From d3313a44a4302b60bcfd7db4824b8870d9218648 Mon Sep 17 00:00:00 2001 From: Elizabeth Campolongo <38985481+egrace479@users.noreply.github.com> Date: Mon, 8 Jun 2026 11:24:07 -0400 Subject: [PATCH 1/9] Fix URLs based on repo transfer --- README.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 10576f5..9ac798f 100644 --- a/README.md +++ b/README.md @@ -2,16 +2,16 @@ ![PyPI Downloads](https://static.pepy.tech/badge/saev) ![MIT License](https://img.shields.io/badge/License-MIT-efefef) -![GitHub Repo stars](https://img.shields.io/github/stars/OSU-NLP-group/saev?style=flat&label=GitHub%20%E2%AD%90) +![GitHub Repo stars](https://img.shields.io/github/stars/Imageomics/saev?style=flat&label=GitHub%20%E2%AD%90) Training sparse autoencoders (SAEs) on vision transformers (ViTs), implemented in PyTorch. ## Docs -- [Docs](https://osu-nlp-group.github.io/saev/api) -- [Colab Notebook for SAE Inference](https://colab.research.google.com/github/OSU-NLP-Group/saev/blob/main/examples/inference.ipynb) -- [User guide](https://osu-nlp-group.github.io/saev/api/users/guide) -- [API reference](https://osu-nlp-group.github.io/saev/api/api/saev/) +- [Docs](https://imageomics.github.io/saev/api) +- [Colab Notebook for SAE Inference](https://colab.research.google.com/github/Imageomics/saev/blob/main/examples/inference.ipynb) +- [User guide](https://imageomics.github.io/saev/api/users/guide) +- [API reference](https://imageomics.github.io/saev/api/api/saev/) ## Research @@ -29,10 +29,9 @@ If you want to cite the software, please cite it as: ```bib @software{stevens2025saev, author = {Stevens, Samuel}, - license = {CC-BY-4.0}, month = apr, title = {{saev}}, - url = {https://github.com/OSU-NLP-Group/saev}, + url = {https://github.com/Imageomics/saev}, year = {2025} } ``` From 8e76166c2fcf511418a2d599695de6c217597381 Mon Sep 17 00:00:00 2001 From: egrace479 Date: Tue, 9 Jun 2026 10:45:56 -0400 Subject: [PATCH 2/9] Reset links to Imageomics GitHub --- CITATION.cff | 4 +- contrib/trait_discovery/CONTRIBUTING.md | 4 +- .../trait_discovery/scripts/push_dinov3.py | 8 +- docs/api/404.html | 2 +- docs/api/api/colors/index.html | 4 +- docs/api/api/configs/index.html | 4 +- docs/api/api/data/bird_mae/index.html | 4 +- docs/api/api/data/buffers/index.html | 4 +- docs/api/api/data/clip/index.html | 4 +- docs/api/api/data/datasets/index.html | 4 +- docs/api/api/data/dinov2/index.html | 4 +- docs/api/api/data/dinov3/index.html | 4 +- docs/api/api/data/fake_clip/index.html | 4 +- docs/api/api/data/indexed/index.html | 4 +- docs/api/api/data/models/index.html | 4 +- docs/api/api/data/ordered/index.html | 4 +- docs/api/api/data/pe/index.html | 4 +- docs/api/api/data/saev.data/index.html | 4 +- docs/api/api/data/shards/index.html | 4 +- docs/api/api/data/shuffled/index.html | 4 +- docs/api/api/data/siglip/index.html | 4 +- docs/api/api/data/transforms/index.html | 4 +- docs/api/api/disk/index.html | 4 +- docs/api/api/framework/inference/index.html | 4 +- .../api/framework/saev.framework/index.html | 4 +- docs/api/api/framework/shards/index.html | 4 +- docs/api/api/framework/train/index.html | 4 +- docs/api/api/helpers/index.html | 4 +- docs/api/api/metrics/index.html | 4 +- docs/api/api/nn/modeling/index.html | 4 +- docs/api/api/nn/objectives/index.html | 4 +- docs/api/api/nn/saev.nn/index.html | 4 +- docs/api/api/saev/index.html | 4 +- docs/api/api/summary/index.html | 4 +- docs/api/api/utils/monitoring/index.html | 4 +- docs/api/api/utils/saev.utils/index.html | 4 +- docs/api/api/utils/scheduling/index.html | 4 +- docs/api/api/utils/statistics/index.html | 4 +- docs/api/api/utils/wandb/index.html | 4 +- docs/api/api/viz/index.html | 4 +- docs/api/developers/contributing/index.html | 4 +- docs/api/developers/datapoint-init/index.html | 4 +- docs/api/developers/disk-layout/index.html | 4 +- docs/api/developers/naming/index.html | 4 +- docs/api/developers/protocol/index.html | 4 +- docs/api/developers/workflows/index.html | 4 +- docs/api/index.html | 6 +- docs/api/search/search_index.json | 2 +- docs/api/sitemap.xml | 98 +- docs/api/users/bird-mae-debugging/index.html | 4 +- docs/api/users/glossary/index.html | 4 +- docs/api/users/guide/index.html | 4 +- docs/api/users/inference/index.html | 12 +- docs/api/users/new-project/index.html | 4 +- docs/api/users/sweeps/index.html | 4 +- .../SAE_BioCLIP_24K_ViT-B-16_iNat21.md | 10 +- .../modelcards/SAE_CLIP_24K_ViT-B-16_IN1K.md | 10 +- .../SAE_DINOv2_24K_ViT-B-14_IN1K.md | 8 +- docs/demos/classification/dist/app.js | 2 +- docs/demos/semseg/dist/app.js | 2 +- docs/index.html | 6 +- .../archive/reports/2025-10-03/report.typ | 2 +- docs/internal/handoff/main.typ | 2 +- docs/mkdocs.yml | 4 +- docs/src/index.md | 2 +- docs/src/users/inference.md | 8 +- examples/inference.ipynb | 4052 ++++++++--------- pyproject.toml | 4 +- scripts/export_notebook.py | 2 +- scripts/push_models.py | 4 +- src/web/apps/classification/dist/app.js | 2 +- src/web/apps/semseg/dist/app.js | 2 +- src/web/src/Classification.elm | 2 +- src/web/src/Semseg.elm | 2 +- 74 files changed, 2225 insertions(+), 2225 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 0d10c04..d09bd8c 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -13,8 +13,8 @@ authors: email: samuel.robert.stevens@gmail.com orcid: 'https://orcid.org/0009-0000-9493-7766' affiliation: The Ohio State University -repository-code: 'https://github.com/OSU-NLP-Group/saev' -url: 'https://osu-nlp-group.github.io/saev/' +repository-code: 'https://github.com/Imageomics/saev' +url: 'https://imageomics.github.io/saev/' repository-artifact: 'https://pypi.org/project/saev/' abstract: >- saev is a package for training sparse autoencoders (SAEs) diff --git a/contrib/trait_discovery/CONTRIBUTING.md b/contrib/trait_discovery/CONTRIBUTING.md index 307a003..c726770 100644 --- a/contrib/trait_discovery/CONTRIBUTING.md +++ b/contrib/trait_discovery/CONTRIBUTING.md @@ -8,7 +8,7 @@ This project aims to use sparse autoencoders (SAEs) on vision transformers like Sparse autoencoders were recently applied to interpreting large language models by many groups. [Anthropic's work](https://transformer-circuits.pub/2024/scaling-monosemanticity/index.html) is probably the most well known, but [OpenAI has some work](https://cdn.openai.com/papers/sparse-autoencoders.pdf) and [Google does too](https://arxiv.org/abs/2408.05147). -I have some prior work ([website](https://osu-nlp-group.github.io/saev/), [arxiv](https://arxiv.org/abs/2502.06755)) that shows that sparse autoencoders can also be applied to vision transformer activations and nice-looking qualitative examples are discovered in ViT activations. +I have some prior work ([website](https://imageomics.github.io/saev/), [arxiv](https://arxiv.org/abs/2502.06755)) that shows that sparse autoencoders can also be applied to vision transformer activations and nice-looking qualitative examples are discovered in ViT activations. **"Interesting and scientifically meaningful"** @@ -75,7 +75,7 @@ train_baseline.py ## Environment ```sh -git clone https://github.com/OSU-NLP-Group/saev +git clone https://github.com/Imageomics/saev git checkout ring-buffer # Check that saev/ installed okay. diff --git a/contrib/trait_discovery/scripts/push_dinov3.py b/contrib/trait_discovery/scripts/push_dinov3.py index 332be14..8b24865 100644 --- a/contrib/trait_discovery/scripts/push_dinov3.py +++ b/contrib/trait_discovery/scripts/push_dinov3.py @@ -251,10 +251,10 @@ def make_readme(repo: Repo, staged: list[StagedRun]) -> str: # SAE for Meta's {repo.title} trained on ImageNet-1K Activations -* **Homepage:** https://osu-nlp-group.github.io/saev -* **Code:** https://github.com/OSU-NLP-Group/saev +* **Homepage:** https://imageomics.github.io/saev +* **Code:** https://github.com/Imageomics/saev * **Preprint:** https://arxiv.org/abs/2511.17735 -* **Demos:** https://osu-nlp-group.github.io/saev#demos +* **Demos:** https://imageomics.github.io/saev#demos * **Point of Contact:** [Sam Stevens](mailto:stevens.994@buckeyemail.osu.edu) ## Checkpoints @@ -280,7 +280,7 @@ def make_readme(repo: Repo, staged: list[StagedRun]) -> str: ## Inference Instructions -Follow the instructions [here](https://osu-nlp-group.github.io/saev/api/saev/#inference-instructions). +Follow the instructions [here](https://imageomics.github.io/saev/api/saev/#inference-instructions). """ diff --git a/docs/api/404.html b/docs/api/404.html index 830762c..9264179 100644 --- a/docs/api/404.html +++ b/docs/api/404.html @@ -1910,7 +1910,7 @@

404 - Not found

- + diff --git a/docs/api/api/colors/index.html b/docs/api/api/colors/index.html index 9e4280a..b260136 100644 --- a/docs/api/api/colors/index.html +++ b/docs/api/api/colors/index.html @@ -8,7 +8,7 @@ - + @@ -2111,7 +2111,7 @@

saev.colors

- + diff --git a/docs/api/api/configs/index.html b/docs/api/api/configs/index.html index dd51f57..c2295fb 100644 --- a/docs/api/api/configs/index.html +++ b/docs/api/api/configs/index.html @@ -8,7 +8,7 @@ - + @@ -2717,7 +2717,7 @@

- + diff --git a/docs/api/api/data/bird_mae/index.html b/docs/api/api/data/bird_mae/index.html index d148d84..8845f07 100644 --- a/docs/api/api/data/bird_mae/index.html +++ b/docs/api/api/data/bird_mae/index.html @@ -8,7 +8,7 @@ - + @@ -3146,7 +3146,7 @@

- + diff --git a/docs/api/api/data/buffers/index.html b/docs/api/api/data/buffers/index.html index a66da2f..e9b6440 100644 --- a/docs/api/api/data/buffers/index.html +++ b/docs/api/api/data/buffers/index.html @@ -8,7 +8,7 @@ - + @@ -2707,7 +2707,7 @@

- + diff --git a/docs/api/api/data/clip/index.html b/docs/api/api/data/clip/index.html index 745d0a9..5c11a8c 100644 --- a/docs/api/api/data/clip/index.html +++ b/docs/api/api/data/clip/index.html @@ -8,7 +8,7 @@ - + @@ -2329,7 +2329,7 @@

- + diff --git a/docs/api/api/data/datasets/index.html b/docs/api/api/data/datasets/index.html index 621c32a..395c0f1 100644 --- a/docs/api/api/data/datasets/index.html +++ b/docs/api/api/data/datasets/index.html @@ -8,7 +8,7 @@ - + @@ -4384,7 +4384,7 @@

- + diff --git a/docs/api/api/data/dinov2/index.html b/docs/api/api/data/dinov2/index.html index c7ebe9d..5b0e21b 100644 --- a/docs/api/api/data/dinov2/index.html +++ b/docs/api/api/data/dinov2/index.html @@ -8,7 +8,7 @@ - + @@ -2109,7 +2109,7 @@

saev.data.dinov2

- + diff --git a/docs/api/api/data/dinov3/index.html b/docs/api/api/data/dinov3/index.html index f450c24..fa507ea 100644 --- a/docs/api/api/data/dinov3/index.html +++ b/docs/api/api/data/dinov3/index.html @@ -8,7 +8,7 @@ - + @@ -3264,7 +3264,7 @@

- + diff --git a/docs/api/api/data/fake_clip/index.html b/docs/api/api/data/fake_clip/index.html index 0d97be7..6bf9328 100644 --- a/docs/api/api/data/fake_clip/index.html +++ b/docs/api/api/data/fake_clip/index.html @@ -8,7 +8,7 @@ - + @@ -2366,7 +2366,7 @@

- + diff --git a/docs/api/api/data/indexed/index.html b/docs/api/api/data/indexed/index.html index e3a0d89..6454396 100644 --- a/docs/api/api/data/indexed/index.html +++ b/docs/api/api/data/indexed/index.html @@ -8,7 +8,7 @@ - + @@ -2529,7 +2529,7 @@

- + diff --git a/docs/api/api/data/models/index.html b/docs/api/api/data/models/index.html index 7f24bdf..7de7986 100644 --- a/docs/api/api/data/models/index.html +++ b/docs/api/api/data/models/index.html @@ -8,7 +8,7 @@ - + @@ -2572,7 +2572,7 @@

- + diff --git a/docs/api/api/data/ordered/index.html b/docs/api/api/data/ordered/index.html index 2103e14..43858a3 100644 --- a/docs/api/api/data/ordered/index.html +++ b/docs/api/api/data/ordered/index.html @@ -8,7 +8,7 @@ - + @@ -2634,7 +2634,7 @@

- + diff --git a/docs/api/api/data/pe/index.html b/docs/api/api/data/pe/index.html index 77bb749..7c11b01 100644 --- a/docs/api/api/data/pe/index.html +++ b/docs/api/api/data/pe/index.html @@ -8,7 +8,7 @@ - + @@ -2286,7 +2286,7 @@

- + diff --git a/docs/api/api/data/saev.data/index.html b/docs/api/api/data/saev.data/index.html index 22fbcea..ebba51e 100644 --- a/docs/api/api/data/saev.data/index.html +++ b/docs/api/api/data/saev.data/index.html @@ -8,7 +8,7 @@ - + @@ -4819,7 +4819,7 @@

- + diff --git a/docs/api/api/data/shards/index.html b/docs/api/api/data/shards/index.html index 3786b80..fd9b62b 100644 --- a/docs/api/api/data/shards/index.html +++ b/docs/api/api/data/shards/index.html @@ -8,7 +8,7 @@ - + @@ -5839,7 +5839,7 @@

- + diff --git a/docs/api/api/data/shuffled/index.html b/docs/api/api/data/shuffled/index.html index 344c051..940bd2f 100644 --- a/docs/api/api/data/shuffled/index.html +++ b/docs/api/api/data/shuffled/index.html @@ -8,7 +8,7 @@ - + @@ -3021,7 +3021,7 @@

- + diff --git a/docs/api/api/data/siglip/index.html b/docs/api/api/data/siglip/index.html index 6ea32c7..314e6c2 100644 --- a/docs/api/api/data/siglip/index.html +++ b/docs/api/api/data/siglip/index.html @@ -8,7 +8,7 @@ - + @@ -2352,7 +2352,7 @@

- + diff --git a/docs/api/api/data/transforms/index.html b/docs/api/api/data/transforms/index.html index f77c3a0..6f2e738 100644 --- a/docs/api/api/data/transforms/index.html +++ b/docs/api/api/data/transforms/index.html @@ -8,7 +8,7 @@ - + @@ -2359,7 +2359,7 @@

- + diff --git a/docs/api/api/disk/index.html b/docs/api/api/disk/index.html index 62dd5b6..0dfe4f7 100644 --- a/docs/api/api/disk/index.html +++ b/docs/api/api/disk/index.html @@ -8,7 +8,7 @@ - + @@ -3006,7 +3006,7 @@

- + diff --git a/docs/api/api/framework/inference/index.html b/docs/api/api/framework/inference/index.html index 5e4487b..40d0505 100644 --- a/docs/api/api/framework/inference/index.html +++ b/docs/api/api/framework/inference/index.html @@ -8,7 +8,7 @@ - + @@ -2830,7 +2830,7 @@

- + diff --git a/docs/api/api/framework/saev.framework/index.html b/docs/api/api/framework/saev.framework/index.html index a5f8f6d..c5abfdd 100644 --- a/docs/api/api/framework/saev.framework/index.html +++ b/docs/api/api/framework/saev.framework/index.html @@ -8,7 +8,7 @@ - + @@ -2113,7 +2113,7 @@

saev.framework

- + diff --git a/docs/api/api/framework/shards/index.html b/docs/api/api/framework/shards/index.html index 0f268ee..bc7f833 100644 --- a/docs/api/api/framework/shards/index.html +++ b/docs/api/api/framework/shards/index.html @@ -8,7 +8,7 @@ - + @@ -2942,7 +2942,7 @@

- + diff --git a/docs/api/api/framework/train/index.html b/docs/api/api/framework/train/index.html index 0ab3c7d..6dfae60 100644 --- a/docs/api/api/framework/train/index.html +++ b/docs/api/api/framework/train/index.html @@ -8,7 +8,7 @@ - + @@ -4631,7 +4631,7 @@

- + diff --git a/docs/api/api/helpers/index.html b/docs/api/api/helpers/index.html index c11c066..48ebf39 100644 --- a/docs/api/api/helpers/index.html +++ b/docs/api/api/helpers/index.html @@ -8,7 +8,7 @@ - + @@ -3965,7 +3965,7 @@

- + diff --git a/docs/api/api/metrics/index.html b/docs/api/api/metrics/index.html index 9ec48cf..dfc1f68 100644 --- a/docs/api/api/metrics/index.html +++ b/docs/api/api/metrics/index.html @@ -8,7 +8,7 @@ - + @@ -2403,7 +2403,7 @@

- + diff --git a/docs/api/api/nn/modeling/index.html b/docs/api/api/nn/modeling/index.html index 87eb072..7d9c4cd 100644 --- a/docs/api/api/nn/modeling/index.html +++ b/docs/api/api/nn/modeling/index.html @@ -8,7 +8,7 @@ - + @@ -4149,7 +4149,7 @@

- + diff --git a/docs/api/api/nn/objectives/index.html b/docs/api/api/nn/objectives/index.html index 8ef2117..de37e7f 100644 --- a/docs/api/api/nn/objectives/index.html +++ b/docs/api/api/nn/objectives/index.html @@ -8,7 +8,7 @@ - + @@ -2936,7 +2936,7 @@

- + diff --git a/docs/api/api/nn/saev.nn/index.html b/docs/api/api/nn/saev.nn/index.html index 517df73..2f9ab87 100644 --- a/docs/api/api/nn/saev.nn/index.html +++ b/docs/api/api/nn/saev.nn/index.html @@ -8,7 +8,7 @@ - + @@ -3394,7 +3394,7 @@

- + diff --git a/docs/api/api/saev/index.html b/docs/api/api/saev/index.html index e7a4219..ff17eca 100644 --- a/docs/api/api/saev/index.html +++ b/docs/api/api/saev/index.html @@ -8,7 +8,7 @@ - + @@ -2111,7 +2111,7 @@

saev

- + diff --git a/docs/api/api/summary/index.html b/docs/api/api/summary/index.html index 95e89a4..6efadd3 100644 --- a/docs/api/api/summary/index.html +++ b/docs/api/api/summary/index.html @@ -8,7 +8,7 @@ - + @@ -1966,7 +1966,7 @@

Summary

- + diff --git a/docs/api/api/utils/monitoring/index.html b/docs/api/api/utils/monitoring/index.html index 0b99bbf..7242ae6 100644 --- a/docs/api/api/utils/monitoring/index.html +++ b/docs/api/api/utils/monitoring/index.html @@ -8,7 +8,7 @@ - + @@ -2187,7 +2187,7 @@

- + diff --git a/docs/api/api/utils/saev.utils/index.html b/docs/api/api/utils/saev.utils/index.html index 2448e21..9f29f0d 100644 --- a/docs/api/api/utils/saev.utils/index.html +++ b/docs/api/api/utils/saev.utils/index.html @@ -8,7 +8,7 @@ - + @@ -2109,7 +2109,7 @@

saev.utils

- + diff --git a/docs/api/api/utils/scheduling/index.html b/docs/api/api/utils/scheduling/index.html index d9259a9..2aeb2ea 100644 --- a/docs/api/api/utils/scheduling/index.html +++ b/docs/api/api/utils/scheduling/index.html @@ -8,7 +8,7 @@ - + @@ -2388,7 +2388,7 @@

- + diff --git a/docs/api/api/utils/statistics/index.html b/docs/api/api/utils/statistics/index.html index 34481f2..1682dad 100644 --- a/docs/api/api/utils/statistics/index.html +++ b/docs/api/api/utils/statistics/index.html @@ -8,7 +8,7 @@ - + @@ -2407,7 +2407,7 @@

- + diff --git a/docs/api/api/utils/wandb/index.html b/docs/api/api/utils/wandb/index.html index e38e3cb..688b234 100644 --- a/docs/api/api/utils/wandb/index.html +++ b/docs/api/api/utils/wandb/index.html @@ -8,7 +8,7 @@ - + @@ -2210,7 +2210,7 @@

- + diff --git a/docs/api/api/viz/index.html b/docs/api/api/viz/index.html index 8be8358..8b90bb7 100644 --- a/docs/api/api/viz/index.html +++ b/docs/api/api/viz/index.html @@ -8,7 +8,7 @@ - + @@ -2203,7 +2203,7 @@

- + diff --git a/docs/api/developers/contributing/index.html b/docs/api/developers/contributing/index.html index 8dce31d..c807bfa 100644 --- a/docs/api/developers/contributing/index.html +++ b/docs/api/developers/contributing/index.html @@ -8,7 +8,7 @@ - + @@ -2085,7 +2085,7 @@

Project layout + diff --git a/docs/api/developers/datapoint-init/index.html b/docs/api/developers/datapoint-init/index.html index 24180c3..07a2909 100644 --- a/docs/api/developers/datapoint-init/index.html +++ b/docs/api/developers/datapoint-init/index.html @@ -8,7 +8,7 @@ - + @@ -2042,7 +2042,7 @@

Datapoint Initialization + diff --git a/docs/api/developers/disk-layout/index.html b/docs/api/developers/disk-layout/index.html index e9a65e1..e916e9d 100644 --- a/docs/api/developers/disk-layout/index.html +++ b/docs/api/developers/disk-layout/index.html @@ -8,7 +8,7 @@ - + @@ -2153,7 +2153,7 @@

FAQs&par - + diff --git a/docs/api/developers/naming/index.html b/docs/api/developers/naming/index.html index 611385c..9a53d19 100644 --- a/docs/api/developers/naming/index.html +++ b/docs/api/developers/naming/index.html @@ -8,7 +8,7 @@ - + @@ -1933,7 +1933,7 @@

Variable Naming + diff --git a/docs/api/developers/protocol/index.html b/docs/api/developers/protocol/index.html index 235e389..9762ef2 100644 --- a/docs/api/developers/protocol/index.html +++ b/docs/api/developers/protocol/index.html @@ -8,7 +8,7 @@ - + @@ -2389,7 +2389,7 @@

5 Versioning & compatibility + diff --git a/docs/api/developers/workflows/index.html b/docs/api/developers/workflows/index.html index 8c6670c..fcbe01d 100644 --- a/docs/api/developers/workflows/index.html +++ b/docs/api/developers/workflows/index.html @@ -8,7 +8,7 @@ - + @@ -1932,7 +1932,7 @@

Workflows

- + diff --git a/docs/api/index.html b/docs/api/index.html index 92b58f9..1cf20ae 100644 --- a/docs/api/index.html +++ b/docs/api/index.html @@ -8,7 +8,7 @@ - + @@ -1963,7 +1963,7 @@

saev

PyPI Downloads MIT License -GitHub Repo stars

+GitHub Repo stars

saev is a framework for training and evaluating Sparse autoencoders (SAEs) for vision transformers (ViTs), implemented in PyTorch.

Installation

Installation is supported with uv. @@ -2074,7 +2074,7 @@

Why saev? + diff --git a/docs/api/search/search_index.json b/docs/api/search/search_index.json index e81c5f4..4b95a4f 100644 --- a/docs/api/search/search_index.json +++ b/docs/api/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"],"fields":{"title":{"boost":1000.0},"text":{"boost":1.0},"tags":{"boost":1000000.0}}},"docs":[{"location":"","title":"saev","text":"

saev is a framework for training and evaluating Sparse autoencoders (SAEs) for vision transformers (ViTs), implemented in PyTorch.

"},{"location":"#installation","title":"Installation","text":"

Installation is supported with uv. saev will likely work with pure pip, conda, etc. but I will not formally support it.

Clone this repository, then from the root directory:

uv run scripts/launch.py --help\n

This will create a virtual environment and display the help for all the provided framework scripts.

"},{"location":"#quick-start","title":"Quick Start","text":"

Save some activations to disk:

uv run scripts/launch.py shards \\\n  --shards-root /$SCRATCH/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-32/openai \\\n  --d-model 768 \\\n  --layers 11 \\\n  --patches-per-ex 49 \\\n  --batch-size 256 \\\n  data:cifar10\n

Read the guide for details.

"},{"location":"#why-saev","title":"Why saev?","text":"

There are plenty of alternative libraries for SAEs:

However, saev has some benefits:

  1. saev is more of a framework, rather than a library. The reason for this is that SAEs require lots of activations to train a relatively small neural network; while you can implement it with a simple inference loop, efficient training requires some caching on disk. This means using saev is a little more like Keras or PyTorch Lightning than Huggingface's Transformers or Datasets libraries.
  2. saev offers lots of tools for interacting with sparse autoencoders after training, including interactive notebooks and evaluations.
  3. saev includes complete code from preprints in the contrib/ directory, along with logbooks describing how the authors used and developed saev.
"},{"location":"api/colors/","title":"saev.colors","text":"

Utility color palettes used across saev visualizations.

"},{"location":"api/configs/","title":"saev.configs","text":""},{"location":"api/configs/#saev.configs.dict_to_dataclass","title":"dict_to_dataclass(data, cls)","text":"

Recursively convert a dictionary to a dataclass instance.

Source code in src/saev/configs.py
@beartype.beartype\ndef dict_to_dataclass(data: dict, cls: type[T]) -> T:\n    \"\"\"Recursively convert a dictionary to a dataclass instance.\"\"\"\n    if not dataclasses.is_dataclass(cls):\n        return data\n\n    field_types = {f.name: f.type for f in dataclasses.fields(cls)}\n    kwargs = {}\n\n    for field_name, field_type in field_types.items():\n        if field_name not in data:\n            continue\n\n        value = data[field_name]\n\n        # Handle Optional types\n        origin = tp.get_origin(field_type)\n        args = tp.get_args(field_type)\n\n        # Handle tuple[str, ...]\n        if origin is tuple and args:\n            kwargs[field_name] = tuple(value) if isinstance(value, list) else value\n        # Handle list[DataclassType]\n        elif origin is list and args and dataclasses.is_dataclass(args[0]):\n            kwargs[field_name] = [dict_to_dataclass(item, args[0]) for item in value]\n        # Handle regular dataclass fields\n        elif dataclasses.is_dataclass(field_type):\n            kwargs[field_name] = dict_to_dataclass(value, field_type)\n        # Handle pathlib.Path\n        elif field_type is pathlib.Path:\n            # Required Path field - always convert\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is tp.Union and pathlib.Path in args:\n            # Optional Path field (typing.Union style)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is types.UnionType and pathlib.Path in args:\n            # Optional Path field (Python 3.10+ union style with |)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        else:\n            kwargs[field_name] = value\n\n    return cls(**kwargs)\n
"},{"location":"api/configs/#saev.configs.expand","title":"expand(config)","text":"

Expand a nested dict that may contain lists into many dicts.

Source code in src/saev/configs.py
@beartype.beartype\ndef expand(config: dict[str, object]) -> Iterator[dict[str, object]]:\n    \"\"\"Expand a nested dict that may contain lists into many dicts.\"\"\"\n    yield from _expand_discrete(dict(config))\n
"},{"location":"api/configs/#saev.configs.get_non_default_values","title":"get_non_default_values(obj, default_obj)","text":"

Recursively find fields that differ from defaults.

Source code in src/saev/configs.py
@beartype.beartype\ndef get_non_default_values(obj: T, default_obj: T) -> dict:\n    \"\"\"Recursively find fields that differ from defaults.\"\"\"\n    # Check that obj and default_obj are instances of a dataclass.\n    assert dataclasses.is_dataclass(obj) and not isinstance(obj, type)\n    assert dataclasses.is_dataclass(default_obj) and not isinstance(default_obj, type)\n\n    diff = {}\n    for field in dataclasses.fields(obj):\n        obj_value = getattr(obj, field.name)\n        default_value = getattr(default_obj, field.name)\n\n        if obj_value == default_value:\n            continue\n\n        # If both are dataclasses of the same type, recurse to find nested differences\n        if (\n            dataclasses.is_dataclass(obj_value)\n            and dataclasses.is_dataclass(default_value)\n            and type(obj_value) is type(default_value)\n        ):\n            nested_diff = get_non_default_values(obj_value, default_value)\n            if nested_diff:\n                diff[field.name] = nested_diff\n        else:\n            # For non-dataclass fields or different types, just record the value\n            diff[field.name] = obj_value\n\n    return diff\n
"},{"location":"api/configs/#saev.configs.load_cfgs","title":"load_cfgs(override, *, default, sweep_dcts)","text":"

Load a list of configs from a combination of sources.

Parameters:

Name Type Description Default override T

Command-line overridden values.

required default T

The default values for a config.

required sweep_dcts list[dict]

A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.

required

Returns:

Type Description tuple[list[T], list[str]]

A list of configs and a list of errors.

Source code in src/saev/configs.py
@beartype.beartype\ndef load_cfgs(\n    override: T, *, default: T, sweep_dcts: list[dict]\n) -> tuple[list[T], list[str]]:\n    \"\"\"\n    Load a list of configs from a combination of sources.\n\n    Args:\n        override: Command-line overridden values.\n        default: The default values for a config.\n        sweep_dcts: A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.\n\n    Returns:\n        A list of configs and a list of errors.\n    \"\"\"\n    # Check that override and default are instances of a dataclass.\n    assert dataclasses.is_dataclass(override) and not isinstance(override, type)\n    assert dataclasses.is_dataclass(default) and not isinstance(default, type)\n\n    # If there's nothing to sweep, return just the override\n    if not sweep_dcts:\n        return [override], []\n\n    # Find which fields were overridden (differ from default)\n    overridden_fields = get_non_default_values(override, default)\n\n    cfgs: list[T] = []\n    errs: list[str] = []\n\n    d = 0  # Global counter for seed incrementing across all expanded configs\n\n    for sweep_dct in sweep_dcts:\n        # Filter out overridden fields from this sweep dict\n        filtered_dct = _filter_overridden_fields(sweep_dct, overridden_fields)\n\n        # If there's nothing to sweep after filtering, just use override\n        if not filtered_dct:\n            cfgs.append(override)\n            d += 1\n            continue\n\n        # Apply the sweep dict to create a config\n        try:\n            updates = _recursive_dataclass_update(override, filtered_dct, override, d)\n\n            if hasattr(override, \"seed\") and \"seed\" not in updates:\n                updates[\"seed\"] = getattr(override, \"seed\", 0) + d\n\n            cfgs.append(dataclasses.replace(override, **updates))\n            d += 1\n        except Exception as err:\n            errs.append(str(err))\n            d += 1\n\n    return cfgs, errs\n
"},{"location":"api/configs/#saev.configs.load_sweep","title":"load_sweep(sweep_fpath)","text":"

Load a sweep file and return the list of config dicts.

Parameters:

Name Type Description Default sweep_fpath Path

Path to a Python file with a make_cfgs() function.

required

Returns:

Type Description list[dict]

List of config dictionaries from make_cfgs(). Returns empty list if any error occurs.

Source code in src/saev/configs.py
@beartype.beartype\ndef load_sweep(sweep_fpath: pathlib.Path) -> list[dict]:\n    \"\"\"\n    Load a sweep file and return the list of config dicts.\n\n    Args:\n        sweep_fpath: Path to a Python file with a `make_cfgs()` function.\n\n    Returns:\n        List of config dictionaries from `make_cfgs()`. Returns empty list if any error occurs.\n    \"\"\"\n    try:\n        namespace = {}\n        exec(sweep_fpath.read_text(), namespace)\n        result = namespace[\"make_cfgs\"]()\n        if not isinstance(result, list):\n            logger.warning(\n                f\"make_cfgs() in {sweep_fpath} returned {type(result).__name__}, expected list\"\n            )\n            return []\n        return result\n    except Exception as err:\n        logger.warning(f\"Failed to load sweep from {sweep_fpath}: {err}\")\n        return []\n
"},{"location":"api/disk/","title":"saev.disk","text":"

Helpers for sticking with the layout described in disk-layout.md.

"},{"location":"api/disk/#saev.disk.Run","title":"Run(run_dir)","text":"

Represents an SAE training run and some associated data.

Parameters:

Name Type Description Default run_dir Path

Run directory, should be $SAEV_NFS/saev/runs/. Assumes the run already exists and validates the structure. Use Run.new() to create a new run. required Source code in src/saev/disk.py

def __init__(self, run_dir: pathlib.Path):\n    self.run_dir = run_dir\n\n    if len(self.run_dir.parts) < 3 or self.run_dir.parts[-3:-1] != (\"saev\", \"runs\"):\n        raise ValueError(\"Run directory is invalid.\")\n\n    if not self.run_dir.exists():\n        raise FileNotFoundError(\n            f\"Run directory does not exist: {self.run_dir}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"checkpoint\").exists():\n        raise FileNotFoundError(\n            f\"Checkpoint directory does not exist: {self.run_dir / 'checkpoint'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"links\").exists():\n        raise FileNotFoundError(\n            f\"Links directory does not exist: {self.run_dir / 'links'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"inference\").exists():\n        raise FileNotFoundError(\n            f\"Inference directory does not exist: {self.run_dir / 'inference'}. Use Run.new() to create a new run.\"\n        )\n
"},{"location":"api/disk/#saev.disk.Run.ckpt","title":"ckpt property","text":"

Path to the sae.pt checkpoint.

"},{"location":"api/disk/#saev.disk.Run.config","title":"config property","text":"

The training run config. Not a train.Config object because we don't want to import from train.py.

"},{"location":"api/disk/#saev.disk.Run.inference","title":"inference property","text":"

Path to the inference/ directory.

"},{"location":"api/disk/#saev.disk.Run.run_id","title":"run_id property","text":"

The run ID, created by wandb.

"},{"location":"api/disk/#saev.disk.Run.train_shards","title":"train_shards property","text":"

Path to shard root with metadata.json and acts*.bin files.

"},{"location":"api/disk/#saev.disk.Run.val_shards","title":"val_shards property","text":"

Path to shard root with metadata.json and acts*.bin files.

"},{"location":"api/disk/#saev.disk.Run.new","title":"new(run_id, *, train_shards_dir, val_shards_dir, runs_root) classmethod","text":"

Create a new run with directory structure and symlinks.

Parameters:

Name Type Description Default run_id str

The run ID (typically from wandb).

required train_shards_dir Path

Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/). required val_shards_dir Path

Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/). required runs_root Path

Root directory for runs (typically $SAEV_NFS/saev/runs).

required

Returns:

Type Description Run

A new Run instance with all directories and symlinks created.

Source code in src/saev/disk.py
@classmethod\ndef new(\n    cls,\n    run_id: str,\n    *,\n    train_shards_dir: pathlib.Path,\n    val_shards_dir: pathlib.Path,\n    runs_root: pathlib.Path,\n) -> \"Run\":\n    \"\"\"\n    Create a new run with directory structure and symlinks.\n\n    Args:\n        run_id: The run ID (typically from wandb).\n        train_shards_dir: Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/<shard_hash>).\n        val_shards_dir: Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/<shard_hash>).\n        runs_root: Root directory for runs (typically $SAEV_NFS/saev/runs).\n\n    Returns:\n        A new Run instance with all directories and symlinks created.\n    \"\"\"\n    run_dir = runs_root / run_id\n    run_dir.mkdir(parents=True)\n    (run_dir / \"checkpoint\").mkdir()\n    (run_dir / \"links\").mkdir()\n    (run_dir / \"inference\").mkdir()\n\n    (run_dir / \"links\" / \"train-shards\").symlink_to(train_shards_dir)\n    (run_dir / \"links\" / \"val-shards\").symlink_to(val_shards_dir)\n\n    return cls(run_dir)\n
"},{"location":"api/disk/#saev.disk.is_runs_root","title":"is_runs_root(path)","text":"

Check if path is a valid runs root directory.

A valid runs root ends with saev/runs and exists as a directory.

Parameters:

Name Type Description Default path Path

Path to check.

required

Returns:

Type Description bool

True if path is a directory ending in saev/runs.

Source code in src/saev/disk.py
@beartype.beartype\ndef is_runs_root(path: pathlib.Path) -> bool:\n    \"\"\"\n    Check if `path` is a valid runs root directory.\n\n    A valid runs root ends with `saev/runs` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/runs.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"runs\")\n
"},{"location":"api/disk/#saev.disk.is_shards_dir","title":"is_shards_dir(path)","text":"

Check if path is a specific shards directory.

A valid shards directory ends with saev/shards/<hash> for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).

Parameters:

Name Type Description Default path Path

Path to check.

required

Returns:

Type Description bool

True if path is a directory ending in saev/shards/ with required files. Source code in src/saev/disk.py

@beartype.beartype\ndef is_shards_dir(path: pathlib.Path) -> bool:\n    \"\"\"\n    Check if `path` is a specific shards directory.\n\n    A valid shards directory ends with `saev/shards/<hash>` for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards/<hash> with required files.\n    \"\"\"\n    if not path.is_dir():\n        return False\n\n    if len(path.parts) < 3 or path.parts[-3:-1] != (\"saev\", \"shards\"):\n        return False\n\n    return True\n
"},{"location":"api/disk/#saev.disk.is_shards_root","title":"is_shards_root(path)","text":"

Check if path is a valid shards root directory.

A valid shards root ends with saev/shards and exists as a directory.

Parameters:

Name Type Description Default path Path

Path to check.

required

Returns:

Type Description bool

True if path is a directory ending in saev/shards.

Source code in src/saev/disk.py
@beartype.beartype\ndef is_shards_root(path: pathlib.Path) -> bool:\n    \"\"\"\n    Check if `path` is a valid shards root directory.\n\n    A valid shards root ends with `saev/shards` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"shards\")\n
"},{"location":"api/helpers/","title":"saev.helpers","text":""},{"location":"api/helpers/#saev.helpers.RemovedFeatureError","title":"RemovedFeatureError","text":"

Bases: RuntimeError

Feature existed before but is no longer supported.

"},{"location":"api/helpers/#saev.helpers.batched_idx","title":"batched_idx(total_size, batch_size)","text":"

Iterate over (start, end) indices for total_size examples, where end - start is at most batch_size.

Parameters:

Name Type Description Default total_size int

total number of examples

required batch_size int

maximum distance between the generated indices.

required

Returns:

Type Description

A generator of (int, int) tuples that can slice up a list or a tensor.

Source code in src/saev/helpers.py
def __init__(self, total_size: int, batch_size: int):\n    self.total_size = total_size\n    self.batch_size = batch_size\n
"},{"location":"api/helpers/#saev.helpers.batched_idx.__iter__","title":"__iter__()","text":"

Yield (start, end) index pairs for batching.

Source code in src/saev/helpers.py
def __iter__(self) -> Iterator[tuple[int, int]]:\n    \"\"\"Yield (start, end) index pairs for batching.\"\"\"\n    for start in range(0, self.total_size, self.batch_size):\n        stop = min(start + self.batch_size, self.total_size)\n        yield start, stop\n
"},{"location":"api/helpers/#saev.helpers.batched_idx.__len__","title":"__len__()","text":"

Return the number of batches.

Source code in src/saev/helpers.py
def __len__(self) -> int:\n    \"\"\"Return the number of batches.\"\"\"\n    return (self.total_size + self.batch_size - 1) // self.batch_size\n
"},{"location":"api/helpers/#saev.helpers.progress","title":"progress(it, *, every=10, desc='progress', total=0)","text":"

Wraps an iterable with a logger like tqdm but doesn't use any control codes to manipulate a progress bar, which doesn't work well when your output is redirected to a file. Instead, simple logging statements are used, but it includes quality-of-life features like iteration speed and predicted time to finish.

Parameters:

Name Type Description Default it Iterable

Iterable to wrap.

required every int

How many iterations between logging progress.

10 desc str

What to name the logger.

'progress' total int

If non-zero, how long the iterable is.

0 Source code in src/saev/helpers.py
def __init__(\n    self, it: Iterable, *, every: int = 10, desc: str = \"progress\", total: int = 0\n):\n    self.it = it\n    self.every = max(every, 1)\n    self.logger = logging.getLogger(desc)\n    self.total = total\n
"},{"location":"api/helpers/#saev.helpers.csr_topk","title":"csr_topk(arr, *, k, axis=0, batch_size=1024)","text":"

Takes the top k values of a sparse CSR array.

We can only iterate efficiently over rows because it's a a CSR array.

Parameters:

Name Type Description Default arr csr_array | csr_matrix

The CSR array of values with shape (rows, cols).

required k int

The k in \"top-k\".

required axis int

The dimension to sort along.

0 batch_size int

How many rows to process at once.

1024

Returns:

Type Description NumpyTopK

saev.helpers.NumpyTopK

Source code in src/saev/helpers.py
@beartype.beartype\ndef csr_topk(\n    arr: scipy.sparse.csr_array | scipy.sparse.csr_matrix,\n    *,\n    k: int,\n    axis: int = 0,\n    batch_size: int = 1024,\n) -> NumpyTopK:\n    \"\"\"\n    Takes the top k values of a sparse CSR array.\n\n    We can only iterate efficiently over *rows* because it's a a *CSR* array.\n\n    Args:\n        arr: The CSR array of values with shape (rows, cols).\n        k: The k in \"top-k\".\n        axis: The dimension to sort along.\n        batch_size: How many rows to process at once.\n\n    Returns:\n        saev.helpers.NumpyTopK\n    \"\"\"\n    if axis == 0:\n        return _csr_topk_axis0(arr, k, batch_size)\n    elif axis == 1:\n        return _csr_topk_axis1(arr, k)\n    else:\n        raise ValueError(f\"axis must be 0 or 1, got {axis}\")\n
"},{"location":"api/helpers/#saev.helpers.current_git_commit","title":"current_git_commit()","text":"

Best-effort short SHA of the repo containing this file.

Returns None when * git executable is missing, * we\u2019re not inside a git repo (e.g. installed wheel), * or any git call errors out.

Source code in src/saev/helpers.py
@beartype.beartype\ndef current_git_commit() -> str | None:\n    \"\"\"\n    Best-effort short SHA of the repo containing *this* file.\n\n    Returns `None` when\n    * `git` executable is missing,\n    * we\u2019re not inside a git repo (e.g. installed wheel),\n    * or any git call errors out.\n    \"\"\"\n    try:\n        # Walk up until we either hit a .git dir or the FS root\n        here = pathlib.Path(__file__).resolve()\n        for parent in (here, *here.parents):\n            if (parent / \".git\").exists():\n                break\n        else:  # no .git found\n            return None\n\n        result = subprocess.run(\n            [\"git\", \"-C\", str(parent), \"rev-parse\", \"--short\", \"HEAD\"],\n            stdout=subprocess.PIPE,\n            stderr=subprocess.DEVNULL,\n            text=True,\n            check=True,\n        )\n        return result.stdout.strip() or None\n    except (FileNotFoundError, subprocess.CalledProcessError):\n        return None\n
"},{"location":"api/helpers/#saev.helpers.flattened","title":"flattened(dct, *, sep='.')","text":"

Flatten a potentially nested dict to a single-level dict with .-separated keys.

Source code in src/saev/helpers.py
@beartype.beartype\ndef flattened(\n    dct: dict[str, object], *, sep: str = \".\"\n) -> dict[str, str | int | float | bool | None]:\n    \"\"\"\n    Flatten a potentially nested dict to a single-level dict with `.`-separated keys.\n    \"\"\"\n    new = {}\n    for key, value in dct.items():\n        if isinstance(value, dict):\n            for nested_key, nested_value in flattened(value).items():\n                new[key + \".\" + nested_key] = nested_value\n            continue\n\n        new[key] = value\n\n    return new\n
"},{"location":"api/helpers/#saev.helpers.fssafe","title":"fssafe(s)","text":"

Convert a string to be filesystem-safe by replacing special characters.

This is particularly useful for checkpoint names that contain characters like 'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like 'hf-hub_timm_ViT-L-16-SigLIP2-256'.

Parameters:

Name Type Description Default s str

String to make filesystem-safe.

required

Returns:

Type Description str

Filesystem-safe version of the string.

Source code in src/saev/helpers.py
@beartype.beartype\ndef fssafe(s: str) -> str:\n    \"\"\"\n    Convert a string to be filesystem-safe by replacing special characters.\n\n    This is particularly useful for checkpoint names that contain characters like\n    'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like\n    'hf-hub_timm_ViT-L-16-SigLIP2-256'.\n\n    Args:\n        s: String to make filesystem-safe.\n\n    Returns:\n        Filesystem-safe version of the string.\n    \"\"\"\n    # Replace common problematic characters with underscores\n    replacements = {\n        \"/\": \"_\",\n        \"\\\\\": \"_\",\n        \":\": \"_\",\n        \"*\": \"_\",\n        \"?\": \"_\",\n        '\"': \"_\",\n        \"<\": \"_\",\n        \">\": \"_\",\n        \"|\": \"_\",\n        \" \": \"_\",\n    }\n    for old, new in replacements.items():\n        s = s.replace(old, new)\n    # Remove any remaining non-alphanumeric characters except - _ .\n    return \"\".join(c if c.isalnum() or c in \"-_.\" else \"_\" for c in s)\n
"},{"location":"api/helpers/#saev.helpers.get_cache_dir","title":"get_cache_dir()","text":"

Get cache directory from environment variables, defaulting to the current working directory (.)

Returns:

Type Description str

A path to a cache directory (might not exist yet).

Source code in src/saev/helpers.py
@beartype.beartype\ndef get_cache_dir() -> str:\n    \"\"\"\n    Get cache directory from environment variables, defaulting to the current working directory (.)\n\n    Returns:\n        A path to a cache directory (might not exist yet).\n    \"\"\"\n    cache_dir = \"\"\n    for var in (\"SAEV_CACHE\", \"HF_HOME\", \"HF_HUB_CACHE\"):\n        cache_dir = cache_dir or os.environ.get(var, \"\")\n    return cache_dir or \".\"\n
"},{"location":"api/helpers/#saev.helpers.get_slurm_job_count","title":"get_slurm_job_count()","text":"

Get the current number of jobs in the queue for the current user.

Uses squeue's -r flag to properly count job array elements individually. For example, a job array 12345_[0-99] will be counted as 100 jobs.

Source code in src/saev/helpers.py
@beartype.beartype\ndef get_slurm_job_count() -> int:\n    \"\"\"\n    Get the current number of jobs in the queue for the current user.\n\n    Uses squeue's -r flag to properly count job array elements individually.\n    For example, a job array 12345_[0-99] will be counted as 100 jobs.\n    \"\"\"\n    try:\n        # Use -r to display each array element on its own line\n        result = subprocess.run(\n            [\"squeue\", \"--me\", \"-h\", \"-r\"], capture_output=True, text=True, check=True\n        )\n\n        # Count non-empty lines\n        lines = result.stdout.strip().split(\"\\n\")\n        return len([line for line in lines if line.strip()])\n\n    except (subprocess.SubprocessError, FileNotFoundError):\n        # If we can't check, assume no jobs\n        return 0\n
"},{"location":"api/helpers/#saev.helpers.get_slurm_max_array_size","title":"get_slurm_max_array_size()","text":"

Get the MaxArraySize configuration from the current Slurm cluster.

Returns:

Name Type Description int int

The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.

Source code in src/saev/helpers.py
@beartype.beartype\ndef get_slurm_max_array_size() -> int:\n    \"\"\"\n    Get the MaxArraySize configuration from the current Slurm cluster.\n\n    Returns:\n        int: The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # Run scontrol command to get config information\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"config\"], capture_output=True, text=True, check=True\n        )\n\n        # Search for MaxArraySize in the output\n        match = re.search(r\"MaxArraySize\\s*=\\s*(\\d+)\", result.stdout)\n        if match:\n            max_array_size = int(match.group(1))\n            logger.info(\"Detected MaxArraySize = %d\", max_array_size)\n            return max_array_size\n        else:\n            logger.warning(\n                \"Could not find MaxArraySize in scontrol output, using default of 1000\"\n            )\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error running scontrol: %s\", e)\n        return 1000  # Safe default\n    except ValueError as e:\n        logger.error(\"Error parsing MaxArraySize: %s\", e)\n        return 1000  # Safe default\n    except FileNotFoundError:\n        logger.warning(\n            \"scontrol command not found. Assuming not in Slurm environment. Returning default MaxArraySize=1000.\"\n        )\n        return 1000\n
"},{"location":"api/helpers/#saev.helpers.get_slurm_max_submit_jobs","title":"get_slurm_max_submit_jobs()","text":"

Get the MaxSubmitJobs limit from the current user's QOS.

Returns:

Name Type Description int int

The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.

Source code in src/saev/helpers.py
@beartype.beartype\ndef get_slurm_max_submit_jobs() -> int:\n    \"\"\"\n    Get the MaxSubmitJobs limit from the current user's QOS.\n\n    Returns:\n        int: The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # First, try to get the QOS from a recent job\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"job\", \"-o\"],\n            capture_output=True,\n            text=True,\n            check=False,\n        )\n\n        qos_name = None\n        if result.returncode == 0 and result.stdout:\n            # Extract QOS from job info\n            match = re.search(r\"QOS=(\\S+)\", result.stdout)\n            if match:\n                qos_name = match.group(1)\n\n        if not qos_name:\n            # If no jobs, try to get default QOS from association\n            # This is less reliable but better than nothing\n            logger.warning(\"No active jobs to determine QOS, using default of 1000\")\n            return 1000\n\n        # Get the MaxSubmitJobs for this QOS\n        result = subprocess.run(\n            [\"sacctmgr\", \"show\", \"qos\", qos_name, \"format=maxsubmitjobs\", \"-n\", \"-P\"],\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        max_submit = result.stdout.strip()\n        if max_submit and max_submit.isdigit():\n            limit = int(max_submit)\n            logger.info(\"Detected MaxSubmitJobs = %d for QOS %s\", limit, qos_name)\n            return limit\n        else:\n            logger.warning(\"Could not parse MaxSubmitJobs, using default of 1000\")\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error getting MaxSubmitJobs: %s\", e)\n        return 1000\n    except (ValueError, FileNotFoundError) as e:\n        logger.error(\"Error: %s\", e)\n        return 1000\n
"},{"location":"api/helpers/#saev.helpers.np_topk","title":"np_topk(arr, k, axis=None)","text":"

A numpy implementation of torch.topk.

Returns the k largest elements along the given axis. If axis is None, the array is flattened first.

Parameters:

Name Type Description Default arr ndarray

Input array.

required k int

Number of top elements to return.

required axis int | None

Axis along which to find top k elements. If None, flattens array first.

None

Returns:

Type Description NumpyTopK

Array of k largest values along the specified axis, sorted in descending order.

Source code in src/saev/helpers.py
@beartype.beartype\ndef np_topk(arr: np.ndarray, k: int, axis: int | None = None) -> NumpyTopK:\n    \"\"\"A numpy implementation of torch.topk.\n\n    Returns the k largest elements along the given axis. If axis is None, the array is flattened first.\n\n    Args:\n        arr: Input array.\n        k: Number of top elements to return.\n        axis: Axis along which to find top k elements. If None, flattens array first.\n\n    Returns:\n        Array of k largest values along the specified axis, sorted in descending order.\n    \"\"\"\n    if axis is None:\n        arr = arr.flatten()\n        axis = 0\n\n    # Handle negative axis\n    if axis < 0:\n        axis = arr.ndim + axis\n\n    # For each position along other axes, sort and take top k\n    # Use argsort which is stable and will preserve order for equal values\n    sort_indices = np.argsort(-arr, axis=axis, kind=\"stable\")\n\n    # Take the first k sorted indices\n    topk_indices = np.take(sort_indices, np.arange(k), axis=axis)\n\n    # Gather the top k values\n    topk_values = np.take_along_axis(arr, topk_indices, axis=axis)\n\n    return NumpyTopK(values=topk_values, indices=topk_indices)\n
"},{"location":"api/helpers/#saev.helpers.submit_job_array","title":"submit_job_array(executor, fn, args_list, *, logger=None, margin=0.8)","text":"

Submit jobs in batches to respect Slurm's MaxArraySize limit.

Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.

Parameters:

Name Type Description Default executor

A submitit executor (SlurmExecutor or LocalExecutor).

required fn Callable

Worker function to call for each config.

required args_list list

List of arguments to pass to fn.

required logger Logger | None

Optional logger for progress messages.

None margin float

Fraction of MaxArraySize to use (default 0.8).

0.8

Yields:

Type Description int

Tuples of (global_index, result) for successful jobs.

object

For failed jobs, yields (global_index, None) and logs a warning.

Example
executor = submitit.SlurmExecutor(folder=\"./logs\")\nexecutor.update_parameters(...)\n\nfor idx, result in submit_job_array(executor, worker_fn, configs):\n    print(f\"Job {idx} returned {result}\")\n
Source code in src/saev/helpers.py
@beartype.beartype\ndef submit_job_array(\n    executor,\n    fn: tp.Callable,\n    args_list: list,\n    *,\n    logger: logging.Logger | None = None,\n    margin: float = 0.8,\n) -> Iterator[tuple[int, object]]:\n    \"\"\"\n    Submit jobs in batches to respect Slurm's MaxArraySize limit.\n\n    Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.\n\n    Args:\n        executor: A submitit executor (SlurmExecutor or LocalExecutor).\n        fn: Worker function to call for each config.\n        args_list: List of arguments to pass to fn.\n        logger: Optional logger for progress messages.\n        margin: Fraction of MaxArraySize to use (default 0.8).\n\n    Yields:\n        Tuples of (global_index, result) for successful jobs.\n        For failed jobs, yields (global_index, None) and logs a warning.\n\n    Example:\n        ```\n        executor = submitit.SlurmExecutor(folder=\"./logs\")\n        executor.update_parameters(...)\n\n        for idx, result in submit_job_array(executor, worker_fn, configs):\n            print(f\"Job {idx} returned {result}\")\n        ```\n    \"\"\"\n    from submitit.core.utils import UncompletedJobError\n\n    arr_size = int(get_slurm_max_array_size() * margin)\n    n_total = len(args_list)\n\n    for arr_start, arr_end in batched_idx(n_total, arr_size):\n        batch_args = args_list[arr_start:arr_end]\n\n        if logger:\n            logger.info(\n                \"Submitting batch of %d jobs (%d-%d of %d).\",\n                len(batch_args),\n                arr_start + 1,\n                arr_end,\n                n_total,\n            )\n\n        with executor.batch():\n            jobs = [executor.submit(fn, arg) for arg in batch_args]\n\n        time.sleep(5.0)\n\n        for i, job in enumerate(jobs):\n            global_idx = arr_start + i\n            try:\n                result = job.result()\n                yield global_idx, result\n            except UncompletedJobError:\n                if logger:\n                    logger.warning(\n                        \"Job %s (%d) did not finish.\", job.job_id, global_idx\n                    )\n                yield global_idx, None\n
"},{"location":"api/metrics/","title":"saev.metrics","text":""},{"location":"api/metrics/#saev.metrics.Metrics","title":"Metrics(mse_per_dim, mse_per_token, normalized_mse, baseline_mse_per_dim, baseline_mse_per_token, sse_recon, sse_baseline, n_tokens, d_model, n_elements) dataclass","text":"

Validated reconstruction metrics aggregated over one evaluation corpus.

The primary totals are sse_recon (SAE reconstruction SSE) and sse_baseline (mean-baseline SSE). Derived terms are: - normalized_mse = sse_recon / sse_baseline - mse_per_dim = sse_recon / n_elements - mse_per_token = sse_recon / n_tokens - baseline_mse_per_dim = sse_baseline / n_elements - baseline_mse_per_token = sse_baseline / n_tokens

Size terms are: - n_tokens: number of tokens included in aggregation - d_model: embedding width per token - n_elements = n_tokens * d_model

"},{"location":"api/metrics/#saev.metrics.Metrics.from_accumulators","title":"from_accumulators(*, sse_recon, sse_baseline, n_tokens, d_model) classmethod","text":"

Construct metrics from aggregate sums and shape information.

Parameters:

Name Type Description Default sse_recon float

Sum of squared reconstruction errors over all selected tokens and dimensions.

required sse_baseline float

Sum of squared mean-baseline errors over the same tokens and dimensions.

required n_tokens int

Number of selected tokens in the aggregation set.

required d_model int

Activation dimension per token.

required

Returns:

Type Description Metrics

A validated Metrics object with all derived fields populated.

Source code in src/saev/metrics.py
@classmethod\ndef from_accumulators(\n    cls, *, sse_recon: float, sse_baseline: float, n_tokens: int, d_model: int\n) -> \"Metrics\":\n    \"\"\"Construct metrics from aggregate sums and shape information.\n\n    Args:\n        sse_recon: Sum of squared reconstruction errors over all selected tokens and dimensions.\n        sse_baseline: Sum of squared mean-baseline errors over the same tokens and dimensions.\n        n_tokens: Number of selected tokens in the aggregation set.\n        d_model: Activation dimension per token.\n\n    Returns:\n        A validated `Metrics` object with all derived fields populated.\n    \"\"\"\n\n    msg = f\"n_tokens must be positive, got {n_tokens}.\"\n    assert n_tokens > 0, msg\n    msg = f\"d_model must be positive, got {d_model}.\"\n    assert d_model > 0, msg\n    msg = f\"sse_recon must be >= 0, got {sse_recon}.\"\n    assert sse_recon >= 0.0, msg\n    msg = f\"sse_baseline must be > 0, got {sse_baseline}.\"\n    assert sse_baseline > 0.0, msg\n\n    n_elements = n_tokens * d_model\n    return cls(\n        mse_per_dim=sse_recon / n_elements,\n        mse_per_token=sse_recon / n_tokens,\n        normalized_mse=sse_recon / sse_baseline,\n        baseline_mse_per_dim=sse_baseline / n_elements,\n        baseline_mse_per_token=sse_baseline / n_tokens,\n        sse_recon=sse_recon,\n        sse_baseline=sse_baseline,\n        n_tokens=n_tokens,\n        d_model=d_model,\n        n_elements=n_elements,\n    )\n
"},{"location":"api/saev/","title":"saev","text":"

saev is a Python package for training sparse autoencoders (SAEs) on vision transformers (ViTs) in PyTorch.

"},{"location":"api/summary/","title":"Summary","text":""},{"location":"api/viz/","title":"saev.viz","text":""},{"location":"api/viz/#saev.viz.load_palette","title":"load_palette(path)","text":"

TODO: docstring.

Source code in src/saev/viz.py
@beartype.beartype\ndef load_palette(path: pathlib.Path) -> list[tuple[float, float, float]]:\n    \"\"\"TODO: docstring.\"\"\"\n    import glasbey\n\n    palette = []\n\n    for i, line in enumerate(path.read_text().split(\"\\n\")):\n        line = line.strip()\n        if not line:\n            palette.append(None)\n            continue\n\n        palette.append(parse_color(line))\n\n    # Extend the palette using https://glasbey.readthedocs.io/en/latest/extending_palettes.html\n    n_missing = sum(color is None for color in palette)\n    if n_missing:\n        seed_palette = [color for color in palette if color is not None]\n        if seed_palette:\n            extended = glasbey.extend_palette(\n                seed_palette, palette_size=len(seed_palette) + n_missing, as_hex=False\n            )\n            fill_colors = extended[len(seed_palette) :]\n        else:\n            fill_colors = glasbey.create_palette(palette_size=n_missing, as_hex=False)\n\n        fill_iter = iter(fill_colors)\n        for i, color in enumerate(palette):\n            if color is not None:\n                continue\n            next_color = tuple(float(chan) for chan in next(fill_iter))\n            palette[i] = next_color\n\n    for i, color in enumerate(palette):\n        assert color is not None\n        msg = f\"Color {i} is invalid: {color}\"\n        assert all(0 <= chan <= 1 and isinstance(chan, float) for chan in color), msg\n\n    return palette\n
"},{"location":"api/data/bird_mae/","title":"saev.data.bird_mae","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.Encoder","title":"Encoder(cfg)","text":"

Bases: Module

Pure PyTorch Bird-MAE backbone (no HF).

Source code in src/saev/data/bird_mae.py
def __init__(self, cfg: Config) -> None:\n    super().__init__()\n    self.cfg = cfg\n\n    self.patch_embed = PatchEmbed(\n        img_size=(cfg.img_size_x, cfg.img_size_y),\n        patch_size=(cfg.patch_size, cfg.patch_size),\n        in_chans=cfg.in_chans,\n        embed_dim=cfg.embed_dim,\n    )\n\n    self.cls_token = nn.Parameter(torch.zeros(1, 1, cfg.embed_dim))\n    self.pos_embed = nn.Parameter(\n        torch.zeros(1, cfg.n_patches + 1, cfg.embed_dim),\n        requires_grad=cfg.pos_trainable,\n    )\n\n    if self.pos_embed.data.shape[1] == cfg.n_tokens:\n        pos_embed_np = get_2d_sincos_pos_embed_flexible(\n            self.pos_embed.shape[-1],\n            self.patch_embed.patch_hw,\n            cls_token=True,\n        )\n        self.pos_embed.data.copy_(\n            torch.from_numpy(pos_embed_np).float().unsqueeze(0)\n        )\n    else:\n        logger.warning(\n            \"Positional embedding shape mismatch. Will not initialize sin-cos pos embed.\"\n        )\n\n    dpr = [x.item() for x in torch.linspace(0, cfg.drop_rate, cfg.depth)]\n    self.blocks = nn.ModuleList([\n        Block(\n            dim=cfg.embed_dim,\n            n_heads=cfg.n_heads,\n            mlp_ratio=cfg.mlp_ratio,\n            qkv_bias=cfg.qkv_bias,\n            qk_norm=cfg.qk_norm,\n            init_values=cfg.init_values,\n            proj_drop=cfg.drop_rate,\n            attn_drop=cfg.drop_rate,\n            drop_path=dpr[i],\n            norm_layer=functools.partial(nn.LayerNorm, eps=cfg.norm_layer_eps),\n        )\n        for i in range(cfg.depth)\n    ])\n\n    self.pos_drop = nn.Dropout(p=cfg.drop_rate)\n    self.norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n    self.fc_norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n\n    nn.init.trunc_normal_(self.cls_token, std=0.02)\n    self.apply(self._init_weights)\n
"},{"location":"api/data/bird_mae/#saev.data.bird_mae.PatchEmbed","title":"PatchEmbed(img_size=(512, 128), patch_size=(16, 16), in_chans=1, embed_dim=768)","text":"

Bases: Module

Image (time x mel) to patch embeddings.

Source code in src/saev/data/bird_mae.py
def __init__(\n    self,\n    img_size: tuple[int, int] = (512, 128),\n    patch_size: tuple[int, int] = (16, 16),\n    in_chans: int = 1,\n    embed_dim: int = 768,\n) -> None:\n    super().__init__()\n    img_size = _ntuple(2)(img_size)\n    patch_size = _ntuple(2)(patch_size)\n    n_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])\n    self.patch_hw = (img_size[1] // patch_size[1], img_size[0] // patch_size[0])\n    self.img_size = img_size\n    self.patch_size = patch_size\n    self.n_patches = n_patches\n\n    self.proj = nn.Conv2d(\n        in_chans,\n        embed_dim,\n        kernel_size=patch_size,\n        stride=patch_size,\n    )\n
"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer","title":"Transformer(ckpt)","text":"

Bases: Module, Transformer

Source code in src/saev/data/bird_mae.py
def __init__(self, ckpt: str):\n    super().__init__()\n    self.model = load(ckpt)\n\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(ckpt.lower())\n
"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_resize","title":"make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

Create resize transform for visualization.

Source code in src/saev/data/bird_mae.py
@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization.\"\"\"\n    raise NotImplementedError(\"Bird-MAE uses audio spectrograms, not images.\")\n
"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

Create transforms for preprocessing: (data_transform, dict_transform | None).

Source code in src/saev/data/bird_mae.py
@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n    return transform, None\n
"},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio","title":"filter_audio(waveform, sample_rate, patches, *, mode='time')","text":"

Filter audio based on SAE patch activations over the log-mel spectrogram.

Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.

Parameters:

Name Type Description Default waveform Float[Tensor, ' samples']

Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.

required sample_rate int

Audio sample rate in Hz. Should be 32000 for Bird-MAE.

required patches Bool[Tensor, ' content_tokens_per_example']

Boolean SAE activation values per patch, shape [256]. Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.

required mode Literal['time', 'time+freq']

Filtering mode. - \"time\": Clip to time segments with high activations (preserves all frequencies). - \"time+freq\": Clip time AND apply frequency masking via STFT.

'time'

Returns:

Type Description Float[Tensor, ' clipped']

Filtered audio waveform as a 1D torch tensor.

Example

waveform_np, sr = librosa.load(audio_path, sr=32000) mel = bird_mae.transform(waveform_np) # [512, 128] waveform = torch.from_numpy(waveform_np)

Source code in src/saev/data/bird_mae.py
@jaxtyped(typechecker=beartype.beartype)\ndef filter_audio(\n    waveform: Float[Tensor, \" samples\"],\n    sample_rate: int,\n    patches: Bool[Tensor, \" content_tokens_per_example\"],\n    *,\n    mode: tp.Literal[\"time\", \"time+freq\"] = \"time\",\n) -> Float[Tensor, \" clipped\"]:\n    \"\"\"\n    Filter audio based on SAE patch activations over the log-mel spectrogram.\n\n    Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.\n\n    Args:\n        waveform: Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.\n        sample_rate: Audio sample rate in Hz. Should be 32000 for Bird-MAE.\n        patches: Boolean SAE activation values per patch, shape [256].\n            Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.\n        mode: Filtering mode.\n            - \"time\": Clip to time segments with high activations (preserves all frequencies).\n            - \"time+freq\": Clip time AND apply frequency masking via STFT.\n\n    Returns:\n        Filtered audio waveform as a 1D torch tensor.\n\n    Example:\n        >>> waveform_np, sr = librosa.load(audio_path, sr=32000)\n        >>> mel = bird_mae.transform(waveform_np)  # [512, 128]\n        >>> waveform = torch.from_numpy(waveform_np)\n        >>> # ... run through SAE to get patch_activations [256] ...\n        >>> # ... covert SAE activations to bool with > 0 ...\n        >>> time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\")\n        >>> time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")\n    \"\"\"\n    msg = f\"Bird-MAE expects sample_rate={BIRDMAE_SR_HZ}, got {sample_rate}.\"\n    assert sample_rate == BIRDMAE_SR_HZ, msg\n    assert patches.shape == (BIRDMAE_N_TIME_PATCHES * BIRDMAE_N_MEL_PATCHES,)\n    assert waveform.ndim == 1, waveform.shape\n\n    # Match transform(): pad/truncate to exactly 5s\n    waveform_t = waveform.to(torch.float32)\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if waveform_t.numel() < max_len:\n        pad = max_len - waveform_t.numel()\n        waveform_t = F.pad(waveform_t, (0, pad))\n    else:\n        waveform_t = waveform_t[:max_len]\n    if mode == \"time+freq\":\n        # STFT parameters matching Kaldi/BirdMAE assumptions approximately\n        n_fft = BIRDMAE_STFT_N_FFT\n        hop_length = BIRDMAE_STFT_HOP_LENGTH\n        win_length = BIRDMAE_STFT_WIN_LENGTH\n        window = torch.hann_window(win_length)\n\n        stft = torch.stft(\n            waveform_t,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            return_complex=True,\n        )\n        # stft shape: [freq_bins, time_frames]\n        # freq_bins = 513\n        # time_frames ~ 498 for 160000 samples\n\n        freqs = torch.linspace(0, sample_rate / 2, stft.shape[0])\n        mask = torch.zeros_like(stft, dtype=torch.bool)\n\n        # Mel range\n        low_freq = BIRDMAE_STFT_LOW_FREQ_HZ\n        high_freq = sample_rate / 2\n        min_mel = hz_to_mel(low_freq)\n        max_mel = hz_to_mel(high_freq)\n        mel_range = max_mel - min_mel\n\n        active_patch_i = torch.nonzero(patches, as_tuple=False).flatten().tolist()\n        for i in active_patch_i:\n            time_idx = i // BIRDMAE_N_MEL_PATCHES\n            mel_idx = i % BIRDMAE_N_MEL_PATCHES\n\n            # Time range (frames)\n            t_start = time_idx * BIRDMAE_FRAMES_PER_PATCH\n            t_end = (time_idx + 1) * BIRDMAE_FRAMES_PER_PATCH\n\n            # Frequency range (Hz)\n            # 128 mel bins total, 16 bins per patch\n            p_mel_low = (\n                min_mel\n                + (mel_idx * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n            p_mel_high = (\n                min_mel\n                + ((mel_idx + 1) * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n\n            hz_low = mel_to_hz(p_mel_low)\n            hz_high = mel_to_hz(p_mel_high)\n\n            freq_mask = (freqs >= hz_low) & (freqs < hz_high)\n\n            # Apply mask to valid frames\n            valid_t_end = min(t_end, stft.shape[1])\n            if t_start < valid_t_end:\n                mask[freq_mask, t_start:valid_t_end] = True\n\n        stft_filtered = stft * mask\n        waveform_t = torch.istft(\n            stft_filtered,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            length=waveform_t.shape[0],\n        )\n\n    # Time clipping (applies to both modes)\n    active_time_indices = torch.unique(\n        torch.nonzero(patches, as_tuple=False).flatten() // BIRDMAE_N_MEL_PATCHES\n    ).tolist()\n    segments = []\n\n    for t in active_time_indices:\n        start = t * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        end = (t + 1) * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        if start >= waveform_t.shape[0]:\n            continue\n        seg = waveform_t[start : min(end, waveform_t.shape[0])]\n        segments.append(seg)\n\n    if not segments:\n        return waveform_t[:0]\n\n    return torch.cat(segments, dim=0)\n
"},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--run-through-sae-to-get-patch_activations-256","title":"... run through SAE to get patch_activations [256] ...","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--covert-sae-activations-to-bool-with-0","title":"... covert SAE activations to bool with > 0 ...","text":"

time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\") time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")

"},{"location":"api/data/bird_mae/#saev.data.bird_mae.transform","title":"transform(waveform)","text":"

waveform: 1D tensor [samples] returns: 2D tensor [512, 128] matching HF's feature extractor output

Source code in src/saev/data/bird_mae.py
@jaxtyped(typechecker=beartype.beartype)\ndef transform(waveform: Float[np.ndarray, \" samples\"]) -> Float[Tensor, \"time mels\"]:\n    \"\"\"\n    waveform: 1D tensor [samples]\n    returns: 2D tensor [512, 128] matching HF's feature extractor output\n    \"\"\"\n    import torchaudio.compliance.kaldi\n\n    waveform = torch.from_numpy(waveform).to(torch.float32)\n    (n_samples,) = waveform.shape\n    # 1) pad/truncate to exactly 5 s\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if n_samples < max_len:\n        pad = max_len - n_samples\n        waveform = F.pad(waveform, (0, pad))\n    else:\n        waveform = waveform[:max_len]\n\n    # 2) mean-center (per clip)\n    waveform = waveform - waveform.mean(dim=0, keepdim=True)\n\n    # 3) Kaldi fbank: [T, 128]\n    fb = torchaudio.compliance.kaldi.fbank(\n        waveform[None, :],\n        htk_compat=True,\n        sample_frequency=BIRDMAE_SR_HZ,\n        use_energy=False,\n        window_type=\"hanning\",\n        num_mel_bins=BIRDMAE_N_MELS,\n        dither=0.0,\n        frame_shift=10.0,\n    )  # [T, 128]\n\n    # 4) pad to 512 frames with min value\n    t, _ = fb.shape\n    if t < BIRDMAE_TARGET_T:\n        diff = BIRDMAE_TARGET_T - t\n        min_val = fb.min()\n        fb = F.pad(fb, (0, 0, 0, diff), value=min_val.item())\n    elif t > BIRDMAE_TARGET_T:\n        fb = fb[:BIRDMAE_TARGET_T]\n\n    fb = (fb - BIRDMAE_MEAN) / (BIRDMAE_STD * 2.0)\n\n    assert fb.shape == (BIRDMAE_TARGET_T, BIRDMAE_N_MELS), fb.shape\n\n    return fb\n
"},{"location":"api/data/buffers/","title":"saev.data.buffers","text":""},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer","title":"ReservoirBuffer(capacity, shape, *, dtype=torch.float32, meta_shape=(2,), meta_dtype=torch.int32, seed=0, collate_fn=None)","text":"

Pool of (tensor, meta) pairs. Multiple producers call put(batch_x, batch_meta). Multiple consumers call get(batch_size) -> (x, meta). Random order, each sample delivered once, blocking semantics.

Source code in src/saev/data/buffers.py
def __init__(\n    self,\n    capacity: int,\n    shape: tuple[int, ...],\n    *,\n    dtype: torch.dtype = torch.float32,\n    meta_shape: tuple[int, ...] = (2,),\n    meta_dtype: torch.dtype = torch.int32,\n    seed: int = 0,\n    collate_fn: collections.abc.Callable | None = None,\n):\n    self.capacity = capacity\n    self._empty = 123456789\n\n    self.data = torch.full((capacity, *shape), self._empty, dtype=dtype)\n    self.data.share_memory_()\n\n    self.meta = torch.full((capacity, *meta_shape), self._empty, dtype=meta_dtype)\n    self.meta.share_memory_()\n\n    self.ctx = mp.get_context()\n\n    self.size = self.ctx.Value(\"L\", 0)  # current live items\n    self.lock = self.ctx.Lock()  # guards size+swap\n    self.free = self.ctx.Semaphore(capacity)\n    self.full = self.ctx.Semaphore(0)\n    # Each process has its own RNG.\n    self.rng = np.random.default_rng(seed)\n\n    self.collate_fn = collate_fn\n\n    self.logger = logging.getLogger(f\"reservoir({os.getpid()})\")\n
"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.close","title":"close()","text":"

Release the shared-memory backing store (call once in the parent).

Source code in src/saev/data/buffers.py
def close(self) -> None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.data.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n
"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.fill","title":"fill()","text":"

Approximate proportion of filled slots (race-safe enough for tests).

Source code in src/saev/data/buffers.py
def fill(self) -> float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n
"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.qsize","title":"qsize()","text":"

Approximate number of filled slots (race-safe enough for tests).

Source code in src/saev/data/buffers.py
def qsize(self) -> int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return self.size.value\n
"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer","title":"RingBuffer(slots, shape, dtype)","text":"

Fixed-capacity, multiple-producer / multiple-consumer queue backed by a shared-memory tensor.

"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer--parameters","title":"Parameters","text":"

slots : int capacity in number of items (tensor rows) shape : tuple[int] shape of one item, e.g. (batch, dim) dtype : torch.dtype tensor dtype

put(tensor) : blocks if full get() -> tensor : blocks if empty qsize() -> int advisory size (approximate) close() frees shared storage (call in the main process)

Source code in src/saev/data/buffers.py
def __init__(self, slots: int, shape: tuple[int, ...], dtype: torch.dtype):\n    assert slots > 0, \"slots must be positive\"\n    self.slots = slots\n    # 123456789 -> Should make you very worried.\n    self.buf = torch.full((slots, *shape), 123456789, dtype=dtype)\n    self.buf.share_memory_()\n\n    ctx = mp.get_context()  # obeys the global start method (\"spawn\")\n\n    # shared, lock-free counters\n    self.head = ctx.Value(\"L\", 0, lock=False)  # next free slot\n    self.tail = ctx.Value(\"L\", 0, lock=False)  # next occupied slot\n\n    # semaphores for blocking semantics\n    self.free = ctx.Semaphore(slots)  # initially all slots free\n    self.full = ctx.Semaphore(0)  # no filled slots yet\n\n    # one mutex for pointer updates\n    self.mutex = ctx.Lock()\n
"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.close","title":"close()","text":"

Release the shared-memory backing store (call once in the parent).

Source code in src/saev/data/buffers.py
def close(self) -> None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.buf.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n
"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.fill","title":"fill()","text":"

Approximate proportion of filled slots (race-safe enough for tests).

Source code in src/saev/data/buffers.py
def fill(self) -> float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n
"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.get","title":"get()","text":"

Return a view of the next item; blocks if the queue is empty.

Source code in src/saev/data/buffers.py
def get(self) -> torch.Tensor:\n    \"\"\"Return a view of the next item; blocks if the queue is empty.\"\"\"\n    self.full.acquire()  # wait for data\n    with self.mutex:  # exclusive update of tail\n        idx = self.tail.value % self.slots\n        out = self.buf[idx].clone()\n        self.tail.value += 1\n    self.free.release()  # signal one more free slot\n    return out\n
"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.put","title":"put(tensor)","text":"

Copy tensor into the next free slot; blocks if the queue is full.

Source code in src/saev/data/buffers.py
def put(self, tensor: torch.Tensor) -> None:\n    \"\"\"Copy `tensor` into the next free slot; blocks if the queue is full.\"\"\"\n    if tensor.shape != self.buf.shape[1:] or tensor.dtype != self.buf.dtype:\n        raise ValueError(\"tensor shape / dtype mismatch\")\n\n    self.free.acquire()  # wait for a free slot\n    with self.mutex:  # exclusive update of head\n        idx = self.head.value % self.slots\n        self.buf[idx].copy_(tensor)\n        self.head.value += 1\n    self.full.release()  # signal there is data\n
"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.qsize","title":"qsize()","text":"

Approximate number of filled slots (race-safe enough for tests).

Source code in src/saev/data/buffers.py
def qsize(self) -> int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return (self.head.value - self.tail.value) % (1 << 64)\n
"},{"location":"api/data/clip/","title":"saev.data.clip","text":""},{"location":"api/data/clip/#saev.data.clip.Vit","title":"Vit(ckpt)","text":"

Bases: Transformer, Module

Source code in src/saev/data/clip.py
def __init__(self, ckpt: str):\n    super().__init__()\n\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n        _, ckpt = ckpt.split(\"hf-hub:\")\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n\n    assert not isinstance(self.model, open_clip.timm_model.TimmModel)\n
"},{"location":"api/data/clip/#saev.data.clip.Vit.patch_size","title":"patch_size property","text":"

Get patch size for CLIP models.

"},{"location":"api/data/clip/#saev.data.clip.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

Create transforms for preprocessing: (img_transform, sample_transform | None).

Source code in src/saev/data/clip.py
@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n
"},{"location":"api/data/datasets/","title":"saev.data.datasets","text":""},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025","title":"BirdClef2025(root=pathlib.Path('data/birdclef-2025'), split='train_audio') dataclass","text":"

Bases: DatasetConfig

Configuration for BirdCLEF 2025 dataset, filtering to only bird species (Aves).

See https://www.kaggle.com/competitions/birdclef-2025/data for more information.

"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.n_examples","title":"n_examples property","text":"

Number of bird audio samples in the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.root","title":"root = pathlib.Path('data/birdclef-2025') class-attribute instance-attribute","text":"

Root directory containing the BirdCLEF 2025 data.

"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.split","title":"split = 'train_audio' class-attribute instance-attribute","text":"

Which data split to use.

"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset","title":"BirdClef2025Dataset(cfg, *, audio_transform=None, mask_transform=None, sample_transform=None)","text":"

Bases: Dataset

Dataset for BirdCLEF 2025 filtered to bird species only (class_name == 'Aves').

Source code in src/saev/data/datasets.py
def __init__(\n    self,\n    cfg: BirdClef2025,\n    *,\n    audio_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    import polars as pl\n\n    self.cfg = cfg\n    self.audio_transform = audio_transform\n    self.sample_transform = sample_transform\n\n    # Load taxonomy and filter to birds only\n    taxonomy = pl.read_csv(cfg.root / \"taxonomy.csv\", infer_schema_length=None)\n    taxonomy = taxonomy.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n    birds = taxonomy.filter(pl.col(\"class_name\") == \"Aves\")\n    bird_labels = set(birds[\"primary_label\"].to_list())\n\n    # Build label -> target mapping from bird species only\n    sorted_labels = sorted(bird_labels)\n    self.label_to_target = {label: i for i, label in enumerate(sorted_labels)}\n    self.target_to_label = {i: label for label, i in self.label_to_target.items()}\n\n    if cfg.split == \"train_audio\":\n        train = pl.read_csv(cfg.root / \"train.csv\", infer_schema_length=None)\n        train = train.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n        train_birds = train.filter(pl.col(\"primary_label\").is_in(bird_labels))\n        self.samples = [\n            {\"label\": row[\"primary_label\"], \"filename\": row[\"filename\"]}\n            for row in train_birds.iter_rows(named=True)\n        ]\n    elif cfg.split == \"train_soundscapes\":\n        soundscapes_dpath = cfg.root / \"train_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    elif cfg.split == \"test_soundscapes\":\n        soundscapes_dpath = cfg.root / \"test_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    else:\n        tp.assert_never(cfg.split)\n
"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset.n_classes","title":"n_classes property","text":"

Number of bird species.

"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10","title":"Cifar10(name='uoft-cs/cifar10', split='train') dataclass","text":"

Bases: DatasetConfig

Configuration for HuggingFace CIFAR-10.

"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.n_examples","title":"n_examples property","text":"

Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.

"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.name","title":"name = 'uoft-cs/cifar10' class-attribute instance-attribute","text":"

Dataset name on HuggingFace. Don't need to change this.

"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.root","title":"root property","text":"

Dummy path for the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.split","title":"split = 'train' class-attribute instance-attribute","text":"

Dataset split. Can be 'train' or 'test'.

"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig","title":"DatasetConfig","text":"

Bases: ABC

Abstract base class for dataset configurations.

"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.n_examples","title":"n_examples abstractmethod property","text":"

Number of examples in the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.root","title":"root abstractmethod property","text":"

Root directory path for the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImg","title":"FakeImg(n_examples=10) dataclass","text":"

Bases: DatasetConfig

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImg.root","title":"root property","text":"

Root directory path for the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg","title":"FakeImgSeg(n_examples=10, content_tokens_per_example=16, n_classes=3, bg_label=0) dataclass","text":"

Bases: DatasetConfig

Tiny synthetic segmentation dataset for tests.

Generates dummy RGB images and pixel-level segmentation masks, mimicking the behavior of real segmentation datasets like ImgSegFolder.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.bg_label","title":"bg_label = 0 class-attribute instance-attribute","text":"

Which class index is considered background.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.content_tokens_per_example","title":"content_tokens_per_example = 16 class-attribute instance-attribute","text":"

Number of content tokens per example.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_classes","title":"n_classes = 3 class-attribute instance-attribute","text":"

Number of segmentation classes.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_examples","title":"n_examples = 10 class-attribute instance-attribute","text":"

Number of examples.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.root","title":"root property","text":"

Root directory path for the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSegDataset","title":"FakeImgSegDataset(cfg, *, img_transform=None, mask_transform=None, sample_transform=None)","text":"

Bases: Dataset

Synthetic segmentation dataset providing pixel-level segmentation masks.

Mimics ImgSegFolderDataset by providing:

Source code in src/saev/data/datasets.py
def __init__(\n    self,\n    cfg: FakeImgSeg,\n    *,\n    img_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    self.cfg = cfg\n    self.img_transform = img_transform\n    self.mask_transform = mask_transform\n    self.sample_transform = sample_transform\n
"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet","title":"Imagenet(name='ILSVRC/imagenet-1k', split='train') dataclass","text":"

Bases: DatasetConfig

Configuration for HuggingFace Imagenet.

"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.n_examples","title":"n_examples property","text":"

Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.

"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.name","title":"name = 'ILSVRC/imagenet-1k' class-attribute instance-attribute","text":"

Dataset name on HuggingFace. Don't need to change this..

"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.root","title":"root property","text":"

Root directory path for the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.split","title":"split = 'train' class-attribute instance-attribute","text":"

Dataset split. For the default ImageNet-1K dataset, can either be 'train', 'validation' or 'test'.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder","title":"ImgFolder(root=pathlib.Path('./data/split')) dataclass","text":"

Bases: DatasetConfig

Configuration for a generic image folder dataset that matches the structure used in PyTorch's ImageFolder.

The datset must be laid out in:

root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n

If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.n_examples","title":"n_examples property","text":"

Number of examples in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires walking the directory structure. If you need to reference this number very often, cache it in a local variable.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.root","title":"root = pathlib.Path('./data/split') class-attribute instance-attribute","text":"

Where the class folders with images are stored. Can be a glob pattern to match multiple directories.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset","title":"ImgFolderDataset(*args, sample_transform=None, **kwargs)","text":"

Bases: ImageFolder

A generic image folder dataset that matches the structure used in PyTorch's ImageFolder.

The datset must be laid out in:

root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n

If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.

Source code in src/saev/data/datasets.py
def __init__(self, *args, sample_transform: Callable | None = None, **kwargs):\n    super().__init__(*args, **kwargs)\n    self.sample_transform = sample_transform\n
"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset.__getitem__","title":"__getitem__(index)","text":"

Parameters:

Name Type Description Default index int

Index

required

Returns:

Type Description dict[str, object]

dict with keys 'data', 'index', 'target' and 'label'.

Source code in src/saev/data/datasets.py
def __getitem__(self, index: int) -> dict[str, object]:\n    \"\"\"\n    Args:\n        index: Index\n\n    Returns:\n        dict with keys 'data', 'index', 'target' and 'label'.\n    \"\"\"\n    path, target = self.samples[index]\n    image = self.loader(path)\n    if self.transform is not None:\n        image = self.transform(image)\n    if self.target_transform is not None:\n        target = self.target_transform(target)\n\n    sample = {\n        \"data\": image,\n        \"target\": target,\n        \"label\": self.classes[target],\n        \"index\": index,\n    }\n\n    if self.sample_transform is not None:\n        sample = self.sample_transform(sample)\n\n    return sample\n
"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder","title":"ImgSegFolder(root=pathlib.Path('./data/segdataset'), split='training', labels_csv='labels.csv', bg_label=0) dataclass","text":"

Bases: DatasetConfig

"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.bg_label","title":"bg_label = 0 class-attribute instance-attribute","text":"

Background label.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.labels_csv","title":"labels_csv = 'labels.csv' class-attribute instance-attribute","text":"

CSV file with columns: stem,label1,label2,... First column must be 'stem'.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.n_examples","title":"n_examples property","text":"

Number of examples in the dataset. Calculated on the fly by counting image files in root/images/split.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.root","title":"root = pathlib.Path('./data/segdataset') class-attribute instance-attribute","text":"

Where the class folders with images are stored.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.split","title":"split = 'training' class-attribute instance-attribute","text":"

Data split.

"},{"location":"api/data/datasets/#saev.data.datasets.get_dataset","title":"get_dataset(cfg, *, data_transform=None, mask_transform=None, sample_transform=None)","text":"

Gets the dataset for the current experiment; delegates construction to dataset-specific functions.

Parameters:

Name Type Description Default cfg Config

Config for the dataset.

required data_tr

Transform to be applied to each 'data' key (typically the raw data).

required mask_tr

Transform to be applied to masks.

required dict_tr

Transform to be applied to the entire sample dict.

required

Returns: A dataset that has dictionaries with 'data', 'index', 'target', and 'label' keys containing examples.

Source code in src/saev/data/datasets.py
@beartype.beartype\ndef get_dataset(\n    cfg: Config,\n    *,\n    data_transform: Callable = None,\n    mask_transform: Callable | None = None,\n    sample_transform: Callable | None = None,\n):\n    \"\"\"\n    Gets the dataset for the current experiment; delegates construction to dataset-specific functions.\n\n    Args:\n        cfg: Config for the dataset.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        dict_tr: Transform to be applied to the entire sample dict.\n    Returns:\n        A dataset that has dictionaries with `'data'`, `'index'`, `'target'`, and `'label'` keys containing examples.\n    \"\"\"\n    # TODO: Can we reduce duplication? Or is it nice to see that there is no magic here?\n    if isinstance(cfg, Imagenet):\n        return ImagenetDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, Cifar10):\n        return Cifar10Dataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, ImgSegFolder):\n        return ImgSegFolderDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, ImgFolder):\n        ds = [\n            ImgFolderDataset(\n                root, transform=data_transform, sample_transform=sample_transform\n            )\n            for root in glob.glob(str(cfg.root), recursive=True)\n        ]\n        if len(ds) == 1:\n            return ds[0]\n        else:\n            return torch.utils.data.ConcatDataset(ds)\n    elif isinstance(cfg, FakeImg):\n        return FakeImgDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, FakeImgSeg):\n        return FakeImgSegDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, BirdClef2025):\n        return BirdClef2025Dataset(\n            cfg, audio_transform=data_transform, sample_transform=sample_transform\n        )\n    else:\n        tp.assert_never(cfg)\n
"},{"location":"api/data/datasets/#saev.data.datasets.is_img_seg_dataset","title":"is_img_seg_dataset(data_cfg)","text":"

Check if a dataset configuration is for an image segmentation dataset.

Parameters:

Name Type Description Default data_cfg DatasetConfig

Dataset configuration

required

Returns:

Type Description bool

True if this is an image segmentation dataset that should have labels.bin

Source code in src/saev/data/datasets.py
@beartype.beartype\ndef is_img_seg_dataset(data_cfg: DatasetConfig) -> bool:\n    \"\"\"\n    Check if a dataset configuration is for an image segmentation dataset.\n\n    Args:\n        data_cfg: Dataset configuration\n\n    Returns:\n        True if this is an image segmentation dataset that should have labels.bin\n    \"\"\"\n    return isinstance(data_cfg, (FakeImgSeg, ImgSegFolder))\n
"},{"location":"api/data/dinov2/","title":"saev.data.dinov2","text":""},{"location":"api/data/dinov3/","title":"saev.data.dinov3","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config","title":"Config(img_size=224, patch_size=16, in_chans=3, pos_embed_rope_base=100.0, pos_embed_rope_min_period=None, pos_embed_rope_max_period=None, pos_embed_rope_normalize_coords='separate', pos_embed_rope_dtype='bf16', embed_dim=768, depth=12, num_heads=12, ffn_ratio=4.0, qkv_bias=True, ffn_layer='mlp', ffn_bias=True, proj_bias=True, n_storage_tokens=0, mask_k_bias=False, untie_global_and_local_cls_norm=False, device=None) dataclass","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config.depth","title":"depth = 12 class-attribute instance-attribute","text":"

Number of transformer blocks.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.device","title":"device = None class-attribute instance-attribute","text":"

Device for tensor operations.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.embed_dim","title":"embed_dim = 768 class-attribute instance-attribute","text":"

Embedding dimension for transformer.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_bias","title":"ffn_bias = True class-attribute instance-attribute","text":"

Whether to use bias in feed-forward network.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_layer","title":"ffn_layer = 'mlp' class-attribute instance-attribute","text":"

Type of feed-forward network layer.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_ratio","title":"ffn_ratio = 4.0 class-attribute instance-attribute","text":"

Feed-forward network expansion ratio.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.img_size","title":"img_size = 224 class-attribute instance-attribute","text":"

Image width and height in pixels.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.in_chans","title":"in_chans = 3 class-attribute instance-attribute","text":"

Number of input image channels.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.mask_k_bias","title":"mask_k_bias = False class-attribute instance-attribute","text":"

Whether to mask K bias in attention.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.n_storage_tokens","title":"n_storage_tokens = 0 class-attribute instance-attribute","text":"

Number of storage/register tokens.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.num_heads","title":"num_heads = 12 class-attribute instance-attribute","text":"

Number of attention heads.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.patch_size","title":"patch_size = 16 class-attribute instance-attribute","text":"

Size of each patch in pixels.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_base","title":"pos_embed_rope_base = 100.0 class-attribute instance-attribute","text":"

Base frequency for RoPE positional encoding.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_dtype","title":"pos_embed_rope_dtype = 'bf16' class-attribute instance-attribute","text":"

Data type for RoPE positional encoding.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_max_period","title":"pos_embed_rope_max_period = None class-attribute instance-attribute","text":"

Maximum period for RoPE positional encoding.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_min_period","title":"pos_embed_rope_min_period = None class-attribute instance-attribute","text":"

Minimum period for RoPE positional encoding.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_normalize_coords","title":"pos_embed_rope_normalize_coords = 'separate' class-attribute instance-attribute","text":"

Coordinate normalization method for RoPE encoding.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.proj_bias","title":"proj_bias = True class-attribute instance-attribute","text":"

Whether to use bias in output projection.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.qkv_bias","title":"qkv_bias = True class-attribute instance-attribute","text":"

Whether to use bias in QKV projection.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.untie_global_and_local_cls_norm","title":"untie_global_and_local_cls_norm = False class-attribute instance-attribute","text":"

Whether to use separate norms for global and local CLS tokens.

"},{"location":"api/data/dinov3/#saev.data.dinov3.PatchEmbed","title":"PatchEmbed(img_size=224, patch_size=16, in_chans=3, embed_dim=768, flatten_embedding=True)","text":"

Bases: Module

2D image to patch embedding: (B,C,H,W) -> (B,N,D)

Parameters:

Name Type Description Default img_size int | tuple[int, int]

Image size.

224 patch_size int | tuple[int, int]

Patch token size.

16 in_chans int

Number of input image channels.

3 embed_dim int

Number of linear projection output channels.

768 Source code in src/saev/data/dinov3.py
def __init__(\n    self,\n    img_size: int | tuple[int, int] = 224,\n    patch_size: int | tuple[int, int] = 16,\n    in_chans: int = 3,\n    embed_dim: int = 768,\n    flatten_embedding: bool = True,\n) -> None:\n    super().__init__()\n\n    image_hw = make_2tuple(img_size)\n    patch_hw = make_2tuple(patch_size)\n\n    self.image_hw = image_hw\n    self.patch_hw = patch_hw\n\n    self.in_chans = in_chans\n    self.embed_dim = embed_dim\n\n    self.proj = nn.Conv2d(\n        in_chans, embed_dim, kernel_size=patch_hw, stride=patch_hw\n    )\n    self.k = patch_hw[0]\n    assert self.proj.kernel_size == (self.k, self.k)\n    assert self.proj.stride == (self.k, self.k)\n    assert self.proj.padding == (0, 0)\n    assert self.proj.groups == 1\n    assert self.proj.dilation == (1, 1)\n
"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit","title":"Vit(ckpt)","text":"

Bases: Module, Transformer

Source code in src/saev/data/dinov3.py
def __init__(self, ckpt: str):\n    super().__init__()\n    name = self._parse_name(ckpt)\n    self.model = load(name, ckpt)\n\n    self._ckpt = name\n    self.logger = logging.getLogger(f\"dinov3/{name}\")\n
"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_resize","title":"make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.

Source code in src/saev/data/dinov3.py
@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    import functools\n\n    return functools.partial(\n        transforms.resize_to_patch_grid,\n        p=int(16 * scale),\n        n=n_patches_per_img,\n        resample=resample,\n    )\n
"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

Create transforms for preprocessing: (img_transform, sample_transform | None).

Source code in src/saev/data/dinov3.py
@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    img_transform = v2.Compose([\n        transforms.FlexResize(patch_size=16, n_patches=n_patches_per_img),\n        v2.ToImage(),\n        v2.ToDtype(torch.float32, scale=True),\n        v2.Normalize(mean=[0.4850, 0.4560, 0.4060], std=[0.2290, 0.2240, 0.2250]),\n    ])\n    sample_transform = transforms.Patchify(\n        patch_size=16, n_patches=n_patches_per_img\n    )\n    return img_transform, sample_transform\n
"},{"location":"api/data/fake_clip/","title":"saev.data.fake_clip","text":"

Fake CLIP model for testing with tiny-open-clip-model.

This module provides a test-only vision transformer that works with the tiny-open-clip-model from HuggingFace, which uses 8x8 images and 2x2 patches instead of the standard 224x224 images with 16x16 patches.

"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit","title":"Vit(ckpt)","text":"

Bases: Transformer, Module

Source code in src/saev/data/fake_clip.py
def __init__(self, ckpt: str):\n    super().__init__()\n\n    # Only support the tiny test model\n    assert ckpt == \"hf-hub:hf-internal-testing/tiny-open-clip-model\", (\n        f\"FakeClip only supports tiny-open-clip-model, got {ckpt}\"\n    )\n\n    clip, _ = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n
"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.patch_size","title":"patch_size property","text":"

Tiny model uses 2x2 patches.

"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_resize","title":"make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

Create resize transform for tiny model (8x8 images).

Source code in src/saev/data/fake_clip.py
@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for tiny model (8x8 images).\"\"\"\n\n    def resize(img: Image.Image) -> Image.Image:\n        # Tiny model uses 8x8 images\n        size_px = (int(8 * scale), int(8 * scale))\n        return img.resize(size_px, resample=resample)\n\n    return resize\n
"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

Create transforms for preprocessing: (img_transform, sample_transform | None).

Source code in src/saev/data/fake_clip.py
@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    _, img_transform = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    return img_transform, None\n
"},{"location":"api/data/indexed/","title":"saev.data.indexed","text":""},{"location":"api/data/indexed/#saev.data.indexed.Config","title":"Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False) dataclass","text":"

Configuration for loading indexed activation data from disk

Attributes:

Name Type Description shards Path

Directory with .bin shards and a metadata.json file.

tokens Literal['special', 'content', 'all']

Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

layer int | Literal['all']

Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

debug bool

Whether the dataloader process should log debug messages.

"},{"location":"api/data/indexed/#saev.data.indexed.Dataset","title":"Dataset(cfg)","text":"

Bases: Dataset

Dataset of activations from disk.

Attributes:

Name Type Description cfg Config

Configuration set via CLI args.

md Metadata

Activations metadata; automatically loaded from disk.

layer_idx int

Layer index into the shards if we are choosing a specific layer.

Source code in src/saev/data/indexed.py
def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n
"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.d_model","title":"d_model property","text":"

Dimension of the underlying vision transformer's embedding space.

"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.Example","title":"Example","text":"

Bases: TypedDict

Individual example.

"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.__len__","title":"__len__()","text":"

Dataset length depends on patches and layer.

Source code in src/saev/data/indexed.py
def __len__(self) -> int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n
"},{"location":"api/data/models/","title":"saev.data.models","text":""},{"location":"api/data/models/#saev.data.models.Transformer","title":"Transformer","text":"

Bases: ABC

Protocol defining the interface for all Transformer models.

"},{"location":"api/data/models/#saev.data.models.Transformer.patch_size","title":"patch_size abstractmethod property","text":"

Patch size in pixels (e.g., 14 or 16).

"},{"location":"api/data/models/#saev.data.models.Transformer.forward","title":"forward(batch) abstractmethod","text":"

Run forward pass on batch of images.

Source code in src/saev/data/models.py
@abc.abstractmethod\ndef forward(\n    self, batch: Float[Tensor, \"batch 3 width height\"]\n) -> Float[Tensor, \"batch patches dim\"]:\n    \"\"\"Run forward pass on batch of images.\"\"\"\n
"},{"location":"api/data/models/#saev.data.models.Transformer.get_residuals","title":"get_residuals() abstractmethod","text":"

Return the list of residual blocks/layers for hook registration.

Source code in src/saev/data/models.py
@abc.abstractmethod\ndef get_residuals(self) -> list[torch.nn.Module]:\n    \"\"\"Return the list of residual blocks/layers for hook registration.\"\"\"\n
"},{"location":"api/data/models/#saev.data.models.Transformer.get_token_i","title":"get_token_i(content_tokens_per_example) abstractmethod","text":"

Return indices for selecting relevant tokens from activations.

Source code in src/saev/data/models.py
@abc.abstractmethod\ndef get_token_i(self, content_tokens_per_example: int) -> slice | torch.Tensor:\n    \"\"\"Return indices for selecting relevant tokens from activations.\"\"\"\n
"},{"location":"api/data/models/#saev.data.models.Transformer.make_resize","title":"make_resize(ckpt, content_tokens_per_example, *, scale=1.0, resample=Image.LANCZOS) abstractmethod staticmethod","text":"

Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.

Source code in src/saev/data/models.py
@staticmethod\n@abc.abstractmethod\ndef make_resize(\n    ckpt: str,\n    content_tokens_per_example: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n
"},{"location":"api/data/models/#saev.data.models.Transformer.make_transforms","title":"make_transforms(ckpt, content_tokens_per_example) abstractmethod staticmethod","text":"

Create transforms for preprocessing: (data_transform, dict_transform | None).

Source code in src/saev/data/models.py
@staticmethod\n@abc.abstractmethod\ndef make_transforms(\n    ckpt: str, content_tokens_per_example: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n
"},{"location":"api/data/models/#saev.data.models.list_families","title":"list_families()","text":"

List all ViT family names.

Source code in src/saev/data/models.py
def list_families() -> list[str]:\n    \"\"\"List all ViT family names.\"\"\"\n    return list(_global_model_registry.keys())\n
"},{"location":"api/data/models/#saev.data.models.load_model_cls","title":"load_model_cls(family)","text":"

Load a transformer family's class.

Source code in src/saev/data/models.py
@beartype.beartype\ndef load_model_cls(family: str) -> type[Transformer]:\n    \"\"\"Load a transformer family's class.\"\"\"\n    if family not in _global_model_registry:\n        raise ValueError(f\"Family '{family}' not found.\")\n\n    return _global_model_registry[family]\n
"},{"location":"api/data/models/#saev.data.models.register_family","title":"register_family(cls)","text":"

Register a new transformer family's class.

Source code in src/saev/data/models.py
@beartype.beartype\ndef register_family(cls: type[Transformer]):\n    \"\"\"Register a new transformer family's class.\"\"\"\n    if cls.family in _global_model_registry:\n        logger.warning(\"Overwriting key '%s' in registry.\", cls.family)\n    _global_model_registry[cls.family] = cls\n
"},{"location":"api/data/ordered/","title":"saev.data.ordered","text":"

Ordered (sequential) dataloader for activation data.

This module provides a high-throughput dataloader that reads activation data from disk shards in sequential order, without shuffling. The implementation uses a single-threaded manager process to ensure data is delivered in the exact order it appears on disk.

Patch labels are provided if there is a labels.bin file on disk.

See the design decisions in src/saev/data/performance.md.

Usage

cfg = Config(shards=\"./shards\", layer=13, batch_size=4096) dataloader = DataLoader(cfg) for batch in dataloader: ... activations = batch[\"act\"] # [batch_size, d_model] ... image_indices = batch[\"example_idx\"] # [batch_size] ... patch_indices = batch[\"token_idx\"] # [batch_size] ... patch_labels = batch[\"patch_labels\"] # [batch_size]

"},{"location":"api/data/ordered/#saev.data.ordered.Config","title":"Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0) dataclass","text":"

Configuration for loading ordered (non-shuffled) activation data from disk

Attributes:

Name Type Description shards Path

Directory with .bin shards and a metadata.json file.

tokens Literal['content']

Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

layer int | Literal['all']

Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

batch_size int

Batch size.

batch_timeout_s float

How long to wait for at least one batch.

drop_last bool

Whether to drop the last batch if it's smaller than the others.

buffer_size int

Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

debug bool

Whether the dataloader process should log debug messages.

log_every_s float

How frequently the dataloader process should log (debug) performance messages.

"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader","title":"DataLoader(cfg)","text":"

High-throughput streaming loader that reads data from disk shards in order (no shuffling).

Source code in src/saev/data/ordered.py
def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n
"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.ExampleBatch","title":"ExampleBatch","text":"

Bases: TypedDict

Individual example.

"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__iter__","title":"__iter__()","text":"

Yields batches in order.

Source code in src/saev/data/ordered.py
def __iter__(self) -> collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n < self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size < self.cfg.batch_size\n                    and n + actual_batch_size >= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n
"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__len__","title":"__len__()","text":"

Returns the number of batches in an epoch.

Source code in src/saev/data/ordered.py
def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n
"},{"location":"api/data/pe/","title":"saev.data.pe","text":"

Perception Encoder (PE) models from Meta (Bolya et al., 2025).

PE-Core: CLIP-style model for language alignment. PE-Spatial: Dense prediction model distilled from SAM 2.1.

Both are available via timm.

"},{"location":"api/data/pe/#saev.data.pe.Core","title":"Core(ckpt)","text":"

Bases: _Base

PE-Core: CLIP-style model for language alignment.

Available checkpoints: - vit_pe_core_large_patch14_336.fb (L/14, 336px) - vit_pe_core_base_patch16_224.fb (B/16, 224px)

Source code in src/saev/data/pe.py
def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n
"},{"location":"api/data/pe/#saev.data.pe.Spatial","title":"Spatial(ckpt)","text":"

Bases: _Base

PE-Spatial: Dense prediction model distilled from SAM 2.1.

Available checkpoints: - vit_pe_spatial_large_patch14_448.fb (L/14, 448px) - vit_pe_spatial_base_patch16_512.fb (B/16, 512px)

Source code in src/saev/data/pe.py
def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n
"},{"location":"api/data/saev.data/","title":"saev.data","text":""},{"location":"api/data/saev.data/#saev.data.IndexedConfig","title":"IndexedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False) dataclass","text":"

Configuration for loading indexed activation data from disk

Attributes:

Name Type Description shards Path

Directory with .bin shards and a metadata.json file.

tokens Literal['special', 'content', 'all']

Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

layer int | Literal['all']

Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

debug bool

Whether the dataloader process should log debug messages.

"},{"location":"api/data/saev.data/#saev.data.IndexedDataset","title":"IndexedDataset(cfg)","text":"

Bases: Dataset

Dataset of activations from disk.

Attributes:

Name Type Description cfg Config

Configuration set via CLI args.

md Metadata

Activations metadata; automatically loaded from disk.

layer_idx int

Layer index into the shards if we are choosing a specific layer.

Source code in src/saev/data/indexed.py
def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n
"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.d_model","title":"d_model property","text":"

Dimension of the underlying vision transformer's embedding space.

"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.Example","title":"Example","text":"

Bases: TypedDict

Individual example.

"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.__len__","title":"__len__()","text":"

Dataset length depends on patches and layer.

Source code in src/saev/data/indexed.py
def __len__(self) -> int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n
"},{"location":"api/data/saev.data/#saev.data.Metadata","title":"Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1') dataclass","text":"

Metadata for a sharded set of transformer activations.

Parameters:

Name Type Description Default family Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']

The transformer family.

required ckpt str

The transformer checkpoint.

required layers tuple[int, ...]

Which layers were saved.

required content_tokens_per_example int

The number of content tokens per example.

required cls_token bool

Whether the transformer has a [CLS] token as well.

required d_model int

Model hidden dimension.

required n_examples int

Number of examples.

required max_tokens_per_shard int

The maximum number of tokens per shard.

required data str

base64-encoded string of pickle.dumps(dataset).

required dataset Path

Absolute path to the root directory of the original dataset.

required pixel_agg PixelAgg

(only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.

MAJORITY dtype Literal['float32']

How activations are stored.

'float32' protocol Literal['1.0.0', '1.1', '2.1']

Protocol version.

'2.1'"},{"location":"api/data/saev.data/#saev.data.Metadata.examples_per_shard","title":"examples_per_shard property","text":"

The number of examples per shard based on the protocol.

Returns:

Type Description int

Number of examples that fit in a shard.

"},{"location":"api/data/saev.data/#saev.data.Metadata.hash","title":"hash property","text":"

First 8 bytes of a SHA256 hash of the metadata configuration.

Returns:

Type Description str

Hexadecimal hash string uniquely identifying this configuration.

"},{"location":"api/data/saev.data/#saev.data.Metadata.n_shards","title":"n_shards property","text":"

Total number of shards needed to store all examples.

Returns:

Type Description int

Number of shards required.

"},{"location":"api/data/saev.data/#saev.data.Metadata.shard_shape","title":"shard_shape property","text":"

Shape of each shard file.

Returns:

Type Description tuple[int, int, int, int]

Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).

"},{"location":"api/data/saev.data/#saev.data.Metadata.tokens_per_example","title":"tokens_per_example property","text":"

Total number of tokens per example including [CLS] token if present.

Returns:

Type Description int

Number of tokens plus one if [CLS] token is included.

"},{"location":"api/data/saev.data/#saev.data.Metadata.dump","title":"dump(shards_root)","text":"

Dumps a Metadata object to a metadata.json file in shards_root / hash.

Parameters:

Name Type Description Default shards_root Path

Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.

required Source code in src/saev/data/shards.py
def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n
"},{"location":"api/data/saev.data/#saev.data.Metadata.load","title":"load(shards_dir) classmethod","text":"

Loads a Metadata object from a metadata.json file in shards_dir.

Parameters:

Name Type Description Default shards_dir Path

Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in src/saev/data/shards.py

@classmethod\ndef load(cls, shards_dir: pathlib.Path) -> tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/<hash> as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n
"},{"location":"api/data/saev.data/#saev.data.OrderedConfig","title":"OrderedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0) dataclass","text":"

Configuration for loading ordered (non-shuffled) activation data from disk

Attributes:

Name Type Description shards Path

Directory with .bin shards and a metadata.json file.

tokens Literal['content']

Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

layer int | Literal['all']

Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

batch_size int

Batch size.

batch_timeout_s float

How long to wait for at least one batch.

drop_last bool

Whether to drop the last batch if it's smaller than the others.

buffer_size int

Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

debug bool

Whether the dataloader process should log debug messages.

log_every_s float

How frequently the dataloader process should log (debug) performance messages.

"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader","title":"OrderedDataLoader(cfg)","text":"

High-throughput streaming loader that reads data from disk shards in order (no shuffling).

Source code in src/saev/data/ordered.py
def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n
"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.ExampleBatch","title":"ExampleBatch","text":"

Bases: TypedDict

Individual example.

"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__iter__","title":"__iter__()","text":"

Yields batches in order.

Source code in src/saev/data/ordered.py
def __iter__(self) -> collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n < self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size < self.cfg.batch_size\n                    and n + actual_batch_size >= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n
"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__len__","title":"__len__()","text":"

Returns the number of batches in an epoch.

Source code in src/saev/data/ordered.py
def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n
"},{"location":"api/data/saev.data/#saev.data.PixelAgg","title":"PixelAgg","text":"

Bases: Enum

How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig","title":"ShuffledConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False) dataclass","text":"

Configuration for loading shuffled activation data from disk.

Attributes:

Name Type Description shards Path

Directory with .bin shards and a metadata.json file.

tokens Literal['special', 'content', 'all']

Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_size","title":"batch_size = 1024 * 16 class-attribute instance-attribute","text":"

Batch size.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_timeout_s","title":"batch_timeout_s = 30.0 class-attribute instance-attribute","text":"

How long to wait for at least one batch.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.buffer_size","title":"buffer_size = 64 class-attribute instance-attribute","text":"

Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.debug","title":"debug = False class-attribute instance-attribute","text":"

Whether the dataloader process should log debug messages.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.drop_last","title":"drop_last = False class-attribute instance-attribute","text":"

Whether to drop the last batch if it's smaller than the others.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.ignore_labels","title":"ignore_labels = dataclasses.field(default_factory=list) class-attribute instance-attribute","text":"

If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.layer","title":"layer = -1 class-attribute instance-attribute","text":"

Which transformer layer(s) to read from disk. -1 is the default, but must be changed. \"all\" enumerates every recorded layer.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.log_every_s","title":"log_every_s = 30.0 class-attribute instance-attribute","text":"

How frequently the dataloader process should log (debug) performance messages.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.min_buffer_fill","title":"min_buffer_fill = 0.0 class-attribute instance-attribute","text":"

Fraction of the reservoir that must be populated before yielding batches.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.n_threads","title":"n_threads = 4 class-attribute instance-attribute","text":"

Number of dataloading threads.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.scale_norm","title":"scale_norm = False class-attribute instance-attribute","text":"

Whether to scale norms to sqrt(D).

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.seed","title":"seed = 17 class-attribute instance-attribute","text":"

Random seed.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.use_tmpdir","title":"use_tmpdir = False class-attribute instance-attribute","text":"

If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.

"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader","title":"ShuffledDataLoader(cfg)","text":"

High-throughput streaming loader that deterministically shuffles data from disk shards.

Source code in src/saev/data/shuffled.py
def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n
"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.ExampleBatch","title":"ExampleBatch","text":"

Bases: TypedDict

Individual example.

"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__iter__","title":"__iter__()","text":"

Yields batches.

Source code in src/saev/data/shuffled.py
def __iter__(self) -> collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n < self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n
"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__len__","title":"__len__()","text":"

Returns the number of batches in an epoch.

Source code in src/saev/data/shuffled.py
def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n
"},{"location":"api/data/saev.data/#saev.data.make_ordered_config","title":"make_ordered_config(shuffled_cfg, **overrides)","text":"

Create an OrderedConfig from a ShuffledConfig, with optional overrides.

Defaults come from shuffled_cfg for fields present in OrderedConfig, and overrides take precedence. Unknown override fields raise TypeError from the OrderedConfig constructor, mirroring dataclasses.replace.

Source code in src/saev/data/__init__.py
@beartype.beartype\ndef make_ordered_config(\n    shuffled_cfg: ShuffledConfig, **overrides: object\n) -> OrderedConfig:\n    \"\"\"Create an `OrderedConfig` from a `ShuffledConfig`, with optional overrides.\n\n    Defaults come from `shuffled_cfg` for fields present in `OrderedConfig`, and `overrides` take precedence. Unknown override fields raise `TypeError` from the `OrderedConfig` constructor, mirroring `dataclasses.replace`.\n    \"\"\"\n    params: dict[str, object] = {}\n    for f in dataclasses.fields(OrderedConfig):\n        name = f.name\n        if hasattr(shuffled_cfg, name):\n            params[name] = getattr(shuffled_cfg, name)\n    params.update(overrides)\n    return OrderedConfig(**params)\n
"},{"location":"api/data/shards/","title":"saev.data.shards","text":"

Library code for reading and writing sharded activations to disk.

"},{"location":"api/data/shards/#saev.data.shards.Index","title":"Index(*, idx, example_idx, content_token_idx, shard_idx, example_idx_in_shard, layer_idx_in_shard, token_idx_in_shard) dataclass","text":"

Attributes:

Name Type Description idx int

The index of the activation.

example_idx int

The index of the original example (image, audio clip etc).

content_token_idx int

The token's index within an example's content. -1 for all special tokens.

shard_idx int

The shard index.

example_idx_in_shard int

The example index along the examples axis in a shard.

token_idx_in_shard int

The token index along the tokens axis in a shard.

"},{"location":"api/data/shards/#saev.data.shards.IndexMap","title":"IndexMap(md, tokens, layer)","text":"

Attributes:

Name Type Description md Metadata

Metadata

tokens Literal['special', 'content', 'all']

Which subset of tokens to load.

layer int

Which layer to load.

layer_idx_lookup dict[int, int]

The lookup from a transformer layer to the layer idx in the shard.

Source code in src/saev/data/shards.py
def __init__(\n    self,\n    md: Metadata,\n    tokens: tp.Literal[\"special\", \"content\", \"all\"],\n    layer: int | tp.Literal[\"all\"],\n):\n    if tokens == \"special\":\n        assert md.cls_token\n\n    self.md = md\n    self.tokens = tokens\n    self.layer = layer\n\n    if isinstance(layer, int):\n        err_msg = f\"No matche for layer; {layer} not in {md.layers}.\"\n        assert layer in md.layers, err_msg\n\n    self.layer_idx_lookup = {layer: i for i, layer in enumerate(md.layers)}\n
"},{"location":"api/data/shards/#saev.data.shards.IndexMap.__len__","title":"__len__()","text":"

Dataset length depends on patches and layer.

Source code in src/saev/data/shards.py
def __len__(self) -> int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    match (self.tokens, self.layer):\n        case (\"special\", \"all\"):\n            # Return a CLS token from a random example and random layer.\n            return self.md.n_examples * len(self.md.layers)\n        case (\"special\", int()):\n            # Return a CLS token from a random example and fixed layer.\n            return self.md.n_examples\n        case (\"content\", int()):\n            # Return a patch from a random example, fixed layer, and random patch.\n            return self.md.n_examples * self.md.content_tokens_per_example\n        case (\"content\", \"all\"):\n            # Return a patch from a random example, random layer and random patch.\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.content_tokens_per_example\n            )\n        case (\"all\", int()):\n            # Return a token from a random example, fixed layer, and random token (including special).\n            return self.md.n_examples * self.md.tokens_per_example\n        case (\"all\", \"all\"):\n            # Return a token from a random example, random layer and random token (including special).\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.tokens_per_example\n            )\n        case _:\n            tp.assert_never((self.cfg.tokens, self.cfg.layer))\n
"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter","title":"LabelsWriter(shards_dir, md)","text":"

LabelsWriter handles writing patch-level segmentation labels to a single binary file.

Parameters:

Name Type Description Default shards_dir Path

The shard directory; $SAEV_SCRATCH/saev/shards/ required md Metadata

The Metadata object.

required

Attributes:

Name Type Description labels UInt8[ndarray, 'n_examples n_patches']

The integer patch labels.

labels_path Path

Where the integer patch labels are stored.

md Metadata

The dataset metadata.

has_written bool

Whether we have written any data to self.labels.

Source code in src/saev/data/shards.py
def __init__(self, shards_dir: pathlib.Path, md: Metadata):\n    assert disk.is_shards_dir(shards_dir)\n    self.logger = logging.getLogger(\"labels-writer\")\n    self.md = md\n    self.has_written = False\n\n    # Always create memory-mapped file for labels\n    # If nothing is written, it will be deleted in flush()\n    self.labels_path = shards_dir / \"labels.bin\"\n    self.labels = np.memmap(\n        self.labels_path,\n        mode=\"w+\",\n        dtype=np.uint8,\n        shape=(self.md.n_examples, self.md.content_tokens_per_example),\n    )\n    self.logger.info(\"Opened labels file '%s'.\", self.labels_path)\n
"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.flush","title":"flush()","text":"

Flush the memory-mapped file to disk if anything was written.

Source code in src/saev/data/shards.py
def flush(self) -> None:\n    \"\"\"Flush the memory-mapped file to disk if anything was written.\"\"\"\n    if self.has_written:\n        self.labels.flush()\n        self.logger.info(\"Flushed labels to '%s'.\", self.labels_path)\n
"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.write_batch","title":"write_batch(batch_labels, start_idx)","text":"

Write a batch of labels to the memory-mapped file.

Parameters:

Name Type Description Default batch_labels ndarray | Tensor

Array of shape (batch_size, content_tokens_per_example) with uint8 dtype

required start_idx int

Starting index in the global labels array

required Source code in src/saev/data/shards.py
@beartype.beartype\ndef write_batch(self, batch_labels: np.ndarray | Tensor, start_idx: int):\n    \"\"\"\n    Write a batch of labels to the memory-mapped file.\n\n    Args:\n        batch_labels: Array of shape (batch_size, content_tokens_per_example) with uint8 dtype\n        start_idx: Starting index in the global labels array\n    \"\"\"\n    # Convert to numpy if needed\n    if isinstance(batch_labels, torch.Tensor):\n        batch_labels = batch_labels.cpu().numpy()\n\n    batch_size = len(batch_labels)\n    assert start_idx + batch_size <= self.md.n_examples\n    assert batch_labels.shape == (batch_size, self.md.content_tokens_per_example)\n    assert batch_labels.dtype == np.uint8\n\n    self.labels[start_idx : start_idx + batch_size] = batch_labels\n    self.has_written = True\n
"},{"location":"api/data/shards/#saev.data.shards.Metadata","title":"Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1') dataclass","text":"

Metadata for a sharded set of transformer activations.

Parameters:

Name Type Description Default family Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']

The transformer family.

required ckpt str

The transformer checkpoint.

required layers tuple[int, ...]

Which layers were saved.

required content_tokens_per_example int

The number of content tokens per example.

required cls_token bool

Whether the transformer has a [CLS] token as well.

required d_model int

Model hidden dimension.

required n_examples int

Number of examples.

required max_tokens_per_shard int

The maximum number of tokens per shard.

required data str

base64-encoded string of pickle.dumps(dataset).

required dataset Path

Absolute path to the root directory of the original dataset.

required pixel_agg PixelAgg

(only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.

MAJORITY dtype Literal['float32']

How activations are stored.

'float32' protocol Literal['1.0.0', '1.1', '2.1']

Protocol version.

'2.1'"},{"location":"api/data/shards/#saev.data.shards.Metadata.examples_per_shard","title":"examples_per_shard property","text":"

The number of examples per shard based on the protocol.

Returns:

Type Description int

Number of examples that fit in a shard.

"},{"location":"api/data/shards/#saev.data.shards.Metadata.hash","title":"hash property","text":"

First 8 bytes of a SHA256 hash of the metadata configuration.

Returns:

Type Description str

Hexadecimal hash string uniquely identifying this configuration.

"},{"location":"api/data/shards/#saev.data.shards.Metadata.n_shards","title":"n_shards property","text":"

Total number of shards needed to store all examples.

Returns:

Type Description int

Number of shards required.

"},{"location":"api/data/shards/#saev.data.shards.Metadata.shard_shape","title":"shard_shape property","text":"

Shape of each shard file.

Returns:

Type Description tuple[int, int, int, int]

Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).

"},{"location":"api/data/shards/#saev.data.shards.Metadata.tokens_per_example","title":"tokens_per_example property","text":"

Total number of tokens per example including [CLS] token if present.

Returns:

Type Description int

Number of tokens plus one if [CLS] token is included.

"},{"location":"api/data/shards/#saev.data.shards.Metadata.dump","title":"dump(shards_root)","text":"

Dumps a Metadata object to a metadata.json file in shards_root / hash.

Parameters:

Name Type Description Default shards_root Path

Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.

required Source code in src/saev/data/shards.py
def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n
"},{"location":"api/data/shards/#saev.data.shards.Metadata.load","title":"load(shards_dir) classmethod","text":"

Loads a Metadata object from a metadata.json file in shards_dir.

Parameters:

Name Type Description Default shards_dir Path

Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in src/saev/data/shards.py

@classmethod\ndef load(cls, shards_dir: pathlib.Path) -> tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/<hash> as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n
"},{"location":"api/data/shards/#saev.data.shards.PixelAgg","title":"PixelAgg","text":"

Bases: Enum

How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).

"},{"location":"api/data/shards/#saev.data.shards.RecordedTransformer","title":"RecordedTransformer(model, content_tokens_per_example, cls_token, layers)","text":"

Bases: Module

A wrapper around a transformer model that records intermediate layer activations during forward passes.

Parameters:

Name Type Description Default model Module

The transformer model to wrap.

required content_tokens_per_example int

Number of content tokens per example.

required cls_token bool

Whether to record the [CLS] token in addition to content tokens.

required layers Sequence[int]

Which transformer layers to record activations from.

required

Attributes:

Name Type Description model Module

The wrapped transformer model.

content_tokens_per_example int

Number of content tokens per example.

cls_token bool

Whether the [CLS] token is included in recorded activations.

layers Sequence[int]

Tuple of layer indices being recorded.

token_i slice

Token indices to extract from model outputs.

logger

Logger instance for this recorder.

Source code in src/saev/data/shards.py
def __init__(\n    self,\n    model: torch.nn.Module,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    layers: Sequence[int],\n):\n    super().__init__()\n\n    self.model = model\n\n    self.content_tokens_per_example = content_tokens_per_example\n    self.cls_token = cls_token\n    self.layers = layers\n\n    self.token_i = model.get_token_i(content_tokens_per_example)\n\n    self._storage = None\n    self._i = 0\n\n    self.logger = logging.getLogger(f\"recorder({model.name})\")\n\n    for i in self.layers:\n        self.model.get_residuals()[i].register_forward_hook(self.hook)\n
"},{"location":"api/data/shards/#saev.data.shards.Shard","title":"Shard(name, n_examples) dataclass","text":"

A single shard entry in shards.json, recording the filename and number of examples.

Attributes:

Name Type Description name str

The filename of the shard (e.g., \"acts000000.bin\").

n_examples int

Number of examples stored in this shard.

"},{"location":"api/data/shards/#saev.data.shards.ShardInfo","title":"ShardInfo(shards=list()) dataclass","text":"

A container for shard metadata as recorded in shards.json.

Parameters:

Name Type Description Default shards list[Shard]

A list of Shard objects.

list()"},{"location":"api/data/shards/#saev.data.shards.ShardWriter","title":"ShardWriter(shards_root, md)","text":"

ShardWriter is a stateful object that handles sharded activation writing to disk.

Parameters:

Name Type Description Default shards_root Path

The $SAEV_SCRATCH/saev/shards path.

required md Metadata

The Metadata object for these shards.

required

Attributes:

Name Type Description shards Path

The $SAEV_SCRATCH/saev/shards/. shard int acts_path Path acts Float[ndarray, 'examples_per_shard n_layers all_patches d_model'] | None filled int labels_writer LabelsWriter

The LabelsWriter writer.

Source code in src/saev/data/shards.py
def __init__(self, shards_root: pathlib.Path, md: Metadata):\n    assert disk.is_shards_root(shards_root)\n    self.md = md\n\n    self.logger = logging.getLogger(\"shard-writer\")\n\n    self.shards_dir = shards_root / md.hash\n    self.shards_dir.mkdir(exist_ok=True)\n\n    # builder for shard manifest\n    self._shards: ShardInfo = ShardInfo()\n\n    # Always initialize labels writer (it handles non-seg datasets internally)\n    self.labels_writer = LabelsWriter(self.shards_dir, md)\n\n    self.shard = -1\n    self.acts = None\n    self.next_shard()\n
"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__enter__","title":"__enter__()","text":"

Context manager entry.

Source code in src/saev/data/shards.py
def __enter__(self):\n    \"\"\"Context manager entry.\"\"\"\n    return self\n
"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__exit__","title":"__exit__(exc_type, exc_val, exc_tb)","text":"

Context manager exit - handle cleanup.

Source code in src/saev/data/shards.py
def __exit__(self, exc_type, exc_val, exc_tb):\n    \"\"\"Context manager exit - handle cleanup.\"\"\"\n    self.flush()\n\n    # Delete empty labels file if nothing was written\n    if not self.labels_writer.has_written:\n        if os.path.exists(self.labels_writer.labels_path):\n            os.remove(self.labels_writer.labels_path)\n            self.logger.info(\n                \"Removed empty labels file '%s'.\", self.labels_writer.labels_path\n            )\n
"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.write_batch","title":"write_batch(activations, start_idx, patch_labels=None)","text":"

Write a batch of activations and (optionally) patch labels.

Parameters:

Name Type Description Default activations Float[Tensor, 'batch n_layers all_patches d_model']

Batch of activations to write.

required start_idx int

Starting index for this batch.

required patch_labels UInt8[Tensor, 'batch n_patches'] | None

Optional patch labels for segmentation datasets.

None Source code in src/saev/data/shards.py
def write_batch(\n    self,\n    activations: Float[Tensor, \"batch n_layers all_patches d_model\"],\n    start_idx: int,\n    patch_labels: UInt8[Tensor, \"batch n_patches\"] | None = None,\n) -> None:\n    \"\"\"Write a batch of activations and (optionally) patch labels.\n\n    Args:\n        activations: Batch of activations to write.\n        start_idx: Starting index for this batch.\n        patch_labels: Optional patch labels for segmentation datasets.\n    \"\"\"\n    batch_size = len(activations)\n    end_idx = start_idx + batch_size\n\n    # Write activations (handling sharding)\n    offset = self.md.examples_per_shard * self.shard\n\n    if end_idx >= offset + self.md.examples_per_shard:\n        # We have run out of space in this mmap'ed file. Let's fill it as much as we can.\n        n_fit = offset + self.md.examples_per_shard - start_idx\n        self.acts[start_idx - offset : start_idx - offset + n_fit] = activations[\n            :n_fit\n        ]\n        self.filled = start_idx - offset + n_fit\n\n        # Write labels for the portion that fits\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                labels_to_write = (\n                    patch_labels[:n_fit].cpu().numpy().astype(np.uint8)\n                )\n            elif not isinstance(patch_labels, np.ndarray):\n                labels_to_write = np.array(patch_labels[:n_fit], dtype=np.uint8)\n            else:\n                labels_to_write = patch_labels[:n_fit]\n\n            self.labels_writer.write_batch(labels_to_write, start_idx)\n\n        self.next_shard()\n\n        # Recursively call write_batch for remaining data\n        if n_fit < batch_size:\n            self.write_batch(\n                activations[n_fit:],\n                start_idx + n_fit,\n                patch_labels[n_fit:] if patch_labels is not None else None,\n            )\n    else:\n        msg = f\"0 <= {start_idx} - {offset} <= {offset} + {self.md.examples_per_shard}\"\n        assert 0 <= start_idx - offset <= offset + self.md.examples_per_shard, msg\n        msg = (\n            f\"0 <= {end_idx} - {offset} <= {offset} + {self.md.examples_per_shard}\"\n        )\n        assert 0 <= end_idx - offset <= offset + self.md.examples_per_shard, msg\n        self.acts[start_idx - offset : end_idx - offset] = activations\n        self.filled = end_idx - offset\n\n        # Write labels if provided\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                patch_labels = patch_labels.cpu().numpy().astype(np.uint8)\n            elif not isinstance(patch_labels, np.ndarray):\n                patch_labels = np.array(patch_labels, dtype=np.uint8)\n\n            self.labels_writer.write_batch(patch_labels, start_idx)\n
"},{"location":"api/data/shards/#saev.data.shards.get_dataloader","title":"get_dataloader(data, *, batch_size, n_workers, data_tr=None, mask_tr=None, sample_tr=None)","text":"

Get a dataloader for a default map-style dataset.

Parameters:

Name Type Description Default data Config

Config for the dataset.

required batch_size int

Batch size.

required n_workers int

Number of dataloader workers.

required data_tr Callable | None

Transform to be applied to each 'data' key (typically the raw data).

None mask_tr Callable | None

Transform to be applied to masks.

None sample_tr Callable | None

Transform to be applied to the entire sample dict.

None

Returns:

Type Description DataLoader

A PyTorch Dataloader that yields dictionaries with 'data' keys containing data batches, 'index' keys containing original dataset indices and 'label' keys containing label batches.

Source code in src/saev/data/shards.py
@beartype.beartype\ndef get_dataloader(\n    data: datasets.Config,\n    *,\n    batch_size: int,\n    n_workers: int,\n    data_tr: Callable | None = None,\n    mask_tr: Callable | None = None,\n    sample_tr: Callable | None = None,\n) -> torch.utils.data.DataLoader:\n    \"\"\"\n    Get a dataloader for a default map-style dataset.\n\n    Args:\n        data: Config for the dataset.\n        batch_size: Batch size.\n        n_workers: Number of dataloader workers.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        sample_tr: Transform to be applied to the entire sample dict.\n\n    Returns:\n        A PyTorch Dataloader that yields dictionaries with `'data'` keys containing data batches, `'index'` keys containing original dataset indices and `'label'` keys containing label batches.\n    \"\"\"\n    dataset = datasets.get_dataset(\n        data, data_transform=data_tr, mask_transform=mask_tr, sample_transform=sample_tr\n    )\n\n    dataloader = torch.utils.data.DataLoader(\n        dataset=dataset,\n        batch_size=batch_size,\n        drop_last=False,\n        num_workers=n_workers,\n        persistent_workers=n_workers > 0,\n        shuffle=False,\n        pin_memory=False,\n    )\n    return dataloader\n
"},{"location":"api/data/shards/#saev.data.shards.pixel_to_patch_labels","title":"pixel_to_patch_labels(seg, n_patches, patch_size, pixel_agg=PixelAgg.MAJORITY, bg_label=0, max_classes=256)","text":"

Convert pixel-level segmentation to patch-level labels using vectorized operations.

Parameters:

Name Type Description Default seg Image

Pixel-level segmentation mask as PIL Image

required n_patches int

Total number of patches expected

required patch_size int

Size of each patch in pixels

required pixel_agg PixelAgg

How to aggregate pixel labels into patch labels

MAJORITY bg_label int

Background label index

0 max_classes int

Maximum number of classes (for bincount)

256

Returns:

Type Description UInt8[Tensor, ' n_patches']

Patch labels as uint8 tensor of shape (n_patches,)

Source code in src/saev/data/shards.py
@jaxtyped(typechecker=beartype.beartype)\ndef pixel_to_patch_labels(\n    seg: Image.Image,\n    n_patches: int,\n    patch_size: int,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n    bg_label: int = 0,\n    max_classes: int = 256,\n) -> UInt8[Tensor, \" n_patches\"]:\n    \"\"\"\n    Convert pixel-level segmentation to patch-level labels using vectorized operations.\n\n    Args:\n        seg: Pixel-level segmentation mask as PIL Image\n        n_patches: Total number of patches expected\n        patch_size: Size of each patch in pixels\n        pixel_agg: How to aggregate pixel labels into patch labels\n        bg_label: Background label index\n        max_classes: Maximum number of classes (for bincount)\n\n    Returns:\n        Patch labels as uint8 tensor of shape (n_patches,)\n    \"\"\"\n    # Convert to torch tensor for vectorized operations\n    seg_tensor = torch.from_numpy(np.array(seg, dtype=np.uint8))\n    assert seg_tensor.ndim == 2\n\n    h, w = seg_tensor.shape\n\n    # Calculate patch grid dimensions\n    patch_grid_h = h // patch_size\n    patch_grid_w = w // patch_size\n    assert patch_grid_w * patch_grid_h == n_patches, (\n        f\"Image size {w}x{h} with patch_size {patch_size} gives {patch_grid_w}x{patch_grid_h} = {patch_grid_w * patch_grid_h} patches, expected {n_patches}\"\n    )\n\n    # Reshape into patches using einops: (n_patches, patch_size * patch_size)\n    patches = einops.rearrange(\n        seg_tensor,\n        \"(h p1) (w p2) -> (h w) (p1 p2)\",\n        p1=patch_size,\n        p2=patch_size,\n        h=patch_grid_h,\n        w=patch_grid_w,\n    )\n\n    # Use vectorized bincount approach to get class counts for all patches at once\n    # counts[i, c] = number of times class c appears in patch i\n    offsets = torch.arange(n_patches, device=patches.device).unsqueeze(1) * max_classes\n    flat = (patches + offsets).reshape(-1)\n    counts = torch.bincount(flat, minlength=n_patches * max_classes).reshape(\n        n_patches, max_classes\n    )\n\n    if pixel_agg is PixelAgg.MAJORITY:\n        # Take the most common label in each patch\n        patch_labels = counts.argmax(dim=1)\n    elif pixel_agg is PixelAgg.PREFER_FG:\n        # Take the most common non-background label, or background if all background\n        nonbg = counts.clone()\n        nonbg[:, bg_label] = 0\n        has_nonbg = nonbg.sum(dim=1) > 0\n        nonbg_arg = nonbg.argmax(dim=1)\n        bg_tensor = torch.full_like(nonbg_arg, bg_label)\n        patch_labels = torch.where(has_nonbg, nonbg_arg, bg_tensor)\n    else:\n        tp.assert_never(pixel_agg)\n\n    return patch_labels.to(torch.uint8)\n
"},{"location":"api/data/shards/#saev.data.shards.worker_fn","title":"worker_fn(*, family, ckpt, content_tokens_per_example, cls_token, d_model, layers, data, batch_size, n_workers, max_tokens_per_shard, shards_root, device, pixel_agg=PixelAgg.MAJORITY)","text":"

Parameters:

Name Type Description Default family str

Transformer family (dinov2, dinov3, clip, etc).

required ckpt str

Transformer ckpt (hf-hub:imageomics/bioclip2, etc).

required content_tokens_per_example int

Number of content tokens per example.

required cls_token bool

Whether the transformer has a [CLS] token.

required d_model int

Hidden dimension of transformer.

required layers list[int]

The layers to record activations for.

required data Config

Config for the particular (image) dataset to load.

required batch_size int

Batch size for the dataset.

required n_workers int

Number of workers for loading examples fromm the dataset.

required max_tokens_per_shard int

Maximum number of tokens per disk shard.

required pixel_agg PixelAgg

Optional method for aggregating segmentation label pixels.

MAJORITY shards_root Path

Where to save shards. Should end with 'shards'. See disk-layout.md; this is $SAEV_SCRATCH/saev/shards.

required device str

Device for doing the computation.

required

Returns:

Type Description Path

Path to the shards directory.

Source code in src/saev/data/shards.py
@beartype.beartype\ndef worker_fn(\n    *,\n    family: str,\n    ckpt: str,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    d_model: int,\n    layers: list[int],\n    data: datasets.Config,\n    batch_size: int,\n    n_workers: int,\n    max_tokens_per_shard: int,\n    shards_root: pathlib.Path,\n    device: str,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n) -> pathlib.Path:\n    \"\"\"\n    Args:\n        family: Transformer family (dinov2, dinov3, clip, etc).\n        ckpt: Transformer ckpt (hf-hub:imageomics/bioclip2, etc).\n        content_tokens_per_example: Number of content tokens per example.\n        cls_token: Whether the transformer has a [CLS] token.\n        d_model: Hidden dimension of transformer.\n        layers: The layers to record activations for.\n        data: Config for the particular (image) dataset to load.\n        batch_size: Batch size for the dataset.\n        n_workers: Number of workers for loading examples fromm the dataset.\n        max_tokens_per_shard: Maximum number of tokens per disk shard.\n        pixel_agg: Optional method for aggregating segmentation label pixels.\n        shards_root: Where to save shards. Should end with 'shards'. See [disk-layout.md](../../developers/disk-layout.md); this is $SAEV_SCRATCH/saev/shards.\n        device: Device for doing the computation.\n\n    Returns:\n        Path to the shards directory.\n    \"\"\"\n    from saev import helpers\n    from saev.data import models\n\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n        torch.backends.cudnn.benchmark = True\n        torch.backends.cudnn.deterministic = True\n\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n    logger = logging.getLogger(\"worker_fn\")\n\n    if device == \"cuda\" and not torch.cuda.is_available():\n        logger.warning(\"No CUDA device available, using CPU.\")\n        device = \"cpu\"\n\n    assert shards_root.name == \"shards\"\n\n    model_cls = models.load_model_cls(family)\n    model_instance = model_cls(ckpt).to(device)\n    model = RecordedTransformer(\n        model_instance, content_tokens_per_example, cls_token, layers\n    )\n\n    data_tr, sample_tr = model_cls.make_transforms(ckpt, content_tokens_per_example)\n\n    mask_tr = None\n    if datasets.is_img_seg_dataset(data):\n        # For image segmentation datasets, create a transform that converts pixels to patches\n        # Use make_resize with NEAREST interpolation for segmentation masks\n        seg_resize_tr = model_cls.make_resize(\n            ckpt, content_tokens_per_example, scale=1.0, resample=Image.NEAREST\n        )\n\n        def seg_to_patches(seg):\n            \"\"\"Transform that resizes segmentation and converts to patch labels.\"\"\"\n\n            # Convert to patch labels\n            return pixel_to_patch_labels(\n                seg_resize_tr(seg),\n                content_tokens_per_example,\n                patch_size=model_instance.patch_size,\n                pixel_agg=pixel_agg,\n                bg_label=data.bg_label,\n            )\n\n        mask_tr = seg_to_patches\n\n    dataloader = get_dataloader(\n        data,\n        batch_size=batch_size,\n        n_workers=n_workers,\n        data_tr=data_tr,\n        mask_tr=mask_tr,\n        sample_tr=sample_tr,\n    )\n\n    n_batches = math.ceil(data.n_examples / batch_size)\n    logger.info(\"Dumping %d batches of %d examples.\", n_batches, batch_size)\n\n    model = model.to(device)\n\n    md = Metadata(\n        family=family,\n        ckpt=ckpt,\n        layers=tuple(layers),\n        content_tokens_per_example=content_tokens_per_example,\n        cls_token=cls_token,\n        d_model=d_model,\n        n_examples=data.n_examples,\n        max_tokens_per_shard=max_tokens_per_shard,\n        data=base64.b64encode(pickle.dumps(data)).decode(\"utf8\"),\n        dataset=data.root,\n        pixel_agg=pixel_agg,\n    )\n    md.dump(shards_root)\n\n    # Use context manager for proper cleanup\n    with ShardWriter(shards_root, md) as writer:\n        i = 0\n        # Calculate and write transformer activations.\n        with torch.inference_mode():\n            for batch in helpers.progress(dataloader, total=n_batches):\n                data = batch.get(\"data\").to(device)\n                grid = batch.get(\"grid\")\n                if grid is not None:\n                    grid = grid.to(device)\n                    out, cache = model(data, grid=grid)\n                else:\n                    out, cache = model(data)\n                # cache has shape [batch size, n layers, n patches + 1, d model]\n                del out\n\n                # Write activations and labels (if present) in one call\n                patch_labels = batch.get(\"patch_labels\")\n                if patch_labels is not None:\n                    logger.debug(\n                        \"Found patch_labels in batch: shape=%s\",\n                        patch_labels.shape\n                        if hasattr(patch_labels, \"shape\")\n                        else \"unknown\",\n                    )\n                    # Ensure correct shape\n                    assert patch_labels.shape == (\n                        len(cache),\n                        content_tokens_per_example,\n                    )\n                else:\n                    logger.debug(f\"No patch_labels in batch. Keys: {batch.keys()}\")\n\n                writer.write_batch(cache, i, patch_labels=patch_labels)\n\n                i += len(cache)\n\n    return shards_root / md.hash\n
"},{"location":"api/data/shuffled/","title":"saev.data.shuffled","text":""},{"location":"api/data/shuffled/#saev.data.shuffled.Config","title":"Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False) dataclass","text":"

Configuration for loading shuffled activation data from disk.

Attributes:

Name Type Description shards Path

Directory with .bin shards and a metadata.json file.

tokens Literal['special', 'content', 'all']

Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_size","title":"batch_size = 1024 * 16 class-attribute instance-attribute","text":"

Batch size.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_timeout_s","title":"batch_timeout_s = 30.0 class-attribute instance-attribute","text":"

How long to wait for at least one batch.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.buffer_size","title":"buffer_size = 64 class-attribute instance-attribute","text":"

Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.debug","title":"debug = False class-attribute instance-attribute","text":"

Whether the dataloader process should log debug messages.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.drop_last","title":"drop_last = False class-attribute instance-attribute","text":"

Whether to drop the last batch if it's smaller than the others.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.ignore_labels","title":"ignore_labels = dataclasses.field(default_factory=list) class-attribute instance-attribute","text":"

If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.layer","title":"layer = -1 class-attribute instance-attribute","text":"

Which transformer layer(s) to read from disk. -1 is the default, but must be changed. \"all\" enumerates every recorded layer.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.log_every_s","title":"log_every_s = 30.0 class-attribute instance-attribute","text":"

How frequently the dataloader process should log (debug) performance messages.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.min_buffer_fill","title":"min_buffer_fill = 0.0 class-attribute instance-attribute","text":"

Fraction of the reservoir that must be populated before yielding batches.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.n_threads","title":"n_threads = 4 class-attribute instance-attribute","text":"

Number of dataloading threads.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.scale_norm","title":"scale_norm = False class-attribute instance-attribute","text":"

Whether to scale norms to sqrt(D).

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.seed","title":"seed = 17 class-attribute instance-attribute","text":"

Random seed.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.use_tmpdir","title":"use_tmpdir = False class-attribute instance-attribute","text":"

If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.

"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader","title":"DataLoader(cfg)","text":"

High-throughput streaming loader that deterministically shuffles data from disk shards.

Source code in src/saev/data/shuffled.py
def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n
"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.ExampleBatch","title":"ExampleBatch","text":"

Bases: TypedDict

Individual example.

"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__iter__","title":"__iter__()","text":"

Yields batches.

Source code in src/saev/data/shuffled.py
def __iter__(self) -> collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n < self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n
"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__len__","title":"__len__()","text":"

Returns the number of batches in an epoch.

Source code in src/saev/data/shuffled.py
def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n
"},{"location":"api/data/siglip/","title":"saev.data.siglip","text":""},{"location":"api/data/siglip/#saev.data.siglip.Vit","title":"Vit(ckpt)","text":"

Bases: Module, Transformer

Source code in src/saev/data/siglip.py
def __init__(self, ckpt: str):\n    super().__init__()\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model\n\n    assert isinstance(self.model, open_clip.timm_model.TimmModel)\n
"},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_resize","title":"make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.

Source code in src/saev/data/siglip.py
@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    from PIL import Image\n\n    def resize(img: Image.Image) -> Image.Image:\n        # SigLIP typically uses 224x224 or 384x384 images\n        # We'll assume 224x224 for simplicity\n        resize_size_px = (int(224 * scale), int(224 * scale))\n        return img.resize(resize_size_px, resample=resample)\n\n    return resize\n
"},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

Create transforms for preprocessing: (img_transform, sample_transform | None).

Source code in src/saev/data/siglip.py
@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n
"},{"location":"api/data/transforms/","title":"saev.data.transforms","text":""},{"location":"api/data/transforms/#saev.data.transforms.conv2d_to_tokens","title":"conv2d_to_tokens(x_bchw, conv)","text":"

Conv2d then flatten spatial to L, return (B, L, D).

Source code in src/saev/data/transforms.py
@jaxtyped(typechecker=beartype.beartype)\ndef conv2d_to_tokens(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -> Float[Tensor, \"b n d\"]:\n    \"\"\"Conv2d then flatten spatial to L, return (B, L, D).\"\"\"\n    y_bdhw = conv(x_bchw)\n    return einops.rearrange(y_bdhw, \"b d h w -> b (h w) d\")\n
"},{"location":"api/data/transforms/#saev.data.transforms.resize_to_patch_grid","title":"resize_to_patch_grid(img, *, p, n, resample=Image.LANCZOS)","text":"

Resize image to (w, h) so that: - w % p == 0, h % p == 0 - (h/p) * (w/p) == N - Minimizes change in aspect ratio.

Source code in src/saev/data/transforms.py
@beartype.beartype\ndef resize_to_patch_grid(\n    img: Image.Image,\n    *,\n    p: int,\n    n: int,\n    resample: Image.Resampling | int = Image.LANCZOS,\n) -> Image.Image:\n    \"\"\"\n    Resize image to (w, h) so that:\n      - w % p == 0, h % p == 0\n      - (h/p) * (w/p) == N\n      - Minimizes change in aspect ratio.\n    \"\"\"\n    if p <= 0 or n <= 0:\n        raise ValueError(\"p and n must be positive integers\")\n\n    w0, h0 = img.size\n    a0 = w0 / h0\n\n    # Find the aspect ratio closest to a0\n    best_c = 0\n    best_dist = float(\"inf\")\n    for i in range(1, int(math.sqrt(n) + 1)):\n        if n % i != 0:\n            continue\n\n        for d in (i, n // i):\n            c, r = d, n // d\n            aspect = c / r\n            dist = abs(aspect - a0)\n\n            if dist < best_dist:\n                best_c = d\n                best_dist = dist\n\n    c = best_c\n    r = n // c\n    w, h = c * p, r * p\n    return img.resize((w, h), resample=resample)\n
"},{"location":"api/data/transforms/#saev.data.transforms.unfolded_conv2d","title":"unfolded_conv2d(x_bchw, conv)","text":"

Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels. Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.

Source code in src/saev/data/transforms.py
@jaxtyped(typechecker=beartype.beartype)\ndef unfolded_conv2d(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -> Float[Tensor, \"b n d\"]:\n    \"\"\"\n    Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels.\n    Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.\n    \"\"\"\n    k = conv.kernel_size[0]\n\n    assert conv.kernel_size == (k, k)\n    assert conv.stride == (k, k)\n    assert conv.padding == (0, 0)\n    assert conv.groups == 1\n    assert conv.dilation == (1, 1)\n\n    *b, c, h, w = x_bchw.shape\n\n    assert h % k == 0 and w % k == 0\n\n    tokens_bnd = einops.rearrange(\n        x_bchw, \"b c (hp p1) (wp p2) -> b (hp wp) (c p1 p2)\", p1=k, p2=k\n    ).contiguous()\n    w_dp = conv.weight.reshape(conv.out_channels, c * k * k)\n    tokens_bnd = tokens_bnd @ w_dp.T\n    if conv.bias is not None:\n        tokens_bnd = tokens_bnd + conv.bias[None, None, :]\n    return tokens_bnd\n
"},{"location":"api/framework/inference/","title":"saev.framework.inference","text":"

Script for dumping SAE inference artifacts in a single pass over the dataset.

Default mode writes 5 files:

  1. mean_values.pt
  2. sparsity.pt
  3. distributions.pt
  4. token_acts.npz
  5. metrics.json

If save=False, only metrics.json is written.

metrics.json is serialized from saev.metrics.Metrics.

"},{"location":"api/framework/inference/#saev.framework.inference.Config","title":"Config(run=pathlib.Path('./runs/abcdefg'), data=OrderedConfig(), n_dists=25, ignore_labels=list(), force_recompute=False, save=True, device='cuda', slurm_acct='', slurm_partition='', n_hours=4.0, mem_gb=80, log_to=os.path.join('.', 'logs')) dataclass","text":"

Configuration for computing image activations.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.data","title":"data = OrderedConfig() class-attribute instance-attribute","text":"

Data configuration

"},{"location":"api/framework/inference/#saev.framework.inference.Config.device","title":"device = 'cuda' class-attribute instance-attribute","text":"

Which accelerator to use.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.force_recompute","title":"force_recompute = False class-attribute instance-attribute","text":"

Force recomputation even if files exist.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.ignore_labels","title":"ignore_labels = dataclasses.field(default_factory=list) class-attribute instance-attribute","text":"

Which token labels to ignore when calculating summarized image activations.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.log_to","title":"log_to = os.path.join('.', 'logs') class-attribute instance-attribute","text":"

Where to log Slurm job stdout/stderr.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.mem_gb","title":"mem_gb = 80 class-attribute instance-attribute","text":"

Node memory in GB.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.n_dists","title":"n_dists = 25 class-attribute instance-attribute","text":"

Number of features to save distributions for.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.n_hours","title":"n_hours = 4.0 class-attribute instance-attribute","text":"

Slurm job length in hours.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.run","title":"run = pathlib.Path('./runs/abcdefg') class-attribute instance-attribute","text":"

Path to the sae.pt file.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.save","title":"save = True class-attribute instance-attribute","text":"

Whether to write token_acts/statistics files. If False, only metrics.json is written.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_acct","title":"slurm_acct = '' class-attribute instance-attribute","text":"

Slurm account string. Empty means to not use Slurm.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_partition","title":"slurm_partition = '' class-attribute instance-attribute","text":"

Slurm partition.

"},{"location":"api/framework/inference/#saev.framework.inference.main","title":"main(cfg, sweep=None)","text":"

Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.

Parameters:

Name Type Description Default cfg Annotated[Config, arg(name='')]

Baseline config inference.

required sweep Path | None

Path to .py file defining the sweep parameters.

None Source code in src/saev/framework/inference.py
@beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")], sweep: pathlib.Path | None = None\n):\n    \"\"\"\n    Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.\n\n    Args:\n        cfg: Baseline config inference.\n        sweep: Path to .py file defining the sweep parameters.\n    \"\"\"\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    assert all(c.slurm_acct == cfgs[0].slurm_acct for c in cfgs)\n    cfg = cfgs[0]\n\n    if not cfg.slurm_acct:\n        for i, cfg_item in enumerate(cfgs, start=1):\n            logger.info(\"Running config %d/%d locally.\", i, len(cfgs))\n            worker_fn(cfg_item)\n        logger.info(\"Jobs done.\")\n        return 0\n\n    import submitit\n    from submitit.core.utils import UncompletedJobError\n\n    executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n    executor.update_parameters(\n        time=int(cfg.n_hours * 60),\n        partition=cfg.slurm_partition,\n        gpus_per_node=1,\n        ntasks_per_node=1,\n        mem=f\"{cfg.mem_gb}GB\",\n        stderr_to_stdout=True,\n        account=cfg.slurm_acct,\n    )\n    with executor.batch():\n        jobs = []\n        for i, cfg in enumerate(cfgs):\n            do, reason, _ = need_compute(cfg)\n            if not do:\n                continue\n\n            logger.info(reason)\n            jobs.append(executor.submit(worker_fn, cfg))\n\n    time.sleep(5.0)\n\n    for i, job in enumerate(jobs, start=1):\n        logger.info(\"Job %d/%d: %s %s\", i, len(jobs), job.job_id, job.state)\n\n    for i, job in enumerate(jobs, start=1):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", i, len(jobs))\n        except UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, i)\n\n    logger.info(\"Jobs done.\")\n    return 0\n
"},{"location":"api/framework/saev.framework/","title":"saev.framework","text":"

Submitit entrypoint modules for SAE workflows.

saev.framework is for script-like modules (e.g. train/inference/shards) that need importable module paths for submitit launchers. Place reusable data/model utilities outside this package.

"},{"location":"api/framework/shards/","title":"saev.framework.shards","text":"

To save lots of activations, we want to do things in parallel, with lots of slurm jobs, and save multiple files, rather than just one.

This script handles that additional complexity.

Conceptually, activations are either thought of as

  1. A single [n_imgs x n_layers x (n_patches + 1), d_model] tensor. This is a dataset
  2. Multiple [n_imgs_per_shard, n_layers, (n_patches + 1), d_model] tensors. This is a set of sharded activations.
"},{"location":"api/framework/shards/#saev.framework.shards.Config","title":"Config(data=datasets.Imagenet(), shards_root=pathlib.Path('$SAEV_SCRATCH/saev/shards/'), family='clip', ckpt='ViT-L-14/openai', batch_size=1024, n_workers=8, d_model=1024, layers=(lambda: [-2])(), content_tokens_per_example=256, cls_token=True, pixel_agg=PixelAgg.MAJORITY, max_tokens_per_shard=2400000, ssl=True, device='cuda', n_hours=24.0, slurm_acct='', slurm_partition='', log_to='./logs') dataclass","text":"

Configuration for calculating and saving ViT activations.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.batch_size","title":"batch_size = 1024 class-attribute instance-attribute","text":"

Batch size for ViT inference.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.ckpt","title":"ckpt = 'ViT-L-14/openai' class-attribute instance-attribute","text":"

Specific model checkpoint.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.cls_token","title":"cls_token = True class-attribute instance-attribute","text":"

Whether the model has a [CLS] token.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.content_tokens_per_example","title":"content_tokens_per_example = 256 class-attribute instance-attribute","text":"

Number of content tokens per example (depends on model).

"},{"location":"api/framework/shards/#saev.framework.shards.Config.d_model","title":"d_model = 1024 class-attribute instance-attribute","text":"

Dimension of the ViT activations (depends on model).

"},{"location":"api/framework/shards/#saev.framework.shards.Config.data","title":"data = dataclasses.field(default_factory=(datasets.Imagenet)) class-attribute instance-attribute","text":"

Which dataset to use.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.device","title":"device = 'cuda' class-attribute instance-attribute","text":"

Which device to use.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.family","title":"family = 'clip' class-attribute instance-attribute","text":"

Which model family.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.layers","title":"layers = dataclasses.field(default_factory=(lambda: [-2])) class-attribute instance-attribute","text":"

Which layers to save. By default, the second-to-last layer.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.log_to","title":"log_to = './logs' class-attribute instance-attribute","text":"

Where to log Slurm job stdout/stderr.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.max_tokens_per_shard","title":"max_tokens_per_shard = 2400000 class-attribute instance-attribute","text":"

Maximum number of activations per shard; 2.4M is approximately 10GB for 1024-dimensional 4-byte activations.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.n_hours","title":"n_hours = 24.0 class-attribute instance-attribute","text":"

Slurm job length.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.n_workers","title":"n_workers = 8 class-attribute instance-attribute","text":"

Number of dataloader workers.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.shards_root","title":"shards_root = pathlib.Path('$SAEV_SCRATCH/saev/shards/') class-attribute instance-attribute","text":"

Where to write shards.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_acct","title":"slurm_acct = '' class-attribute instance-attribute","text":"

Slurm account string.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_partition","title":"slurm_partition = '' class-attribute instance-attribute","text":"

Slurm partition.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.ssl","title":"ssl = True class-attribute instance-attribute","text":"

Whether to use SSL.

"},{"location":"api/framework/shards/#saev.framework.shards.cli","title":"cli(cfg)","text":"

Save ViT activations for use later on.

Parameters:

Name Type Description Default cfg Annotated[Config, arg(name='')]

Configuration for activations.

required Source code in src/saev/framework/shards.py
@beartype.beartype\ndef cli(cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")]):\n    \"\"\"\n    Save ViT activations for use later on.\n\n    Args:\n        cfg: Configuration for activations.\n    \"\"\"\n    logger = logging.getLogger(\"dump\")\n\n    if not cfg.ssl:\n        logger.warning(\"Ignoring SSL certs. Try not to do this!\")\n        # https://github.com/openai/whisper/discussions/734#discussioncomment-4491761\n        # Ideally we don't have to disable SSL but we are only downloading weights.\n        import ssl\n\n        ssl._create_default_https_context = ssl._create_unverified_context\n\n    from saev.data import shards\n\n    kwargs = dict(\n        family=cfg.family,\n        ckpt=cfg.ckpt,\n        content_tokens_per_example=cfg.content_tokens_per_example,\n        cls_token=cfg.cls_token,\n        d_model=cfg.d_model,\n        layers=cfg.layers,\n        data=cfg.data,\n        batch_size=cfg.batch_size,\n        n_workers=cfg.n_workers,\n        max_tokens_per_shard=cfg.max_tokens_per_shard,\n        shards_root=cfg.shards_root,\n        device=cfg.device,\n        pixel_agg=cfg.pixel_agg,\n    )\n\n    # Actually record activations.\n    if cfg.slurm_acct:\n        import submitit\n\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n        executor.update_parameters(\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            cpus_per_task=cfg.n_workers + 4,\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n\n        job = executor.submit(shards.worker_fn, **kwargs)\n        logger.info(\"Running job '%s'.\", job.job_id)\n        job.result()\n\n    else:\n        shards.worker_fn(**kwargs)\n
"},{"location":"api/framework/train/","title":"saev.framework.train","text":"

Trains many SAEs in parallel to amortize the cost of loading a single batch of data over many SAE training runs.

Checklist for making sure your training doesn't suck:

"},{"location":"api/framework/train/#saev.framework.train.Config","title":"Config(train_data=saev.data.ShuffledConfig(), val_data=saev.data.ShuffledConfig(), n_train=100000000, n_val=10000000, sae=nn.SparseAutoencoderConfig(), objective=nn.objectives.Matryoshka(), n_sparsity_warmup=0, optim='adam', lr=0.0004, n_lr_warmup=500, grad_clip=1.0, track=True, wandb_project='saev', tags=(), log_every=25, runs_root=pathlib.Path('$SAEV_NFS/saev/runs'), device='cuda', seed=42, slurm_acct='', slurm_partition='', n_hours=24.0, mem_gb=128, log_to=os.path.join('.', 'logs')) dataclass","text":"

Configuration for training a sparse autoencoder on a vision transformer.

"},{"location":"api/framework/train/#saev.framework.train.Config.device","title":"device = 'cuda' class-attribute instance-attribute","text":"

Hardware device.

"},{"location":"api/framework/train/#saev.framework.train.Config.grad_clip","title":"grad_clip = 1.0 class-attribute instance-attribute","text":"

Maximum gradient norm across all SAE parameters.

"},{"location":"api/framework/train/#saev.framework.train.Config.log_every","title":"log_every = 25 class-attribute instance-attribute","text":"

How often to log to WandB.

"},{"location":"api/framework/train/#saev.framework.train.Config.log_to","title":"log_to = os.path.join('.', 'logs') class-attribute instance-attribute","text":"

Where to log Slurm job stdout/stderr.

"},{"location":"api/framework/train/#saev.framework.train.Config.lr","title":"lr = 0.0004 class-attribute instance-attribute","text":"

Learning rate.

"},{"location":"api/framework/train/#saev.framework.train.Config.mem_gb","title":"mem_gb = 128 class-attribute instance-attribute","text":"

Node memory in GB.

"},{"location":"api/framework/train/#saev.framework.train.Config.n_hours","title":"n_hours = 24.0 class-attribute instance-attribute","text":"

Slurm job length in hours.

"},{"location":"api/framework/train/#saev.framework.train.Config.n_lr_warmup","title":"n_lr_warmup = 500 class-attribute instance-attribute","text":"

Number of learning rate warmup steps.

"},{"location":"api/framework/train/#saev.framework.train.Config.n_sparsity_warmup","title":"n_sparsity_warmup = 0 class-attribute instance-attribute","text":"

Number of sparsity coefficient warmup steps.

"},{"location":"api/framework/train/#saev.framework.train.Config.n_train","title":"n_train = 100000000 class-attribute instance-attribute","text":"

Number of SAE training samples.

"},{"location":"api/framework/train/#saev.framework.train.Config.n_val","title":"n_val = 10000000 class-attribute instance-attribute","text":"

Number of SAE evaluation samples.

"},{"location":"api/framework/train/#saev.framework.train.Config.objective","title":"objective = nn.objectives.Matryoshka() class-attribute instance-attribute","text":"

SAE objective configuration.

"},{"location":"api/framework/train/#saev.framework.train.Config.optim","title":"optim = 'adam' class-attribute instance-attribute","text":"

Optimizer for training.

"},{"location":"api/framework/train/#saev.framework.train.Config.runs_root","title":"runs_root = pathlib.Path('$SAEV_NFS/saev/runs') class-attribute instance-attribute","text":"

Root directory for runs.

"},{"location":"api/framework/train/#saev.framework.train.Config.sae","title":"sae = nn.SparseAutoencoderConfig() class-attribute instance-attribute","text":"

SAE configuration.

"},{"location":"api/framework/train/#saev.framework.train.Config.seed","title":"seed = 42 class-attribute instance-attribute","text":"

Random seed.

"},{"location":"api/framework/train/#saev.framework.train.Config.slurm_acct","title":"slurm_acct = '' class-attribute instance-attribute","text":"

Slurm account string. Empty means to not use Slurm.

"},{"location":"api/framework/train/#saev.framework.train.Config.slurm_partition","title":"slurm_partition = '' class-attribute instance-attribute","text":"

Slurm partition.

"},{"location":"api/framework/train/#saev.framework.train.Config.tags","title":"tags = () class-attribute instance-attribute","text":"

Tags to add to WandB run.

"},{"location":"api/framework/train/#saev.framework.train.Config.track","title":"track = True class-attribute instance-attribute","text":"

Whether to track with WandB.

"},{"location":"api/framework/train/#saev.framework.train.Config.train_data","title":"train_data = saev.data.ShuffledConfig() class-attribute instance-attribute","text":"

Training data.

"},{"location":"api/framework/train/#saev.framework.train.Config.val_data","title":"val_data = saev.data.ShuffledConfig() class-attribute instance-attribute","text":"

Validation data.

"},{"location":"api/framework/train/#saev.framework.train.Config.wandb_project","title":"wandb_project = 'saev' class-attribute instance-attribute","text":"

WandB project name.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics","title":"EvalMetrics(l0, l1, mse, normalized_mse, sse_sae, sse_baseline, n_dead, n_almost_dead, n_dense, freqs, mean_values, almost_dead_threshold, dense_threshold) dataclass","text":"

Results of evaluating a trained SAE on a datset.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.almost_dead_threshold","title":"almost_dead_threshold instance-attribute","text":"

Threshold for an \"almost dead\" neuron.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.dense_threshold","title":"dense_threshold instance-attribute","text":"

Threshold for a dense neuron.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.freqs","title":"freqs instance-attribute","text":"

How often each feature fired.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l0","title":"l0 instance-attribute","text":"

Mean L0 across all examples.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l1","title":"l1 instance-attribute","text":"

Mean L1 across all examples.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mean_values","title":"mean_values instance-attribute","text":"

The mean value for each feature when it did fire.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mse","title":"mse instance-attribute","text":"

Mean MSE across all examples.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_almost_dead","title":"n_almost_dead instance-attribute","text":"

Number of neurons that fired on fewer than almost_dead_threshold of examples.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dead","title":"n_dead instance-attribute","text":"

Number of neurons that never fired on any example.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dense","title":"n_dense instance-attribute","text":"

Number of neurons that fired on more than dense_threshold of examples.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.normalized_mse","title":"normalized_mse instance-attribute","text":"

Normalized reconstruction MSE (SAE SSE / mean-baseline SSE).

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_baseline","title":"sse_baseline instance-attribute","text":"

Total reconstruction sum-squared error for the mean baseline.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_sae","title":"sse_sae instance-attribute","text":"

Total reconstruction sum-squared error for the SAE.

"},{"location":"api/framework/train/#saev.framework.train.evaluate","title":"evaluate(cfgs, saes, objectives)","text":"

Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.

The metrics computed are mean L0/L1/MSE losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values. A list of EvalMetrics is returned, one for each SAE.

Source code in src/saev/framework/train.py
@beartype.beartype\n@torch.no_grad()\ndef evaluate(\n    cfgs: list[Config], saes: torch.nn.ModuleList, objectives: torch.nn.ModuleList\n) -> list[EvalMetrics]:\n    \"\"\"\n    Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.\n\n    The metrics computed are mean ``L0``/``L1``/``MSE`` losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values.  A list of `EvalMetrics` is returned, one for each SAE.\n    \"\"\"\n\n    torch.cuda.empty_cache()\n\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    saes.eval()\n    objectives.eval()\n\n    cfg = cfgs[0]\n\n    almost_dead_lim = 1e-7\n    dense_lim = 1e-2\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.val_data)\n    n_val = min(dataloader.n_samples, cfg.n_val)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, n_val)\n\n    n_fired = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    values = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    total_l0_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_l1_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_mse_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_sse_sae = torch.zeros(len(cfgs), dtype=torch.float64, device=cfg.device)\n    sum_sq = torch.zeros((), dtype=torch.float64, device=cfg.device)\n    sum_vec = torch.zeros(\n        (saes[0].cfg.d_model,), dtype=torch.float64, device=cfg.device\n    )\n    n_tokens = 0\n\n    for batch in helpers.progress(dataloader, desc=\"eval\", every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        batch_size = acts_BD.shape[0]\n        acts_BD_f64 = acts_BD.to(torch.float64)\n        sum_sq += torch.sum(acts_BD_f64 * acts_BD_f64)\n        sum_vec += acts_BD_f64.sum(dim=0)\n        n_tokens += batch_size\n        for i, (sae, objective) in enumerate(zip(saes, objectives)):\n            # Objective now handles the forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            # Get f_x for metrics\n            residual = acts_BD - fwd.x_hats[:, -1, :]\n            total_sse_sae[i] += torch.sum((residual.to(torch.float64)) ** 2)\n            n_fired[i] += einops.reduce(\n                fwd.f_x > 0, \"batch d_sae -> d_sae\", \"sum\"\n            ).cpu()\n            values[i] += einops.reduce(fwd.f_x, \"batch d_sae -> d_sae\", \"sum\").cpu()\n            total_l0_sum[i] += loss.l0.cpu().item() * batch_size\n            total_l1_sum[i] += loss.l1.cpu().item() * batch_size\n            total_mse_sum[i] += loss.mse.cpu().item() * batch_size\n\n    msg = \"Validation dataloader yielded zero tokens; cannot compute normalized MSE.\"\n    assert n_tokens > 0, msg\n    sum_vec_sq = torch.dot(sum_vec, sum_vec)\n    sse_baseline = sum_sq - sum_vec_sq / n_tokens\n    msg = (\n        f\"Validation baseline variance non-positive: \"\n        f\"sse_baseline={sse_baseline.item():.6e}\"\n    )\n    assert sse_baseline > 0, msg\n    sse_baseline_value = sse_baseline.item()\n\n    mean_values = values / n_fired\n    freqs = n_fired / n_tokens\n\n    l0 = (total_l0_sum / n_tokens).tolist()\n    l1 = (total_l1_sum / n_tokens).tolist()\n    mse = (total_mse_sum / n_tokens).tolist()\n    sse_sae = total_sse_sae.tolist()\n    normalized_mse = (total_sse_sae / sse_baseline_value).tolist()\n    sse_baseline_all = [sse_baseline_value] * len(cfgs)\n\n    n_dead = einops.reduce(freqs == 0, \"n_saes d_sae -> n_saes\", \"sum\").tolist()\n    n_almost_dead = einops.reduce(\n        freqs < almost_dead_lim, \"n_saes d_sae -> n_saes\", \"sum\"\n    ).tolist()\n    n_dense = einops.reduce(freqs > dense_lim, \"n_saes d_sae -> n_saes\", \"sum\").tolist()\n\n    metrics = []\n    for i in range(len(cfgs)):\n        metrics.append(\n            EvalMetrics(\n                l0=l0[i],\n                l1=l1[i],\n                mse=mse[i],\n                normalized_mse=normalized_mse[i],\n                sse_sae=sse_sae[i],\n                sse_baseline=sse_baseline_all[i],\n                n_dead=n_dead[i],\n                n_almost_dead=n_almost_dead[i],\n                n_dense=n_dense[i],\n                freqs=freqs[i],\n                mean_values=mean_values[i],\n                almost_dead_threshold=almost_dead_lim,\n                dense_threshold=dense_lim,\n            )\n        )\n\n    return metrics\n
"},{"location":"api/framework/train/#saev.framework.train.main","title":"main(cfg, sweep=None, max_parallel=None)","text":"

Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.

Parameters:

Name Type Description Default cfg Annotated[Config, arg(name='')]

Baseline config for training an SAE.

required sweep Path | None

Path to .py file defining the sweep parameters.

None max_parallel int | None

Maximum SAEs to train concurrently within a single worker.

None Source code in src/saev/framework/train.py
@beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")],\n    sweep: pathlib.Path | None = None,\n    max_parallel: int | None = None,\n):\n    \"\"\"\n    Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.\n\n    Args:\n        cfg: Baseline config for training an SAE.\n        sweep: Path to .py file defining the sweep parameters.\n        max_parallel: Maximum SAEs to train concurrently within a single worker.\n    \"\"\"\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n\n    import submitit\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    cfgs = split_cfgs(cfgs)\n    # codex resume 019ac16a-dc07-78e3-82c7-e5c08a6c6f0c\n    if max_parallel:\n        cfgs = [\n            subgroup\n            for group in cfgs\n            for subgroup in [\n                group[start:end]\n                for start, end in helpers.batched_idx(len(group), max_parallel)\n            ]\n        ]\n\n    logger.info(\"Running %d training jobs.\", len(cfgs))\n\n    # Use the first resolved config for submitit parameters (n_hours, mem_gb, etc.) so that sweep values take effect instead of CLI defaults.\n    cfg = cfgs[0][0]\n\n    if cfg.slurm_acct:\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n        executor.update_parameters(\n            job_name=\"sae-train\",\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            mem=f\"{cfg.mem_gb}GB\",\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n    else:\n        executor = submitit.DebugExecutor(folder=cfg.log_to)\n\n    try:\n        cloudpickle.dumps(worker_fn)\n        for group in cfgs:\n            cloudpickle.dumps(group)\n    except TypeError as err:\n        raise AssertionError(f\"Failed to pickle: {err}\")\n\n    with executor.batch():\n        jobs = [executor.submit(worker_fn, group) for group in cfgs]\n\n    # Give the executor five seconds to fire the jobs off.\n    time.sleep(5.0)\n\n    # Log initial status.\n    for j, job in enumerate(jobs):\n        logger.info(\"Job %d/%d: %s %s\", j + 1, len(jobs), job.job_id, job.state)\n\n    for j, job in enumerate(jobs):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", j + 1, len(jobs))\n        except submitit.core.utils.UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, j)\n\n    logger.info(\"Jobs done.\")\n
"},{"location":"api/framework/train/#saev.framework.train.split_cfgs","title":"split_cfgs(cfgs)","text":"

Splits configs into groups that can be parallelized.

Parameters:

Name Type Description Default cfgs list[Config]

A list of configs from a sweep file.

required

Returns:

Type Description list[list[Config]]

A list of lists, where the configs in each sublist do not differ in any keys that are in CANNOT_PARALLELIZE. This means that each sublist is a valid \"parallel\" set of configs for train.

Source code in src/saev/framework/train.py
@beartype.beartype\ndef split_cfgs(cfgs: list[Config]) -> list[list[Config]]:\n    \"\"\"\n    Splits configs into groups that can be parallelized.\n\n    Arguments:\n        cfgs: A list of configs from a sweep file.\n\n    Returns:\n        A list of lists, where the configs in each sublist do not differ in any keys that are in `CANNOT_PARALLELIZE`. This means that each sublist is a valid \"parallel\" set of configs for `train`.\n    \"\"\"\n    groups = collections.defaultdict(list)\n    for cfg in cfgs:\n        key = _parallel_key(cfg)\n        groups[key].append(cfg)\n\n    return [\n        [\n            dataclasses.replace(\n                cfg,\n                train_data=dataclasses.replace(cfg.train_data, seed=cfg.seed),\n                val_data=dataclasses.replace(cfg.val_data, seed=cfg.seed),\n            )\n            for cfg in group\n        ]\n        for _, group in sorted(groups.items())\n    ]\n
"},{"location":"api/framework/train/#saev.framework.train.train","title":"train(cfgs)","text":"

Explicitly declare the optimizer, schedulers, dataloader, etc outside of main so that all the variables are dropped from scope and can be garbage collected.

Source code in src/saev/framework/train.py
@beartype.beartype\ndef train(\n    cfgs: list[Config],\n) -> tuple[\n    torch.nn.ModuleList, torch.nn.ModuleList, saev.utils.wandb.ParallelWandbRun, int\n]:\n    \"\"\"\n    Explicitly declare the optimizer, schedulers, dataloader, etc outside of `main` so that all the variables are dropped from scope and can be garbage collected.\n    \"\"\"\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    logger.info(\"Parallelizing %d runs.\", len(cfgs))\n\n    cfg = cfgs[0]\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.train_data)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, cfg.n_train)\n\n    saes, objectives, param_groups = make_saes(\n        [(c.sae, c.objective) for c in cfgs], dataloader\n    )\n\n    mode = \"online\" if cfg.track else \"disabled\"\n    tags = list(cfg.tags)\n\n    # Add metadata to configs for WandB logging\n    metadata_dict = dataclasses.asdict(dataloader.metadata)\n    wandb_configs = []\n    for c in cfgs:\n        cfg_dict = dataclasses.asdict(c)\n        cfg_dict[\"train_data\"][\"metadata\"] = metadata_dict\n        wandb_configs.append(cfg_dict)\n\n    run = saev.utils.wandb.ParallelWandbRun(\n        cfg.wandb_project, wandb_configs, mode, tags\n    )\n    slurm_job_id = os.environ.get(\"SLURM_JOB_ID\")\n    if slurm_job_id:\n        run.set_summary(\"slurm_job_id\", slurm_job_id)\n\n    # Build per-SAE bundles of optimizers/param_groups/schedulers so each config's LR and warmup drive both Muon and Adam param groups for that SAE. We reshape the flat param_groups into per-SAE lists because we need to:\n    #   (a) build schedulers with that SAE's cfg\n    #   (b) step/zero only that SAE's optimizers\n    #   (c) log that SAE's LR without fishing through a mixed flat list.\n    grouped_pgs: list[list[dict[str, object]]] = []\n    optimizers: list[list[torch.optim.Optimizer]] = []\n    lr_schedulers: list[list[saev.utils.scheduling.WarmupCosine]] = []\n\n    for i, (sae, cfg, param_group) in enumerate(zip(saes, cfgs, param_groups)):\n        if cfg.optim == \"adam\":\n            opts = [torch.optim.Adam([param_group], fused=True)]\n        elif cfg.optim == \"muon\":\n            muon_params = [p for p in sae.parameters() if p.ndim == 2]\n            msg = f\"Muon optimizer requires 2D params; SAE {i} has none.\"\n            assert muon_params, msg\n            adam_params = [p for p in sae.parameters() if p.ndim != 2]\n            msg = f\"Adam optimizer requires non-2D params; SAE {i} has none.\"\n            assert adam_params, msg\n\n            opts = [\n                torch.optim.Muon(muon_params, lr=0.0),\n                torch.optim.Adam(adam_params, lr=0.0, fused=True),\n            ]\n        else:\n            tp.assert_never(cfg.optim)\n\n        pgs = [pg for opt in opts for pg in opt.param_groups]\n        scheds = [\n            saev.utils.scheduling.WarmupCosine(\n                0.0, cfg.n_lr_warmup, cfg.lr, len(dataloader), 0.0\n            )\n            for _ in pgs\n        ]\n\n        optimizers.append(opts)\n        grouped_pgs.append(pgs)\n        lr_schedulers.append(scheds)\n\n    param_groups = grouped_pgs\n\n    saes.train()\n    saes = saes.to(cfg.device)\n    objectives.train()\n    objectives = objectives.to(cfg.device)\n\n    global_step, n_patches_seen = 0, 0\n    dl_monitor = DataloaderMonitor(dataloader)\n\n    for batch in helpers.progress(dataloader, every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        for sae in saes:\n            sae.normalize_w_dec()\n        # Forward passes and loss calculations.\n        losses = []\n        fwds = []\n        for sae, objective in zip(saes, objectives):\n            # Objective handles the SAE forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            losses.append(loss)\n            fwds.append(fwd)\n\n        n_patches_seen += len(acts_BD)\n\n        for loss in losses:\n            loss.loss.backward()\n\n        # remove parallel gradients or normalize columns?\n        for sae in saes:\n            sae.remove_parallel_grads()\n\n        # Calculate gradient norms before optimizer step\n        grad_norms = []\n        for sae, cfg in zip(saes, cfgs):\n            # Clip gradients and get the gradient norm\n            grad_norm = torch.nn.utils.clip_grad_norm_(\n                sae.parameters(), max_norm=cfg.grad_clip\n            )\n\n            grad_norms.append(grad_norm)\n\n        # Log metrics after gradient computation\n        if (global_step + 1) % cfg.log_every == 0:\n            with torch.no_grad():\n                now = time.time()\n                dl_metrics = dl_monitor.compute(now=now)\n\n                metadata = dataloader.metadata\n                entropy_metrics = statistics.calc_batch_entropy(\n                    batch[\"example_idx\"].to(\"cpu\"),\n                    batch[\"token_idx\"].to(\"cpu\"),\n                    metadata.n_examples,\n                    metadata.content_tokens_per_example,\n                )\n                dl_metrics.update(entropy_metrics)\n\n                acts_bd_f64 = acts_BD.to(torch.float64)\n                n_batch = acts_bd_f64.shape[0]\n                msg = \"Batch is empty; cannot compute normalized MSE.\"\n                assert n_batch > 0, msg\n                batch_sum_sq = torch.sum(acts_bd_f64 * acts_bd_f64)\n                batch_sum_vec = acts_bd_f64.sum(dim=0)\n                batch_baseline_sse = (\n                    batch_sum_sq - torch.dot(batch_sum_vec, batch_sum_vec) / n_batch\n                )\n                msg = f\"Batch baseline variance non-positive: sse_baseline={batch_baseline_sse.item():.6e}\"\n                assert batch_baseline_sse > 0, msg\n                batch_baseline_sse_value = batch_baseline_sse.item()\n\n                metrics = []\n                for i, (loss, sae, objective, fwd) in enumerate(\n                    zip(losses, saes, objectives, fwds)\n                ):\n                    current_lr = param_groups[i][0][\"lr\"]\n                    # Explained variance: 1 - Var(x - x_hat) / Var(x)\n                    residual = acts_BD - fwd.x_hats[:, -1, :]\n                    batch_sse_sae_value = torch.sum(\n                        (residual.to(torch.float64)) ** 2\n                    ).item()\n                    normalized_mse_value = (\n                        batch_sse_sae_value / batch_baseline_sse_value\n                    )\n                    explained_var = 1 - residual.var() / acts_BD.var()\n\n                    # Dead unit percentage: fraction of units that never activate\n                    dead_pct = ((fwd.f_x.abs() > 1e-12).sum(0) == 0).float().mean()\n\n                    # Dictionary coherence: max |<w_i, w_j>| for i != j\n                    W = sae.W_dec  # (d_sae, d_model)\n                    # Normalize each row (each SAE feature)\n                    W_norm = W / W.norm(dim=1, keepdim=True)\n                    coherence = (W_norm @ W_norm.T).abs().triu(1).max()\n\n                    # Average decoder row L2 norm (since W_dec is d_sae x d_model)\n                    avg_w_row_norm = sae.W_dec.norm(dim=1).mean()\n\n                    metric = {\n                        **{f\"loss/{key}\": val for key, val in loss.metrics().items()},\n                        \"progress/n_patches_seen\": n_patches_seen,\n                        \"progress/learning_rate\": current_lr,\n                        \"metrics/explained_variance\": explained_var.item(),\n                        \"metrics/dead_unit_pct\": dead_pct.item(),\n                        \"metrics/dictionary_coherence\": coherence.item(),\n                        \"metrics/avg_decoder_row_norm\": avg_w_row_norm.item(),\n                        \"metrics/grad_norm\": grad_norms[i].item(),\n                        \"metrics/sse_sae\": batch_sse_sae_value,\n                        \"metrics/sse_baseline\": batch_baseline_sse_value,\n                        \"metrics/normalized_mse\": normalized_mse_value,\n                        **dl_metrics,\n                    }\n\n                    metrics.append(metric)\n                run.log(metrics, step=global_step)\n\n                logger.info(\n                    \", \".join(\n                        f\"{key}: {value:.5f}\"\n                        for key, value in losses[0].metrics().items()\n                    )\n                )\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.step()\n\n        # Update LR and sparsity coefficients.\n        for pgs, scheds in zip(param_groups, lr_schedulers):\n            for pg, sched in zip(pgs, scheds):\n                pg[\"lr\"] = sched.step()\n\n        # for objective, scheduler in zip(objectives, sparsity_schedulers):\n        #     objective.sparsity_coeff = scheduler.step()\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.zero_grad()\n\n        global_step += 1\n\n    return saes, objectives, run, global_step\n
"},{"location":"api/nn/modeling/","title":"saev.nn.modeling","text":"

Neural network architectures for sparse autoencoders.

"},{"location":"api/nn/modeling/#saev.nn.modeling.AuxK","title":"AuxK(key='auxk', k_aux=512, alpha=1 / 32) dataclass","text":"

AuxK auxiliary reconstruction loss for dead latents.

"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK","title":"BatchTopK(key='batch-top-k', top_k=32, sparsity=NoSparsity(), momentum=0.1, aux=AuxK()) dataclass","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK.top_k","title":"top_k = 32 class-attribute instance-attribute","text":"

How many values are allowed to be non-zero per sample in the batch.

"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation","title":"BatchTopKActivation(cfg)","text":"

Bases: Module

BatchTopK activation and inference-time threshold for sparse autoencoders.

This module implements a BatchTopK nonlinearity that enforces a fixed sparsity budget across a batch, together with an inference-time approximation that replaces the batch-coupled operation with a simple elementwise threshold.

Training mode (model.train()): Given pre-activation codes x with shape [batch, d_sae], the BatchTopK activation flattens the batch to shape [batch * d_sae], selects the largest (batch * top_k) entries by value, and sets all other entries to zero. This enforces an average of exactly top_k active features per example while allowing the \"activation budget\" to move between examples in the batch.

During training, we also estimate an inference threshold theta that approximates the effective cutoff induced by BatchTopK. For each batch, we compute the minimum positive activation that survives the BatchTopK mask and update an exponential moving average of this quantity. This running estimate plays the same role as BatchNorm running statistics: it is updated only in training mode and treated as fixed at inference.\n

Eval mode (model.eval()): At inference time we do not apply a batch-coupled top-k, since that would make each example depend on the rest of the eval batch. Instead, we use the stored running threshold theta to define a JumpReLU nonlinearity:

    y = x if x > theta else 0\n\napplied elementwise and independently to each example. This preserves the approximate sparsity level learned during training, but makes the layer deterministic and sample-wise independent for evaluation, probing, and downstream use.\n
Inputs

x: Tensor of shape [batch, d_sae] containing pre-activation codes.

Outputs

Tensor of shape [batch, d_sae] with the same dtype and device as x, where either: - in training mode: exactly (batch * top_k) entries are non-zero across the batch due to the BatchTopK mask, or - in eval mode: entries are zeroed by an elementwise JumpReLU with the learned threshold theta.

Source code in src/saev/nn/modeling.py
def __init__(self, cfg: BatchTopK):\n    super().__init__()\n    self.cfg = cfg\n\n    self.register_buffer(\"threshold\", torch.tensor(0.0))\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation.forward","title":"forward(x)","text":"

Apply top-k activation to each sample in the batch.

Source code in src/saev/nn/modeling.py
def forward(self, x: Float[Tensor, \"batch d_sae\"]) -> Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to each sample in the batch.\n    \"\"\"\n\n    if not self.training:\n        if self.threshold <= 0:\n            return torch.where(x > 0, x, torch.zeros_like(x))\n\n        return torch.where(x > self.threshold, x, torch.zeros_like(x))\n\n    bsz, d_sae = x.shape\n    x_flat = x.flatten()\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k * bsz, d_sae * bsz)\n    _, idxs = torch.topk(x_flat, k, sorted=False)\n    mask = torch.zeros_like(x_flat).scatter(-1, idxs, 1.0).reshape(x.shape)\n\n    x = torch.mul(mask, x)\n\n    with torch.no_grad():\n        pos = x[x > 0]\n        if pos.numel() >= 0:\n            self.threshold.mul_(1 - self.cfg.momentum).add_(\n                self.cfg.momentum * pos.min()\n            )\n\n    return x\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.NoAux","title":"NoAux(key='no-aux') dataclass","text":"

No auxiliary loss (e.g., for ReLU).

"},{"location":"api/nn/modeling/#saev.nn.modeling.NoSparsity","title":"NoSparsity(key='no-sparsity') dataclass","text":"

No explicit sparsity penalty (e.g. for TopK/BatchTopK where k controls sparsity).

"},{"location":"api/nn/modeling/#saev.nn.modeling.Relu","title":"Relu(key='relu', sparsity=L1Sparsity(coeff=0.0004), aux=NoAux()) dataclass","text":"

Vanilla ReLU

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder","title":"SparseAutoencoder(cfg)","text":"

Bases: Module

Sparse auto-encoder (SAE)

Source code in src/saev/nn/modeling.py
def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.EncodeOut","title":"EncodeOut","text":"

Bases: NamedTuple

Outputs of encode: pre-activations and activated latents.

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.Output","title":"Output","text":"

Bases: NamedTuple

Full SAE forward outputs for objectives and metrics.

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.decode","title":"decode(f_x, *, prefixes=None)","text":"

Decode latent features to reconstructions.

Parameters:

Name Type Description Default f_x Float[Tensor, 'batch d_sae']

Latent features of shape (batch, d_sae)

required prefixes Int64[Tensor, ' n_prefixes'] | None

Optional tensor of prefix lengths for Matryoshka decoding.

None

Returns:

Type Description Float[Tensor, 'batch n_prefixes d_model']

Matryoshka reconstructions (batch, n_prefixes, d_model).

Source code in src/saev/nn/modeling.py
def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -> Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] > prefixes[:-1])\n    assert 1 <= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -> (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -> ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.forward","title":"forward(x)","text":"

Given x, calculates the reconstructed x_hat and the intermediate activations f_x.

Parameters:

Name Type Description Default x Float[Tensor, 'batch d_model']

a batch of transformer activations.

required Source code in src/saev/nn/modeling.py
def forward(self, x: Float[Tensor, \"batch d_model\"]) -> Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.normalize_w_dec","title":"normalize_w_dec()","text":"

Set W_dec to unit-norm columns.

Source code in src/saev/nn/modeling.py
@torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.remove_parallel_grads","title":"remove_parallel_grads()","text":"

Update grads so that they remove the parallel component

Source code in src/saev/nn/modeling.py
@torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -> d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq > 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -> d_sae d_model\",\n    )\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig","title":"SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True) dataclass","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.activation","title":"activation = TopK() class-attribute instance-attribute","text":"

Activation function.

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_model","title":"d_model = 1024 class-attribute instance-attribute","text":"

Size of x.

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_sae","title":"d_sae = 1024 * 16 class-attribute instance-attribute","text":"

Number of features in SAE latent space; size of f(x).

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.normalize_w_dec","title":"normalize_w_dec = True class-attribute instance-attribute","text":"

Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_blend","title":"reinit_blend = 0.8 class-attribute instance-attribute","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"reinit_enc_dec_tranpose = True class-attribute instance-attribute","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.remove_parallel_grads","title":"remove_parallel_grads = True class-attribute instance-attribute","text":"

Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.

"},{"location":"api/nn/modeling/#saev.nn.modeling.TopK","title":"TopK(key='top-k', top_k=32, sparsity=NoSparsity(), aux=AuxK()) dataclass","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.TopK.top_k","title":"top_k = 32 class-attribute instance-attribute","text":"

How many values are allowed to be non-zero.

"},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation","title":"TopKActivation(cfg)","text":"

Bases: Module

Top-K activation function. For use as activation function of sparse encoder.

Source code in src/saev/nn/modeling.py
def __init__(self, cfg: TopK):\n    super().__init__()\n    self.cfg = cfg\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation.forward","title":"forward(x)","text":"

Apply top-k activation to the input tensor.

Source code in src/saev/nn/modeling.py
def forward(self, x: Float[Tensor, \"batch d_sae\"]) -> Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to the input tensor.\n    \"\"\"\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k, d_sae)\n    _, idxs = torch.topk(x, k, dim=-1, sorted=False)\n    mask = torch.zeros_like(x).scatter(-1, idxs, 1.0)\n\n    return torch.mul(mask, x)\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.dump","title":"dump(fpath, sae)","text":"

Save an SAE checkpoint to disk along with configuration, using the trick from equinox.

Parameters:

Name Type Description Default fpath Path | str

filepath to save checkpoint to.

required sae SparseAutoencoder

sparse autoencoder checkpoint to save.

required Source code in src/saev/nn/modeling.py
@beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.load","title":"load(fpath, *, device='cpu')","text":"

Loads a sparse autoencoder from disk.

Source code in src/saev/nn/modeling.py
@beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -> SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n
"},{"location":"api/nn/objectives/","title":"saev.nn.objectives","text":""},{"location":"api/nn/objectives/#saev.nn.objectives.Loss","title":"Loss() dataclass","text":"

The loss term for an autoencoder training batch.

"},{"location":"api/nn/objectives/#saev.nn.objectives.Loss.loss","title":"loss property","text":"

Total loss.

"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka","title":"Matryoshka(n_prefixes=10, dead_threshold_tokens=10000000) dataclass","text":"

Config for the Matryoshka loss for another arbitrary SAE class.

Reference code is here: https://github.com/noanabeshima/matryoshka-saes and the original reading is https://sparselatents.com/matryoshka.html and https://arxiv.org/pdf/2503.17547

"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.dead_threshold_tokens","title":"dead_threshold_tokens = 10000000 class-attribute instance-attribute","text":"

Tokens without activation before a latent is considered dead.

"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.n_prefixes","title":"n_prefixes = 10 class-attribute instance-attribute","text":"

Number of random length prefixes to use for loss calculation.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss","title":"MatryoshkaLoss(mse, sparsity, l0, l1, aux, n_dead) dataclass","text":"

Bases: Loss

The composite loss terms for an training batch.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.aux","title":"aux instance-attribute","text":"

Auxiliary loss term (e.g., AuxK).

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l0","title":"l0 instance-attribute","text":"

Sum of L0 magnitudes of hidden activations for all prefix lengths.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l1","title":"l1 instance-attribute","text":"

Sum of L1 magnitudes of hidden activations for all prefix lengths.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.loss","title":"loss property","text":"

Total loss.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.mse","title":"mse instance-attribute","text":"

Average of reconstruction loss (mean squared error) for all prefix lengths.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.n_dead","title":"n_dead instance-attribute","text":"

Number of dead latents (per aux loss threshold).

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.sparsity","title":"sparsity instance-attribute","text":"

Sparsity loss, typically lambda * L1.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaObjective","title":"MatryoshkaObjective(cfg)","text":"

Bases: Objective

Torch module for calculating the matryoshka loss for an SAE.

Source code in src/saev/nn/objectives.py
def __init__(self, cfg: Matryoshka):\n    super().__init__()\n    self.cfg = cfg\n    self.toks_since_active: Tensor | None = None\n
"},{"location":"api/nn/objectives/#saev.nn.objectives.sample_prefixes","title":"sample_prefixes(d_sae, n_prefixes, min_prefix_length=1, pareto_power=0.5)","text":"

Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)

Parameters:

Name Type Description Default d_sae int

Total number of latent dimensions

required n_prefixes int

Number of prefixes to sample

required min_prefix_length int

Minimum length of any prefix

1 pareto_power float

Power parameter for Pareto distribution (lower = more uniform)

0.5

Returns:

Type Description Int64[Tensor, ' n_prefixes']

torch.Tensor: Sorted prefix lengths

Source code in src/saev/nn/objectives.py
@torch.no_grad()\n@jaxtyped(typechecker=beartype.beartype)\ndef sample_prefixes(\n    d_sae: int, n_prefixes: int, min_prefix_length: int = 1, pareto_power: float = 0.5\n) -> Int64[Tensor, \" n_prefixes\"]:\n    \"\"\"\n    Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with\n    Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)\n\n    Args:\n        d_sae: Total number of latent dimensions\n        n_prefixes: Number of prefixes to sample\n        min_prefix_length: Minimum length of any prefix\n        pareto_power: Power parameter for Pareto distribution (lower = more uniform)\n\n    Returns:\n        torch.Tensor: Sorted prefix lengths\n    \"\"\"\n    if n_prefixes <= 1:\n        return torch.tensor([d_sae], dtype=torch.int64)\n\n    assert n_prefixes <= d_sae\n\n    # Calculate probability distribution favoring shorter prefixes\n    lengths = torch.arange(1, d_sae)\n    pareto_cdf = 1 - ((min_prefix_length / lengths.float()) ** pareto_power)\n    pareto_pdf = torch.cat([pareto_cdf[:1], pareto_cdf[1:] - pareto_cdf[:-1]])\n    probability_dist = pareto_pdf / pareto_pdf.sum()\n\n    # Sample and sort prefix lengths\n    sampled_indices = torch.multinomial(\n        probability_dist, num_samples=n_prefixes - 1, replacement=False\n    )\n\n    # Convert indices to actual prefix lengths\n    prefixes = lengths[sampled_indices]\n\n    # Add n_latents as the final prefix\n    prefixes = torch.cat((prefixes.detach().clone(), torch.tensor([d_sae])))\n\n    prefixes, _ = torch.sort(prefixes, descending=False)\n\n    return prefixes.to(torch.int64)\n
"},{"location":"api/nn/saev.nn/","title":"saev.nn","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder","title":"SparseAutoencoder(cfg)","text":"

Bases: Module

Sparse auto-encoder (SAE)

Source code in src/saev/nn/modeling.py
def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n
"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.EncodeOut","title":"EncodeOut","text":"

Bases: NamedTuple

Outputs of encode: pre-activations and activated latents.

"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.Output","title":"Output","text":"

Bases: NamedTuple

Full SAE forward outputs for objectives and metrics.

"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.decode","title":"decode(f_x, *, prefixes=None)","text":"

Decode latent features to reconstructions.

Parameters:

Name Type Description Default f_x Float[Tensor, 'batch d_sae']

Latent features of shape (batch, d_sae)

required prefixes Int64[Tensor, ' n_prefixes'] | None

Optional tensor of prefix lengths for Matryoshka decoding.

None

Returns:

Type Description Float[Tensor, 'batch n_prefixes d_model']

Matryoshka reconstructions (batch, n_prefixes, d_model).

Source code in src/saev/nn/modeling.py
def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -> Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] > prefixes[:-1])\n    assert 1 <= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -> (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -> ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n
"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.forward","title":"forward(x)","text":"

Given x, calculates the reconstructed x_hat and the intermediate activations f_x.

Parameters:

Name Type Description Default x Float[Tensor, 'batch d_model']

a batch of transformer activations.

required Source code in src/saev/nn/modeling.py
def forward(self, x: Float[Tensor, \"batch d_model\"]) -> Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n
"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.normalize_w_dec","title":"normalize_w_dec()","text":"

Set W_dec to unit-norm columns.

Source code in src/saev/nn/modeling.py
@torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n
"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.remove_parallel_grads","title":"remove_parallel_grads()","text":"

Update grads so that they remove the parallel component

Source code in src/saev/nn/modeling.py
@torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -> d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq > 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -> d_sae d_model\",\n    )\n
"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig","title":"SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True) dataclass","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.activation","title":"activation = TopK() class-attribute instance-attribute","text":"

Activation function.

"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_model","title":"d_model = 1024 class-attribute instance-attribute","text":"

Size of x.

"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_sae","title":"d_sae = 1024 * 16 class-attribute instance-attribute","text":"

Number of features in SAE latent space; size of f(x).

"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.normalize_w_dec","title":"normalize_w_dec = True class-attribute instance-attribute","text":"

Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".

"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_blend","title":"reinit_blend = 0.8 class-attribute instance-attribute","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"reinit_enc_dec_tranpose = True class-attribute instance-attribute","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.remove_parallel_grads","title":"remove_parallel_grads = True class-attribute instance-attribute","text":"

Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.

"},{"location":"api/nn/saev.nn/#saev.nn.dump","title":"dump(fpath, sae)","text":"

Save an SAE checkpoint to disk along with configuration, using the trick from equinox.

Parameters:

Name Type Description Default fpath Path | str

filepath to save checkpoint to.

required sae SparseAutoencoder

sparse autoencoder checkpoint to save.

required Source code in src/saev/nn/modeling.py
@beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n
"},{"location":"api/nn/saev.nn/#saev.nn.load","title":"load(fpath, *, device='cpu')","text":"

Loads a sparse autoencoder from disk.

Source code in src/saev/nn/modeling.py
@beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -> SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n
"},{"location":"api/utils/monitoring/","title":"saev.utils.monitoring","text":""},{"location":"api/utils/monitoring/#saev.utils.monitoring.DataloaderMonitor","title":"DataloaderMonitor(dataloader, process_factory=None)","text":"

Tracks IO and CPU activity for the dataloader manager process and its children.

The monitor owns the dataloader handle and psutil processes internally, so callers simply construct it with the dataloader and then call compute() whenever metrics are needed.

Source code in src/saev/utils/monitoring.py
def __init__(\n    self,\n    dataloader: object,\n    process_factory: Callable[[int], psutil.Process] | None = None,\n) -> None:\n    self.dataloader = dataloader\n    self.process_factory = process_factory or psutil.Process\n    self._reset_state()\n
"},{"location":"api/utils/saev.utils/","title":"saev.utils","text":""},{"location":"api/utils/scheduling/","title":"saev.utils.scheduling","text":""},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter","title":"BatchLimiter(dataloader, n_samples)","text":"

Limits the number of batches to only return n_samples total samples.

Source code in src/saev/utils/scheduling.py
def __init__(self, dataloader: DataLoaderLike, n_samples: int):\n    self.dataloader = dataloader\n    self.n_samples = n_samples\n    self.batch_size = dataloader.batch_size\n    self.drop_last = dataloader.drop_last\n
"},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter.__getattr__","title":"__getattr__(name)","text":"

Pass through attribute access to the wrapped dataloader.

Source code in src/saev/utils/scheduling.py
def __getattr__(self, name: str) -> Any:\n    \"\"\"Pass through attribute access to the wrapped dataloader.\"\"\"\n    # __getattr__ is only called when the attribute wasn't found on self\n    # So we delegate to the wrapped dataloader\n    try:\n        return getattr(self.dataloader, name)\n    except AttributeError:\n        # Re-raise with more context about where the attribute was not found\n        raise AttributeError(\n            f\"'{self.__class__.__name__}' object and its wrapped dataloader have no attribute '{name}'\"\n        )\n
"},{"location":"api/utils/scheduling/#saev.utils.scheduling.Warmup","title":"Warmup(init, final, n_steps)","text":"

Bases: Scheduler

Linearly increases from init to final over n_warmup_steps steps.

Source code in src/saev/utils/scheduling.py
def __init__(self, init: float, final: float, n_steps: int):\n    self.final = final\n    self.init = init\n    self.n_steps = n_steps\n    self._step = 0\n
"},{"location":"api/utils/scheduling/#saev.utils.scheduling.WarmupCosine","title":"WarmupCosine(init, n_warmup, peak, n_steps, final)","text":"

Bases: Scheduler

Linearly increases from init to peak over n_warmup steps, then decrease down to final using cosine decay over n_steps - n_warmup.

Source code in src/saev/utils/scheduling.py
def __init__(\n    self, init: float, n_warmup: int, peak: float, n_steps: int, final: float\n):\n    self.init = init\n    self.peak = peak\n    self.final = final\n    self.n_warmup = n_warmup\n    self.n_steps = n_steps\n    self._step = 0\n
"},{"location":"api/utils/statistics/","title":"saev.utils.statistics","text":""},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator","title":"PercentileEstimator(percentile, total, lr=0.001, shape=())","text":"Source code in src/saev/utils/statistics.py
def __init__(\n    self,\n    percentile: float | int,\n    total: int,\n    lr: float = 1e-3,\n    shape: tuple[int, ...] = (),\n):\n    self.percentile = percentile\n    self.total = total\n    self.lr = lr\n\n    self._estimate = torch.zeros(shape)\n    self._step = 0\n
"},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator.update","title":"update(x)","text":"

Update the estimator with a new value.

This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.

Parameters:

Name Type Description Default x float | Tensor

The new value to incorporate into the estimation

required Source code in src/saev/utils/statistics.py
def update(self, x: float | Tensor):\n    \"\"\"\n    Update the estimator with a new value.\n\n    This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.\n\n    Arguments:\n        x: The new value to incorporate into the estimation\n    \"\"\"\n    self._step += 1\n\n    step_size = self.lr * (self.total - self._step) / self.total\n\n    # Is a no-op if it's already on the same device.\n    if isinstance(x, Tensor):\n        self._estimate = self._estimate.to(x.device)\n\n    self._estimate += step_size * (\n        torch.sign(x - self._estimate) + 2 * self.percentile / 100 - 1.0\n    )\n
"},{"location":"api/utils/statistics/#saev.utils.statistics.calc_batch_entropy","title":"calc_batch_entropy(example_idx, token_idx, n_examples, content_tokens_per_example)","text":"

Compute entropy and coverage metrics for a batch of shuffled indices.

The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.

Source code in src/saev/utils/statistics.py
@beartype.beartype\ndef calc_batch_entropy(\n    example_idx: IndexLike,\n    token_idx: IndexLike,\n    n_examples: int,\n    content_tokens_per_example: int,\n) -> dict[str, float]:\n    \"\"\"\n    Compute entropy and coverage metrics for a batch of shuffled indices.\n\n    The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.\n    \"\"\"\n    example_idx_t = _to_tensor(example_idx)\n    token_idx_t = _to_tensor(token_idx)\n    if n_examples <= 0:\n        raise ValueError(\"n_examples must be positive.\")\n    if content_tokens_per_example <= 0:\n        raise ValueError(\"content_tokens_per_example must be positive.\")\n\n    if example_idx_t.ndim != 1:\n        raise ValueError(\"example_idx must be 1D.\")\n    if token_idx_t.ndim != 1:\n        raise ValueError(\"token_idx must be 1D.\")\n    if example_idx_t.numel() == 0:\n        raise ValueError(\"example_idx must contain at least one element.\")\n\n    _assert_batch_dim(example_idx_t, token_idx_t)\n\n    example_metrics = _add_prefix(\n        \"loader/example\", _entropy_metrics(example_idx_t, n_examples)\n    )\n    token_metrics = _add_prefix(\n        \"loader/token\", _entropy_metrics(token_idx_t, content_tokens_per_example)\n    )\n\n    return {**example_metrics, **token_metrics}\n
"},{"location":"api/utils/wandb/","title":"saev.utils.wandb","text":""},{"location":"api/utils/wandb/#saev.utils.wandb.ParallelWandbRun","title":"ParallelWandbRun(project, cfgs, mode, tags, dir='.wandb')","text":"

Inspired by https://community.wandb.ai/t/is-it-possible-to-log-to-multiple-runs-simultaneously/4387

Source code in src/saev/utils/wandb.py
def __init__(\n    self,\n    project: str,\n    cfgs: list[dict[str, object]],\n    mode: str,\n    tags: list[str],\n    dir: str = \".wandb\",\n):\n    cfg, *cfgs = cfgs\n    self.project = project\n    self.cfgs = cfgs\n    self.mode = mode\n    self.tags = tags\n    self.dir = dir\n    self.summary_updates: dict[str, object] = {}\n\n    self.live_run = wandb.init(\n        project=project, config=cfg, mode=mode, tags=tags, dir=dir\n    )\n\n    self.metric_queues: list[MetricQueue] = [[] for _ in self.cfgs]\n
"},{"location":"developers/contributing/","title":"Contributing","text":""},{"location":"developers/contributing/#project-layout","title":"Project layout","text":"
docs/\n    mkdocs.yml    # The configuration file.\n    src/\n        index.md  # The documentation homepage.\n        ...       # Other markdown pages, images and other files.\n
"},{"location":"developers/datapoint-init/","title":"Datapoint Initialization","text":"

Datapoint initialization is an SAE weight initializations strategy independently proposed by Anthropic and Pierre Peigne for improving SAE training.

Conceptually, we initialize each decoder column to look like a real datapoint, so every latent starts with a patch of input space where it \"wins\" and gets some gradient. Here's the algorithm:

  1. Select \\(n\\) random data points from your training data.
  2. Compute the mean \\(\\mu\\) and zero-center the data: \\(x_0 = x - \\mu\\).
  3. Linearly blend each zero-centered datapoint with Kaiming initialization: \\(w = p \\cdot (x - \\mu) + (1 - p) \\cdot r\\) where \\(p\\) is your blend probability and \\(r\\) is a randomly sampled Kaiming initalization vector.
  4. Initialize \\(W_\\text{enc}\\) as a concatenation of \\(n\\) blended vectors.
  5. Initialize \\(W_\\text{dec}\\) as \\(W_\\text{enc}^T\\).

Anthropic suggests \\(p = 0.8\\) for SAEs and 0.4 for \"weakly causal crosscoders\". I interpret this that there is no universally appropriate \\(p\\).

"},{"location":"developers/disk-layout/","title":"Storage & Run Manifest Spec (v1)","text":"

There are two main locations:

  1. $SAEV_SCRATCH/saev/shards: where we store transformer activations (referred to as shards_root in the codebase).
  2. $SAEV_NFS/saev/runs: where we store checkpoints and other computed intermediate stuff like example images, probe1d results, etc. (referred to as runs_root in the codebase).

Visually, these are:

$SAEV_SCRATCH/saev/\n  shards/\n    <shard_hash>/\n      metadata.json\n      shards.json\n      acts000000.bin\n      acts000001.bin\n      ...\n      labels.bin\n

and

$SAEV_NFS/saev/\n  runs/\n    <run_id>/\n      checkpoint/           # output of train.py on <shard_hash>\n        sae.pt\n        config.json\n      links/                # Symlinks\n        train-shards        # $SCRATCH/saev/shards/<shard_hash>\n        train-dataset       # Whatever the original image dataset was\n        val-shards          # $SCRATCH/saev/shards/<shard_hash>\n        val-dataset         # Whatever the original image dataset was\n      inference/            # outputs from dump.py\n        <shard_hash>/\n          config.json\n          token_acts.npz\n          visuals/          # output of visuals.py\n

Each $SAEV_SCRATCH/shards/<shard_hash>/ MUST include:

Note

Immutability: Files under saev/shards/<shard_hash>/ MUST be treated as read-only after publication. Any change yields a new shard_hash.

All CLI entrypoints should accept a single --run <path> argument. Every other path MUST be resolved from the run root:

Example resolution:

run = pathlib.Path(cfg.run)\nshards_root = (run / \"links\" / \"shards\").resolve()\ndataset_root = (run / \"links\" / \"dataset\").resolve()\nckpt = run / \"checkpoint\" / \"sae.pt\"\nlabels = vit_root / \"labels.bin\"\n
"},{"location":"developers/disk-layout/#faqs","title":"FAQs","text":""},{"location":"developers/naming/","title":"Variable Naming","text":""},{"location":"developers/protocol/","title":"saev Sharded Activation File Protocol","text":"

saev caches activations to disk rather than run ViT or LLM inference when training SAEs. Gemma Scope makes this decision as well (see Section 3.3.2 of https://arxiv.org/pdf/2408.05147). saev.data has a specific protocol to support this in on OSC, a super computer center, and take advantage of OSC's specific disk performance.

Goal: loss-lessly persist very large Transformer (ViT or LLM) activations in a form that is:

This document is the single normative source. Any divergence in code is a bug.

"},{"location":"developers/protocol/#1-directory-layout","title":"1. Directory layout","text":"
<dump_to>/<HASH>/\n    metadata.json    # UTF-8 JSON, human-readable, describes data-generating config\n    shards.json      # UTF-8 JSON, human-readable, describes shards.\n    acts000000.bin   # shard 0\n    acts000001.bin   # shard 1\n    ...\n    actsNNNNNN.bin   # shard NNNNNN  (zero-padded width=6)\n    labels.bin       # patch labels (optional)\n

HASH = sha256(json.dumps(metadata, sort_keys=True, separators=(',', ':')).encode('utf-8')) Guards against silent config drift.

"},{"location":"developers/protocol/#2-json-file-schemas","title":"2. JSON file schemas","text":""},{"location":"developers/protocol/#21-metadatajson","title":"2.1. metadata.json","text":"field type semantic family string \"clip\" \\| \"siglip\" \\| \"dinov2\" ckpt string model identifier (OpenCLIP, HF, etc.) layers int[] ViT residual\u2010block indices recorded patches_per_ex int example patches only (excludes CLS) cls_token bool true -> patch 0 is CLS, else no CLS d_model int activation dimensionality n_examples int total examples in dataset patches_per_shard int logical activations per shard (see #3) data object opaque dataset description dataset string absolute path to original dataset root dtype string numpy dtype. Fixed \"float32\" for now. protocol string \"2.1\" (shards after big refactor)

The data object is base64.b64encode(pickle.dumps(img_ds)).decode('utf8').

The dataset field stores the absolute path to the root directory of the original image dataset, allowing runs to create symlinks back to the source images for visualization and analysis.

"},{"location":"developers/protocol/#22-shardsjson","title":"2.2. shards.json","text":"

A single array of shard objects, each of which has the following fields:

field type semantic name string shard filename (acts000000.bin). n_examples int the number of examples in the shard."},{"location":"developers/protocol/#3-shard-sizing-maths","title":"3. Shard sizing maths","text":"
tokens_per_ex = patches_per_ex + (1 if cls_token else 0)\n\nexamples_per_shard = floor(patches_per_shard / (tokens_per_ex * len(layers)))\n\nshape_per_shard = (\n    examples_per_shard, len(layers), tokens_per_ex, d_model,\n)\n

patches_per_shard is a budget (default ~2.4 M) chosen so a shard is approximately 10 GiB for Float32 @ d_model = 1024.

The last shard will have a smaller value for examples_per_shard; this value is documented in n_examples in shards.json

"},{"location":"developers/protocol/#4-data-layout-and-global-indexing","title":"4. Data Layout and Global Indexing","text":"

The entire dataset of activations is treated as a single logical 4D tensor with the shape (n_examples, len(layers), tokens_per_ex, d_model). This logical tensor is C-contiguous with axes ordered [Example, Layer, Token, Dimension].

Physically, this tensor is split along the first axis (Example) into multiple shards, where each shard is a single binary file. The number of examples in each shard is constant, except for the final shard, which may be smaller.

To locate an arbitrary activation vector, a reader must convert a logical coordinate (global_ex_idx, layer_value, token_idx) into a file path and an offset within that file.

"},{"location":"developers/protocol/#41-definitions","title":"4.1 Definitions","text":"

Let the parameters from metadata.json be:

"},{"location":"developers/protocol/#42-coordinate-transformations","title":"4.2 Coordinate Transformations","text":"

Given a logical coordinate:

The physical location is found as follows:

  1. Identify Shard:

    • shard_idx = global_ex_idx // S
    • ex_in_shard = global_ex_idx % S The target file is acts{shard_idx:06d}.bin.
  2. Identify Layer Index: The stored data contains a subset of the ViT's layers. The logical layer_value must be mapped to its index in the stored layers array.

    • layer_idx = layers.index(layer) A reader must raise an error if layer is not in layers.
  3. Calculate Offset: The data within a shard is a 4D tensor of shape (S, L, T, D). The offset to the first byte of the desired activation vector [ex_in_shard, layer_idx , token_idx] is:

    • offset_in_vectors = (ex_in_shard * L * T) + (layer_idx * T) + token_idx
    • offset_in_bytes = offset_in_vectors * D * 4 (assuming 4 bytes for float32)

A reader can then seek to offset_in_bytes and read \\(D \\times 4\\) bytes to retrieve the vector.

Alternatively, rather than calculate the offset, readers can memmap the shard, then use Numpy indexing to get the activation vector.

"},{"location":"developers/protocol/#43-token-axis-layout","title":"4.3 Token Axis Layout","text":"

The token axis of length \\(T\\) is ordered as follows: * If cls_token is true: * Index 0: [CLS] token activation * Indices 1 to \\(P\\): Patch token activations * If cls_token is false: * Indices 0 to \\(P-1\\): Patch token activations

The relative order of patch tokens is preserved exactly as produced by the upstream Vision Transformer.

"},{"location":"developers/protocol/#5-versioning-compatibility","title":"5 Versioning & compatibility","text":"

That's it. Anything else you find in code that contradicts this document, fix the code or update the spec.

"},{"location":"developers/workflows/","title":"Workflows","text":"
  1. Generate inference activations (and thus visuals) for both training and validation splits.
"},{"location":"users/bird-mae-debugging/","title":"Debugging Bird-MAE Activations","text":"

This is an example of the kind of debugging you might have to do when training SAEs on a new model. The short version: Bird-MAE has an \"emergent outlier feature\" in dimension 296 that blows up after the first MLP. The fix is to record activations after the pre-MLP LayerNorm (block.norm2) instead of the raw residual stream, because the LayerNorm learns to suppress the outlier.

"},{"location":"users/bird-mae-debugging/#symptom-80-dead-neurons","title":"Symptom: 80% dead neurons","text":"

While training TopK SAEs on BirdMAE activations taken from birdsong, ~80% of my neurons were dead from the very start of training.

"},{"location":"users/bird-mae-debugging/#comparing-to-known-good-activations","title":"Comparing to known-good activations","text":"

First, I compared activations from BirdMAE to DINOv3 activations (which I know are well-behaved). I recorded 300K content token activation vectors from layer 14/24 from DINOv3 ViT-L/16 and BirdMAE-L. Each vector has 1024 dimensions. I flattened these vectors; for each of BirdMAE and DINOv3, I have a list of 307.2M neuron activations (300K x 1024 = 307,200,000). I plotted a histogram below. Note the log scale on the y-axis.

I zoomed in on the left-most cluster, ignoring the right cluster. While BirdMAE is more spread out, the shapes look good enough for now.

"},{"location":"users/bird-mae-debugging/#finding-the-outlier-dimension-296","title":"Finding the outlier: dimension 296","text":"

Looking at the right cluster, I realized that all of these values are from neuron 296 of 1024. Here, I colored activations based on their neuron: all BirdMAE neurons besides 296 are blue, DINOv3 is orange, and neuron 296 is red.

My activation matrix is \\(\\mathbb{R}^{300K \\times 1024}\\) for each dataset. In code, what I see is:

bird_acts.shape  # (300K, 1024)\nbird_acts[:, 295].min()  # 2549.54\nbird_acts[:, 295].max()  # 4625.12\n

Something is broken inside of BirdMAE.

"},{"location":"users/bird-mae-debugging/#tracing-the-outlier-through-the-residual-stream","title":"Tracing the outlier through the residual stream","text":"

Where in BirdMAE does this abnormality show up? Consider transformers as residual streams. After what layer does dimension 296/1024 blow up? See this diagram below: for a single random example from BirdMAE, we will track both the average neuron and neuron 296's value through the 24 transformer layers.

BirdMAE uses 256 content tokens for a single example. We take the average value of each neuron in the residual stream before each transformer block (the green \"Graph #1\" circle in the above diagram) and after the final transformer block. We plot each of the 1023 \"well-behaved\" neurons in light blue. We plot our degenerate neuron 296 in red. Note the log scale on the y-axis.

Our well-behaved neurons mostly stay in (-10, 10). Neuron 296 jumps straight to ~2.2K after the first residual block and is never fixed again. It's well-behaved coming out of the patch embedding before the first residual block.

"},{"location":"users/bird-mae-debugging/#narrowing-it-down-the-first-mlp","title":"Narrowing it down: the first MLP","text":"

Below is the output from the attention layers (Graph #2) in our architecture diagram.

Neuron 296 is mostly well-behaved; it's a little big after the second attention layer, but not insane.

Here, we can see that the output of the first MLP produces an abnormally high value for neuron 296. Why?

Here's a architecture diagram of BirdMAE's MLPs according to the model definition on HuggingFace. Let's look at the trainable parameters in these MLP across layers, starting from the end and working backwards.

fc2 has a weight parameter with shape (4096, 1024) and a bias parameter with shape (1024,). I take the L2 norm of fc2.weight's columns to see if col 296/1024 is different.

fc2.weight does appear to be different, and abnormally large (note the log scale). fc2.bias is also different, but it's not immediately obvious what's going on there to me.

"},{"location":"users/bird-mae-debugging/#root-cause-emergent-outlier-features","title":"Root cause: emergent outlier features","text":"

This is a known phenomenon in transformers called \"emergent outlier features.\" After extensive pretraining, a single dimension in the residual stream accumulates a very large magnitude. The model never needs to \"fix\" this because the pre-attention and pre-MLP `LayerNormss learn to suppress it: the learned multiplicative weight for dimension 296 is very small, and the bias is approximately 1. So later layers never actually \"see\" the outlier in practice.

We verified this by inspecting norm2.weight across layers and confirming that the learned scale for dimension 296 is near-zero, but that analysis is not reproduced here.

The BirdMAE authors never had to deal with this because all downstream use of the model goes through LayerNorm first.

"},{"location":"users/bird-mae-debugging/#fix-record-after-layernorm","title":"Fix: record after LayerNorm","text":"

The fix is to record activations after block.norm2 (the pre-MLP LayerNorm) instead of from the raw residual stream. In saev, this is implemented as:

def get_residuals(self) -> list[torch.nn.Module]:\n    return [block.norm2 for block in self.model.blocks]\n

After this change, the outlier is suppressed and SAE training works normally.

"},{"location":"users/bird-mae-debugging/#lessons","title":"Lessons","text":"
  1. Compare activation distributions to a known-good model. Histogramming flattened activations from 300K tokens is cheap and can reveal outliers.
  2. Emergent outlier features are real. If a single dimension dominates your activation distribution, check whether it's a known artifact of pretraining before assuming your recording code is wrong.
  3. Record after LayerNorm, not from the raw residual stream. The residual stream can carry high-magnitude \"bookkeeping\" values that LayerNorm suppresses. Recording post-norm avoids this entirely.
"},{"location":"users/glossary/","title":"Glossary","text":"

Definitions for words used in the code and documentation.

Modality-specific vocab:

"},{"location":"users/guide/","title":"Guide","text":"

This guide explains how to transition from the ADE20K demo to using saev with your own custom datasets.

Here are the steps:

  1. Save ViT activations to disk
  2. Train SAEs on activations
  3. Evaluate the SAE checkpoints
  4. Visualize Learned Features

Note

saev assumes you are running on NVIDIA GPUs. On a multi-GPU system, prefix your commands with CUDA_VISIBLE_DEVICES=X to run on GPU X.

"},{"location":"users/guide/#save-vit-activations-to-disk","title":"Save ViT Activations to Disk","text":"

To save activations to disk, we need to specify:

  1. Which model we would like to use
  2. Which layers we would like to save.
  3. Where on disk and how we would like to save activations.
  4. Which images we want to save activations for.

The saev/framework/shards.py script does all of this for us.

Run uv run launch.py shards --help to see all the configuration.

In practice, you might run:

uv run launch.py shards \\\n  --shards-root /fs/scratch/PAS2136/samuelstevens/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-16/openai \\\n  --d-model 768 \\\n  --layers 6 7 8 9 10 11 \\\n  --content-tokens-per-example 196 \\\n  --batch-size 512 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  data:img-seg-folder \\\n  --data.root /fs/scratch/PAS2136/samuelstevens/datasets/ADEChallengeData2016/ \\\n  --data.split training\n

This will save activations for the CLIP-pretrained model ViT-B/16, which has a residual stream dimension of 768, and has 196 patches per image (224 / 16 = 14; 14 x 14 = 196). It will save the last 6 layers. It will write 2.4M patches per shard, and save shards to a new directory /fs/scratch/PAS2136/samuelstevens/saev/shards.

Note

A note on storage space: A ViT-B/16 on ImageNet-1K will save 1.2M images x 197 patches/layer/image x 1 layer = ~240M activations, each of which take up 768 floats x 4 bytes/float = 3072 bytes, for a total of 723GB for the entire dataset. As you scale to larger models (ViT-L has 1024 dimensions, 14x14 patches are 224 patches/layer/image), recorded activations will grow even larger.

This script will also save a metadata.json file that will record the relevant metadata for these activations, which will be read by future steps. The activations will be in .bin files, numbered starting from 000000.

To add your own models, see the guide to extending in saev.activations.

"},{"location":"users/guide/#train-saes-on-activations","title":"Train SAEs on Activations","text":"

To train an SAE, we need to specify:

  1. Which activations to use as input.
  2. SAE architectural stuff.
  3. Optimization-related stuff.

The train.py script handles this.

Run uv run train.py --help to see all the configuration.

The most important options are:

This is a full example:

uv run train.py \\\n  --runs-root /fs/ess/PAS2136/samuelstevens/saev/runs \\\n  --lr 4e-3 \\\n  --sae.exp-factor 16 \\\n  --sae.d-model 1024 \\\n  --tag ade20k-v0.1 \\\n  --n-train 100_000_000 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --train-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/51567c6c \\\n  --train-data.layer 11 \\\n  --val-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3e27794f \\\n  --val-data.layer 11 \\\n  sae.activation:relu \\\n  objective:matryoshka \\\n  --objective.sparsity-coeff 1e-3 \\\n

This will train one (1) sparse autoencoder on the data. See the section on sweeps to learn how to train multiple SAEs in parallel using one or more GPUs.

"},{"location":"users/guide/#loader-entropy-metrics","title":"Loader Entropy Metrics","text":"

The training loop logs additional loader diagnostics derived from calc_batch_entropy in train.py. Every batch contributes two entropy measurements in natural log units:

All eight metrics appear alongside the existing loader/read_mb counters, helping spot skewed sampling or under-covered patches mid-run.

"},{"location":"users/guide/#evaluation","title":"Evaluation","text":"

After training an SAE, you probably want to use the SAE. While you can use the SAE as a regular PyTorch torch.nn.Module in combination with a saev.data.OrderedDataLoader or saev.data.IndexedDataset.

However, most SAEs are evaluated with a similar set of metrics (normalized MSE, L0, etc). The saev/framework/inference.py script calculates these metrics. You can run uv run launch.py inference --help to see all the options.

The most important options are:

uv run launch.py inference \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/z55bntm1/ \\\n  --data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/614861a0 \\\n  --data.layer 11\n
"},{"location":"users/guide/#visualize-learned-features","title":"Visualize Learned Features","text":"

Now that you've trained an SAE, you probably want to look at its learned features. One way to visualize an individual learned feature is by picking out images that maximize the activation of feature. We use the saved sparse token_acts.npz file from the previous inference step.

Warning

Because there are so many different ways to visualize SAE features, I moved it to contrib/trait_discovery (used for our preprint \"Towards Open-Ended Visual Scientific Discovery with Sparse Autoencoders\").

The most important options:

So first, move into the contrib/trait_discovery:

cd contrib/trait_discovery\n

Then run the script that generates highlighted images:

uv run scripts/launch.py visuals \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/unu6dbfb \\\n  --shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3802cb66 \\\n  --latents 0 1 2 3 4 5 6 7 8 9 49 56 57 125 202 \\\n  --n-latents 20 \\\n

Note

Because of limitations in the SAE training process, not all SAE latents are equally interesting. Some latents are dead, some are dense, some only fire on two images, etc. Typically, you want neurons that fire very strongly (high value) and fairly infrequently (low frequency). You might be interested in particular, fixed latents (--include-latents). I recommend using saev/interactive/metrics.py with marimo to figure out good thresholds.

"},{"location":"users/guide/#sweeps","title":"Sweeps","text":"

tl;dr: basically the slow part of training SAEs is loading vit activations from disk, and since SAEs are pretty small compared to other models, you can train a bunch of different SAEs in parallel on the same data using a big GPU. That way you can sweep learning rate, lambda, etc. all on one GPU.

"},{"location":"users/guide/#why-parallel-sweeps","title":"Why Parallel Sweeps","text":"

SAE training optimizes for a unique bottleneck compared to typical ML workflows: disk I/O rather than GPU computation. When training on vision transformer activations, loading the pre-computed activation data from disk is often the slowest part of the process, not the SAE training itself.

A single set of ImageNet activations for a vision transformer can require terabytes of storage. Reading this data repeatedly for each hyperparameter configuration would be extremely inefficient.

"},{"location":"users/guide/#parallelized-training-architecture","title":"Parallelized Training Architecture","text":"

To address this bottleneck, we implement parallel training that allows multiple SAE configurations to train simultaneously on the same data batch:

\nflowchart TD\n    A[Pre-computed ViT Activations] -->|Slow I/O| B[Memory Buffer]\n    B -->|Shared Batch| C[SAE Model 1]\n    B -->|Shared Batch| D[SAE Model 2]\n    B -->|Shared Batch| E[SAE Model 3]\n    B -->|Shared Batch| F[...]\n

This approach:

"},{"location":"users/guide/#running-a-sweep","title":"Running a Sweep","text":"

The train command accepts a --sweep parameter that points to a TOML file defining the hyperparameter grid:

uv run python -m saev train --sweep configs/my_sweep.toml\n

Here's an example sweep configuration file:

[sae]\nsparsity_coeff = [1e-4, 2e-4, 3e-4]\nd_model = 768\nd_sae = [6144, 12288]\n\n[data]\nscale_mean = true\n

This would train 6 models (3 sparsity coefficients \u00d7 2 SAE widths), each sharing the same data loading operation.

"},{"location":"users/guide/#limitations","title":"Limitations","text":"

Not all parameters can be swept in parallel. Parameters that affect data loading (like batch_size or dataset configuration) will cause the sweep to split into separate parallel groups. The system automatically handles this division to maximize efficiency.

"},{"location":"users/inference/","title":"Inference","text":"

If you want to get started quickly, try the inference notebook in marimo or on Google Colab.

Briefly, you need to:

  1. Download a checkpoint.
  2. Get the code.
  3. Load the checkpoint.
  4. Get activations.

Details are below.

"},{"location":"users/inference/#download-a-checkpoint","title":"Download a Checkpoint","text":"

First, download an SAE checkpoint from the Huggingface collection.

"},{"location":"users/inference/#single-checkpoint-repos","title":"Single-checkpoint repos","text":"

Some repos (CLIP, BioCLIP, DINOv2) contain a single sae.pt at the root. For instance, the SAE trained on OpenAI's CLIP ViT-B/16 with ImageNet-1K activations is here.

You can use wget if you want:

wget https://huggingface.co/osunlp/SAE_CLIP_24K_ViT-B-16_IN1K/resolve/main/sae.pt\n
"},{"location":"users/inference/#multi-checkpoint-repos","title":"Multi-checkpoint repos","text":"

The DINOv3 repos contain multiple checkpoints organized by layer and sparsity level. Each repo has a manifest.jsonl with metadata (layer, L0, MSE) for every checkpoint, so you can pick the right one programmatically.

Download a specific checkpoint:

from huggingface_hub import hf_hub_download\n\n# Pick a specific layer and run ID from the repo's README or manifest.jsonl\npath = hf_hub_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\", \"layer_23/lnleoyf6/sae.pt\")\n

Download all checkpoints in a repo:

from huggingface_hub import snapshot_download\n\nsnapshot_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\")\n

Available DINOv3 repos:

"},{"location":"users/inference/#get-the-code","title":"Get the Code","text":"

The easiest way to do this is to clone the code:

git clone https://github.com/OSU-NLP-Group/saev\n

You can also install the package from git if you use uv (not sure about pip or cuda):

uv add git+https://github.com/OSU-NLP-Group/saev\n

Or clone it and install it as an editable with pip, lik pip install -e . in your virtual environment.

Then you can do things like from saev import ....

Note

If you struggle to get saev installed, open an issue on GitHub and I will figure out how to make it easier.

"},{"location":"users/inference/#load-the-checkpoint","title":"Load the Checkpoint","text":"
import saev.nn\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n

Now you have a pretrained SAE.

"},{"location":"users/inference/#get-activations","title":"Get Activations","text":"

This is the hardest part. We need to:

  1. Pass an image into a ViT
  2. Record the dense ViT activations at the same layer that the SAE was trained on.
  3. Pass the activations into the SAE to get sparse activations.
  4. Do something interesting with the sparse SAE activations.

There are examples of this in the demo code: for classification and semantic segmentation. If the permalinks change, you are looking for the get_sae_latents() functions in both files.

Below is example code to do it using the saev package.

import saev.nn\nimport saev.data.models\nimport saev.data.shards\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n\nvit_cls = saev.data.models.load_model_cls(\"clip\")\nvit = vit_cls(\"ViT-B-16/openai\").to(device)\nvit = saev.data.shards.RecordedTransformer(vit, 196, True, [10])\n\nimg_tr, _ = vit_cls.make_transforms(\"ViT-B-16/openai\", 196)\nimg = Image.open(\"example.jpg\")\n\nx = img_tr(img)\n# Add a batch dimension.\nx = x[None, ...]\n_, vit_acts = vit(x)\n# Select the only layer and ignore the CLS token.\nvit_acts = vit_acts[:, 0, 1:, :]\n\nout = sae(vit_acts)\n# out.f_x: sparse SAE latents (batch, d_sae)\n# out.x_hats: reconstructed activations (batch, n_prefixes, d_model)\n

Now you have the sparse representation of all patches in the image (out.f_x) and the reconstructed activations (out.x_hats).

You might select the dimensions with maximal values for each patch and see what other images are maximally activating.

"},{"location":"users/new-project/","title":"New Project Structure","text":"

saev is structured like big_vision, Google's ViT codebase. To get the most use out of saev, you should not use it as a requirement in your project; rather, you should build inside of the source code of saev. This is a guide to that process.

TL;DR:

  1. Fork saev.
  2. Clone your fork.
  3. Create a new directory in contrib/.
  4. Update both src/saev and your new contrib directory as necessary.
  5. (Hopefully) publish.
  6. If your changes to src/saev are broadly useful and not overly restrictive, open a PR with your changes to src/saev.

I am currently applying SAEs to audio of birdsong, so this is how I'll develop it.

First, fork and clone saev. Do this however you want, but GitHub has a guide on it.

Second, you probably want to store code related to your project in this repo. Make a new directory in contrib/. I'm calling my new subproject \"birdsong.\"

[I] samuelstevens@host ~/p/saev (main)> tree -L 1 contrib/\ncontrib/\n\u251c\u2500\u2500 birdsong\n\u251c\u2500\u2500 interactive_interp\n\u2514\u2500\u2500 trait_discovery\n

Use uv to make a new package inside your new project:

[I] samuelstevens@host ~/p/s/c/birdsong (main)> uv init --package .\nAdding `birdsong` as member of workspace `~/projects/saev`\nInitialized project `birdsong` at `~/projects/saev/contrib/birdsong`\n

Now you have some additional files.

[I] samuelstevens@ascend-login02 ~/p/s/c/birdsong (main)> tree\n.\n\u251c\u2500\u2500 pyproject.toml\n\u251c\u2500\u2500 README.md\n\u2514\u2500\u2500 src\n    \u2514\u2500\u2500 birdsong\n        \u2514\u2500\u2500 __init__.py\n

Now I can write scripts and source code for birdsong-specific stuff in here. I'll probably add a notebook for looking at instances of birdsongs before and after using SAEs to identify patterns under a new birdsong/notebooks directory, and will add birdsong/logbook.md to store ongoing TODO items, and so on.

To train SAEs on audio files, I'll need to add a new dataset type to save activations. In order to do this, I'll edit src/saev/data/datasets.py.

I'll also need to add another model to the dataset, one that expects audio files. Since I don't think that DINOv3, OpenCLIP, or the other existing model families will be suitable, I'll need to add a new model family. Again, this will need to go somewhere in src/saev/data.

If I'm smart about it, these changes will be nice and non-destructive, and other users of saev can benefit from them. After I publish some results, to share this code with others, I'll open a PR from my fork/branch to main with the new datasets/models. But I won't open a PR with birdsong because that's specific to me, rather than to the library.1

  1. Technically, birdsong will be in saev because I'm a sort of privileged user because I'm the main developer. But other folks probably want their project-specific code attached to their GitHub page, rather than OSU-NLP's.\u00a0\u21a9

"},{"location":"users/sweeps/","title":"Sweeps","text":"

Hyperparameter sweeps in saev train multiple SAE configurations in parallel on a single GPU, amortizing the cost of loading activation data from disk across all models. Furthermore, sweeps make it easy to train multiple SAEs with one command across multiple GPUs using Slurm.

"},{"location":"users/sweeps/#quick-start","title":"Quick Start","text":"

Create a Python file defining your sweep:

# sweeps/my_sweep.py\n\ndef make_cfgs() -> list[dict]:\n    cfgs = []\n\n    # Grid search over learning rate and sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"objective\": {\"sparsity_coeff\": sparsity},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

Run the sweep:

uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23\n

This trains 9 SAEs (3 learning rates x 3 sparsity coefficients) in parallel.

"},{"location":"users/sweeps/#why-parallel-sweeps","title":"Why Parallel Sweeps?","text":"

SAE training is bottlenecked by disk I/O, not GPU computation. Loading terabytes of pre-computed ViT activations from disk is the slowest part. By training multiple SAE configurations on the same batch simultaneously, we amortize the I/O cost:

\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 ViT Activations (disk) \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n            \u2502 (slow I/O, once per batch)\n            \u25bc\n      \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n      \u2502  Batch   \u2502\n      \u2514\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2518\n            \u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n            \u25bc         \u25bc         \u25bc         \u25bc\n         SAE #1    SAE #2    SAE #3     ...\n        (lr=3e-4) (lr=1e-3) (lr=3e-3)\n
"},{"location":"users/sweeps/#sweep-configuration","title":"Sweep Configuration","text":""},{"location":"users/sweeps/#python-based-sweeps","title":"Python-Based Sweeps","text":"

Python sweeps give you full control over config generation. Your sweep file must define a make_cfgs() function that returns a list of dicts.

Grid search example:

def make_cfgs():\n    cfgs = []\n\n    for lr in [1e-4, 3e-4, 1e-3]:\n        for d_sae in [8192, 16384, 32768]:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

Paired parameters (not a grid):

def make_cfgs():\n    cfgs = []\n\n    # Grid over lr x sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            # Paired layers (train and val use same layer)\n            for layer in [6, 7, 8, 9, 10, 11]:\n                cfg = {\n                    \"lr\": lr,\n                    \"objective\": {\"sparsity_coeff\": sparsity},\n                    \"train_data\": {\"layer\": layer},\n                    \"val_data\": {\"layer\": layer},\n                }\n                cfgs.append(cfg)\n\n    return cfgs\n

This generates 54 configs (3 x 3 x 6) where each train/val pair uses the same layer, avoiding the 162 configs you'd get from a full grid (3 x 3 x 6 x 6).

Conditional sweeps:

def make_cfgs():\n    cfgs = []\n\n    for d_sae in [8192, 16384, 32768]:\n        # Use different LR for different SAE widths\n        lrs = [1e-3, 3e-3] if d_sae <= 16384 else [3e-4, 1e-3]\n\n        for lr in lrs:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n
"},{"location":"users/sweeps/#command-line-overrides","title":"Command-Line Overrides","text":"

Command-line arguments override sweep parameters with deep merging. The precedence order is: CLI > Sweep > Default.

uv run train.py --sweep sweeps/my_sweep.py \\\n  --lr 5e-4  # Overrides all LRs in the sweep\n

Override nested config fields with dotted notation:

uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23 \\\n  --sae.d-sae 16384\n

Deep merging means that when you override a nested field, only that specific field is replaced\u2014other fields in the nested config are preserved from the sweep or default values.

"},{"location":"users/sweeps/#parallel-groups","title":"Parallel Groups","text":"

Not all parameters can vary within a parallel sweep. Parameters that affect data loading (like train_data, n_train, device) must be identical across all configs in a parallel group.

When configs differ in these parameters, they're automatically split into separate Slurm jobs:

def make_cfgs():\n    cfgs = []\n\n    # These will run in 2 separate jobs\n    for layer in [6, 12]:  # Different data loading\n        for lr in [1e-4, 3e-4]:  # Can parallelize\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

This creates 2 parallel groups: - Job 1: layer=6, lr=[1e-4, 3e-4] - Job 2: layer=12, lr=[1e-4, 3e-4]

Implementation detail

See CANNOT_PARALLELIZE in train.py for the full list of parameters that split parallel groups. The split_cfgs() function handles grouping automatically.

"},{"location":"users/sweeps/#module-loading","title":"Module Loading","text":"

Your sweep file is executed as a Python module, so you can use imports and helper functions:

def make_cfgs():\n    cfgs = []\n\n    # You can use helper functions\n    base_layers = list(range(6, 24, 2))\n\n    for layer in base_layers:\n        for lr in [1e-4, 3e-4]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"val_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"sae\": {\"d_model\": 1024, \"d_sae\": 16384},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

Import mechanics

The sweep file is loaded with importlib.import_module(), so it must be importable as a Python module. Place sweep files in a location where Python can find them (typically the project root or a sweeps/ subdirectory).

"},{"location":"users/sweeps/#slurm-integration","title":"Slurm Integration","text":"

When running with --slurm-acct, each parallel group becomes a separate Slurm job:

uv run train.py --sweep sweeps/large.py \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --n-hours 24\n

The system automatically: - Groups configs that can parallelize - Submits one Slurm job per group - Waits for all jobs to complete - Reports results

"},{"location":"users/sweeps/#seed-management","title":"Seed Management","text":"

Seeds are automatically incremented for each config to ensure reproducibility:

# Base config has seed=42\n# Sweep generates 9 configs with seeds: 42, 43, 44, ..., 50\n

Override the base seed on the command line:

uv run train.py --sweep sweeps/my_sweep.py --seed 100\n
"},{"location":"users/sweeps/#examples","title":"Examples","text":"

Simple grid:

# sweeps/simple.py\ndef make_cfgs():\n    return [\n        {\"lr\": lr, \"objective\": {\"sparsity_coeff\": sp}}\n        for lr in [1e-4, 3e-4, 1e-3]\n        for sp in [4e-4, 8e-4, 1.6e-3]\n    ]\n

Layer sweep with paired train/val:

# sweeps/layers.py\ndef make_cfgs():\n    cfgs = []\n\n    for layer in range(6, 24, 2):  # Layers 6, 8, 10, ..., 22\n        for lr in [3e-4, 1e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n                \"val_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

Architecture sweep:

# sweeps/architecture.py\ndef make_cfgs():\n    cfgs = []\n\n    architectures = [\n        (\"small\", 8192, 1e-3),\n        (\"medium\", 16384, 5e-4),\n        (\"large\", 32768, 3e-4),\n    ]\n\n    for name, d_sae, lr in architectures:\n        cfg = {\n            \"lr\": lr,\n            \"sae\": {\"d_sae\": d_sae},\n            \"tag\": name,\n        }\n        cfgs.append(cfg)\n\n    return cfgs\n
"}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"],"fields":{"title":{"boost":1000.0},"text":{"boost":1.0},"tags":{"boost":1000000.0}}},"docs":[{"location":"","title":"saev","text":"

saev is a framework for training and evaluating Sparse autoencoders (SAEs) for vision transformers (ViTs), implemented in PyTorch.

"},{"location":"#installation","title":"Installation","text":"

Installation is supported with uv. saev will likely work with pure pip, conda, etc. but I will not formally support it.

Clone this repository, then from the root directory:

uv run scripts/launch.py --help\n

This will create a virtual environment and display the help for all the provided framework scripts.

"},{"location":"#quick-start","title":"Quick Start","text":"

Save some activations to disk:

uv run scripts/launch.py shards \\\n  --shards-root /$SCRATCH/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-32/openai \\\n  --d-model 768 \\\n  --layers 11 \\\n  --patches-per-ex 49 \\\n  --batch-size 256 \\\n  data:cifar10\n

Read the guide for details.

"},{"location":"#why-saev","title":"Why saev?","text":"

There are plenty of alternative libraries for SAEs:

However, saev has some benefits:

  1. saev is more of a framework, rather than a library. The reason for this is that SAEs require lots of activations to train a relatively small neural network; while you can implement it with a simple inference loop, efficient training requires some caching on disk. This means using saev is a little more like Keras or PyTorch Lightning than Huggingface's Transformers or Datasets libraries.
  2. saev offers lots of tools for interacting with sparse autoencoders after training, including interactive notebooks and evaluations.
  3. saev includes complete code from preprints in the contrib/ directory, along with logbooks describing how the authors used and developed saev.
"},{"location":"api/colors/","title":"saev.colors","text":"

Utility color palettes used across saev visualizations.

"},{"location":"api/configs/","title":"saev.configs","text":""},{"location":"api/configs/#saev.configs.dict_to_dataclass","title":"dict_to_dataclass(data, cls)","text":"

Recursively convert a dictionary to a dataclass instance.

Source code in src/saev/configs.py
@beartype.beartype\ndef dict_to_dataclass(data: dict, cls: type[T]) -> T:\n    \"\"\"Recursively convert a dictionary to a dataclass instance.\"\"\"\n    if not dataclasses.is_dataclass(cls):\n        return data\n\n    field_types = {f.name: f.type for f in dataclasses.fields(cls)}\n    kwargs = {}\n\n    for field_name, field_type in field_types.items():\n        if field_name not in data:\n            continue\n\n        value = data[field_name]\n\n        # Handle Optional types\n        origin = tp.get_origin(field_type)\n        args = tp.get_args(field_type)\n\n        # Handle tuple[str, ...]\n        if origin is tuple and args:\n            kwargs[field_name] = tuple(value) if isinstance(value, list) else value\n        # Handle list[DataclassType]\n        elif origin is list and args and dataclasses.is_dataclass(args[0]):\n            kwargs[field_name] = [dict_to_dataclass(item, args[0]) for item in value]\n        # Handle regular dataclass fields\n        elif dataclasses.is_dataclass(field_type):\n            kwargs[field_name] = dict_to_dataclass(value, field_type)\n        # Handle pathlib.Path\n        elif field_type is pathlib.Path:\n            # Required Path field - always convert\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is tp.Union and pathlib.Path in args:\n            # Optional Path field (typing.Union style)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is types.UnionType and pathlib.Path in args:\n            # Optional Path field (Python 3.10+ union style with |)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        else:\n            kwargs[field_name] = value\n\n    return cls(**kwargs)\n
"},{"location":"api/configs/#saev.configs.expand","title":"expand(config)","text":"

Expand a nested dict that may contain lists into many dicts.

Source code in src/saev/configs.py
@beartype.beartype\ndef expand(config: dict[str, object]) -> Iterator[dict[str, object]]:\n    \"\"\"Expand a nested dict that may contain lists into many dicts.\"\"\"\n    yield from _expand_discrete(dict(config))\n
"},{"location":"api/configs/#saev.configs.get_non_default_values","title":"get_non_default_values(obj, default_obj)","text":"

Recursively find fields that differ from defaults.

Source code in src/saev/configs.py
@beartype.beartype\ndef get_non_default_values(obj: T, default_obj: T) -> dict:\n    \"\"\"Recursively find fields that differ from defaults.\"\"\"\n    # Check that obj and default_obj are instances of a dataclass.\n    assert dataclasses.is_dataclass(obj) and not isinstance(obj, type)\n    assert dataclasses.is_dataclass(default_obj) and not isinstance(default_obj, type)\n\n    diff = {}\n    for field in dataclasses.fields(obj):\n        obj_value = getattr(obj, field.name)\n        default_value = getattr(default_obj, field.name)\n\n        if obj_value == default_value:\n            continue\n\n        # If both are dataclasses of the same type, recurse to find nested differences\n        if (\n            dataclasses.is_dataclass(obj_value)\n            and dataclasses.is_dataclass(default_value)\n            and type(obj_value) is type(default_value)\n        ):\n            nested_diff = get_non_default_values(obj_value, default_value)\n            if nested_diff:\n                diff[field.name] = nested_diff\n        else:\n            # For non-dataclass fields or different types, just record the value\n            diff[field.name] = obj_value\n\n    return diff\n
"},{"location":"api/configs/#saev.configs.load_cfgs","title":"load_cfgs(override, *, default, sweep_dcts)","text":"

Load a list of configs from a combination of sources.

Parameters:

Name Type Description Default override T

Command-line overridden values.

required default T

The default values for a config.

required sweep_dcts list[dict]

A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.

required

Returns:

Type Description tuple[list[T], list[str]]

A list of configs and a list of errors.

Source code in src/saev/configs.py
@beartype.beartype\ndef load_cfgs(\n    override: T, *, default: T, sweep_dcts: list[dict]\n) -> tuple[list[T], list[str]]:\n    \"\"\"\n    Load a list of configs from a combination of sources.\n\n    Args:\n        override: Command-line overridden values.\n        default: The default values for a config.\n        sweep_dcts: A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.\n\n    Returns:\n        A list of configs and a list of errors.\n    \"\"\"\n    # Check that override and default are instances of a dataclass.\n    assert dataclasses.is_dataclass(override) and not isinstance(override, type)\n    assert dataclasses.is_dataclass(default) and not isinstance(default, type)\n\n    # If there's nothing to sweep, return just the override\n    if not sweep_dcts:\n        return [override], []\n\n    # Find which fields were overridden (differ from default)\n    overridden_fields = get_non_default_values(override, default)\n\n    cfgs: list[T] = []\n    errs: list[str] = []\n\n    d = 0  # Global counter for seed incrementing across all expanded configs\n\n    for sweep_dct in sweep_dcts:\n        # Filter out overridden fields from this sweep dict\n        filtered_dct = _filter_overridden_fields(sweep_dct, overridden_fields)\n\n        # If there's nothing to sweep after filtering, just use override\n        if not filtered_dct:\n            cfgs.append(override)\n            d += 1\n            continue\n\n        # Apply the sweep dict to create a config\n        try:\n            updates = _recursive_dataclass_update(override, filtered_dct, override, d)\n\n            if hasattr(override, \"seed\") and \"seed\" not in updates:\n                updates[\"seed\"] = getattr(override, \"seed\", 0) + d\n\n            cfgs.append(dataclasses.replace(override, **updates))\n            d += 1\n        except Exception as err:\n            errs.append(str(err))\n            d += 1\n\n    return cfgs, errs\n
"},{"location":"api/configs/#saev.configs.load_sweep","title":"load_sweep(sweep_fpath)","text":"

Load a sweep file and return the list of config dicts.

Parameters:

Name Type Description Default sweep_fpath Path

Path to a Python file with a make_cfgs() function.

required

Returns:

Type Description list[dict]

List of config dictionaries from make_cfgs(). Returns empty list if any error occurs.

Source code in src/saev/configs.py
@beartype.beartype\ndef load_sweep(sweep_fpath: pathlib.Path) -> list[dict]:\n    \"\"\"\n    Load a sweep file and return the list of config dicts.\n\n    Args:\n        sweep_fpath: Path to a Python file with a `make_cfgs()` function.\n\n    Returns:\n        List of config dictionaries from `make_cfgs()`. Returns empty list if any error occurs.\n    \"\"\"\n    try:\n        namespace = {}\n        exec(sweep_fpath.read_text(), namespace)\n        result = namespace[\"make_cfgs\"]()\n        if not isinstance(result, list):\n            logger.warning(\n                f\"make_cfgs() in {sweep_fpath} returned {type(result).__name__}, expected list\"\n            )\n            return []\n        return result\n    except Exception as err:\n        logger.warning(f\"Failed to load sweep from {sweep_fpath}: {err}\")\n        return []\n
"},{"location":"api/disk/","title":"saev.disk","text":"

Helpers for sticking with the layout described in disk-layout.md.

"},{"location":"api/disk/#saev.disk.Run","title":"Run(run_dir)","text":"

Represents an SAE training run and some associated data.

Parameters:

Name Type Description Default run_dir Path

Run directory, should be $SAEV_NFS/saev/runs/. Assumes the run already exists and validates the structure. Use Run.new() to create a new run. required Source code in src/saev/disk.py

def __init__(self, run_dir: pathlib.Path):\n    self.run_dir = run_dir\n\n    if len(self.run_dir.parts) < 3 or self.run_dir.parts[-3:-1] != (\"saev\", \"runs\"):\n        raise ValueError(\"Run directory is invalid.\")\n\n    if not self.run_dir.exists():\n        raise FileNotFoundError(\n            f\"Run directory does not exist: {self.run_dir}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"checkpoint\").exists():\n        raise FileNotFoundError(\n            f\"Checkpoint directory does not exist: {self.run_dir / 'checkpoint'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"links\").exists():\n        raise FileNotFoundError(\n            f\"Links directory does not exist: {self.run_dir / 'links'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"inference\").exists():\n        raise FileNotFoundError(\n            f\"Inference directory does not exist: {self.run_dir / 'inference'}. Use Run.new() to create a new run.\"\n        )\n
"},{"location":"api/disk/#saev.disk.Run.ckpt","title":"ckpt property","text":"

Path to the sae.pt checkpoint.

"},{"location":"api/disk/#saev.disk.Run.config","title":"config property","text":"

The training run config. Not a train.Config object because we don't want to import from train.py.

"},{"location":"api/disk/#saev.disk.Run.inference","title":"inference property","text":"

Path to the inference/ directory.

"},{"location":"api/disk/#saev.disk.Run.run_id","title":"run_id property","text":"

The run ID, created by wandb.

"},{"location":"api/disk/#saev.disk.Run.train_shards","title":"train_shards property","text":"

Path to shard root with metadata.json and acts*.bin files.

"},{"location":"api/disk/#saev.disk.Run.val_shards","title":"val_shards property","text":"

Path to shard root with metadata.json and acts*.bin files.

"},{"location":"api/disk/#saev.disk.Run.new","title":"new(run_id, *, train_shards_dir, val_shards_dir, runs_root) classmethod","text":"

Create a new run with directory structure and symlinks.

Parameters:

Name Type Description Default run_id str

The run ID (typically from wandb).

required train_shards_dir Path

Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/). required val_shards_dir Path

Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/). required runs_root Path

Root directory for runs (typically $SAEV_NFS/saev/runs).

required

Returns:

Type Description Run

A new Run instance with all directories and symlinks created.

Source code in src/saev/disk.py
@classmethod\ndef new(\n    cls,\n    run_id: str,\n    *,\n    train_shards_dir: pathlib.Path,\n    val_shards_dir: pathlib.Path,\n    runs_root: pathlib.Path,\n) -> \"Run\":\n    \"\"\"\n    Create a new run with directory structure and symlinks.\n\n    Args:\n        run_id: The run ID (typically from wandb).\n        train_shards_dir: Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/<shard_hash>).\n        val_shards_dir: Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/<shard_hash>).\n        runs_root: Root directory for runs (typically $SAEV_NFS/saev/runs).\n\n    Returns:\n        A new Run instance with all directories and symlinks created.\n    \"\"\"\n    run_dir = runs_root / run_id\n    run_dir.mkdir(parents=True)\n    (run_dir / \"checkpoint\").mkdir()\n    (run_dir / \"links\").mkdir()\n    (run_dir / \"inference\").mkdir()\n\n    (run_dir / \"links\" / \"train-shards\").symlink_to(train_shards_dir)\n    (run_dir / \"links\" / \"val-shards\").symlink_to(val_shards_dir)\n\n    return cls(run_dir)\n
"},{"location":"api/disk/#saev.disk.is_runs_root","title":"is_runs_root(path)","text":"

Check if path is a valid runs root directory.

A valid runs root ends with saev/runs and exists as a directory.

Parameters:

Name Type Description Default path Path

Path to check.

required

Returns:

Type Description bool

True if path is a directory ending in saev/runs.

Source code in src/saev/disk.py
@beartype.beartype\ndef is_runs_root(path: pathlib.Path) -> bool:\n    \"\"\"\n    Check if `path` is a valid runs root directory.\n\n    A valid runs root ends with `saev/runs` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/runs.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"runs\")\n
"},{"location":"api/disk/#saev.disk.is_shards_dir","title":"is_shards_dir(path)","text":"

Check if path is a specific shards directory.

A valid shards directory ends with saev/shards/<hash> for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).

Parameters:

Name Type Description Default path Path

Path to check.

required

Returns:

Type Description bool

True if path is a directory ending in saev/shards/ with required files. Source code in src/saev/disk.py

@beartype.beartype\ndef is_shards_dir(path: pathlib.Path) -> bool:\n    \"\"\"\n    Check if `path` is a specific shards directory.\n\n    A valid shards directory ends with `saev/shards/<hash>` for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards/<hash> with required files.\n    \"\"\"\n    if not path.is_dir():\n        return False\n\n    if len(path.parts) < 3 or path.parts[-3:-1] != (\"saev\", \"shards\"):\n        return False\n\n    return True\n
"},{"location":"api/disk/#saev.disk.is_shards_root","title":"is_shards_root(path)","text":"

Check if path is a valid shards root directory.

A valid shards root ends with saev/shards and exists as a directory.

Parameters:

Name Type Description Default path Path

Path to check.

required

Returns:

Type Description bool

True if path is a directory ending in saev/shards.

Source code in src/saev/disk.py
@beartype.beartype\ndef is_shards_root(path: pathlib.Path) -> bool:\n    \"\"\"\n    Check if `path` is a valid shards root directory.\n\n    A valid shards root ends with `saev/shards` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"shards\")\n
"},{"location":"api/helpers/","title":"saev.helpers","text":""},{"location":"api/helpers/#saev.helpers.RemovedFeatureError","title":"RemovedFeatureError","text":"

Bases: RuntimeError

Feature existed before but is no longer supported.

"},{"location":"api/helpers/#saev.helpers.batched_idx","title":"batched_idx(total_size, batch_size)","text":"

Iterate over (start, end) indices for total_size examples, where end - start is at most batch_size.

Parameters:

Name Type Description Default total_size int

total number of examples

required batch_size int

maximum distance between the generated indices.

required

Returns:

Type Description

A generator of (int, int) tuples that can slice up a list or a tensor.

Source code in src/saev/helpers.py
def __init__(self, total_size: int, batch_size: int):\n    self.total_size = total_size\n    self.batch_size = batch_size\n
"},{"location":"api/helpers/#saev.helpers.batched_idx.__iter__","title":"__iter__()","text":"

Yield (start, end) index pairs for batching.

Source code in src/saev/helpers.py
def __iter__(self) -> Iterator[tuple[int, int]]:\n    \"\"\"Yield (start, end) index pairs for batching.\"\"\"\n    for start in range(0, self.total_size, self.batch_size):\n        stop = min(start + self.batch_size, self.total_size)\n        yield start, stop\n
"},{"location":"api/helpers/#saev.helpers.batched_idx.__len__","title":"__len__()","text":"

Return the number of batches.

Source code in src/saev/helpers.py
def __len__(self) -> int:\n    \"\"\"Return the number of batches.\"\"\"\n    return (self.total_size + self.batch_size - 1) // self.batch_size\n
"},{"location":"api/helpers/#saev.helpers.progress","title":"progress(it, *, every=10, desc='progress', total=0)","text":"

Wraps an iterable with a logger like tqdm but doesn't use any control codes to manipulate a progress bar, which doesn't work well when your output is redirected to a file. Instead, simple logging statements are used, but it includes quality-of-life features like iteration speed and predicted time to finish.

Parameters:

Name Type Description Default it Iterable

Iterable to wrap.

required every int

How many iterations between logging progress.

10 desc str

What to name the logger.

'progress' total int

If non-zero, how long the iterable is.

0 Source code in src/saev/helpers.py
def __init__(\n    self, it: Iterable, *, every: int = 10, desc: str = \"progress\", total: int = 0\n):\n    self.it = it\n    self.every = max(every, 1)\n    self.logger = logging.getLogger(desc)\n    self.total = total\n
"},{"location":"api/helpers/#saev.helpers.csr_topk","title":"csr_topk(arr, *, k, axis=0, batch_size=1024)","text":"

Takes the top k values of a sparse CSR array.

We can only iterate efficiently over rows because it's a a CSR array.

Parameters:

Name Type Description Default arr csr_array | csr_matrix

The CSR array of values with shape (rows, cols).

required k int

The k in \"top-k\".

required axis int

The dimension to sort along.

0 batch_size int

How many rows to process at once.

1024

Returns:

Type Description NumpyTopK

saev.helpers.NumpyTopK

Source code in src/saev/helpers.py
@beartype.beartype\ndef csr_topk(\n    arr: scipy.sparse.csr_array | scipy.sparse.csr_matrix,\n    *,\n    k: int,\n    axis: int = 0,\n    batch_size: int = 1024,\n) -> NumpyTopK:\n    \"\"\"\n    Takes the top k values of a sparse CSR array.\n\n    We can only iterate efficiently over *rows* because it's a a *CSR* array.\n\n    Args:\n        arr: The CSR array of values with shape (rows, cols).\n        k: The k in \"top-k\".\n        axis: The dimension to sort along.\n        batch_size: How many rows to process at once.\n\n    Returns:\n        saev.helpers.NumpyTopK\n    \"\"\"\n    if axis == 0:\n        return _csr_topk_axis0(arr, k, batch_size)\n    elif axis == 1:\n        return _csr_topk_axis1(arr, k)\n    else:\n        raise ValueError(f\"axis must be 0 or 1, got {axis}\")\n
"},{"location":"api/helpers/#saev.helpers.current_git_commit","title":"current_git_commit()","text":"

Best-effort short SHA of the repo containing this file.

Returns None when * git executable is missing, * we\u2019re not inside a git repo (e.g. installed wheel), * or any git call errors out.

Source code in src/saev/helpers.py
@beartype.beartype\ndef current_git_commit() -> str | None:\n    \"\"\"\n    Best-effort short SHA of the repo containing *this* file.\n\n    Returns `None` when\n    * `git` executable is missing,\n    * we\u2019re not inside a git repo (e.g. installed wheel),\n    * or any git call errors out.\n    \"\"\"\n    try:\n        # Walk up until we either hit a .git dir or the FS root\n        here = pathlib.Path(__file__).resolve()\n        for parent in (here, *here.parents):\n            if (parent / \".git\").exists():\n                break\n        else:  # no .git found\n            return None\n\n        result = subprocess.run(\n            [\"git\", \"-C\", str(parent), \"rev-parse\", \"--short\", \"HEAD\"],\n            stdout=subprocess.PIPE,\n            stderr=subprocess.DEVNULL,\n            text=True,\n            check=True,\n        )\n        return result.stdout.strip() or None\n    except (FileNotFoundError, subprocess.CalledProcessError):\n        return None\n
"},{"location":"api/helpers/#saev.helpers.flattened","title":"flattened(dct, *, sep='.')","text":"

Flatten a potentially nested dict to a single-level dict with .-separated keys.

Source code in src/saev/helpers.py
@beartype.beartype\ndef flattened(\n    dct: dict[str, object], *, sep: str = \".\"\n) -> dict[str, str | int | float | bool | None]:\n    \"\"\"\n    Flatten a potentially nested dict to a single-level dict with `.`-separated keys.\n    \"\"\"\n    new = {}\n    for key, value in dct.items():\n        if isinstance(value, dict):\n            for nested_key, nested_value in flattened(value).items():\n                new[key + \".\" + nested_key] = nested_value\n            continue\n\n        new[key] = value\n\n    return new\n
"},{"location":"api/helpers/#saev.helpers.fssafe","title":"fssafe(s)","text":"

Convert a string to be filesystem-safe by replacing special characters.

This is particularly useful for checkpoint names that contain characters like 'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like 'hf-hub_timm_ViT-L-16-SigLIP2-256'.

Parameters:

Name Type Description Default s str

String to make filesystem-safe.

required

Returns:

Type Description str

Filesystem-safe version of the string.

Source code in src/saev/helpers.py
@beartype.beartype\ndef fssafe(s: str) -> str:\n    \"\"\"\n    Convert a string to be filesystem-safe by replacing special characters.\n\n    This is particularly useful for checkpoint names that contain characters like\n    'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like\n    'hf-hub_timm_ViT-L-16-SigLIP2-256'.\n\n    Args:\n        s: String to make filesystem-safe.\n\n    Returns:\n        Filesystem-safe version of the string.\n    \"\"\"\n    # Replace common problematic characters with underscores\n    replacements = {\n        \"/\": \"_\",\n        \"\\\\\": \"_\",\n        \":\": \"_\",\n        \"*\": \"_\",\n        \"?\": \"_\",\n        '\"': \"_\",\n        \"<\": \"_\",\n        \">\": \"_\",\n        \"|\": \"_\",\n        \" \": \"_\",\n    }\n    for old, new in replacements.items():\n        s = s.replace(old, new)\n    # Remove any remaining non-alphanumeric characters except - _ .\n    return \"\".join(c if c.isalnum() or c in \"-_.\" else \"_\" for c in s)\n
"},{"location":"api/helpers/#saev.helpers.get_cache_dir","title":"get_cache_dir()","text":"

Get cache directory from environment variables, defaulting to the current working directory (.)

Returns:

Type Description str

A path to a cache directory (might not exist yet).

Source code in src/saev/helpers.py
@beartype.beartype\ndef get_cache_dir() -> str:\n    \"\"\"\n    Get cache directory from environment variables, defaulting to the current working directory (.)\n\n    Returns:\n        A path to a cache directory (might not exist yet).\n    \"\"\"\n    cache_dir = \"\"\n    for var in (\"SAEV_CACHE\", \"HF_HOME\", \"HF_HUB_CACHE\"):\n        cache_dir = cache_dir or os.environ.get(var, \"\")\n    return cache_dir or \".\"\n
"},{"location":"api/helpers/#saev.helpers.get_slurm_job_count","title":"get_slurm_job_count()","text":"

Get the current number of jobs in the queue for the current user.

Uses squeue's -r flag to properly count job array elements individually. For example, a job array 12345_[0-99] will be counted as 100 jobs.

Source code in src/saev/helpers.py
@beartype.beartype\ndef get_slurm_job_count() -> int:\n    \"\"\"\n    Get the current number of jobs in the queue for the current user.\n\n    Uses squeue's -r flag to properly count job array elements individually.\n    For example, a job array 12345_[0-99] will be counted as 100 jobs.\n    \"\"\"\n    try:\n        # Use -r to display each array element on its own line\n        result = subprocess.run(\n            [\"squeue\", \"--me\", \"-h\", \"-r\"], capture_output=True, text=True, check=True\n        )\n\n        # Count non-empty lines\n        lines = result.stdout.strip().split(\"\\n\")\n        return len([line for line in lines if line.strip()])\n\n    except (subprocess.SubprocessError, FileNotFoundError):\n        # If we can't check, assume no jobs\n        return 0\n
"},{"location":"api/helpers/#saev.helpers.get_slurm_max_array_size","title":"get_slurm_max_array_size()","text":"

Get the MaxArraySize configuration from the current Slurm cluster.

Returns:

Name Type Description int int

The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.

Source code in src/saev/helpers.py
@beartype.beartype\ndef get_slurm_max_array_size() -> int:\n    \"\"\"\n    Get the MaxArraySize configuration from the current Slurm cluster.\n\n    Returns:\n        int: The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # Run scontrol command to get config information\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"config\"], capture_output=True, text=True, check=True\n        )\n\n        # Search for MaxArraySize in the output\n        match = re.search(r\"MaxArraySize\\s*=\\s*(\\d+)\", result.stdout)\n        if match:\n            max_array_size = int(match.group(1))\n            logger.info(\"Detected MaxArraySize = %d\", max_array_size)\n            return max_array_size\n        else:\n            logger.warning(\n                \"Could not find MaxArraySize in scontrol output, using default of 1000\"\n            )\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error running scontrol: %s\", e)\n        return 1000  # Safe default\n    except ValueError as e:\n        logger.error(\"Error parsing MaxArraySize: %s\", e)\n        return 1000  # Safe default\n    except FileNotFoundError:\n        logger.warning(\n            \"scontrol command not found. Assuming not in Slurm environment. Returning default MaxArraySize=1000.\"\n        )\n        return 1000\n
"},{"location":"api/helpers/#saev.helpers.get_slurm_max_submit_jobs","title":"get_slurm_max_submit_jobs()","text":"

Get the MaxSubmitJobs limit from the current user's QOS.

Returns:

Name Type Description int int

The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.

Source code in src/saev/helpers.py
@beartype.beartype\ndef get_slurm_max_submit_jobs() -> int:\n    \"\"\"\n    Get the MaxSubmitJobs limit from the current user's QOS.\n\n    Returns:\n        int: The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # First, try to get the QOS from a recent job\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"job\", \"-o\"],\n            capture_output=True,\n            text=True,\n            check=False,\n        )\n\n        qos_name = None\n        if result.returncode == 0 and result.stdout:\n            # Extract QOS from job info\n            match = re.search(r\"QOS=(\\S+)\", result.stdout)\n            if match:\n                qos_name = match.group(1)\n\n        if not qos_name:\n            # If no jobs, try to get default QOS from association\n            # This is less reliable but better than nothing\n            logger.warning(\"No active jobs to determine QOS, using default of 1000\")\n            return 1000\n\n        # Get the MaxSubmitJobs for this QOS\n        result = subprocess.run(\n            [\"sacctmgr\", \"show\", \"qos\", qos_name, \"format=maxsubmitjobs\", \"-n\", \"-P\"],\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        max_submit = result.stdout.strip()\n        if max_submit and max_submit.isdigit():\n            limit = int(max_submit)\n            logger.info(\"Detected MaxSubmitJobs = %d for QOS %s\", limit, qos_name)\n            return limit\n        else:\n            logger.warning(\"Could not parse MaxSubmitJobs, using default of 1000\")\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error getting MaxSubmitJobs: %s\", e)\n        return 1000\n    except (ValueError, FileNotFoundError) as e:\n        logger.error(\"Error: %s\", e)\n        return 1000\n
"},{"location":"api/helpers/#saev.helpers.np_topk","title":"np_topk(arr, k, axis=None)","text":"

A numpy implementation of torch.topk.

Returns the k largest elements along the given axis. If axis is None, the array is flattened first.

Parameters:

Name Type Description Default arr ndarray

Input array.

required k int

Number of top elements to return.

required axis int | None

Axis along which to find top k elements. If None, flattens array first.

None

Returns:

Type Description NumpyTopK

Array of k largest values along the specified axis, sorted in descending order.

Source code in src/saev/helpers.py
@beartype.beartype\ndef np_topk(arr: np.ndarray, k: int, axis: int | None = None) -> NumpyTopK:\n    \"\"\"A numpy implementation of torch.topk.\n\n    Returns the k largest elements along the given axis. If axis is None, the array is flattened first.\n\n    Args:\n        arr: Input array.\n        k: Number of top elements to return.\n        axis: Axis along which to find top k elements. If None, flattens array first.\n\n    Returns:\n        Array of k largest values along the specified axis, sorted in descending order.\n    \"\"\"\n    if axis is None:\n        arr = arr.flatten()\n        axis = 0\n\n    # Handle negative axis\n    if axis < 0:\n        axis = arr.ndim + axis\n\n    # For each position along other axes, sort and take top k\n    # Use argsort which is stable and will preserve order for equal values\n    sort_indices = np.argsort(-arr, axis=axis, kind=\"stable\")\n\n    # Take the first k sorted indices\n    topk_indices = np.take(sort_indices, np.arange(k), axis=axis)\n\n    # Gather the top k values\n    topk_values = np.take_along_axis(arr, topk_indices, axis=axis)\n\n    return NumpyTopK(values=topk_values, indices=topk_indices)\n
"},{"location":"api/helpers/#saev.helpers.submit_job_array","title":"submit_job_array(executor, fn, args_list, *, logger=None, margin=0.8)","text":"

Submit jobs in batches to respect Slurm's MaxArraySize limit.

Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.

Parameters:

Name Type Description Default executor

A submitit executor (SlurmExecutor or LocalExecutor).

required fn Callable

Worker function to call for each config.

required args_list list

List of arguments to pass to fn.

required logger Logger | None

Optional logger for progress messages.

None margin float

Fraction of MaxArraySize to use (default 0.8).

0.8

Yields:

Type Description int

Tuples of (global_index, result) for successful jobs.

object

For failed jobs, yields (global_index, None) and logs a warning.

Example
executor = submitit.SlurmExecutor(folder=\"./logs\")\nexecutor.update_parameters(...)\n\nfor idx, result in submit_job_array(executor, worker_fn, configs):\n    print(f\"Job {idx} returned {result}\")\n
Source code in src/saev/helpers.py
@beartype.beartype\ndef submit_job_array(\n    executor,\n    fn: tp.Callable,\n    args_list: list,\n    *,\n    logger: logging.Logger | None = None,\n    margin: float = 0.8,\n) -> Iterator[tuple[int, object]]:\n    \"\"\"\n    Submit jobs in batches to respect Slurm's MaxArraySize limit.\n\n    Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.\n\n    Args:\n        executor: A submitit executor (SlurmExecutor or LocalExecutor).\n        fn: Worker function to call for each config.\n        args_list: List of arguments to pass to fn.\n        logger: Optional logger for progress messages.\n        margin: Fraction of MaxArraySize to use (default 0.8).\n\n    Yields:\n        Tuples of (global_index, result) for successful jobs.\n        For failed jobs, yields (global_index, None) and logs a warning.\n\n    Example:\n        ```\n        executor = submitit.SlurmExecutor(folder=\"./logs\")\n        executor.update_parameters(...)\n\n        for idx, result in submit_job_array(executor, worker_fn, configs):\n            print(f\"Job {idx} returned {result}\")\n        ```\n    \"\"\"\n    from submitit.core.utils import UncompletedJobError\n\n    arr_size = int(get_slurm_max_array_size() * margin)\n    n_total = len(args_list)\n\n    for arr_start, arr_end in batched_idx(n_total, arr_size):\n        batch_args = args_list[arr_start:arr_end]\n\n        if logger:\n            logger.info(\n                \"Submitting batch of %d jobs (%d-%d of %d).\",\n                len(batch_args),\n                arr_start + 1,\n                arr_end,\n                n_total,\n            )\n\n        with executor.batch():\n            jobs = [executor.submit(fn, arg) for arg in batch_args]\n\n        time.sleep(5.0)\n\n        for i, job in enumerate(jobs):\n            global_idx = arr_start + i\n            try:\n                result = job.result()\n                yield global_idx, result\n            except UncompletedJobError:\n                if logger:\n                    logger.warning(\n                        \"Job %s (%d) did not finish.\", job.job_id, global_idx\n                    )\n                yield global_idx, None\n
"},{"location":"api/metrics/","title":"saev.metrics","text":""},{"location":"api/metrics/#saev.metrics.Metrics","title":"Metrics(mse_per_dim, mse_per_token, normalized_mse, baseline_mse_per_dim, baseline_mse_per_token, sse_recon, sse_baseline, n_tokens, d_model, n_elements) dataclass","text":"

Validated reconstruction metrics aggregated over one evaluation corpus.

The primary totals are sse_recon (SAE reconstruction SSE) and sse_baseline (mean-baseline SSE). Derived terms are: - normalized_mse = sse_recon / sse_baseline - mse_per_dim = sse_recon / n_elements - mse_per_token = sse_recon / n_tokens - baseline_mse_per_dim = sse_baseline / n_elements - baseline_mse_per_token = sse_baseline / n_tokens

Size terms are: - n_tokens: number of tokens included in aggregation - d_model: embedding width per token - n_elements = n_tokens * d_model

"},{"location":"api/metrics/#saev.metrics.Metrics.from_accumulators","title":"from_accumulators(*, sse_recon, sse_baseline, n_tokens, d_model) classmethod","text":"

Construct metrics from aggregate sums and shape information.

Parameters:

Name Type Description Default sse_recon float

Sum of squared reconstruction errors over all selected tokens and dimensions.

required sse_baseline float

Sum of squared mean-baseline errors over the same tokens and dimensions.

required n_tokens int

Number of selected tokens in the aggregation set.

required d_model int

Activation dimension per token.

required

Returns:

Type Description Metrics

A validated Metrics object with all derived fields populated.

Source code in src/saev/metrics.py
@classmethod\ndef from_accumulators(\n    cls, *, sse_recon: float, sse_baseline: float, n_tokens: int, d_model: int\n) -> \"Metrics\":\n    \"\"\"Construct metrics from aggregate sums and shape information.\n\n    Args:\n        sse_recon: Sum of squared reconstruction errors over all selected tokens and dimensions.\n        sse_baseline: Sum of squared mean-baseline errors over the same tokens and dimensions.\n        n_tokens: Number of selected tokens in the aggregation set.\n        d_model: Activation dimension per token.\n\n    Returns:\n        A validated `Metrics` object with all derived fields populated.\n    \"\"\"\n\n    msg = f\"n_tokens must be positive, got {n_tokens}.\"\n    assert n_tokens > 0, msg\n    msg = f\"d_model must be positive, got {d_model}.\"\n    assert d_model > 0, msg\n    msg = f\"sse_recon must be >= 0, got {sse_recon}.\"\n    assert sse_recon >= 0.0, msg\n    msg = f\"sse_baseline must be > 0, got {sse_baseline}.\"\n    assert sse_baseline > 0.0, msg\n\n    n_elements = n_tokens * d_model\n    return cls(\n        mse_per_dim=sse_recon / n_elements,\n        mse_per_token=sse_recon / n_tokens,\n        normalized_mse=sse_recon / sse_baseline,\n        baseline_mse_per_dim=sse_baseline / n_elements,\n        baseline_mse_per_token=sse_baseline / n_tokens,\n        sse_recon=sse_recon,\n        sse_baseline=sse_baseline,\n        n_tokens=n_tokens,\n        d_model=d_model,\n        n_elements=n_elements,\n    )\n
"},{"location":"api/saev/","title":"saev","text":"

saev is a Python package for training sparse autoencoders (SAEs) on vision transformers (ViTs) in PyTorch.

"},{"location":"api/summary/","title":"Summary","text":""},{"location":"api/viz/","title":"saev.viz","text":""},{"location":"api/viz/#saev.viz.load_palette","title":"load_palette(path)","text":"

TODO: docstring.

Source code in src/saev/viz.py
@beartype.beartype\ndef load_palette(path: pathlib.Path) -> list[tuple[float, float, float]]:\n    \"\"\"TODO: docstring.\"\"\"\n    import glasbey\n\n    palette = []\n\n    for i, line in enumerate(path.read_text().split(\"\\n\")):\n        line = line.strip()\n        if not line:\n            palette.append(None)\n            continue\n\n        palette.append(parse_color(line))\n\n    # Extend the palette using https://glasbey.readthedocs.io/en/latest/extending_palettes.html\n    n_missing = sum(color is None for color in palette)\n    if n_missing:\n        seed_palette = [color for color in palette if color is not None]\n        if seed_palette:\n            extended = glasbey.extend_palette(\n                seed_palette, palette_size=len(seed_palette) + n_missing, as_hex=False\n            )\n            fill_colors = extended[len(seed_palette) :]\n        else:\n            fill_colors = glasbey.create_palette(palette_size=n_missing, as_hex=False)\n\n        fill_iter = iter(fill_colors)\n        for i, color in enumerate(palette):\n            if color is not None:\n                continue\n            next_color = tuple(float(chan) for chan in next(fill_iter))\n            palette[i] = next_color\n\n    for i, color in enumerate(palette):\n        assert color is not None\n        msg = f\"Color {i} is invalid: {color}\"\n        assert all(0 <= chan <= 1 and isinstance(chan, float) for chan in color), msg\n\n    return palette\n
"},{"location":"api/data/bird_mae/","title":"saev.data.bird_mae","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.Encoder","title":"Encoder(cfg)","text":"

Bases: Module

Pure PyTorch Bird-MAE backbone (no HF).

Source code in src/saev/data/bird_mae.py
def __init__(self, cfg: Config) -> None:\n    super().__init__()\n    self.cfg = cfg\n\n    self.patch_embed = PatchEmbed(\n        img_size=(cfg.img_size_x, cfg.img_size_y),\n        patch_size=(cfg.patch_size, cfg.patch_size),\n        in_chans=cfg.in_chans,\n        embed_dim=cfg.embed_dim,\n    )\n\n    self.cls_token = nn.Parameter(torch.zeros(1, 1, cfg.embed_dim))\n    self.pos_embed = nn.Parameter(\n        torch.zeros(1, cfg.n_patches + 1, cfg.embed_dim),\n        requires_grad=cfg.pos_trainable,\n    )\n\n    if self.pos_embed.data.shape[1] == cfg.n_tokens:\n        pos_embed_np = get_2d_sincos_pos_embed_flexible(\n            self.pos_embed.shape[-1],\n            self.patch_embed.patch_hw,\n            cls_token=True,\n        )\n        self.pos_embed.data.copy_(\n            torch.from_numpy(pos_embed_np).float().unsqueeze(0)\n        )\n    else:\n        logger.warning(\n            \"Positional embedding shape mismatch. Will not initialize sin-cos pos embed.\"\n        )\n\n    dpr = [x.item() for x in torch.linspace(0, cfg.drop_rate, cfg.depth)]\n    self.blocks = nn.ModuleList([\n        Block(\n            dim=cfg.embed_dim,\n            n_heads=cfg.n_heads,\n            mlp_ratio=cfg.mlp_ratio,\n            qkv_bias=cfg.qkv_bias,\n            qk_norm=cfg.qk_norm,\n            init_values=cfg.init_values,\n            proj_drop=cfg.drop_rate,\n            attn_drop=cfg.drop_rate,\n            drop_path=dpr[i],\n            norm_layer=functools.partial(nn.LayerNorm, eps=cfg.norm_layer_eps),\n        )\n        for i in range(cfg.depth)\n    ])\n\n    self.pos_drop = nn.Dropout(p=cfg.drop_rate)\n    self.norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n    self.fc_norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n\n    nn.init.trunc_normal_(self.cls_token, std=0.02)\n    self.apply(self._init_weights)\n
"},{"location":"api/data/bird_mae/#saev.data.bird_mae.PatchEmbed","title":"PatchEmbed(img_size=(512, 128), patch_size=(16, 16), in_chans=1, embed_dim=768)","text":"

Bases: Module

Image (time x mel) to patch embeddings.

Source code in src/saev/data/bird_mae.py
def __init__(\n    self,\n    img_size: tuple[int, int] = (512, 128),\n    patch_size: tuple[int, int] = (16, 16),\n    in_chans: int = 1,\n    embed_dim: int = 768,\n) -> None:\n    super().__init__()\n    img_size = _ntuple(2)(img_size)\n    patch_size = _ntuple(2)(patch_size)\n    n_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])\n    self.patch_hw = (img_size[1] // patch_size[1], img_size[0] // patch_size[0])\n    self.img_size = img_size\n    self.patch_size = patch_size\n    self.n_patches = n_patches\n\n    self.proj = nn.Conv2d(\n        in_chans,\n        embed_dim,\n        kernel_size=patch_size,\n        stride=patch_size,\n    )\n
"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer","title":"Transformer(ckpt)","text":"

Bases: Module, Transformer

Source code in src/saev/data/bird_mae.py
def __init__(self, ckpt: str):\n    super().__init__()\n    self.model = load(ckpt)\n\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(ckpt.lower())\n
"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_resize","title":"make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

Create resize transform for visualization.

Source code in src/saev/data/bird_mae.py
@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization.\"\"\"\n    raise NotImplementedError(\"Bird-MAE uses audio spectrograms, not images.\")\n
"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

Create transforms for preprocessing: (data_transform, dict_transform | None).

Source code in src/saev/data/bird_mae.py
@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n    return transform, None\n
"},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio","title":"filter_audio(waveform, sample_rate, patches, *, mode='time')","text":"

Filter audio based on SAE patch activations over the log-mel spectrogram.

Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.

Parameters:

Name Type Description Default waveform Float[Tensor, ' samples']

Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.

required sample_rate int

Audio sample rate in Hz. Should be 32000 for Bird-MAE.

required patches Bool[Tensor, ' content_tokens_per_example']

Boolean SAE activation values per patch, shape [256]. Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.

required mode Literal['time', 'time+freq']

Filtering mode. - \"time\": Clip to time segments with high activations (preserves all frequencies). - \"time+freq\": Clip time AND apply frequency masking via STFT.

'time'

Returns:

Type Description Float[Tensor, ' clipped']

Filtered audio waveform as a 1D torch tensor.

Example

waveform_np, sr = librosa.load(audio_path, sr=32000) mel = bird_mae.transform(waveform_np) # [512, 128] waveform = torch.from_numpy(waveform_np)

Source code in src/saev/data/bird_mae.py
@jaxtyped(typechecker=beartype.beartype)\ndef filter_audio(\n    waveform: Float[Tensor, \" samples\"],\n    sample_rate: int,\n    patches: Bool[Tensor, \" content_tokens_per_example\"],\n    *,\n    mode: tp.Literal[\"time\", \"time+freq\"] = \"time\",\n) -> Float[Tensor, \" clipped\"]:\n    \"\"\"\n    Filter audio based on SAE patch activations over the log-mel spectrogram.\n\n    Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.\n\n    Args:\n        waveform: Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.\n        sample_rate: Audio sample rate in Hz. Should be 32000 for Bird-MAE.\n        patches: Boolean SAE activation values per patch, shape [256].\n            Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.\n        mode: Filtering mode.\n            - \"time\": Clip to time segments with high activations (preserves all frequencies).\n            - \"time+freq\": Clip time AND apply frequency masking via STFT.\n\n    Returns:\n        Filtered audio waveform as a 1D torch tensor.\n\n    Example:\n        >>> waveform_np, sr = librosa.load(audio_path, sr=32000)\n        >>> mel = bird_mae.transform(waveform_np)  # [512, 128]\n        >>> waveform = torch.from_numpy(waveform_np)\n        >>> # ... run through SAE to get patch_activations [256] ...\n        >>> # ... covert SAE activations to bool with > 0 ...\n        >>> time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\")\n        >>> time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")\n    \"\"\"\n    msg = f\"Bird-MAE expects sample_rate={BIRDMAE_SR_HZ}, got {sample_rate}.\"\n    assert sample_rate == BIRDMAE_SR_HZ, msg\n    assert patches.shape == (BIRDMAE_N_TIME_PATCHES * BIRDMAE_N_MEL_PATCHES,)\n    assert waveform.ndim == 1, waveform.shape\n\n    # Match transform(): pad/truncate to exactly 5s\n    waveform_t = waveform.to(torch.float32)\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if waveform_t.numel() < max_len:\n        pad = max_len - waveform_t.numel()\n        waveform_t = F.pad(waveform_t, (0, pad))\n    else:\n        waveform_t = waveform_t[:max_len]\n    if mode == \"time+freq\":\n        # STFT parameters matching Kaldi/BirdMAE assumptions approximately\n        n_fft = BIRDMAE_STFT_N_FFT\n        hop_length = BIRDMAE_STFT_HOP_LENGTH\n        win_length = BIRDMAE_STFT_WIN_LENGTH\n        window = torch.hann_window(win_length)\n\n        stft = torch.stft(\n            waveform_t,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            return_complex=True,\n        )\n        # stft shape: [freq_bins, time_frames]\n        # freq_bins = 513\n        # time_frames ~ 498 for 160000 samples\n\n        freqs = torch.linspace(0, sample_rate / 2, stft.shape[0])\n        mask = torch.zeros_like(stft, dtype=torch.bool)\n\n        # Mel range\n        low_freq = BIRDMAE_STFT_LOW_FREQ_HZ\n        high_freq = sample_rate / 2\n        min_mel = hz_to_mel(low_freq)\n        max_mel = hz_to_mel(high_freq)\n        mel_range = max_mel - min_mel\n\n        active_patch_i = torch.nonzero(patches, as_tuple=False).flatten().tolist()\n        for i in active_patch_i:\n            time_idx = i // BIRDMAE_N_MEL_PATCHES\n            mel_idx = i % BIRDMAE_N_MEL_PATCHES\n\n            # Time range (frames)\n            t_start = time_idx * BIRDMAE_FRAMES_PER_PATCH\n            t_end = (time_idx + 1) * BIRDMAE_FRAMES_PER_PATCH\n\n            # Frequency range (Hz)\n            # 128 mel bins total, 16 bins per patch\n            p_mel_low = (\n                min_mel\n                + (mel_idx * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n            p_mel_high = (\n                min_mel\n                + ((mel_idx + 1) * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n\n            hz_low = mel_to_hz(p_mel_low)\n            hz_high = mel_to_hz(p_mel_high)\n\n            freq_mask = (freqs >= hz_low) & (freqs < hz_high)\n\n            # Apply mask to valid frames\n            valid_t_end = min(t_end, stft.shape[1])\n            if t_start < valid_t_end:\n                mask[freq_mask, t_start:valid_t_end] = True\n\n        stft_filtered = stft * mask\n        waveform_t = torch.istft(\n            stft_filtered,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            length=waveform_t.shape[0],\n        )\n\n    # Time clipping (applies to both modes)\n    active_time_indices = torch.unique(\n        torch.nonzero(patches, as_tuple=False).flatten() // BIRDMAE_N_MEL_PATCHES\n    ).tolist()\n    segments = []\n\n    for t in active_time_indices:\n        start = t * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        end = (t + 1) * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        if start >= waveform_t.shape[0]:\n            continue\n        seg = waveform_t[start : min(end, waveform_t.shape[0])]\n        segments.append(seg)\n\n    if not segments:\n        return waveform_t[:0]\n\n    return torch.cat(segments, dim=0)\n
"},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--run-through-sae-to-get-patch_activations-256","title":"... run through SAE to get patch_activations [256] ...","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--covert-sae-activations-to-bool-with-0","title":"... covert SAE activations to bool with > 0 ...","text":"

time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\") time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")

"},{"location":"api/data/bird_mae/#saev.data.bird_mae.transform","title":"transform(waveform)","text":"

waveform: 1D tensor [samples] returns: 2D tensor [512, 128] matching HF's feature extractor output

Source code in src/saev/data/bird_mae.py
@jaxtyped(typechecker=beartype.beartype)\ndef transform(waveform: Float[np.ndarray, \" samples\"]) -> Float[Tensor, \"time mels\"]:\n    \"\"\"\n    waveform: 1D tensor [samples]\n    returns: 2D tensor [512, 128] matching HF's feature extractor output\n    \"\"\"\n    import torchaudio.compliance.kaldi\n\n    waveform = torch.from_numpy(waveform).to(torch.float32)\n    (n_samples,) = waveform.shape\n    # 1) pad/truncate to exactly 5 s\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if n_samples < max_len:\n        pad = max_len - n_samples\n        waveform = F.pad(waveform, (0, pad))\n    else:\n        waveform = waveform[:max_len]\n\n    # 2) mean-center (per clip)\n    waveform = waveform - waveform.mean(dim=0, keepdim=True)\n\n    # 3) Kaldi fbank: [T, 128]\n    fb = torchaudio.compliance.kaldi.fbank(\n        waveform[None, :],\n        htk_compat=True,\n        sample_frequency=BIRDMAE_SR_HZ,\n        use_energy=False,\n        window_type=\"hanning\",\n        num_mel_bins=BIRDMAE_N_MELS,\n        dither=0.0,\n        frame_shift=10.0,\n    )  # [T, 128]\n\n    # 4) pad to 512 frames with min value\n    t, _ = fb.shape\n    if t < BIRDMAE_TARGET_T:\n        diff = BIRDMAE_TARGET_T - t\n        min_val = fb.min()\n        fb = F.pad(fb, (0, 0, 0, diff), value=min_val.item())\n    elif t > BIRDMAE_TARGET_T:\n        fb = fb[:BIRDMAE_TARGET_T]\n\n    fb = (fb - BIRDMAE_MEAN) / (BIRDMAE_STD * 2.0)\n\n    assert fb.shape == (BIRDMAE_TARGET_T, BIRDMAE_N_MELS), fb.shape\n\n    return fb\n
"},{"location":"api/data/buffers/","title":"saev.data.buffers","text":""},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer","title":"ReservoirBuffer(capacity, shape, *, dtype=torch.float32, meta_shape=(2,), meta_dtype=torch.int32, seed=0, collate_fn=None)","text":"

Pool of (tensor, meta) pairs. Multiple producers call put(batch_x, batch_meta). Multiple consumers call get(batch_size) -> (x, meta). Random order, each sample delivered once, blocking semantics.

Source code in src/saev/data/buffers.py
def __init__(\n    self,\n    capacity: int,\n    shape: tuple[int, ...],\n    *,\n    dtype: torch.dtype = torch.float32,\n    meta_shape: tuple[int, ...] = (2,),\n    meta_dtype: torch.dtype = torch.int32,\n    seed: int = 0,\n    collate_fn: collections.abc.Callable | None = None,\n):\n    self.capacity = capacity\n    self._empty = 123456789\n\n    self.data = torch.full((capacity, *shape), self._empty, dtype=dtype)\n    self.data.share_memory_()\n\n    self.meta = torch.full((capacity, *meta_shape), self._empty, dtype=meta_dtype)\n    self.meta.share_memory_()\n\n    self.ctx = mp.get_context()\n\n    self.size = self.ctx.Value(\"L\", 0)  # current live items\n    self.lock = self.ctx.Lock()  # guards size+swap\n    self.free = self.ctx.Semaphore(capacity)\n    self.full = self.ctx.Semaphore(0)\n    # Each process has its own RNG.\n    self.rng = np.random.default_rng(seed)\n\n    self.collate_fn = collate_fn\n\n    self.logger = logging.getLogger(f\"reservoir({os.getpid()})\")\n
"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.close","title":"close()","text":"

Release the shared-memory backing store (call once in the parent).

Source code in src/saev/data/buffers.py
def close(self) -> None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.data.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n
"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.fill","title":"fill()","text":"

Approximate proportion of filled slots (race-safe enough for tests).

Source code in src/saev/data/buffers.py
def fill(self) -> float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n
"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.qsize","title":"qsize()","text":"

Approximate number of filled slots (race-safe enough for tests).

Source code in src/saev/data/buffers.py
def qsize(self) -> int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return self.size.value\n
"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer","title":"RingBuffer(slots, shape, dtype)","text":"

Fixed-capacity, multiple-producer / multiple-consumer queue backed by a shared-memory tensor.

"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer--parameters","title":"Parameters","text":"

slots : int capacity in number of items (tensor rows) shape : tuple[int] shape of one item, e.g. (batch, dim) dtype : torch.dtype tensor dtype

put(tensor) : blocks if full get() -> tensor : blocks if empty qsize() -> int advisory size (approximate) close() frees shared storage (call in the main process)

Source code in src/saev/data/buffers.py
def __init__(self, slots: int, shape: tuple[int, ...], dtype: torch.dtype):\n    assert slots > 0, \"slots must be positive\"\n    self.slots = slots\n    # 123456789 -> Should make you very worried.\n    self.buf = torch.full((slots, *shape), 123456789, dtype=dtype)\n    self.buf.share_memory_()\n\n    ctx = mp.get_context()  # obeys the global start method (\"spawn\")\n\n    # shared, lock-free counters\n    self.head = ctx.Value(\"L\", 0, lock=False)  # next free slot\n    self.tail = ctx.Value(\"L\", 0, lock=False)  # next occupied slot\n\n    # semaphores for blocking semantics\n    self.free = ctx.Semaphore(slots)  # initially all slots free\n    self.full = ctx.Semaphore(0)  # no filled slots yet\n\n    # one mutex for pointer updates\n    self.mutex = ctx.Lock()\n
"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.close","title":"close()","text":"

Release the shared-memory backing store (call once in the parent).

Source code in src/saev/data/buffers.py
def close(self) -> None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.buf.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n
"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.fill","title":"fill()","text":"

Approximate proportion of filled slots (race-safe enough for tests).

Source code in src/saev/data/buffers.py
def fill(self) -> float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n
"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.get","title":"get()","text":"

Return a view of the next item; blocks if the queue is empty.

Source code in src/saev/data/buffers.py
def get(self) -> torch.Tensor:\n    \"\"\"Return a view of the next item; blocks if the queue is empty.\"\"\"\n    self.full.acquire()  # wait for data\n    with self.mutex:  # exclusive update of tail\n        idx = self.tail.value % self.slots\n        out = self.buf[idx].clone()\n        self.tail.value += 1\n    self.free.release()  # signal one more free slot\n    return out\n
"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.put","title":"put(tensor)","text":"

Copy tensor into the next free slot; blocks if the queue is full.

Source code in src/saev/data/buffers.py
def put(self, tensor: torch.Tensor) -> None:\n    \"\"\"Copy `tensor` into the next free slot; blocks if the queue is full.\"\"\"\n    if tensor.shape != self.buf.shape[1:] or tensor.dtype != self.buf.dtype:\n        raise ValueError(\"tensor shape / dtype mismatch\")\n\n    self.free.acquire()  # wait for a free slot\n    with self.mutex:  # exclusive update of head\n        idx = self.head.value % self.slots\n        self.buf[idx].copy_(tensor)\n        self.head.value += 1\n    self.full.release()  # signal there is data\n
"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.qsize","title":"qsize()","text":"

Approximate number of filled slots (race-safe enough for tests).

Source code in src/saev/data/buffers.py
def qsize(self) -> int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return (self.head.value - self.tail.value) % (1 << 64)\n
"},{"location":"api/data/clip/","title":"saev.data.clip","text":""},{"location":"api/data/clip/#saev.data.clip.Vit","title":"Vit(ckpt)","text":"

Bases: Transformer, Module

Source code in src/saev/data/clip.py
def __init__(self, ckpt: str):\n    super().__init__()\n\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n        _, ckpt = ckpt.split(\"hf-hub:\")\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n\n    assert not isinstance(self.model, open_clip.timm_model.TimmModel)\n
"},{"location":"api/data/clip/#saev.data.clip.Vit.patch_size","title":"patch_size property","text":"

Get patch size for CLIP models.

"},{"location":"api/data/clip/#saev.data.clip.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

Create transforms for preprocessing: (img_transform, sample_transform | None).

Source code in src/saev/data/clip.py
@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n
"},{"location":"api/data/datasets/","title":"saev.data.datasets","text":""},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025","title":"BirdClef2025(root=pathlib.Path('data/birdclef-2025'), split='train_audio') dataclass","text":"

Bases: DatasetConfig

Configuration for BirdCLEF 2025 dataset, filtering to only bird species (Aves).

See https://www.kaggle.com/competitions/birdclef-2025/data for more information.

"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.n_examples","title":"n_examples property","text":"

Number of bird audio samples in the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.root","title":"root = pathlib.Path('data/birdclef-2025') class-attribute instance-attribute","text":"

Root directory containing the BirdCLEF 2025 data.

"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.split","title":"split = 'train_audio' class-attribute instance-attribute","text":"

Which data split to use.

"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset","title":"BirdClef2025Dataset(cfg, *, audio_transform=None, mask_transform=None, sample_transform=None)","text":"

Bases: Dataset

Dataset for BirdCLEF 2025 filtered to bird species only (class_name == 'Aves').

Source code in src/saev/data/datasets.py
def __init__(\n    self,\n    cfg: BirdClef2025,\n    *,\n    audio_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    import polars as pl\n\n    self.cfg = cfg\n    self.audio_transform = audio_transform\n    self.sample_transform = sample_transform\n\n    # Load taxonomy and filter to birds only\n    taxonomy = pl.read_csv(cfg.root / \"taxonomy.csv\", infer_schema_length=None)\n    taxonomy = taxonomy.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n    birds = taxonomy.filter(pl.col(\"class_name\") == \"Aves\")\n    bird_labels = set(birds[\"primary_label\"].to_list())\n\n    # Build label -> target mapping from bird species only\n    sorted_labels = sorted(bird_labels)\n    self.label_to_target = {label: i for i, label in enumerate(sorted_labels)}\n    self.target_to_label = {i: label for label, i in self.label_to_target.items()}\n\n    if cfg.split == \"train_audio\":\n        train = pl.read_csv(cfg.root / \"train.csv\", infer_schema_length=None)\n        train = train.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n        train_birds = train.filter(pl.col(\"primary_label\").is_in(bird_labels))\n        self.samples = [\n            {\"label\": row[\"primary_label\"], \"filename\": row[\"filename\"]}\n            for row in train_birds.iter_rows(named=True)\n        ]\n    elif cfg.split == \"train_soundscapes\":\n        soundscapes_dpath = cfg.root / \"train_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    elif cfg.split == \"test_soundscapes\":\n        soundscapes_dpath = cfg.root / \"test_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    else:\n        tp.assert_never(cfg.split)\n
"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset.n_classes","title":"n_classes property","text":"

Number of bird species.

"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10","title":"Cifar10(name='uoft-cs/cifar10', split='train') dataclass","text":"

Bases: DatasetConfig

Configuration for HuggingFace CIFAR-10.

"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.n_examples","title":"n_examples property","text":"

Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.

"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.name","title":"name = 'uoft-cs/cifar10' class-attribute instance-attribute","text":"

Dataset name on HuggingFace. Don't need to change this.

"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.root","title":"root property","text":"

Dummy path for the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.split","title":"split = 'train' class-attribute instance-attribute","text":"

Dataset split. Can be 'train' or 'test'.

"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig","title":"DatasetConfig","text":"

Bases: ABC

Abstract base class for dataset configurations.

"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.n_examples","title":"n_examples abstractmethod property","text":"

Number of examples in the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.root","title":"root abstractmethod property","text":"

Root directory path for the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImg","title":"FakeImg(n_examples=10) dataclass","text":"

Bases: DatasetConfig

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImg.root","title":"root property","text":"

Root directory path for the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg","title":"FakeImgSeg(n_examples=10, content_tokens_per_example=16, n_classes=3, bg_label=0) dataclass","text":"

Bases: DatasetConfig

Tiny synthetic segmentation dataset for tests.

Generates dummy RGB images and pixel-level segmentation masks, mimicking the behavior of real segmentation datasets like ImgSegFolder.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.bg_label","title":"bg_label = 0 class-attribute instance-attribute","text":"

Which class index is considered background.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.content_tokens_per_example","title":"content_tokens_per_example = 16 class-attribute instance-attribute","text":"

Number of content tokens per example.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_classes","title":"n_classes = 3 class-attribute instance-attribute","text":"

Number of segmentation classes.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_examples","title":"n_examples = 10 class-attribute instance-attribute","text":"

Number of examples.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.root","title":"root property","text":"

Root directory path for the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSegDataset","title":"FakeImgSegDataset(cfg, *, img_transform=None, mask_transform=None, sample_transform=None)","text":"

Bases: Dataset

Synthetic segmentation dataset providing pixel-level segmentation masks.

Mimics ImgSegFolderDataset by providing:

Source code in src/saev/data/datasets.py
def __init__(\n    self,\n    cfg: FakeImgSeg,\n    *,\n    img_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    self.cfg = cfg\n    self.img_transform = img_transform\n    self.mask_transform = mask_transform\n    self.sample_transform = sample_transform\n
"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet","title":"Imagenet(name='ILSVRC/imagenet-1k', split='train') dataclass","text":"

Bases: DatasetConfig

Configuration for HuggingFace Imagenet.

"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.n_examples","title":"n_examples property","text":"

Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.

"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.name","title":"name = 'ILSVRC/imagenet-1k' class-attribute instance-attribute","text":"

Dataset name on HuggingFace. Don't need to change this..

"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.root","title":"root property","text":"

Root directory path for the dataset.

"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.split","title":"split = 'train' class-attribute instance-attribute","text":"

Dataset split. For the default ImageNet-1K dataset, can either be 'train', 'validation' or 'test'.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder","title":"ImgFolder(root=pathlib.Path('./data/split')) dataclass","text":"

Bases: DatasetConfig

Configuration for a generic image folder dataset that matches the structure used in PyTorch's ImageFolder.

The datset must be laid out in:

root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n

If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.n_examples","title":"n_examples property","text":"

Number of examples in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires walking the directory structure. If you need to reference this number very often, cache it in a local variable.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.root","title":"root = pathlib.Path('./data/split') class-attribute instance-attribute","text":"

Where the class folders with images are stored. Can be a glob pattern to match multiple directories.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset","title":"ImgFolderDataset(*args, sample_transform=None, **kwargs)","text":"

Bases: ImageFolder

A generic image folder dataset that matches the structure used in PyTorch's ImageFolder.

The datset must be laid out in:

root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n

If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.

Source code in src/saev/data/datasets.py
def __init__(self, *args, sample_transform: Callable | None = None, **kwargs):\n    super().__init__(*args, **kwargs)\n    self.sample_transform = sample_transform\n
"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset.__getitem__","title":"__getitem__(index)","text":"

Parameters:

Name Type Description Default index int

Index

required

Returns:

Type Description dict[str, object]

dict with keys 'data', 'index', 'target' and 'label'.

Source code in src/saev/data/datasets.py
def __getitem__(self, index: int) -> dict[str, object]:\n    \"\"\"\n    Args:\n        index: Index\n\n    Returns:\n        dict with keys 'data', 'index', 'target' and 'label'.\n    \"\"\"\n    path, target = self.samples[index]\n    image = self.loader(path)\n    if self.transform is not None:\n        image = self.transform(image)\n    if self.target_transform is not None:\n        target = self.target_transform(target)\n\n    sample = {\n        \"data\": image,\n        \"target\": target,\n        \"label\": self.classes[target],\n        \"index\": index,\n    }\n\n    if self.sample_transform is not None:\n        sample = self.sample_transform(sample)\n\n    return sample\n
"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder","title":"ImgSegFolder(root=pathlib.Path('./data/segdataset'), split='training', labels_csv='labels.csv', bg_label=0) dataclass","text":"

Bases: DatasetConfig

"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.bg_label","title":"bg_label = 0 class-attribute instance-attribute","text":"

Background label.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.labels_csv","title":"labels_csv = 'labels.csv' class-attribute instance-attribute","text":"

CSV file with columns: stem,label1,label2,... First column must be 'stem'.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.n_examples","title":"n_examples property","text":"

Number of examples in the dataset. Calculated on the fly by counting image files in root/images/split.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.root","title":"root = pathlib.Path('./data/segdataset') class-attribute instance-attribute","text":"

Where the class folders with images are stored.

"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.split","title":"split = 'training' class-attribute instance-attribute","text":"

Data split.

"},{"location":"api/data/datasets/#saev.data.datasets.get_dataset","title":"get_dataset(cfg, *, data_transform=None, mask_transform=None, sample_transform=None)","text":"

Gets the dataset for the current experiment; delegates construction to dataset-specific functions.

Parameters:

Name Type Description Default cfg Config

Config for the dataset.

required data_tr

Transform to be applied to each 'data' key (typically the raw data).

required mask_tr

Transform to be applied to masks.

required dict_tr

Transform to be applied to the entire sample dict.

required

Returns: A dataset that has dictionaries with 'data', 'index', 'target', and 'label' keys containing examples.

Source code in src/saev/data/datasets.py
@beartype.beartype\ndef get_dataset(\n    cfg: Config,\n    *,\n    data_transform: Callable = None,\n    mask_transform: Callable | None = None,\n    sample_transform: Callable | None = None,\n):\n    \"\"\"\n    Gets the dataset for the current experiment; delegates construction to dataset-specific functions.\n\n    Args:\n        cfg: Config for the dataset.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        dict_tr: Transform to be applied to the entire sample dict.\n    Returns:\n        A dataset that has dictionaries with `'data'`, `'index'`, `'target'`, and `'label'` keys containing examples.\n    \"\"\"\n    # TODO: Can we reduce duplication? Or is it nice to see that there is no magic here?\n    if isinstance(cfg, Imagenet):\n        return ImagenetDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, Cifar10):\n        return Cifar10Dataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, ImgSegFolder):\n        return ImgSegFolderDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, ImgFolder):\n        ds = [\n            ImgFolderDataset(\n                root, transform=data_transform, sample_transform=sample_transform\n            )\n            for root in glob.glob(str(cfg.root), recursive=True)\n        ]\n        if len(ds) == 1:\n            return ds[0]\n        else:\n            return torch.utils.data.ConcatDataset(ds)\n    elif isinstance(cfg, FakeImg):\n        return FakeImgDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, FakeImgSeg):\n        return FakeImgSegDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, BirdClef2025):\n        return BirdClef2025Dataset(\n            cfg, audio_transform=data_transform, sample_transform=sample_transform\n        )\n    else:\n        tp.assert_never(cfg)\n
"},{"location":"api/data/datasets/#saev.data.datasets.is_img_seg_dataset","title":"is_img_seg_dataset(data_cfg)","text":"

Check if a dataset configuration is for an image segmentation dataset.

Parameters:

Name Type Description Default data_cfg DatasetConfig

Dataset configuration

required

Returns:

Type Description bool

True if this is an image segmentation dataset that should have labels.bin

Source code in src/saev/data/datasets.py
@beartype.beartype\ndef is_img_seg_dataset(data_cfg: DatasetConfig) -> bool:\n    \"\"\"\n    Check if a dataset configuration is for an image segmentation dataset.\n\n    Args:\n        data_cfg: Dataset configuration\n\n    Returns:\n        True if this is an image segmentation dataset that should have labels.bin\n    \"\"\"\n    return isinstance(data_cfg, (FakeImgSeg, ImgSegFolder))\n
"},{"location":"api/data/dinov2/","title":"saev.data.dinov2","text":""},{"location":"api/data/dinov3/","title":"saev.data.dinov3","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config","title":"Config(img_size=224, patch_size=16, in_chans=3, pos_embed_rope_base=100.0, pos_embed_rope_min_period=None, pos_embed_rope_max_period=None, pos_embed_rope_normalize_coords='separate', pos_embed_rope_dtype='bf16', embed_dim=768, depth=12, num_heads=12, ffn_ratio=4.0, qkv_bias=True, ffn_layer='mlp', ffn_bias=True, proj_bias=True, n_storage_tokens=0, mask_k_bias=False, untie_global_and_local_cls_norm=False, device=None) dataclass","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config.depth","title":"depth = 12 class-attribute instance-attribute","text":"

Number of transformer blocks.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.device","title":"device = None class-attribute instance-attribute","text":"

Device for tensor operations.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.embed_dim","title":"embed_dim = 768 class-attribute instance-attribute","text":"

Embedding dimension for transformer.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_bias","title":"ffn_bias = True class-attribute instance-attribute","text":"

Whether to use bias in feed-forward network.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_layer","title":"ffn_layer = 'mlp' class-attribute instance-attribute","text":"

Type of feed-forward network layer.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_ratio","title":"ffn_ratio = 4.0 class-attribute instance-attribute","text":"

Feed-forward network expansion ratio.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.img_size","title":"img_size = 224 class-attribute instance-attribute","text":"

Image width and height in pixels.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.in_chans","title":"in_chans = 3 class-attribute instance-attribute","text":"

Number of input image channels.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.mask_k_bias","title":"mask_k_bias = False class-attribute instance-attribute","text":"

Whether to mask K bias in attention.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.n_storage_tokens","title":"n_storage_tokens = 0 class-attribute instance-attribute","text":"

Number of storage/register tokens.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.num_heads","title":"num_heads = 12 class-attribute instance-attribute","text":"

Number of attention heads.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.patch_size","title":"patch_size = 16 class-attribute instance-attribute","text":"

Size of each patch in pixels.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_base","title":"pos_embed_rope_base = 100.0 class-attribute instance-attribute","text":"

Base frequency for RoPE positional encoding.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_dtype","title":"pos_embed_rope_dtype = 'bf16' class-attribute instance-attribute","text":"

Data type for RoPE positional encoding.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_max_period","title":"pos_embed_rope_max_period = None class-attribute instance-attribute","text":"

Maximum period for RoPE positional encoding.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_min_period","title":"pos_embed_rope_min_period = None class-attribute instance-attribute","text":"

Minimum period for RoPE positional encoding.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_normalize_coords","title":"pos_embed_rope_normalize_coords = 'separate' class-attribute instance-attribute","text":"

Coordinate normalization method for RoPE encoding.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.proj_bias","title":"proj_bias = True class-attribute instance-attribute","text":"

Whether to use bias in output projection.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.qkv_bias","title":"qkv_bias = True class-attribute instance-attribute","text":"

Whether to use bias in QKV projection.

"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.untie_global_and_local_cls_norm","title":"untie_global_and_local_cls_norm = False class-attribute instance-attribute","text":"

Whether to use separate norms for global and local CLS tokens.

"},{"location":"api/data/dinov3/#saev.data.dinov3.PatchEmbed","title":"PatchEmbed(img_size=224, patch_size=16, in_chans=3, embed_dim=768, flatten_embedding=True)","text":"

Bases: Module

2D image to patch embedding: (B,C,H,W) -> (B,N,D)

Parameters:

Name Type Description Default img_size int | tuple[int, int]

Image size.

224 patch_size int | tuple[int, int]

Patch token size.

16 in_chans int

Number of input image channels.

3 embed_dim int

Number of linear projection output channels.

768 Source code in src/saev/data/dinov3.py
def __init__(\n    self,\n    img_size: int | tuple[int, int] = 224,\n    patch_size: int | tuple[int, int] = 16,\n    in_chans: int = 3,\n    embed_dim: int = 768,\n    flatten_embedding: bool = True,\n) -> None:\n    super().__init__()\n\n    image_hw = make_2tuple(img_size)\n    patch_hw = make_2tuple(patch_size)\n\n    self.image_hw = image_hw\n    self.patch_hw = patch_hw\n\n    self.in_chans = in_chans\n    self.embed_dim = embed_dim\n\n    self.proj = nn.Conv2d(\n        in_chans, embed_dim, kernel_size=patch_hw, stride=patch_hw\n    )\n    self.k = patch_hw[0]\n    assert self.proj.kernel_size == (self.k, self.k)\n    assert self.proj.stride == (self.k, self.k)\n    assert self.proj.padding == (0, 0)\n    assert self.proj.groups == 1\n    assert self.proj.dilation == (1, 1)\n
"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit","title":"Vit(ckpt)","text":"

Bases: Module, Transformer

Source code in src/saev/data/dinov3.py
def __init__(self, ckpt: str):\n    super().__init__()\n    name = self._parse_name(ckpt)\n    self.model = load(name, ckpt)\n\n    self._ckpt = name\n    self.logger = logging.getLogger(f\"dinov3/{name}\")\n
"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_resize","title":"make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.

Source code in src/saev/data/dinov3.py
@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    import functools\n\n    return functools.partial(\n        transforms.resize_to_patch_grid,\n        p=int(16 * scale),\n        n=n_patches_per_img,\n        resample=resample,\n    )\n
"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

Create transforms for preprocessing: (img_transform, sample_transform | None).

Source code in src/saev/data/dinov3.py
@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    img_transform = v2.Compose([\n        transforms.FlexResize(patch_size=16, n_patches=n_patches_per_img),\n        v2.ToImage(),\n        v2.ToDtype(torch.float32, scale=True),\n        v2.Normalize(mean=[0.4850, 0.4560, 0.4060], std=[0.2290, 0.2240, 0.2250]),\n    ])\n    sample_transform = transforms.Patchify(\n        patch_size=16, n_patches=n_patches_per_img\n    )\n    return img_transform, sample_transform\n
"},{"location":"api/data/fake_clip/","title":"saev.data.fake_clip","text":"

Fake CLIP model for testing with tiny-open-clip-model.

This module provides a test-only vision transformer that works with the tiny-open-clip-model from HuggingFace, which uses 8x8 images and 2x2 patches instead of the standard 224x224 images with 16x16 patches.

"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit","title":"Vit(ckpt)","text":"

Bases: Transformer, Module

Source code in src/saev/data/fake_clip.py
def __init__(self, ckpt: str):\n    super().__init__()\n\n    # Only support the tiny test model\n    assert ckpt == \"hf-hub:hf-internal-testing/tiny-open-clip-model\", (\n        f\"FakeClip only supports tiny-open-clip-model, got {ckpt}\"\n    )\n\n    clip, _ = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n
"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.patch_size","title":"patch_size property","text":"

Tiny model uses 2x2 patches.

"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_resize","title":"make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

Create resize transform for tiny model (8x8 images).

Source code in src/saev/data/fake_clip.py
@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for tiny model (8x8 images).\"\"\"\n\n    def resize(img: Image.Image) -> Image.Image:\n        # Tiny model uses 8x8 images\n        size_px = (int(8 * scale), int(8 * scale))\n        return img.resize(size_px, resample=resample)\n\n    return resize\n
"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

Create transforms for preprocessing: (img_transform, sample_transform | None).

Source code in src/saev/data/fake_clip.py
@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    _, img_transform = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    return img_transform, None\n
"},{"location":"api/data/indexed/","title":"saev.data.indexed","text":""},{"location":"api/data/indexed/#saev.data.indexed.Config","title":"Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False) dataclass","text":"

Configuration for loading indexed activation data from disk

Attributes:

Name Type Description shards Path

Directory with .bin shards and a metadata.json file.

tokens Literal['special', 'content', 'all']

Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

layer int | Literal['all']

Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

debug bool

Whether the dataloader process should log debug messages.

"},{"location":"api/data/indexed/#saev.data.indexed.Dataset","title":"Dataset(cfg)","text":"

Bases: Dataset

Dataset of activations from disk.

Attributes:

Name Type Description cfg Config

Configuration set via CLI args.

md Metadata

Activations metadata; automatically loaded from disk.

layer_idx int

Layer index into the shards if we are choosing a specific layer.

Source code in src/saev/data/indexed.py
def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n
"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.d_model","title":"d_model property","text":"

Dimension of the underlying vision transformer's embedding space.

"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.Example","title":"Example","text":"

Bases: TypedDict

Individual example.

"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.__len__","title":"__len__()","text":"

Dataset length depends on patches and layer.

Source code in src/saev/data/indexed.py
def __len__(self) -> int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n
"},{"location":"api/data/models/","title":"saev.data.models","text":""},{"location":"api/data/models/#saev.data.models.Transformer","title":"Transformer","text":"

Bases: ABC

Protocol defining the interface for all Transformer models.

"},{"location":"api/data/models/#saev.data.models.Transformer.patch_size","title":"patch_size abstractmethod property","text":"

Patch size in pixels (e.g., 14 or 16).

"},{"location":"api/data/models/#saev.data.models.Transformer.forward","title":"forward(batch) abstractmethod","text":"

Run forward pass on batch of images.

Source code in src/saev/data/models.py
@abc.abstractmethod\ndef forward(\n    self, batch: Float[Tensor, \"batch 3 width height\"]\n) -> Float[Tensor, \"batch patches dim\"]:\n    \"\"\"Run forward pass on batch of images.\"\"\"\n
"},{"location":"api/data/models/#saev.data.models.Transformer.get_residuals","title":"get_residuals() abstractmethod","text":"

Return the list of residual blocks/layers for hook registration.

Source code in src/saev/data/models.py
@abc.abstractmethod\ndef get_residuals(self) -> list[torch.nn.Module]:\n    \"\"\"Return the list of residual blocks/layers for hook registration.\"\"\"\n
"},{"location":"api/data/models/#saev.data.models.Transformer.get_token_i","title":"get_token_i(content_tokens_per_example) abstractmethod","text":"

Return indices for selecting relevant tokens from activations.

Source code in src/saev/data/models.py
@abc.abstractmethod\ndef get_token_i(self, content_tokens_per_example: int) -> slice | torch.Tensor:\n    \"\"\"Return indices for selecting relevant tokens from activations.\"\"\"\n
"},{"location":"api/data/models/#saev.data.models.Transformer.make_resize","title":"make_resize(ckpt, content_tokens_per_example, *, scale=1.0, resample=Image.LANCZOS) abstractmethod staticmethod","text":"

Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.

Source code in src/saev/data/models.py
@staticmethod\n@abc.abstractmethod\ndef make_resize(\n    ckpt: str,\n    content_tokens_per_example: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n
"},{"location":"api/data/models/#saev.data.models.Transformer.make_transforms","title":"make_transforms(ckpt, content_tokens_per_example) abstractmethod staticmethod","text":"

Create transforms for preprocessing: (data_transform, dict_transform | None).

Source code in src/saev/data/models.py
@staticmethod\n@abc.abstractmethod\ndef make_transforms(\n    ckpt: str, content_tokens_per_example: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n
"},{"location":"api/data/models/#saev.data.models.list_families","title":"list_families()","text":"

List all ViT family names.

Source code in src/saev/data/models.py
def list_families() -> list[str]:\n    \"\"\"List all ViT family names.\"\"\"\n    return list(_global_model_registry.keys())\n
"},{"location":"api/data/models/#saev.data.models.load_model_cls","title":"load_model_cls(family)","text":"

Load a transformer family's class.

Source code in src/saev/data/models.py
@beartype.beartype\ndef load_model_cls(family: str) -> type[Transformer]:\n    \"\"\"Load a transformer family's class.\"\"\"\n    if family not in _global_model_registry:\n        raise ValueError(f\"Family '{family}' not found.\")\n\n    return _global_model_registry[family]\n
"},{"location":"api/data/models/#saev.data.models.register_family","title":"register_family(cls)","text":"

Register a new transformer family's class.

Source code in src/saev/data/models.py
@beartype.beartype\ndef register_family(cls: type[Transformer]):\n    \"\"\"Register a new transformer family's class.\"\"\"\n    if cls.family in _global_model_registry:\n        logger.warning(\"Overwriting key '%s' in registry.\", cls.family)\n    _global_model_registry[cls.family] = cls\n
"},{"location":"api/data/ordered/","title":"saev.data.ordered","text":"

Ordered (sequential) dataloader for activation data.

This module provides a high-throughput dataloader that reads activation data from disk shards in sequential order, without shuffling. The implementation uses a single-threaded manager process to ensure data is delivered in the exact order it appears on disk.

Patch labels are provided if there is a labels.bin file on disk.

See the design decisions in src/saev/data/performance.md.

Usage

cfg = Config(shards=\"./shards\", layer=13, batch_size=4096) dataloader = DataLoader(cfg) for batch in dataloader: ... activations = batch[\"act\"] # [batch_size, d_model] ... image_indices = batch[\"example_idx\"] # [batch_size] ... patch_indices = batch[\"token_idx\"] # [batch_size] ... patch_labels = batch[\"patch_labels\"] # [batch_size]

"},{"location":"api/data/ordered/#saev.data.ordered.Config","title":"Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0) dataclass","text":"

Configuration for loading ordered (non-shuffled) activation data from disk

Attributes:

Name Type Description shards Path

Directory with .bin shards and a metadata.json file.

tokens Literal['content']

Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

layer int | Literal['all']

Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

batch_size int

Batch size.

batch_timeout_s float

How long to wait for at least one batch.

drop_last bool

Whether to drop the last batch if it's smaller than the others.

buffer_size int

Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

debug bool

Whether the dataloader process should log debug messages.

log_every_s float

How frequently the dataloader process should log (debug) performance messages.

"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader","title":"DataLoader(cfg)","text":"

High-throughput streaming loader that reads data from disk shards in order (no shuffling).

Source code in src/saev/data/ordered.py
def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n
"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.ExampleBatch","title":"ExampleBatch","text":"

Bases: TypedDict

Individual example.

"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__iter__","title":"__iter__()","text":"

Yields batches in order.

Source code in src/saev/data/ordered.py
def __iter__(self) -> collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n < self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size < self.cfg.batch_size\n                    and n + actual_batch_size >= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n
"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__len__","title":"__len__()","text":"

Returns the number of batches in an epoch.

Source code in src/saev/data/ordered.py
def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n
"},{"location":"api/data/pe/","title":"saev.data.pe","text":"

Perception Encoder (PE) models from Meta (Bolya et al., 2025).

PE-Core: CLIP-style model for language alignment. PE-Spatial: Dense prediction model distilled from SAM 2.1.

Both are available via timm.

"},{"location":"api/data/pe/#saev.data.pe.Core","title":"Core(ckpt)","text":"

Bases: _Base

PE-Core: CLIP-style model for language alignment.

Available checkpoints: - vit_pe_core_large_patch14_336.fb (L/14, 336px) - vit_pe_core_base_patch16_224.fb (B/16, 224px)

Source code in src/saev/data/pe.py
def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n
"},{"location":"api/data/pe/#saev.data.pe.Spatial","title":"Spatial(ckpt)","text":"

Bases: _Base

PE-Spatial: Dense prediction model distilled from SAM 2.1.

Available checkpoints: - vit_pe_spatial_large_patch14_448.fb (L/14, 448px) - vit_pe_spatial_base_patch16_512.fb (B/16, 512px)

Source code in src/saev/data/pe.py
def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n
"},{"location":"api/data/saev.data/","title":"saev.data","text":""},{"location":"api/data/saev.data/#saev.data.IndexedConfig","title":"IndexedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False) dataclass","text":"

Configuration for loading indexed activation data from disk

Attributes:

Name Type Description shards Path

Directory with .bin shards and a metadata.json file.

tokens Literal['special', 'content', 'all']

Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

layer int | Literal['all']

Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

debug bool

Whether the dataloader process should log debug messages.

"},{"location":"api/data/saev.data/#saev.data.IndexedDataset","title":"IndexedDataset(cfg)","text":"

Bases: Dataset

Dataset of activations from disk.

Attributes:

Name Type Description cfg Config

Configuration set via CLI args.

md Metadata

Activations metadata; automatically loaded from disk.

layer_idx int

Layer index into the shards if we are choosing a specific layer.

Source code in src/saev/data/indexed.py
def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n
"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.d_model","title":"d_model property","text":"

Dimension of the underlying vision transformer's embedding space.

"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.Example","title":"Example","text":"

Bases: TypedDict

Individual example.

"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.__len__","title":"__len__()","text":"

Dataset length depends on patches and layer.

Source code in src/saev/data/indexed.py
def __len__(self) -> int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n
"},{"location":"api/data/saev.data/#saev.data.Metadata","title":"Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1') dataclass","text":"

Metadata for a sharded set of transformer activations.

Parameters:

Name Type Description Default family Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']

The transformer family.

required ckpt str

The transformer checkpoint.

required layers tuple[int, ...]

Which layers were saved.

required content_tokens_per_example int

The number of content tokens per example.

required cls_token bool

Whether the transformer has a [CLS] token as well.

required d_model int

Model hidden dimension.

required n_examples int

Number of examples.

required max_tokens_per_shard int

The maximum number of tokens per shard.

required data str

base64-encoded string of pickle.dumps(dataset).

required dataset Path

Absolute path to the root directory of the original dataset.

required pixel_agg PixelAgg

(only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.

MAJORITY dtype Literal['float32']

How activations are stored.

'float32' protocol Literal['1.0.0', '1.1', '2.1']

Protocol version.

'2.1'"},{"location":"api/data/saev.data/#saev.data.Metadata.examples_per_shard","title":"examples_per_shard property","text":"

The number of examples per shard based on the protocol.

Returns:

Type Description int

Number of examples that fit in a shard.

"},{"location":"api/data/saev.data/#saev.data.Metadata.hash","title":"hash property","text":"

First 8 bytes of a SHA256 hash of the metadata configuration.

Returns:

Type Description str

Hexadecimal hash string uniquely identifying this configuration.

"},{"location":"api/data/saev.data/#saev.data.Metadata.n_shards","title":"n_shards property","text":"

Total number of shards needed to store all examples.

Returns:

Type Description int

Number of shards required.

"},{"location":"api/data/saev.data/#saev.data.Metadata.shard_shape","title":"shard_shape property","text":"

Shape of each shard file.

Returns:

Type Description tuple[int, int, int, int]

Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).

"},{"location":"api/data/saev.data/#saev.data.Metadata.tokens_per_example","title":"tokens_per_example property","text":"

Total number of tokens per example including [CLS] token if present.

Returns:

Type Description int

Number of tokens plus one if [CLS] token is included.

"},{"location":"api/data/saev.data/#saev.data.Metadata.dump","title":"dump(shards_root)","text":"

Dumps a Metadata object to a metadata.json file in shards_root / hash.

Parameters:

Name Type Description Default shards_root Path

Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.

required Source code in src/saev/data/shards.py
def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n
"},{"location":"api/data/saev.data/#saev.data.Metadata.load","title":"load(shards_dir) classmethod","text":"

Loads a Metadata object from a metadata.json file in shards_dir.

Parameters:

Name Type Description Default shards_dir Path

Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in src/saev/data/shards.py

@classmethod\ndef load(cls, shards_dir: pathlib.Path) -> tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/<hash> as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n
"},{"location":"api/data/saev.data/#saev.data.OrderedConfig","title":"OrderedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0) dataclass","text":"

Configuration for loading ordered (non-shuffled) activation data from disk

Attributes:

Name Type Description shards Path

Directory with .bin shards and a metadata.json file.

tokens Literal['content']

Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

layer int | Literal['all']

Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

batch_size int

Batch size.

batch_timeout_s float

How long to wait for at least one batch.

drop_last bool

Whether to drop the last batch if it's smaller than the others.

buffer_size int

Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

debug bool

Whether the dataloader process should log debug messages.

log_every_s float

How frequently the dataloader process should log (debug) performance messages.

"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader","title":"OrderedDataLoader(cfg)","text":"

High-throughput streaming loader that reads data from disk shards in order (no shuffling).

Source code in src/saev/data/ordered.py
def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n
"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.ExampleBatch","title":"ExampleBatch","text":"

Bases: TypedDict

Individual example.

"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__iter__","title":"__iter__()","text":"

Yields batches in order.

Source code in src/saev/data/ordered.py
def __iter__(self) -> collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n < self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size < self.cfg.batch_size\n                    and n + actual_batch_size >= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n
"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__len__","title":"__len__()","text":"

Returns the number of batches in an epoch.

Source code in src/saev/data/ordered.py
def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n
"},{"location":"api/data/saev.data/#saev.data.PixelAgg","title":"PixelAgg","text":"

Bases: Enum

How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig","title":"ShuffledConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False) dataclass","text":"

Configuration for loading shuffled activation data from disk.

Attributes:

Name Type Description shards Path

Directory with .bin shards and a metadata.json file.

tokens Literal['special', 'content', 'all']

Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_size","title":"batch_size = 1024 * 16 class-attribute instance-attribute","text":"

Batch size.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_timeout_s","title":"batch_timeout_s = 30.0 class-attribute instance-attribute","text":"

How long to wait for at least one batch.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.buffer_size","title":"buffer_size = 64 class-attribute instance-attribute","text":"

Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.debug","title":"debug = False class-attribute instance-attribute","text":"

Whether the dataloader process should log debug messages.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.drop_last","title":"drop_last = False class-attribute instance-attribute","text":"

Whether to drop the last batch if it's smaller than the others.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.ignore_labels","title":"ignore_labels = dataclasses.field(default_factory=list) class-attribute instance-attribute","text":"

If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.layer","title":"layer = -1 class-attribute instance-attribute","text":"

Which transformer layer(s) to read from disk. -1 is the default, but must be changed. \"all\" enumerates every recorded layer.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.log_every_s","title":"log_every_s = 30.0 class-attribute instance-attribute","text":"

How frequently the dataloader process should log (debug) performance messages.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.min_buffer_fill","title":"min_buffer_fill = 0.0 class-attribute instance-attribute","text":"

Fraction of the reservoir that must be populated before yielding batches.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.n_threads","title":"n_threads = 4 class-attribute instance-attribute","text":"

Number of dataloading threads.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.scale_norm","title":"scale_norm = False class-attribute instance-attribute","text":"

Whether to scale norms to sqrt(D).

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.seed","title":"seed = 17 class-attribute instance-attribute","text":"

Random seed.

"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.use_tmpdir","title":"use_tmpdir = False class-attribute instance-attribute","text":"

If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.

"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader","title":"ShuffledDataLoader(cfg)","text":"

High-throughput streaming loader that deterministically shuffles data from disk shards.

Source code in src/saev/data/shuffled.py
def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n
"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.ExampleBatch","title":"ExampleBatch","text":"

Bases: TypedDict

Individual example.

"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__iter__","title":"__iter__()","text":"

Yields batches.

Source code in src/saev/data/shuffled.py
def __iter__(self) -> collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n < self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n
"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__len__","title":"__len__()","text":"

Returns the number of batches in an epoch.

Source code in src/saev/data/shuffled.py
def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n
"},{"location":"api/data/saev.data/#saev.data.make_ordered_config","title":"make_ordered_config(shuffled_cfg, **overrides)","text":"

Create an OrderedConfig from a ShuffledConfig, with optional overrides.

Defaults come from shuffled_cfg for fields present in OrderedConfig, and overrides take precedence. Unknown override fields raise TypeError from the OrderedConfig constructor, mirroring dataclasses.replace.

Source code in src/saev/data/__init__.py
@beartype.beartype\ndef make_ordered_config(\n    shuffled_cfg: ShuffledConfig, **overrides: object\n) -> OrderedConfig:\n    \"\"\"Create an `OrderedConfig` from a `ShuffledConfig`, with optional overrides.\n\n    Defaults come from `shuffled_cfg` for fields present in `OrderedConfig`, and `overrides` take precedence. Unknown override fields raise `TypeError` from the `OrderedConfig` constructor, mirroring `dataclasses.replace`.\n    \"\"\"\n    params: dict[str, object] = {}\n    for f in dataclasses.fields(OrderedConfig):\n        name = f.name\n        if hasattr(shuffled_cfg, name):\n            params[name] = getattr(shuffled_cfg, name)\n    params.update(overrides)\n    return OrderedConfig(**params)\n
"},{"location":"api/data/shards/","title":"saev.data.shards","text":"

Library code for reading and writing sharded activations to disk.

"},{"location":"api/data/shards/#saev.data.shards.Index","title":"Index(*, idx, example_idx, content_token_idx, shard_idx, example_idx_in_shard, layer_idx_in_shard, token_idx_in_shard) dataclass","text":"

Attributes:

Name Type Description idx int

The index of the activation.

example_idx int

The index of the original example (image, audio clip etc).

content_token_idx int

The token's index within an example's content. -1 for all special tokens.

shard_idx int

The shard index.

example_idx_in_shard int

The example index along the examples axis in a shard.

token_idx_in_shard int

The token index along the tokens axis in a shard.

"},{"location":"api/data/shards/#saev.data.shards.IndexMap","title":"IndexMap(md, tokens, layer)","text":"

Attributes:

Name Type Description md Metadata

Metadata

tokens Literal['special', 'content', 'all']

Which subset of tokens to load.

layer int

Which layer to load.

layer_idx_lookup dict[int, int]

The lookup from a transformer layer to the layer idx in the shard.

Source code in src/saev/data/shards.py
def __init__(\n    self,\n    md: Metadata,\n    tokens: tp.Literal[\"special\", \"content\", \"all\"],\n    layer: int | tp.Literal[\"all\"],\n):\n    if tokens == \"special\":\n        assert md.cls_token\n\n    self.md = md\n    self.tokens = tokens\n    self.layer = layer\n\n    if isinstance(layer, int):\n        err_msg = f\"No matche for layer; {layer} not in {md.layers}.\"\n        assert layer in md.layers, err_msg\n\n    self.layer_idx_lookup = {layer: i for i, layer in enumerate(md.layers)}\n
"},{"location":"api/data/shards/#saev.data.shards.IndexMap.__len__","title":"__len__()","text":"

Dataset length depends on patches and layer.

Source code in src/saev/data/shards.py
def __len__(self) -> int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    match (self.tokens, self.layer):\n        case (\"special\", \"all\"):\n            # Return a CLS token from a random example and random layer.\n            return self.md.n_examples * len(self.md.layers)\n        case (\"special\", int()):\n            # Return a CLS token from a random example and fixed layer.\n            return self.md.n_examples\n        case (\"content\", int()):\n            # Return a patch from a random example, fixed layer, and random patch.\n            return self.md.n_examples * self.md.content_tokens_per_example\n        case (\"content\", \"all\"):\n            # Return a patch from a random example, random layer and random patch.\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.content_tokens_per_example\n            )\n        case (\"all\", int()):\n            # Return a token from a random example, fixed layer, and random token (including special).\n            return self.md.n_examples * self.md.tokens_per_example\n        case (\"all\", \"all\"):\n            # Return a token from a random example, random layer and random token (including special).\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.tokens_per_example\n            )\n        case _:\n            tp.assert_never((self.cfg.tokens, self.cfg.layer))\n
"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter","title":"LabelsWriter(shards_dir, md)","text":"

LabelsWriter handles writing patch-level segmentation labels to a single binary file.

Parameters:

Name Type Description Default shards_dir Path

The shard directory; $SAEV_SCRATCH/saev/shards/ required md Metadata

The Metadata object.

required

Attributes:

Name Type Description labels UInt8[ndarray, 'n_examples n_patches']

The integer patch labels.

labels_path Path

Where the integer patch labels are stored.

md Metadata

The dataset metadata.

has_written bool

Whether we have written any data to self.labels.

Source code in src/saev/data/shards.py
def __init__(self, shards_dir: pathlib.Path, md: Metadata):\n    assert disk.is_shards_dir(shards_dir)\n    self.logger = logging.getLogger(\"labels-writer\")\n    self.md = md\n    self.has_written = False\n\n    # Always create memory-mapped file for labels\n    # If nothing is written, it will be deleted in flush()\n    self.labels_path = shards_dir / \"labels.bin\"\n    self.labels = np.memmap(\n        self.labels_path,\n        mode=\"w+\",\n        dtype=np.uint8,\n        shape=(self.md.n_examples, self.md.content_tokens_per_example),\n    )\n    self.logger.info(\"Opened labels file '%s'.\", self.labels_path)\n
"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.flush","title":"flush()","text":"

Flush the memory-mapped file to disk if anything was written.

Source code in src/saev/data/shards.py
def flush(self) -> None:\n    \"\"\"Flush the memory-mapped file to disk if anything was written.\"\"\"\n    if self.has_written:\n        self.labels.flush()\n        self.logger.info(\"Flushed labels to '%s'.\", self.labels_path)\n
"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.write_batch","title":"write_batch(batch_labels, start_idx)","text":"

Write a batch of labels to the memory-mapped file.

Parameters:

Name Type Description Default batch_labels ndarray | Tensor

Array of shape (batch_size, content_tokens_per_example) with uint8 dtype

required start_idx int

Starting index in the global labels array

required Source code in src/saev/data/shards.py
@beartype.beartype\ndef write_batch(self, batch_labels: np.ndarray | Tensor, start_idx: int):\n    \"\"\"\n    Write a batch of labels to the memory-mapped file.\n\n    Args:\n        batch_labels: Array of shape (batch_size, content_tokens_per_example) with uint8 dtype\n        start_idx: Starting index in the global labels array\n    \"\"\"\n    # Convert to numpy if needed\n    if isinstance(batch_labels, torch.Tensor):\n        batch_labels = batch_labels.cpu().numpy()\n\n    batch_size = len(batch_labels)\n    assert start_idx + batch_size <= self.md.n_examples\n    assert batch_labels.shape == (batch_size, self.md.content_tokens_per_example)\n    assert batch_labels.dtype == np.uint8\n\n    self.labels[start_idx : start_idx + batch_size] = batch_labels\n    self.has_written = True\n
"},{"location":"api/data/shards/#saev.data.shards.Metadata","title":"Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1') dataclass","text":"

Metadata for a sharded set of transformer activations.

Parameters:

Name Type Description Default family Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']

The transformer family.

required ckpt str

The transformer checkpoint.

required layers tuple[int, ...]

Which layers were saved.

required content_tokens_per_example int

The number of content tokens per example.

required cls_token bool

Whether the transformer has a [CLS] token as well.

required d_model int

Model hidden dimension.

required n_examples int

Number of examples.

required max_tokens_per_shard int

The maximum number of tokens per shard.

required data str

base64-encoded string of pickle.dumps(dataset).

required dataset Path

Absolute path to the root directory of the original dataset.

required pixel_agg PixelAgg

(only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.

MAJORITY dtype Literal['float32']

How activations are stored.

'float32' protocol Literal['1.0.0', '1.1', '2.1']

Protocol version.

'2.1'"},{"location":"api/data/shards/#saev.data.shards.Metadata.examples_per_shard","title":"examples_per_shard property","text":"

The number of examples per shard based on the protocol.

Returns:

Type Description int

Number of examples that fit in a shard.

"},{"location":"api/data/shards/#saev.data.shards.Metadata.hash","title":"hash property","text":"

First 8 bytes of a SHA256 hash of the metadata configuration.

Returns:

Type Description str

Hexadecimal hash string uniquely identifying this configuration.

"},{"location":"api/data/shards/#saev.data.shards.Metadata.n_shards","title":"n_shards property","text":"

Total number of shards needed to store all examples.

Returns:

Type Description int

Number of shards required.

"},{"location":"api/data/shards/#saev.data.shards.Metadata.shard_shape","title":"shard_shape property","text":"

Shape of each shard file.

Returns:

Type Description tuple[int, int, int, int]

Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).

"},{"location":"api/data/shards/#saev.data.shards.Metadata.tokens_per_example","title":"tokens_per_example property","text":"

Total number of tokens per example including [CLS] token if present.

Returns:

Type Description int

Number of tokens plus one if [CLS] token is included.

"},{"location":"api/data/shards/#saev.data.shards.Metadata.dump","title":"dump(shards_root)","text":"

Dumps a Metadata object to a metadata.json file in shards_root / hash.

Parameters:

Name Type Description Default shards_root Path

Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.

required Source code in src/saev/data/shards.py
def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n
"},{"location":"api/data/shards/#saev.data.shards.Metadata.load","title":"load(shards_dir) classmethod","text":"

Loads a Metadata object from a metadata.json file in shards_dir.

Parameters:

Name Type Description Default shards_dir Path

Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in src/saev/data/shards.py

@classmethod\ndef load(cls, shards_dir: pathlib.Path) -> tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/<hash> as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n
"},{"location":"api/data/shards/#saev.data.shards.PixelAgg","title":"PixelAgg","text":"

Bases: Enum

How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).

"},{"location":"api/data/shards/#saev.data.shards.RecordedTransformer","title":"RecordedTransformer(model, content_tokens_per_example, cls_token, layers)","text":"

Bases: Module

A wrapper around a transformer model that records intermediate layer activations during forward passes.

Parameters:

Name Type Description Default model Module

The transformer model to wrap.

required content_tokens_per_example int

Number of content tokens per example.

required cls_token bool

Whether to record the [CLS] token in addition to content tokens.

required layers Sequence[int]

Which transformer layers to record activations from.

required

Attributes:

Name Type Description model Module

The wrapped transformer model.

content_tokens_per_example int

Number of content tokens per example.

cls_token bool

Whether the [CLS] token is included in recorded activations.

layers Sequence[int]

Tuple of layer indices being recorded.

token_i slice

Token indices to extract from model outputs.

logger

Logger instance for this recorder.

Source code in src/saev/data/shards.py
def __init__(\n    self,\n    model: torch.nn.Module,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    layers: Sequence[int],\n):\n    super().__init__()\n\n    self.model = model\n\n    self.content_tokens_per_example = content_tokens_per_example\n    self.cls_token = cls_token\n    self.layers = layers\n\n    self.token_i = model.get_token_i(content_tokens_per_example)\n\n    self._storage = None\n    self._i = 0\n\n    self.logger = logging.getLogger(f\"recorder({model.name})\")\n\n    for i in self.layers:\n        self.model.get_residuals()[i].register_forward_hook(self.hook)\n
"},{"location":"api/data/shards/#saev.data.shards.Shard","title":"Shard(name, n_examples) dataclass","text":"

A single shard entry in shards.json, recording the filename and number of examples.

Attributes:

Name Type Description name str

The filename of the shard (e.g., \"acts000000.bin\").

n_examples int

Number of examples stored in this shard.

"},{"location":"api/data/shards/#saev.data.shards.ShardInfo","title":"ShardInfo(shards=list()) dataclass","text":"

A container for shard metadata as recorded in shards.json.

Parameters:

Name Type Description Default shards list[Shard]

A list of Shard objects.

list()"},{"location":"api/data/shards/#saev.data.shards.ShardWriter","title":"ShardWriter(shards_root, md)","text":"

ShardWriter is a stateful object that handles sharded activation writing to disk.

Parameters:

Name Type Description Default shards_root Path

The $SAEV_SCRATCH/saev/shards path.

required md Metadata

The Metadata object for these shards.

required

Attributes:

Name Type Description shards Path

The $SAEV_SCRATCH/saev/shards/. shard int acts_path Path acts Float[ndarray, 'examples_per_shard n_layers all_patches d_model'] | None filled int labels_writer LabelsWriter

The LabelsWriter writer.

Source code in src/saev/data/shards.py
def __init__(self, shards_root: pathlib.Path, md: Metadata):\n    assert disk.is_shards_root(shards_root)\n    self.md = md\n\n    self.logger = logging.getLogger(\"shard-writer\")\n\n    self.shards_dir = shards_root / md.hash\n    self.shards_dir.mkdir(exist_ok=True)\n\n    # builder for shard manifest\n    self._shards: ShardInfo = ShardInfo()\n\n    # Always initialize labels writer (it handles non-seg datasets internally)\n    self.labels_writer = LabelsWriter(self.shards_dir, md)\n\n    self.shard = -1\n    self.acts = None\n    self.next_shard()\n
"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__enter__","title":"__enter__()","text":"

Context manager entry.

Source code in src/saev/data/shards.py
def __enter__(self):\n    \"\"\"Context manager entry.\"\"\"\n    return self\n
"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__exit__","title":"__exit__(exc_type, exc_val, exc_tb)","text":"

Context manager exit - handle cleanup.

Source code in src/saev/data/shards.py
def __exit__(self, exc_type, exc_val, exc_tb):\n    \"\"\"Context manager exit - handle cleanup.\"\"\"\n    self.flush()\n\n    # Delete empty labels file if nothing was written\n    if not self.labels_writer.has_written:\n        if os.path.exists(self.labels_writer.labels_path):\n            os.remove(self.labels_writer.labels_path)\n            self.logger.info(\n                \"Removed empty labels file '%s'.\", self.labels_writer.labels_path\n            )\n
"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.write_batch","title":"write_batch(activations, start_idx, patch_labels=None)","text":"

Write a batch of activations and (optionally) patch labels.

Parameters:

Name Type Description Default activations Float[Tensor, 'batch n_layers all_patches d_model']

Batch of activations to write.

required start_idx int

Starting index for this batch.

required patch_labels UInt8[Tensor, 'batch n_patches'] | None

Optional patch labels for segmentation datasets.

None Source code in src/saev/data/shards.py
def write_batch(\n    self,\n    activations: Float[Tensor, \"batch n_layers all_patches d_model\"],\n    start_idx: int,\n    patch_labels: UInt8[Tensor, \"batch n_patches\"] | None = None,\n) -> None:\n    \"\"\"Write a batch of activations and (optionally) patch labels.\n\n    Args:\n        activations: Batch of activations to write.\n        start_idx: Starting index for this batch.\n        patch_labels: Optional patch labels for segmentation datasets.\n    \"\"\"\n    batch_size = len(activations)\n    end_idx = start_idx + batch_size\n\n    # Write activations (handling sharding)\n    offset = self.md.examples_per_shard * self.shard\n\n    if end_idx >= offset + self.md.examples_per_shard:\n        # We have run out of space in this mmap'ed file. Let's fill it as much as we can.\n        n_fit = offset + self.md.examples_per_shard - start_idx\n        self.acts[start_idx - offset : start_idx - offset + n_fit] = activations[\n            :n_fit\n        ]\n        self.filled = start_idx - offset + n_fit\n\n        # Write labels for the portion that fits\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                labels_to_write = (\n                    patch_labels[:n_fit].cpu().numpy().astype(np.uint8)\n                )\n            elif not isinstance(patch_labels, np.ndarray):\n                labels_to_write = np.array(patch_labels[:n_fit], dtype=np.uint8)\n            else:\n                labels_to_write = patch_labels[:n_fit]\n\n            self.labels_writer.write_batch(labels_to_write, start_idx)\n\n        self.next_shard()\n\n        # Recursively call write_batch for remaining data\n        if n_fit < batch_size:\n            self.write_batch(\n                activations[n_fit:],\n                start_idx + n_fit,\n                patch_labels[n_fit:] if patch_labels is not None else None,\n            )\n    else:\n        msg = f\"0 <= {start_idx} - {offset} <= {offset} + {self.md.examples_per_shard}\"\n        assert 0 <= start_idx - offset <= offset + self.md.examples_per_shard, msg\n        msg = (\n            f\"0 <= {end_idx} - {offset} <= {offset} + {self.md.examples_per_shard}\"\n        )\n        assert 0 <= end_idx - offset <= offset + self.md.examples_per_shard, msg\n        self.acts[start_idx - offset : end_idx - offset] = activations\n        self.filled = end_idx - offset\n\n        # Write labels if provided\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                patch_labels = patch_labels.cpu().numpy().astype(np.uint8)\n            elif not isinstance(patch_labels, np.ndarray):\n                patch_labels = np.array(patch_labels, dtype=np.uint8)\n\n            self.labels_writer.write_batch(patch_labels, start_idx)\n
"},{"location":"api/data/shards/#saev.data.shards.get_dataloader","title":"get_dataloader(data, *, batch_size, n_workers, data_tr=None, mask_tr=None, sample_tr=None)","text":"

Get a dataloader for a default map-style dataset.

Parameters:

Name Type Description Default data Config

Config for the dataset.

required batch_size int

Batch size.

required n_workers int

Number of dataloader workers.

required data_tr Callable | None

Transform to be applied to each 'data' key (typically the raw data).

None mask_tr Callable | None

Transform to be applied to masks.

None sample_tr Callable | None

Transform to be applied to the entire sample dict.

None

Returns:

Type Description DataLoader

A PyTorch Dataloader that yields dictionaries with 'data' keys containing data batches, 'index' keys containing original dataset indices and 'label' keys containing label batches.

Source code in src/saev/data/shards.py
@beartype.beartype\ndef get_dataloader(\n    data: datasets.Config,\n    *,\n    batch_size: int,\n    n_workers: int,\n    data_tr: Callable | None = None,\n    mask_tr: Callable | None = None,\n    sample_tr: Callable | None = None,\n) -> torch.utils.data.DataLoader:\n    \"\"\"\n    Get a dataloader for a default map-style dataset.\n\n    Args:\n        data: Config for the dataset.\n        batch_size: Batch size.\n        n_workers: Number of dataloader workers.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        sample_tr: Transform to be applied to the entire sample dict.\n\n    Returns:\n        A PyTorch Dataloader that yields dictionaries with `'data'` keys containing data batches, `'index'` keys containing original dataset indices and `'label'` keys containing label batches.\n    \"\"\"\n    dataset = datasets.get_dataset(\n        data, data_transform=data_tr, mask_transform=mask_tr, sample_transform=sample_tr\n    )\n\n    dataloader = torch.utils.data.DataLoader(\n        dataset=dataset,\n        batch_size=batch_size,\n        drop_last=False,\n        num_workers=n_workers,\n        persistent_workers=n_workers > 0,\n        shuffle=False,\n        pin_memory=False,\n    )\n    return dataloader\n
"},{"location":"api/data/shards/#saev.data.shards.pixel_to_patch_labels","title":"pixel_to_patch_labels(seg, n_patches, patch_size, pixel_agg=PixelAgg.MAJORITY, bg_label=0, max_classes=256)","text":"

Convert pixel-level segmentation to patch-level labels using vectorized operations.

Parameters:

Name Type Description Default seg Image

Pixel-level segmentation mask as PIL Image

required n_patches int

Total number of patches expected

required patch_size int

Size of each patch in pixels

required pixel_agg PixelAgg

How to aggregate pixel labels into patch labels

MAJORITY bg_label int

Background label index

0 max_classes int

Maximum number of classes (for bincount)

256

Returns:

Type Description UInt8[Tensor, ' n_patches']

Patch labels as uint8 tensor of shape (n_patches,)

Source code in src/saev/data/shards.py
@jaxtyped(typechecker=beartype.beartype)\ndef pixel_to_patch_labels(\n    seg: Image.Image,\n    n_patches: int,\n    patch_size: int,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n    bg_label: int = 0,\n    max_classes: int = 256,\n) -> UInt8[Tensor, \" n_patches\"]:\n    \"\"\"\n    Convert pixel-level segmentation to patch-level labels using vectorized operations.\n\n    Args:\n        seg: Pixel-level segmentation mask as PIL Image\n        n_patches: Total number of patches expected\n        patch_size: Size of each patch in pixels\n        pixel_agg: How to aggregate pixel labels into patch labels\n        bg_label: Background label index\n        max_classes: Maximum number of classes (for bincount)\n\n    Returns:\n        Patch labels as uint8 tensor of shape (n_patches,)\n    \"\"\"\n    # Convert to torch tensor for vectorized operations\n    seg_tensor = torch.from_numpy(np.array(seg, dtype=np.uint8))\n    assert seg_tensor.ndim == 2\n\n    h, w = seg_tensor.shape\n\n    # Calculate patch grid dimensions\n    patch_grid_h = h // patch_size\n    patch_grid_w = w // patch_size\n    assert patch_grid_w * patch_grid_h == n_patches, (\n        f\"Image size {w}x{h} with patch_size {patch_size} gives {patch_grid_w}x{patch_grid_h} = {patch_grid_w * patch_grid_h} patches, expected {n_patches}\"\n    )\n\n    # Reshape into patches using einops: (n_patches, patch_size * patch_size)\n    patches = einops.rearrange(\n        seg_tensor,\n        \"(h p1) (w p2) -> (h w) (p1 p2)\",\n        p1=patch_size,\n        p2=patch_size,\n        h=patch_grid_h,\n        w=patch_grid_w,\n    )\n\n    # Use vectorized bincount approach to get class counts for all patches at once\n    # counts[i, c] = number of times class c appears in patch i\n    offsets = torch.arange(n_patches, device=patches.device).unsqueeze(1) * max_classes\n    flat = (patches + offsets).reshape(-1)\n    counts = torch.bincount(flat, minlength=n_patches * max_classes).reshape(\n        n_patches, max_classes\n    )\n\n    if pixel_agg is PixelAgg.MAJORITY:\n        # Take the most common label in each patch\n        patch_labels = counts.argmax(dim=1)\n    elif pixel_agg is PixelAgg.PREFER_FG:\n        # Take the most common non-background label, or background if all background\n        nonbg = counts.clone()\n        nonbg[:, bg_label] = 0\n        has_nonbg = nonbg.sum(dim=1) > 0\n        nonbg_arg = nonbg.argmax(dim=1)\n        bg_tensor = torch.full_like(nonbg_arg, bg_label)\n        patch_labels = torch.where(has_nonbg, nonbg_arg, bg_tensor)\n    else:\n        tp.assert_never(pixel_agg)\n\n    return patch_labels.to(torch.uint8)\n
"},{"location":"api/data/shards/#saev.data.shards.worker_fn","title":"worker_fn(*, family, ckpt, content_tokens_per_example, cls_token, d_model, layers, data, batch_size, n_workers, max_tokens_per_shard, shards_root, device, pixel_agg=PixelAgg.MAJORITY)","text":"

Parameters:

Name Type Description Default family str

Transformer family (dinov2, dinov3, clip, etc).

required ckpt str

Transformer ckpt (hf-hub:imageomics/bioclip2, etc).

required content_tokens_per_example int

Number of content tokens per example.

required cls_token bool

Whether the transformer has a [CLS] token.

required d_model int

Hidden dimension of transformer.

required layers list[int]

The layers to record activations for.

required data Config

Config for the particular (image) dataset to load.

required batch_size int

Batch size for the dataset.

required n_workers int

Number of workers for loading examples fromm the dataset.

required max_tokens_per_shard int

Maximum number of tokens per disk shard.

required pixel_agg PixelAgg

Optional method for aggregating segmentation label pixels.

MAJORITY shards_root Path

Where to save shards. Should end with 'shards'. See disk-layout.md; this is $SAEV_SCRATCH/saev/shards.

required device str

Device for doing the computation.

required

Returns:

Type Description Path

Path to the shards directory.

Source code in src/saev/data/shards.py
@beartype.beartype\ndef worker_fn(\n    *,\n    family: str,\n    ckpt: str,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    d_model: int,\n    layers: list[int],\n    data: datasets.Config,\n    batch_size: int,\n    n_workers: int,\n    max_tokens_per_shard: int,\n    shards_root: pathlib.Path,\n    device: str,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n) -> pathlib.Path:\n    \"\"\"\n    Args:\n        family: Transformer family (dinov2, dinov3, clip, etc).\n        ckpt: Transformer ckpt (hf-hub:imageomics/bioclip2, etc).\n        content_tokens_per_example: Number of content tokens per example.\n        cls_token: Whether the transformer has a [CLS] token.\n        d_model: Hidden dimension of transformer.\n        layers: The layers to record activations for.\n        data: Config for the particular (image) dataset to load.\n        batch_size: Batch size for the dataset.\n        n_workers: Number of workers for loading examples fromm the dataset.\n        max_tokens_per_shard: Maximum number of tokens per disk shard.\n        pixel_agg: Optional method for aggregating segmentation label pixels.\n        shards_root: Where to save shards. Should end with 'shards'. See [disk-layout.md](../../developers/disk-layout.md); this is $SAEV_SCRATCH/saev/shards.\n        device: Device for doing the computation.\n\n    Returns:\n        Path to the shards directory.\n    \"\"\"\n    from saev import helpers\n    from saev.data import models\n\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n        torch.backends.cudnn.benchmark = True\n        torch.backends.cudnn.deterministic = True\n\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n    logger = logging.getLogger(\"worker_fn\")\n\n    if device == \"cuda\" and not torch.cuda.is_available():\n        logger.warning(\"No CUDA device available, using CPU.\")\n        device = \"cpu\"\n\n    assert shards_root.name == \"shards\"\n\n    model_cls = models.load_model_cls(family)\n    model_instance = model_cls(ckpt).to(device)\n    model = RecordedTransformer(\n        model_instance, content_tokens_per_example, cls_token, layers\n    )\n\n    data_tr, sample_tr = model_cls.make_transforms(ckpt, content_tokens_per_example)\n\n    mask_tr = None\n    if datasets.is_img_seg_dataset(data):\n        # For image segmentation datasets, create a transform that converts pixels to patches\n        # Use make_resize with NEAREST interpolation for segmentation masks\n        seg_resize_tr = model_cls.make_resize(\n            ckpt, content_tokens_per_example, scale=1.0, resample=Image.NEAREST\n        )\n\n        def seg_to_patches(seg):\n            \"\"\"Transform that resizes segmentation and converts to patch labels.\"\"\"\n\n            # Convert to patch labels\n            return pixel_to_patch_labels(\n                seg_resize_tr(seg),\n                content_tokens_per_example,\n                patch_size=model_instance.patch_size,\n                pixel_agg=pixel_agg,\n                bg_label=data.bg_label,\n            )\n\n        mask_tr = seg_to_patches\n\n    dataloader = get_dataloader(\n        data,\n        batch_size=batch_size,\n        n_workers=n_workers,\n        data_tr=data_tr,\n        mask_tr=mask_tr,\n        sample_tr=sample_tr,\n    )\n\n    n_batches = math.ceil(data.n_examples / batch_size)\n    logger.info(\"Dumping %d batches of %d examples.\", n_batches, batch_size)\n\n    model = model.to(device)\n\n    md = Metadata(\n        family=family,\n        ckpt=ckpt,\n        layers=tuple(layers),\n        content_tokens_per_example=content_tokens_per_example,\n        cls_token=cls_token,\n        d_model=d_model,\n        n_examples=data.n_examples,\n        max_tokens_per_shard=max_tokens_per_shard,\n        data=base64.b64encode(pickle.dumps(data)).decode(\"utf8\"),\n        dataset=data.root,\n        pixel_agg=pixel_agg,\n    )\n    md.dump(shards_root)\n\n    # Use context manager for proper cleanup\n    with ShardWriter(shards_root, md) as writer:\n        i = 0\n        # Calculate and write transformer activations.\n        with torch.inference_mode():\n            for batch in helpers.progress(dataloader, total=n_batches):\n                data = batch.get(\"data\").to(device)\n                grid = batch.get(\"grid\")\n                if grid is not None:\n                    grid = grid.to(device)\n                    out, cache = model(data, grid=grid)\n                else:\n                    out, cache = model(data)\n                # cache has shape [batch size, n layers, n patches + 1, d model]\n                del out\n\n                # Write activations and labels (if present) in one call\n                patch_labels = batch.get(\"patch_labels\")\n                if patch_labels is not None:\n                    logger.debug(\n                        \"Found patch_labels in batch: shape=%s\",\n                        patch_labels.shape\n                        if hasattr(patch_labels, \"shape\")\n                        else \"unknown\",\n                    )\n                    # Ensure correct shape\n                    assert patch_labels.shape == (\n                        len(cache),\n                        content_tokens_per_example,\n                    )\n                else:\n                    logger.debug(f\"No patch_labels in batch. Keys: {batch.keys()}\")\n\n                writer.write_batch(cache, i, patch_labels=patch_labels)\n\n                i += len(cache)\n\n    return shards_root / md.hash\n
"},{"location":"api/data/shuffled/","title":"saev.data.shuffled","text":""},{"location":"api/data/shuffled/#saev.data.shuffled.Config","title":"Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False) dataclass","text":"

Configuration for loading shuffled activation data from disk.

Attributes:

Name Type Description shards Path

Directory with .bin shards and a metadata.json file.

tokens Literal['special', 'content', 'all']

Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_size","title":"batch_size = 1024 * 16 class-attribute instance-attribute","text":"

Batch size.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_timeout_s","title":"batch_timeout_s = 30.0 class-attribute instance-attribute","text":"

How long to wait for at least one batch.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.buffer_size","title":"buffer_size = 64 class-attribute instance-attribute","text":"

Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.debug","title":"debug = False class-attribute instance-attribute","text":"

Whether the dataloader process should log debug messages.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.drop_last","title":"drop_last = False class-attribute instance-attribute","text":"

Whether to drop the last batch if it's smaller than the others.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.ignore_labels","title":"ignore_labels = dataclasses.field(default_factory=list) class-attribute instance-attribute","text":"

If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.layer","title":"layer = -1 class-attribute instance-attribute","text":"

Which transformer layer(s) to read from disk. -1 is the default, but must be changed. \"all\" enumerates every recorded layer.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.log_every_s","title":"log_every_s = 30.0 class-attribute instance-attribute","text":"

How frequently the dataloader process should log (debug) performance messages.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.min_buffer_fill","title":"min_buffer_fill = 0.0 class-attribute instance-attribute","text":"

Fraction of the reservoir that must be populated before yielding batches.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.n_threads","title":"n_threads = 4 class-attribute instance-attribute","text":"

Number of dataloading threads.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.scale_norm","title":"scale_norm = False class-attribute instance-attribute","text":"

Whether to scale norms to sqrt(D).

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.seed","title":"seed = 17 class-attribute instance-attribute","text":"

Random seed.

"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.use_tmpdir","title":"use_tmpdir = False class-attribute instance-attribute","text":"

If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.

"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader","title":"DataLoader(cfg)","text":"

High-throughput streaming loader that deterministically shuffles data from disk shards.

Source code in src/saev/data/shuffled.py
def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n
"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.ExampleBatch","title":"ExampleBatch","text":"

Bases: TypedDict

Individual example.

"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__iter__","title":"__iter__()","text":"

Yields batches.

Source code in src/saev/data/shuffled.py
def __iter__(self) -> collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n < self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n
"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__len__","title":"__len__()","text":"

Returns the number of batches in an epoch.

Source code in src/saev/data/shuffled.py
def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n
"},{"location":"api/data/siglip/","title":"saev.data.siglip","text":""},{"location":"api/data/siglip/#saev.data.siglip.Vit","title":"Vit(ckpt)","text":"

Bases: Module, Transformer

Source code in src/saev/data/siglip.py
def __init__(self, ckpt: str):\n    super().__init__()\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model\n\n    assert isinstance(self.model, open_clip.timm_model.TimmModel)\n
"},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_resize","title":"make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.

Source code in src/saev/data/siglip.py
@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    from PIL import Image\n\n    def resize(img: Image.Image) -> Image.Image:\n        # SigLIP typically uses 224x224 or 384x384 images\n        # We'll assume 224x224 for simplicity\n        resize_size_px = (int(224 * scale), int(224 * scale))\n        return img.resize(resize_size_px, resample=resample)\n\n    return resize\n
"},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

Create transforms for preprocessing: (img_transform, sample_transform | None).

Source code in src/saev/data/siglip.py
@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n
"},{"location":"api/data/transforms/","title":"saev.data.transforms","text":""},{"location":"api/data/transforms/#saev.data.transforms.conv2d_to_tokens","title":"conv2d_to_tokens(x_bchw, conv)","text":"

Conv2d then flatten spatial to L, return (B, L, D).

Source code in src/saev/data/transforms.py
@jaxtyped(typechecker=beartype.beartype)\ndef conv2d_to_tokens(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -> Float[Tensor, \"b n d\"]:\n    \"\"\"Conv2d then flatten spatial to L, return (B, L, D).\"\"\"\n    y_bdhw = conv(x_bchw)\n    return einops.rearrange(y_bdhw, \"b d h w -> b (h w) d\")\n
"},{"location":"api/data/transforms/#saev.data.transforms.resize_to_patch_grid","title":"resize_to_patch_grid(img, *, p, n, resample=Image.LANCZOS)","text":"

Resize image to (w, h) so that: - w % p == 0, h % p == 0 - (h/p) * (w/p) == N - Minimizes change in aspect ratio.

Source code in src/saev/data/transforms.py
@beartype.beartype\ndef resize_to_patch_grid(\n    img: Image.Image,\n    *,\n    p: int,\n    n: int,\n    resample: Image.Resampling | int = Image.LANCZOS,\n) -> Image.Image:\n    \"\"\"\n    Resize image to (w, h) so that:\n      - w % p == 0, h % p == 0\n      - (h/p) * (w/p) == N\n      - Minimizes change in aspect ratio.\n    \"\"\"\n    if p <= 0 or n <= 0:\n        raise ValueError(\"p and n must be positive integers\")\n\n    w0, h0 = img.size\n    a0 = w0 / h0\n\n    # Find the aspect ratio closest to a0\n    best_c = 0\n    best_dist = float(\"inf\")\n    for i in range(1, int(math.sqrt(n) + 1)):\n        if n % i != 0:\n            continue\n\n        for d in (i, n // i):\n            c, r = d, n // d\n            aspect = c / r\n            dist = abs(aspect - a0)\n\n            if dist < best_dist:\n                best_c = d\n                best_dist = dist\n\n    c = best_c\n    r = n // c\n    w, h = c * p, r * p\n    return img.resize((w, h), resample=resample)\n
"},{"location":"api/data/transforms/#saev.data.transforms.unfolded_conv2d","title":"unfolded_conv2d(x_bchw, conv)","text":"

Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels. Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.

Source code in src/saev/data/transforms.py
@jaxtyped(typechecker=beartype.beartype)\ndef unfolded_conv2d(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -> Float[Tensor, \"b n d\"]:\n    \"\"\"\n    Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels.\n    Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.\n    \"\"\"\n    k = conv.kernel_size[0]\n\n    assert conv.kernel_size == (k, k)\n    assert conv.stride == (k, k)\n    assert conv.padding == (0, 0)\n    assert conv.groups == 1\n    assert conv.dilation == (1, 1)\n\n    *b, c, h, w = x_bchw.shape\n\n    assert h % k == 0 and w % k == 0\n\n    tokens_bnd = einops.rearrange(\n        x_bchw, \"b c (hp p1) (wp p2) -> b (hp wp) (c p1 p2)\", p1=k, p2=k\n    ).contiguous()\n    w_dp = conv.weight.reshape(conv.out_channels, c * k * k)\n    tokens_bnd = tokens_bnd @ w_dp.T\n    if conv.bias is not None:\n        tokens_bnd = tokens_bnd + conv.bias[None, None, :]\n    return tokens_bnd\n
"},{"location":"api/framework/inference/","title":"saev.framework.inference","text":"

Script for dumping SAE inference artifacts in a single pass over the dataset.

Default mode writes 5 files:

  1. mean_values.pt
  2. sparsity.pt
  3. distributions.pt
  4. token_acts.npz
  5. metrics.json

If save=False, only metrics.json is written.

metrics.json is serialized from saev.metrics.Metrics.

"},{"location":"api/framework/inference/#saev.framework.inference.Config","title":"Config(run=pathlib.Path('./runs/abcdefg'), data=OrderedConfig(), n_dists=25, ignore_labels=list(), force_recompute=False, save=True, device='cuda', slurm_acct='', slurm_partition='', n_hours=4.0, mem_gb=80, log_to=os.path.join('.', 'logs')) dataclass","text":"

Configuration for computing image activations.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.data","title":"data = OrderedConfig() class-attribute instance-attribute","text":"

Data configuration

"},{"location":"api/framework/inference/#saev.framework.inference.Config.device","title":"device = 'cuda' class-attribute instance-attribute","text":"

Which accelerator to use.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.force_recompute","title":"force_recompute = False class-attribute instance-attribute","text":"

Force recomputation even if files exist.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.ignore_labels","title":"ignore_labels = dataclasses.field(default_factory=list) class-attribute instance-attribute","text":"

Which token labels to ignore when calculating summarized image activations.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.log_to","title":"log_to = os.path.join('.', 'logs') class-attribute instance-attribute","text":"

Where to log Slurm job stdout/stderr.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.mem_gb","title":"mem_gb = 80 class-attribute instance-attribute","text":"

Node memory in GB.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.n_dists","title":"n_dists = 25 class-attribute instance-attribute","text":"

Number of features to save distributions for.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.n_hours","title":"n_hours = 4.0 class-attribute instance-attribute","text":"

Slurm job length in hours.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.run","title":"run = pathlib.Path('./runs/abcdefg') class-attribute instance-attribute","text":"

Path to the sae.pt file.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.save","title":"save = True class-attribute instance-attribute","text":"

Whether to write token_acts/statistics files. If False, only metrics.json is written.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_acct","title":"slurm_acct = '' class-attribute instance-attribute","text":"

Slurm account string. Empty means to not use Slurm.

"},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_partition","title":"slurm_partition = '' class-attribute instance-attribute","text":"

Slurm partition.

"},{"location":"api/framework/inference/#saev.framework.inference.main","title":"main(cfg, sweep=None)","text":"

Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.

Parameters:

Name Type Description Default cfg Annotated[Config, arg(name='')]

Baseline config inference.

required sweep Path | None

Path to .py file defining the sweep parameters.

None Source code in src/saev/framework/inference.py
@beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")], sweep: pathlib.Path | None = None\n):\n    \"\"\"\n    Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.\n\n    Args:\n        cfg: Baseline config inference.\n        sweep: Path to .py file defining the sweep parameters.\n    \"\"\"\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    assert all(c.slurm_acct == cfgs[0].slurm_acct for c in cfgs)\n    cfg = cfgs[0]\n\n    if not cfg.slurm_acct:\n        for i, cfg_item in enumerate(cfgs, start=1):\n            logger.info(\"Running config %d/%d locally.\", i, len(cfgs))\n            worker_fn(cfg_item)\n        logger.info(\"Jobs done.\")\n        return 0\n\n    import submitit\n    from submitit.core.utils import UncompletedJobError\n\n    executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n    executor.update_parameters(\n        time=int(cfg.n_hours * 60),\n        partition=cfg.slurm_partition,\n        gpus_per_node=1,\n        ntasks_per_node=1,\n        mem=f\"{cfg.mem_gb}GB\",\n        stderr_to_stdout=True,\n        account=cfg.slurm_acct,\n    )\n    with executor.batch():\n        jobs = []\n        for i, cfg in enumerate(cfgs):\n            do, reason, _ = need_compute(cfg)\n            if not do:\n                continue\n\n            logger.info(reason)\n            jobs.append(executor.submit(worker_fn, cfg))\n\n    time.sleep(5.0)\n\n    for i, job in enumerate(jobs, start=1):\n        logger.info(\"Job %d/%d: %s %s\", i, len(jobs), job.job_id, job.state)\n\n    for i, job in enumerate(jobs, start=1):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", i, len(jobs))\n        except UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, i)\n\n    logger.info(\"Jobs done.\")\n    return 0\n
"},{"location":"api/framework/saev.framework/","title":"saev.framework","text":"

Submitit entrypoint modules for SAE workflows.

saev.framework is for script-like modules (e.g. train/inference/shards) that need importable module paths for submitit launchers. Place reusable data/model utilities outside this package.

"},{"location":"api/framework/shards/","title":"saev.framework.shards","text":"

To save lots of activations, we want to do things in parallel, with lots of slurm jobs, and save multiple files, rather than just one.

This script handles that additional complexity.

Conceptually, activations are either thought of as

  1. A single [n_imgs x n_layers x (n_patches + 1), d_model] tensor. This is a dataset
  2. Multiple [n_imgs_per_shard, n_layers, (n_patches + 1), d_model] tensors. This is a set of sharded activations.
"},{"location":"api/framework/shards/#saev.framework.shards.Config","title":"Config(data=datasets.Imagenet(), shards_root=pathlib.Path('$SAEV_SCRATCH/saev/shards/'), family='clip', ckpt='ViT-L-14/openai', batch_size=1024, n_workers=8, d_model=1024, layers=(lambda: [-2])(), content_tokens_per_example=256, cls_token=True, pixel_agg=PixelAgg.MAJORITY, max_tokens_per_shard=2400000, ssl=True, device='cuda', n_hours=24.0, slurm_acct='', slurm_partition='', log_to='./logs') dataclass","text":"

Configuration for calculating and saving ViT activations.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.batch_size","title":"batch_size = 1024 class-attribute instance-attribute","text":"

Batch size for ViT inference.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.ckpt","title":"ckpt = 'ViT-L-14/openai' class-attribute instance-attribute","text":"

Specific model checkpoint.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.cls_token","title":"cls_token = True class-attribute instance-attribute","text":"

Whether the model has a [CLS] token.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.content_tokens_per_example","title":"content_tokens_per_example = 256 class-attribute instance-attribute","text":"

Number of content tokens per example (depends on model).

"},{"location":"api/framework/shards/#saev.framework.shards.Config.d_model","title":"d_model = 1024 class-attribute instance-attribute","text":"

Dimension of the ViT activations (depends on model).

"},{"location":"api/framework/shards/#saev.framework.shards.Config.data","title":"data = dataclasses.field(default_factory=(datasets.Imagenet)) class-attribute instance-attribute","text":"

Which dataset to use.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.device","title":"device = 'cuda' class-attribute instance-attribute","text":"

Which device to use.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.family","title":"family = 'clip' class-attribute instance-attribute","text":"

Which model family.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.layers","title":"layers = dataclasses.field(default_factory=(lambda: [-2])) class-attribute instance-attribute","text":"

Which layers to save. By default, the second-to-last layer.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.log_to","title":"log_to = './logs' class-attribute instance-attribute","text":"

Where to log Slurm job stdout/stderr.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.max_tokens_per_shard","title":"max_tokens_per_shard = 2400000 class-attribute instance-attribute","text":"

Maximum number of activations per shard; 2.4M is approximately 10GB for 1024-dimensional 4-byte activations.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.n_hours","title":"n_hours = 24.0 class-attribute instance-attribute","text":"

Slurm job length.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.n_workers","title":"n_workers = 8 class-attribute instance-attribute","text":"

Number of dataloader workers.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.shards_root","title":"shards_root = pathlib.Path('$SAEV_SCRATCH/saev/shards/') class-attribute instance-attribute","text":"

Where to write shards.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_acct","title":"slurm_acct = '' class-attribute instance-attribute","text":"

Slurm account string.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_partition","title":"slurm_partition = '' class-attribute instance-attribute","text":"

Slurm partition.

"},{"location":"api/framework/shards/#saev.framework.shards.Config.ssl","title":"ssl = True class-attribute instance-attribute","text":"

Whether to use SSL.

"},{"location":"api/framework/shards/#saev.framework.shards.cli","title":"cli(cfg)","text":"

Save ViT activations for use later on.

Parameters:

Name Type Description Default cfg Annotated[Config, arg(name='')]

Configuration for activations.

required Source code in src/saev/framework/shards.py
@beartype.beartype\ndef cli(cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")]):\n    \"\"\"\n    Save ViT activations for use later on.\n\n    Args:\n        cfg: Configuration for activations.\n    \"\"\"\n    logger = logging.getLogger(\"dump\")\n\n    if not cfg.ssl:\n        logger.warning(\"Ignoring SSL certs. Try not to do this!\")\n        # https://github.com/openai/whisper/discussions/734#discussioncomment-4491761\n        # Ideally we don't have to disable SSL but we are only downloading weights.\n        import ssl\n\n        ssl._create_default_https_context = ssl._create_unverified_context\n\n    from saev.data import shards\n\n    kwargs = dict(\n        family=cfg.family,\n        ckpt=cfg.ckpt,\n        content_tokens_per_example=cfg.content_tokens_per_example,\n        cls_token=cfg.cls_token,\n        d_model=cfg.d_model,\n        layers=cfg.layers,\n        data=cfg.data,\n        batch_size=cfg.batch_size,\n        n_workers=cfg.n_workers,\n        max_tokens_per_shard=cfg.max_tokens_per_shard,\n        shards_root=cfg.shards_root,\n        device=cfg.device,\n        pixel_agg=cfg.pixel_agg,\n    )\n\n    # Actually record activations.\n    if cfg.slurm_acct:\n        import submitit\n\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n        executor.update_parameters(\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            cpus_per_task=cfg.n_workers + 4,\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n\n        job = executor.submit(shards.worker_fn, **kwargs)\n        logger.info(\"Running job '%s'.\", job.job_id)\n        job.result()\n\n    else:\n        shards.worker_fn(**kwargs)\n
"},{"location":"api/framework/train/","title":"saev.framework.train","text":"

Trains many SAEs in parallel to amortize the cost of loading a single batch of data over many SAE training runs.

Checklist for making sure your training doesn't suck:

"},{"location":"api/framework/train/#saev.framework.train.Config","title":"Config(train_data=saev.data.ShuffledConfig(), val_data=saev.data.ShuffledConfig(), n_train=100000000, n_val=10000000, sae=nn.SparseAutoencoderConfig(), objective=nn.objectives.Matryoshka(), n_sparsity_warmup=0, optim='adam', lr=0.0004, n_lr_warmup=500, grad_clip=1.0, track=True, wandb_project='saev', tags=(), log_every=25, runs_root=pathlib.Path('$SAEV_NFS/saev/runs'), device='cuda', seed=42, slurm_acct='', slurm_partition='', n_hours=24.0, mem_gb=128, log_to=os.path.join('.', 'logs')) dataclass","text":"

Configuration for training a sparse autoencoder on a vision transformer.

"},{"location":"api/framework/train/#saev.framework.train.Config.device","title":"device = 'cuda' class-attribute instance-attribute","text":"

Hardware device.

"},{"location":"api/framework/train/#saev.framework.train.Config.grad_clip","title":"grad_clip = 1.0 class-attribute instance-attribute","text":"

Maximum gradient norm across all SAE parameters.

"},{"location":"api/framework/train/#saev.framework.train.Config.log_every","title":"log_every = 25 class-attribute instance-attribute","text":"

How often to log to WandB.

"},{"location":"api/framework/train/#saev.framework.train.Config.log_to","title":"log_to = os.path.join('.', 'logs') class-attribute instance-attribute","text":"

Where to log Slurm job stdout/stderr.

"},{"location":"api/framework/train/#saev.framework.train.Config.lr","title":"lr = 0.0004 class-attribute instance-attribute","text":"

Learning rate.

"},{"location":"api/framework/train/#saev.framework.train.Config.mem_gb","title":"mem_gb = 128 class-attribute instance-attribute","text":"

Node memory in GB.

"},{"location":"api/framework/train/#saev.framework.train.Config.n_hours","title":"n_hours = 24.0 class-attribute instance-attribute","text":"

Slurm job length in hours.

"},{"location":"api/framework/train/#saev.framework.train.Config.n_lr_warmup","title":"n_lr_warmup = 500 class-attribute instance-attribute","text":"

Number of learning rate warmup steps.

"},{"location":"api/framework/train/#saev.framework.train.Config.n_sparsity_warmup","title":"n_sparsity_warmup = 0 class-attribute instance-attribute","text":"

Number of sparsity coefficient warmup steps.

"},{"location":"api/framework/train/#saev.framework.train.Config.n_train","title":"n_train = 100000000 class-attribute instance-attribute","text":"

Number of SAE training samples.

"},{"location":"api/framework/train/#saev.framework.train.Config.n_val","title":"n_val = 10000000 class-attribute instance-attribute","text":"

Number of SAE evaluation samples.

"},{"location":"api/framework/train/#saev.framework.train.Config.objective","title":"objective = nn.objectives.Matryoshka() class-attribute instance-attribute","text":"

SAE objective configuration.

"},{"location":"api/framework/train/#saev.framework.train.Config.optim","title":"optim = 'adam' class-attribute instance-attribute","text":"

Optimizer for training.

"},{"location":"api/framework/train/#saev.framework.train.Config.runs_root","title":"runs_root = pathlib.Path('$SAEV_NFS/saev/runs') class-attribute instance-attribute","text":"

Root directory for runs.

"},{"location":"api/framework/train/#saev.framework.train.Config.sae","title":"sae = nn.SparseAutoencoderConfig() class-attribute instance-attribute","text":"

SAE configuration.

"},{"location":"api/framework/train/#saev.framework.train.Config.seed","title":"seed = 42 class-attribute instance-attribute","text":"

Random seed.

"},{"location":"api/framework/train/#saev.framework.train.Config.slurm_acct","title":"slurm_acct = '' class-attribute instance-attribute","text":"

Slurm account string. Empty means to not use Slurm.

"},{"location":"api/framework/train/#saev.framework.train.Config.slurm_partition","title":"slurm_partition = '' class-attribute instance-attribute","text":"

Slurm partition.

"},{"location":"api/framework/train/#saev.framework.train.Config.tags","title":"tags = () class-attribute instance-attribute","text":"

Tags to add to WandB run.

"},{"location":"api/framework/train/#saev.framework.train.Config.track","title":"track = True class-attribute instance-attribute","text":"

Whether to track with WandB.

"},{"location":"api/framework/train/#saev.framework.train.Config.train_data","title":"train_data = saev.data.ShuffledConfig() class-attribute instance-attribute","text":"

Training data.

"},{"location":"api/framework/train/#saev.framework.train.Config.val_data","title":"val_data = saev.data.ShuffledConfig() class-attribute instance-attribute","text":"

Validation data.

"},{"location":"api/framework/train/#saev.framework.train.Config.wandb_project","title":"wandb_project = 'saev' class-attribute instance-attribute","text":"

WandB project name.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics","title":"EvalMetrics(l0, l1, mse, normalized_mse, sse_sae, sse_baseline, n_dead, n_almost_dead, n_dense, freqs, mean_values, almost_dead_threshold, dense_threshold) dataclass","text":"

Results of evaluating a trained SAE on a datset.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.almost_dead_threshold","title":"almost_dead_threshold instance-attribute","text":"

Threshold for an \"almost dead\" neuron.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.dense_threshold","title":"dense_threshold instance-attribute","text":"

Threshold for a dense neuron.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.freqs","title":"freqs instance-attribute","text":"

How often each feature fired.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l0","title":"l0 instance-attribute","text":"

Mean L0 across all examples.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l1","title":"l1 instance-attribute","text":"

Mean L1 across all examples.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mean_values","title":"mean_values instance-attribute","text":"

The mean value for each feature when it did fire.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mse","title":"mse instance-attribute","text":"

Mean MSE across all examples.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_almost_dead","title":"n_almost_dead instance-attribute","text":"

Number of neurons that fired on fewer than almost_dead_threshold of examples.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dead","title":"n_dead instance-attribute","text":"

Number of neurons that never fired on any example.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dense","title":"n_dense instance-attribute","text":"

Number of neurons that fired on more than dense_threshold of examples.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.normalized_mse","title":"normalized_mse instance-attribute","text":"

Normalized reconstruction MSE (SAE SSE / mean-baseline SSE).

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_baseline","title":"sse_baseline instance-attribute","text":"

Total reconstruction sum-squared error for the mean baseline.

"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_sae","title":"sse_sae instance-attribute","text":"

Total reconstruction sum-squared error for the SAE.

"},{"location":"api/framework/train/#saev.framework.train.evaluate","title":"evaluate(cfgs, saes, objectives)","text":"

Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.

The metrics computed are mean L0/L1/MSE losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values. A list of EvalMetrics is returned, one for each SAE.

Source code in src/saev/framework/train.py
@beartype.beartype\n@torch.no_grad()\ndef evaluate(\n    cfgs: list[Config], saes: torch.nn.ModuleList, objectives: torch.nn.ModuleList\n) -> list[EvalMetrics]:\n    \"\"\"\n    Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.\n\n    The metrics computed are mean ``L0``/``L1``/``MSE`` losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values.  A list of `EvalMetrics` is returned, one for each SAE.\n    \"\"\"\n\n    torch.cuda.empty_cache()\n\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    saes.eval()\n    objectives.eval()\n\n    cfg = cfgs[0]\n\n    almost_dead_lim = 1e-7\n    dense_lim = 1e-2\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.val_data)\n    n_val = min(dataloader.n_samples, cfg.n_val)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, n_val)\n\n    n_fired = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    values = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    total_l0_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_l1_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_mse_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_sse_sae = torch.zeros(len(cfgs), dtype=torch.float64, device=cfg.device)\n    sum_sq = torch.zeros((), dtype=torch.float64, device=cfg.device)\n    sum_vec = torch.zeros(\n        (saes[0].cfg.d_model,), dtype=torch.float64, device=cfg.device\n    )\n    n_tokens = 0\n\n    for batch in helpers.progress(dataloader, desc=\"eval\", every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        batch_size = acts_BD.shape[0]\n        acts_BD_f64 = acts_BD.to(torch.float64)\n        sum_sq += torch.sum(acts_BD_f64 * acts_BD_f64)\n        sum_vec += acts_BD_f64.sum(dim=0)\n        n_tokens += batch_size\n        for i, (sae, objective) in enumerate(zip(saes, objectives)):\n            # Objective now handles the forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            # Get f_x for metrics\n            residual = acts_BD - fwd.x_hats[:, -1, :]\n            total_sse_sae[i] += torch.sum((residual.to(torch.float64)) ** 2)\n            n_fired[i] += einops.reduce(\n                fwd.f_x > 0, \"batch d_sae -> d_sae\", \"sum\"\n            ).cpu()\n            values[i] += einops.reduce(fwd.f_x, \"batch d_sae -> d_sae\", \"sum\").cpu()\n            total_l0_sum[i] += loss.l0.cpu().item() * batch_size\n            total_l1_sum[i] += loss.l1.cpu().item() * batch_size\n            total_mse_sum[i] += loss.mse.cpu().item() * batch_size\n\n    msg = \"Validation dataloader yielded zero tokens; cannot compute normalized MSE.\"\n    assert n_tokens > 0, msg\n    sum_vec_sq = torch.dot(sum_vec, sum_vec)\n    sse_baseline = sum_sq - sum_vec_sq / n_tokens\n    msg = (\n        f\"Validation baseline variance non-positive: \"\n        f\"sse_baseline={sse_baseline.item():.6e}\"\n    )\n    assert sse_baseline > 0, msg\n    sse_baseline_value = sse_baseline.item()\n\n    mean_values = values / n_fired\n    freqs = n_fired / n_tokens\n\n    l0 = (total_l0_sum / n_tokens).tolist()\n    l1 = (total_l1_sum / n_tokens).tolist()\n    mse = (total_mse_sum / n_tokens).tolist()\n    sse_sae = total_sse_sae.tolist()\n    normalized_mse = (total_sse_sae / sse_baseline_value).tolist()\n    sse_baseline_all = [sse_baseline_value] * len(cfgs)\n\n    n_dead = einops.reduce(freqs == 0, \"n_saes d_sae -> n_saes\", \"sum\").tolist()\n    n_almost_dead = einops.reduce(\n        freqs < almost_dead_lim, \"n_saes d_sae -> n_saes\", \"sum\"\n    ).tolist()\n    n_dense = einops.reduce(freqs > dense_lim, \"n_saes d_sae -> n_saes\", \"sum\").tolist()\n\n    metrics = []\n    for i in range(len(cfgs)):\n        metrics.append(\n            EvalMetrics(\n                l0=l0[i],\n                l1=l1[i],\n                mse=mse[i],\n                normalized_mse=normalized_mse[i],\n                sse_sae=sse_sae[i],\n                sse_baseline=sse_baseline_all[i],\n                n_dead=n_dead[i],\n                n_almost_dead=n_almost_dead[i],\n                n_dense=n_dense[i],\n                freqs=freqs[i],\n                mean_values=mean_values[i],\n                almost_dead_threshold=almost_dead_lim,\n                dense_threshold=dense_lim,\n            )\n        )\n\n    return metrics\n
"},{"location":"api/framework/train/#saev.framework.train.main","title":"main(cfg, sweep=None, max_parallel=None)","text":"

Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.

Parameters:

Name Type Description Default cfg Annotated[Config, arg(name='')]

Baseline config for training an SAE.

required sweep Path | None

Path to .py file defining the sweep parameters.

None max_parallel int | None

Maximum SAEs to train concurrently within a single worker.

None Source code in src/saev/framework/train.py
@beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")],\n    sweep: pathlib.Path | None = None,\n    max_parallel: int | None = None,\n):\n    \"\"\"\n    Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.\n\n    Args:\n        cfg: Baseline config for training an SAE.\n        sweep: Path to .py file defining the sweep parameters.\n        max_parallel: Maximum SAEs to train concurrently within a single worker.\n    \"\"\"\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n\n    import submitit\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    cfgs = split_cfgs(cfgs)\n    # codex resume 019ac16a-dc07-78e3-82c7-e5c08a6c6f0c\n    if max_parallel:\n        cfgs = [\n            subgroup\n            for group in cfgs\n            for subgroup in [\n                group[start:end]\n                for start, end in helpers.batched_idx(len(group), max_parallel)\n            ]\n        ]\n\n    logger.info(\"Running %d training jobs.\", len(cfgs))\n\n    # Use the first resolved config for submitit parameters (n_hours, mem_gb, etc.) so that sweep values take effect instead of CLI defaults.\n    cfg = cfgs[0][0]\n\n    if cfg.slurm_acct:\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n        executor.update_parameters(\n            job_name=\"sae-train\",\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            mem=f\"{cfg.mem_gb}GB\",\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n    else:\n        executor = submitit.DebugExecutor(folder=cfg.log_to)\n\n    try:\n        cloudpickle.dumps(worker_fn)\n        for group in cfgs:\n            cloudpickle.dumps(group)\n    except TypeError as err:\n        raise AssertionError(f\"Failed to pickle: {err}\")\n\n    with executor.batch():\n        jobs = [executor.submit(worker_fn, group) for group in cfgs]\n\n    # Give the executor five seconds to fire the jobs off.\n    time.sleep(5.0)\n\n    # Log initial status.\n    for j, job in enumerate(jobs):\n        logger.info(\"Job %d/%d: %s %s\", j + 1, len(jobs), job.job_id, job.state)\n\n    for j, job in enumerate(jobs):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", j + 1, len(jobs))\n        except submitit.core.utils.UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, j)\n\n    logger.info(\"Jobs done.\")\n
"},{"location":"api/framework/train/#saev.framework.train.split_cfgs","title":"split_cfgs(cfgs)","text":"

Splits configs into groups that can be parallelized.

Parameters:

Name Type Description Default cfgs list[Config]

A list of configs from a sweep file.

required

Returns:

Type Description list[list[Config]]

A list of lists, where the configs in each sublist do not differ in any keys that are in CANNOT_PARALLELIZE. This means that each sublist is a valid \"parallel\" set of configs for train.

Source code in src/saev/framework/train.py
@beartype.beartype\ndef split_cfgs(cfgs: list[Config]) -> list[list[Config]]:\n    \"\"\"\n    Splits configs into groups that can be parallelized.\n\n    Arguments:\n        cfgs: A list of configs from a sweep file.\n\n    Returns:\n        A list of lists, where the configs in each sublist do not differ in any keys that are in `CANNOT_PARALLELIZE`. This means that each sublist is a valid \"parallel\" set of configs for `train`.\n    \"\"\"\n    groups = collections.defaultdict(list)\n    for cfg in cfgs:\n        key = _parallel_key(cfg)\n        groups[key].append(cfg)\n\n    return [\n        [\n            dataclasses.replace(\n                cfg,\n                train_data=dataclasses.replace(cfg.train_data, seed=cfg.seed),\n                val_data=dataclasses.replace(cfg.val_data, seed=cfg.seed),\n            )\n            for cfg in group\n        ]\n        for _, group in sorted(groups.items())\n    ]\n
"},{"location":"api/framework/train/#saev.framework.train.train","title":"train(cfgs)","text":"

Explicitly declare the optimizer, schedulers, dataloader, etc outside of main so that all the variables are dropped from scope and can be garbage collected.

Source code in src/saev/framework/train.py
@beartype.beartype\ndef train(\n    cfgs: list[Config],\n) -> tuple[\n    torch.nn.ModuleList, torch.nn.ModuleList, saev.utils.wandb.ParallelWandbRun, int\n]:\n    \"\"\"\n    Explicitly declare the optimizer, schedulers, dataloader, etc outside of `main` so that all the variables are dropped from scope and can be garbage collected.\n    \"\"\"\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    logger.info(\"Parallelizing %d runs.\", len(cfgs))\n\n    cfg = cfgs[0]\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.train_data)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, cfg.n_train)\n\n    saes, objectives, param_groups = make_saes(\n        [(c.sae, c.objective) for c in cfgs], dataloader\n    )\n\n    mode = \"online\" if cfg.track else \"disabled\"\n    tags = list(cfg.tags)\n\n    # Add metadata to configs for WandB logging\n    metadata_dict = dataclasses.asdict(dataloader.metadata)\n    wandb_configs = []\n    for c in cfgs:\n        cfg_dict = dataclasses.asdict(c)\n        cfg_dict[\"train_data\"][\"metadata\"] = metadata_dict\n        wandb_configs.append(cfg_dict)\n\n    run = saev.utils.wandb.ParallelWandbRun(\n        cfg.wandb_project, wandb_configs, mode, tags\n    )\n    slurm_job_id = os.environ.get(\"SLURM_JOB_ID\")\n    if slurm_job_id:\n        run.set_summary(\"slurm_job_id\", slurm_job_id)\n\n    # Build per-SAE bundles of optimizers/param_groups/schedulers so each config's LR and warmup drive both Muon and Adam param groups for that SAE. We reshape the flat param_groups into per-SAE lists because we need to:\n    #   (a) build schedulers with that SAE's cfg\n    #   (b) step/zero only that SAE's optimizers\n    #   (c) log that SAE's LR without fishing through a mixed flat list.\n    grouped_pgs: list[list[dict[str, object]]] = []\n    optimizers: list[list[torch.optim.Optimizer]] = []\n    lr_schedulers: list[list[saev.utils.scheduling.WarmupCosine]] = []\n\n    for i, (sae, cfg, param_group) in enumerate(zip(saes, cfgs, param_groups)):\n        if cfg.optim == \"adam\":\n            opts = [torch.optim.Adam([param_group], fused=True)]\n        elif cfg.optim == \"muon\":\n            muon_params = [p for p in sae.parameters() if p.ndim == 2]\n            msg = f\"Muon optimizer requires 2D params; SAE {i} has none.\"\n            assert muon_params, msg\n            adam_params = [p for p in sae.parameters() if p.ndim != 2]\n            msg = f\"Adam optimizer requires non-2D params; SAE {i} has none.\"\n            assert adam_params, msg\n\n            opts = [\n                torch.optim.Muon(muon_params, lr=0.0),\n                torch.optim.Adam(adam_params, lr=0.0, fused=True),\n            ]\n        else:\n            tp.assert_never(cfg.optim)\n\n        pgs = [pg for opt in opts for pg in opt.param_groups]\n        scheds = [\n            saev.utils.scheduling.WarmupCosine(\n                0.0, cfg.n_lr_warmup, cfg.lr, len(dataloader), 0.0\n            )\n            for _ in pgs\n        ]\n\n        optimizers.append(opts)\n        grouped_pgs.append(pgs)\n        lr_schedulers.append(scheds)\n\n    param_groups = grouped_pgs\n\n    saes.train()\n    saes = saes.to(cfg.device)\n    objectives.train()\n    objectives = objectives.to(cfg.device)\n\n    global_step, n_patches_seen = 0, 0\n    dl_monitor = DataloaderMonitor(dataloader)\n\n    for batch in helpers.progress(dataloader, every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        for sae in saes:\n            sae.normalize_w_dec()\n        # Forward passes and loss calculations.\n        losses = []\n        fwds = []\n        for sae, objective in zip(saes, objectives):\n            # Objective handles the SAE forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            losses.append(loss)\n            fwds.append(fwd)\n\n        n_patches_seen += len(acts_BD)\n\n        for loss in losses:\n            loss.loss.backward()\n\n        # remove parallel gradients or normalize columns?\n        for sae in saes:\n            sae.remove_parallel_grads()\n\n        # Calculate gradient norms before optimizer step\n        grad_norms = []\n        for sae, cfg in zip(saes, cfgs):\n            # Clip gradients and get the gradient norm\n            grad_norm = torch.nn.utils.clip_grad_norm_(\n                sae.parameters(), max_norm=cfg.grad_clip\n            )\n\n            grad_norms.append(grad_norm)\n\n        # Log metrics after gradient computation\n        if (global_step + 1) % cfg.log_every == 0:\n            with torch.no_grad():\n                now = time.time()\n                dl_metrics = dl_monitor.compute(now=now)\n\n                metadata = dataloader.metadata\n                entropy_metrics = statistics.calc_batch_entropy(\n                    batch[\"example_idx\"].to(\"cpu\"),\n                    batch[\"token_idx\"].to(\"cpu\"),\n                    metadata.n_examples,\n                    metadata.content_tokens_per_example,\n                )\n                dl_metrics.update(entropy_metrics)\n\n                acts_bd_f64 = acts_BD.to(torch.float64)\n                n_batch = acts_bd_f64.shape[0]\n                msg = \"Batch is empty; cannot compute normalized MSE.\"\n                assert n_batch > 0, msg\n                batch_sum_sq = torch.sum(acts_bd_f64 * acts_bd_f64)\n                batch_sum_vec = acts_bd_f64.sum(dim=0)\n                batch_baseline_sse = (\n                    batch_sum_sq - torch.dot(batch_sum_vec, batch_sum_vec) / n_batch\n                )\n                msg = f\"Batch baseline variance non-positive: sse_baseline={batch_baseline_sse.item():.6e}\"\n                assert batch_baseline_sse > 0, msg\n                batch_baseline_sse_value = batch_baseline_sse.item()\n\n                metrics = []\n                for i, (loss, sae, objective, fwd) in enumerate(\n                    zip(losses, saes, objectives, fwds)\n                ):\n                    current_lr = param_groups[i][0][\"lr\"]\n                    # Explained variance: 1 - Var(x - x_hat) / Var(x)\n                    residual = acts_BD - fwd.x_hats[:, -1, :]\n                    batch_sse_sae_value = torch.sum(\n                        (residual.to(torch.float64)) ** 2\n                    ).item()\n                    normalized_mse_value = (\n                        batch_sse_sae_value / batch_baseline_sse_value\n                    )\n                    explained_var = 1 - residual.var() / acts_BD.var()\n\n                    # Dead unit percentage: fraction of units that never activate\n                    dead_pct = ((fwd.f_x.abs() > 1e-12).sum(0) == 0).float().mean()\n\n                    # Dictionary coherence: max |<w_i, w_j>| for i != j\n                    W = sae.W_dec  # (d_sae, d_model)\n                    # Normalize each row (each SAE feature)\n                    W_norm = W / W.norm(dim=1, keepdim=True)\n                    coherence = (W_norm @ W_norm.T).abs().triu(1).max()\n\n                    # Average decoder row L2 norm (since W_dec is d_sae x d_model)\n                    avg_w_row_norm = sae.W_dec.norm(dim=1).mean()\n\n                    metric = {\n                        **{f\"loss/{key}\": val for key, val in loss.metrics().items()},\n                        \"progress/n_patches_seen\": n_patches_seen,\n                        \"progress/learning_rate\": current_lr,\n                        \"metrics/explained_variance\": explained_var.item(),\n                        \"metrics/dead_unit_pct\": dead_pct.item(),\n                        \"metrics/dictionary_coherence\": coherence.item(),\n                        \"metrics/avg_decoder_row_norm\": avg_w_row_norm.item(),\n                        \"metrics/grad_norm\": grad_norms[i].item(),\n                        \"metrics/sse_sae\": batch_sse_sae_value,\n                        \"metrics/sse_baseline\": batch_baseline_sse_value,\n                        \"metrics/normalized_mse\": normalized_mse_value,\n                        **dl_metrics,\n                    }\n\n                    metrics.append(metric)\n                run.log(metrics, step=global_step)\n\n                logger.info(\n                    \", \".join(\n                        f\"{key}: {value:.5f}\"\n                        for key, value in losses[0].metrics().items()\n                    )\n                )\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.step()\n\n        # Update LR and sparsity coefficients.\n        for pgs, scheds in zip(param_groups, lr_schedulers):\n            for pg, sched in zip(pgs, scheds):\n                pg[\"lr\"] = sched.step()\n\n        # for objective, scheduler in zip(objectives, sparsity_schedulers):\n        #     objective.sparsity_coeff = scheduler.step()\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.zero_grad()\n\n        global_step += 1\n\n    return saes, objectives, run, global_step\n
"},{"location":"api/nn/modeling/","title":"saev.nn.modeling","text":"

Neural network architectures for sparse autoencoders.

"},{"location":"api/nn/modeling/#saev.nn.modeling.AuxK","title":"AuxK(key='auxk', k_aux=512, alpha=1 / 32) dataclass","text":"

AuxK auxiliary reconstruction loss for dead latents.

"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK","title":"BatchTopK(key='batch-top-k', top_k=32, sparsity=NoSparsity(), momentum=0.1, aux=AuxK()) dataclass","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK.top_k","title":"top_k = 32 class-attribute instance-attribute","text":"

How many values are allowed to be non-zero per sample in the batch.

"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation","title":"BatchTopKActivation(cfg)","text":"

Bases: Module

BatchTopK activation and inference-time threshold for sparse autoencoders.

This module implements a BatchTopK nonlinearity that enforces a fixed sparsity budget across a batch, together with an inference-time approximation that replaces the batch-coupled operation with a simple elementwise threshold.

Training mode (model.train()): Given pre-activation codes x with shape [batch, d_sae], the BatchTopK activation flattens the batch to shape [batch * d_sae], selects the largest (batch * top_k) entries by value, and sets all other entries to zero. This enforces an average of exactly top_k active features per example while allowing the \"activation budget\" to move between examples in the batch.

During training, we also estimate an inference threshold theta that approximates the effective cutoff induced by BatchTopK. For each batch, we compute the minimum positive activation that survives the BatchTopK mask and update an exponential moving average of this quantity. This running estimate plays the same role as BatchNorm running statistics: it is updated only in training mode and treated as fixed at inference.\n

Eval mode (model.eval()): At inference time we do not apply a batch-coupled top-k, since that would make each example depend on the rest of the eval batch. Instead, we use the stored running threshold theta to define a JumpReLU nonlinearity:

    y = x if x > theta else 0\n\napplied elementwise and independently to each example. This preserves the approximate sparsity level learned during training, but makes the layer deterministic and sample-wise independent for evaluation, probing, and downstream use.\n
Inputs

x: Tensor of shape [batch, d_sae] containing pre-activation codes.

Outputs

Tensor of shape [batch, d_sae] with the same dtype and device as x, where either: - in training mode: exactly (batch * top_k) entries are non-zero across the batch due to the BatchTopK mask, or - in eval mode: entries are zeroed by an elementwise JumpReLU with the learned threshold theta.

Source code in src/saev/nn/modeling.py
def __init__(self, cfg: BatchTopK):\n    super().__init__()\n    self.cfg = cfg\n\n    self.register_buffer(\"threshold\", torch.tensor(0.0))\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation.forward","title":"forward(x)","text":"

Apply top-k activation to each sample in the batch.

Source code in src/saev/nn/modeling.py
def forward(self, x: Float[Tensor, \"batch d_sae\"]) -> Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to each sample in the batch.\n    \"\"\"\n\n    if not self.training:\n        if self.threshold <= 0:\n            return torch.where(x > 0, x, torch.zeros_like(x))\n\n        return torch.where(x > self.threshold, x, torch.zeros_like(x))\n\n    bsz, d_sae = x.shape\n    x_flat = x.flatten()\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k * bsz, d_sae * bsz)\n    _, idxs = torch.topk(x_flat, k, sorted=False)\n    mask = torch.zeros_like(x_flat).scatter(-1, idxs, 1.0).reshape(x.shape)\n\n    x = torch.mul(mask, x)\n\n    with torch.no_grad():\n        pos = x[x > 0]\n        if pos.numel() >= 0:\n            self.threshold.mul_(1 - self.cfg.momentum).add_(\n                self.cfg.momentum * pos.min()\n            )\n\n    return x\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.NoAux","title":"NoAux(key='no-aux') dataclass","text":"

No auxiliary loss (e.g., for ReLU).

"},{"location":"api/nn/modeling/#saev.nn.modeling.NoSparsity","title":"NoSparsity(key='no-sparsity') dataclass","text":"

No explicit sparsity penalty (e.g. for TopK/BatchTopK where k controls sparsity).

"},{"location":"api/nn/modeling/#saev.nn.modeling.Relu","title":"Relu(key='relu', sparsity=L1Sparsity(coeff=0.0004), aux=NoAux()) dataclass","text":"

Vanilla ReLU

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder","title":"SparseAutoencoder(cfg)","text":"

Bases: Module

Sparse auto-encoder (SAE)

Source code in src/saev/nn/modeling.py
def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.EncodeOut","title":"EncodeOut","text":"

Bases: NamedTuple

Outputs of encode: pre-activations and activated latents.

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.Output","title":"Output","text":"

Bases: NamedTuple

Full SAE forward outputs for objectives and metrics.

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.decode","title":"decode(f_x, *, prefixes=None)","text":"

Decode latent features to reconstructions.

Parameters:

Name Type Description Default f_x Float[Tensor, 'batch d_sae']

Latent features of shape (batch, d_sae)

required prefixes Int64[Tensor, ' n_prefixes'] | None

Optional tensor of prefix lengths for Matryoshka decoding.

None

Returns:

Type Description Float[Tensor, 'batch n_prefixes d_model']

Matryoshka reconstructions (batch, n_prefixes, d_model).

Source code in src/saev/nn/modeling.py
def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -> Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] > prefixes[:-1])\n    assert 1 <= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -> (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -> ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.forward","title":"forward(x)","text":"

Given x, calculates the reconstructed x_hat and the intermediate activations f_x.

Parameters:

Name Type Description Default x Float[Tensor, 'batch d_model']

a batch of transformer activations.

required Source code in src/saev/nn/modeling.py
def forward(self, x: Float[Tensor, \"batch d_model\"]) -> Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.normalize_w_dec","title":"normalize_w_dec()","text":"

Set W_dec to unit-norm columns.

Source code in src/saev/nn/modeling.py
@torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.remove_parallel_grads","title":"remove_parallel_grads()","text":"

Update grads so that they remove the parallel component

Source code in src/saev/nn/modeling.py
@torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -> d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq > 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -> d_sae d_model\",\n    )\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig","title":"SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True) dataclass","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.activation","title":"activation = TopK() class-attribute instance-attribute","text":"

Activation function.

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_model","title":"d_model = 1024 class-attribute instance-attribute","text":"

Size of x.

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_sae","title":"d_sae = 1024 * 16 class-attribute instance-attribute","text":"

Number of features in SAE latent space; size of f(x).

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.normalize_w_dec","title":"normalize_w_dec = True class-attribute instance-attribute","text":"

Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".

"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_blend","title":"reinit_blend = 0.8 class-attribute instance-attribute","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"reinit_enc_dec_tranpose = True class-attribute instance-attribute","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.remove_parallel_grads","title":"remove_parallel_grads = True class-attribute instance-attribute","text":"

Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.

"},{"location":"api/nn/modeling/#saev.nn.modeling.TopK","title":"TopK(key='top-k', top_k=32, sparsity=NoSparsity(), aux=AuxK()) dataclass","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.TopK.top_k","title":"top_k = 32 class-attribute instance-attribute","text":"

How many values are allowed to be non-zero.

"},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation","title":"TopKActivation(cfg)","text":"

Bases: Module

Top-K activation function. For use as activation function of sparse encoder.

Source code in src/saev/nn/modeling.py
def __init__(self, cfg: TopK):\n    super().__init__()\n    self.cfg = cfg\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation.forward","title":"forward(x)","text":"

Apply top-k activation to the input tensor.

Source code in src/saev/nn/modeling.py
def forward(self, x: Float[Tensor, \"batch d_sae\"]) -> Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to the input tensor.\n    \"\"\"\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k, d_sae)\n    _, idxs = torch.topk(x, k, dim=-1, sorted=False)\n    mask = torch.zeros_like(x).scatter(-1, idxs, 1.0)\n\n    return torch.mul(mask, x)\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.dump","title":"dump(fpath, sae)","text":"

Save an SAE checkpoint to disk along with configuration, using the trick from equinox.

Parameters:

Name Type Description Default fpath Path | str

filepath to save checkpoint to.

required sae SparseAutoencoder

sparse autoencoder checkpoint to save.

required Source code in src/saev/nn/modeling.py
@beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n
"},{"location":"api/nn/modeling/#saev.nn.modeling.load","title":"load(fpath, *, device='cpu')","text":"

Loads a sparse autoencoder from disk.

Source code in src/saev/nn/modeling.py
@beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -> SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n
"},{"location":"api/nn/objectives/","title":"saev.nn.objectives","text":""},{"location":"api/nn/objectives/#saev.nn.objectives.Loss","title":"Loss() dataclass","text":"

The loss term for an autoencoder training batch.

"},{"location":"api/nn/objectives/#saev.nn.objectives.Loss.loss","title":"loss property","text":"

Total loss.

"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka","title":"Matryoshka(n_prefixes=10, dead_threshold_tokens=10000000) dataclass","text":"

Config for the Matryoshka loss for another arbitrary SAE class.

Reference code is here: https://github.com/noanabeshima/matryoshka-saes and the original reading is https://sparselatents.com/matryoshka.html and https://arxiv.org/pdf/2503.17547

"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.dead_threshold_tokens","title":"dead_threshold_tokens = 10000000 class-attribute instance-attribute","text":"

Tokens without activation before a latent is considered dead.

"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.n_prefixes","title":"n_prefixes = 10 class-attribute instance-attribute","text":"

Number of random length prefixes to use for loss calculation.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss","title":"MatryoshkaLoss(mse, sparsity, l0, l1, aux, n_dead) dataclass","text":"

Bases: Loss

The composite loss terms for an training batch.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.aux","title":"aux instance-attribute","text":"

Auxiliary loss term (e.g., AuxK).

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l0","title":"l0 instance-attribute","text":"

Sum of L0 magnitudes of hidden activations for all prefix lengths.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l1","title":"l1 instance-attribute","text":"

Sum of L1 magnitudes of hidden activations for all prefix lengths.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.loss","title":"loss property","text":"

Total loss.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.mse","title":"mse instance-attribute","text":"

Average of reconstruction loss (mean squared error) for all prefix lengths.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.n_dead","title":"n_dead instance-attribute","text":"

Number of dead latents (per aux loss threshold).

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.sparsity","title":"sparsity instance-attribute","text":"

Sparsity loss, typically lambda * L1.

"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaObjective","title":"MatryoshkaObjective(cfg)","text":"

Bases: Objective

Torch module for calculating the matryoshka loss for an SAE.

Source code in src/saev/nn/objectives.py
def __init__(self, cfg: Matryoshka):\n    super().__init__()\n    self.cfg = cfg\n    self.toks_since_active: Tensor | None = None\n
"},{"location":"api/nn/objectives/#saev.nn.objectives.sample_prefixes","title":"sample_prefixes(d_sae, n_prefixes, min_prefix_length=1, pareto_power=0.5)","text":"

Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)

Parameters:

Name Type Description Default d_sae int

Total number of latent dimensions

required n_prefixes int

Number of prefixes to sample

required min_prefix_length int

Minimum length of any prefix

1 pareto_power float

Power parameter for Pareto distribution (lower = more uniform)

0.5

Returns:

Type Description Int64[Tensor, ' n_prefixes']

torch.Tensor: Sorted prefix lengths

Source code in src/saev/nn/objectives.py
@torch.no_grad()\n@jaxtyped(typechecker=beartype.beartype)\ndef sample_prefixes(\n    d_sae: int, n_prefixes: int, min_prefix_length: int = 1, pareto_power: float = 0.5\n) -> Int64[Tensor, \" n_prefixes\"]:\n    \"\"\"\n    Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with\n    Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)\n\n    Args:\n        d_sae: Total number of latent dimensions\n        n_prefixes: Number of prefixes to sample\n        min_prefix_length: Minimum length of any prefix\n        pareto_power: Power parameter for Pareto distribution (lower = more uniform)\n\n    Returns:\n        torch.Tensor: Sorted prefix lengths\n    \"\"\"\n    if n_prefixes <= 1:\n        return torch.tensor([d_sae], dtype=torch.int64)\n\n    assert n_prefixes <= d_sae\n\n    # Calculate probability distribution favoring shorter prefixes\n    lengths = torch.arange(1, d_sae)\n    pareto_cdf = 1 - ((min_prefix_length / lengths.float()) ** pareto_power)\n    pareto_pdf = torch.cat([pareto_cdf[:1], pareto_cdf[1:] - pareto_cdf[:-1]])\n    probability_dist = pareto_pdf / pareto_pdf.sum()\n\n    # Sample and sort prefix lengths\n    sampled_indices = torch.multinomial(\n        probability_dist, num_samples=n_prefixes - 1, replacement=False\n    )\n\n    # Convert indices to actual prefix lengths\n    prefixes = lengths[sampled_indices]\n\n    # Add n_latents as the final prefix\n    prefixes = torch.cat((prefixes.detach().clone(), torch.tensor([d_sae])))\n\n    prefixes, _ = torch.sort(prefixes, descending=False)\n\n    return prefixes.to(torch.int64)\n
"},{"location":"api/nn/saev.nn/","title":"saev.nn","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder","title":"SparseAutoencoder(cfg)","text":"

Bases: Module

Sparse auto-encoder (SAE)

Source code in src/saev/nn/modeling.py
def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n
"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.EncodeOut","title":"EncodeOut","text":"

Bases: NamedTuple

Outputs of encode: pre-activations and activated latents.

"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.Output","title":"Output","text":"

Bases: NamedTuple

Full SAE forward outputs for objectives and metrics.

"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.decode","title":"decode(f_x, *, prefixes=None)","text":"

Decode latent features to reconstructions.

Parameters:

Name Type Description Default f_x Float[Tensor, 'batch d_sae']

Latent features of shape (batch, d_sae)

required prefixes Int64[Tensor, ' n_prefixes'] | None

Optional tensor of prefix lengths for Matryoshka decoding.

None

Returns:

Type Description Float[Tensor, 'batch n_prefixes d_model']

Matryoshka reconstructions (batch, n_prefixes, d_model).

Source code in src/saev/nn/modeling.py
def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -> Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] > prefixes[:-1])\n    assert 1 <= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -> (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -> ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n
"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.forward","title":"forward(x)","text":"

Given x, calculates the reconstructed x_hat and the intermediate activations f_x.

Parameters:

Name Type Description Default x Float[Tensor, 'batch d_model']

a batch of transformer activations.

required Source code in src/saev/nn/modeling.py
def forward(self, x: Float[Tensor, \"batch d_model\"]) -> Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n
"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.normalize_w_dec","title":"normalize_w_dec()","text":"

Set W_dec to unit-norm columns.

Source code in src/saev/nn/modeling.py
@torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n
"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.remove_parallel_grads","title":"remove_parallel_grads()","text":"

Update grads so that they remove the parallel component

Source code in src/saev/nn/modeling.py
@torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -> d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq > 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -> d_sae d_model\",\n    )\n
"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig","title":"SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True) dataclass","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.activation","title":"activation = TopK() class-attribute instance-attribute","text":"

Activation function.

"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_model","title":"d_model = 1024 class-attribute instance-attribute","text":"

Size of x.

"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_sae","title":"d_sae = 1024 * 16 class-attribute instance-attribute","text":"

Number of features in SAE latent space; size of f(x).

"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.normalize_w_dec","title":"normalize_w_dec = True class-attribute instance-attribute","text":"

Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".

"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_blend","title":"reinit_blend = 0.8 class-attribute instance-attribute","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"reinit_enc_dec_tranpose = True class-attribute instance-attribute","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.remove_parallel_grads","title":"remove_parallel_grads = True class-attribute instance-attribute","text":"

Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.

"},{"location":"api/nn/saev.nn/#saev.nn.dump","title":"dump(fpath, sae)","text":"

Save an SAE checkpoint to disk along with configuration, using the trick from equinox.

Parameters:

Name Type Description Default fpath Path | str

filepath to save checkpoint to.

required sae SparseAutoencoder

sparse autoencoder checkpoint to save.

required Source code in src/saev/nn/modeling.py
@beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n
"},{"location":"api/nn/saev.nn/#saev.nn.load","title":"load(fpath, *, device='cpu')","text":"

Loads a sparse autoencoder from disk.

Source code in src/saev/nn/modeling.py
@beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -> SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n
"},{"location":"api/utils/monitoring/","title":"saev.utils.monitoring","text":""},{"location":"api/utils/monitoring/#saev.utils.monitoring.DataloaderMonitor","title":"DataloaderMonitor(dataloader, process_factory=None)","text":"

Tracks IO and CPU activity for the dataloader manager process and its children.

The monitor owns the dataloader handle and psutil processes internally, so callers simply construct it with the dataloader and then call compute() whenever metrics are needed.

Source code in src/saev/utils/monitoring.py
def __init__(\n    self,\n    dataloader: object,\n    process_factory: Callable[[int], psutil.Process] | None = None,\n) -> None:\n    self.dataloader = dataloader\n    self.process_factory = process_factory or psutil.Process\n    self._reset_state()\n
"},{"location":"api/utils/saev.utils/","title":"saev.utils","text":""},{"location":"api/utils/scheduling/","title":"saev.utils.scheduling","text":""},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter","title":"BatchLimiter(dataloader, n_samples)","text":"

Limits the number of batches to only return n_samples total samples.

Source code in src/saev/utils/scheduling.py
def __init__(self, dataloader: DataLoaderLike, n_samples: int):\n    self.dataloader = dataloader\n    self.n_samples = n_samples\n    self.batch_size = dataloader.batch_size\n    self.drop_last = dataloader.drop_last\n
"},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter.__getattr__","title":"__getattr__(name)","text":"

Pass through attribute access to the wrapped dataloader.

Source code in src/saev/utils/scheduling.py
def __getattr__(self, name: str) -> Any:\n    \"\"\"Pass through attribute access to the wrapped dataloader.\"\"\"\n    # __getattr__ is only called when the attribute wasn't found on self\n    # So we delegate to the wrapped dataloader\n    try:\n        return getattr(self.dataloader, name)\n    except AttributeError:\n        # Re-raise with more context about where the attribute was not found\n        raise AttributeError(\n            f\"'{self.__class__.__name__}' object and its wrapped dataloader have no attribute '{name}'\"\n        )\n
"},{"location":"api/utils/scheduling/#saev.utils.scheduling.Warmup","title":"Warmup(init, final, n_steps)","text":"

Bases: Scheduler

Linearly increases from init to final over n_warmup_steps steps.

Source code in src/saev/utils/scheduling.py
def __init__(self, init: float, final: float, n_steps: int):\n    self.final = final\n    self.init = init\n    self.n_steps = n_steps\n    self._step = 0\n
"},{"location":"api/utils/scheduling/#saev.utils.scheduling.WarmupCosine","title":"WarmupCosine(init, n_warmup, peak, n_steps, final)","text":"

Bases: Scheduler

Linearly increases from init to peak over n_warmup steps, then decrease down to final using cosine decay over n_steps - n_warmup.

Source code in src/saev/utils/scheduling.py
def __init__(\n    self, init: float, n_warmup: int, peak: float, n_steps: int, final: float\n):\n    self.init = init\n    self.peak = peak\n    self.final = final\n    self.n_warmup = n_warmup\n    self.n_steps = n_steps\n    self._step = 0\n
"},{"location":"api/utils/statistics/","title":"saev.utils.statistics","text":""},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator","title":"PercentileEstimator(percentile, total, lr=0.001, shape=())","text":"Source code in src/saev/utils/statistics.py
def __init__(\n    self,\n    percentile: float | int,\n    total: int,\n    lr: float = 1e-3,\n    shape: tuple[int, ...] = (),\n):\n    self.percentile = percentile\n    self.total = total\n    self.lr = lr\n\n    self._estimate = torch.zeros(shape)\n    self._step = 0\n
"},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator.update","title":"update(x)","text":"

Update the estimator with a new value.

This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.

Parameters:

Name Type Description Default x float | Tensor

The new value to incorporate into the estimation

required Source code in src/saev/utils/statistics.py
def update(self, x: float | Tensor):\n    \"\"\"\n    Update the estimator with a new value.\n\n    This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.\n\n    Arguments:\n        x: The new value to incorporate into the estimation\n    \"\"\"\n    self._step += 1\n\n    step_size = self.lr * (self.total - self._step) / self.total\n\n    # Is a no-op if it's already on the same device.\n    if isinstance(x, Tensor):\n        self._estimate = self._estimate.to(x.device)\n\n    self._estimate += step_size * (\n        torch.sign(x - self._estimate) + 2 * self.percentile / 100 - 1.0\n    )\n
"},{"location":"api/utils/statistics/#saev.utils.statistics.calc_batch_entropy","title":"calc_batch_entropy(example_idx, token_idx, n_examples, content_tokens_per_example)","text":"

Compute entropy and coverage metrics for a batch of shuffled indices.

The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.

Source code in src/saev/utils/statistics.py
@beartype.beartype\ndef calc_batch_entropy(\n    example_idx: IndexLike,\n    token_idx: IndexLike,\n    n_examples: int,\n    content_tokens_per_example: int,\n) -> dict[str, float]:\n    \"\"\"\n    Compute entropy and coverage metrics for a batch of shuffled indices.\n\n    The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.\n    \"\"\"\n    example_idx_t = _to_tensor(example_idx)\n    token_idx_t = _to_tensor(token_idx)\n    if n_examples <= 0:\n        raise ValueError(\"n_examples must be positive.\")\n    if content_tokens_per_example <= 0:\n        raise ValueError(\"content_tokens_per_example must be positive.\")\n\n    if example_idx_t.ndim != 1:\n        raise ValueError(\"example_idx must be 1D.\")\n    if token_idx_t.ndim != 1:\n        raise ValueError(\"token_idx must be 1D.\")\n    if example_idx_t.numel() == 0:\n        raise ValueError(\"example_idx must contain at least one element.\")\n\n    _assert_batch_dim(example_idx_t, token_idx_t)\n\n    example_metrics = _add_prefix(\n        \"loader/example\", _entropy_metrics(example_idx_t, n_examples)\n    )\n    token_metrics = _add_prefix(\n        \"loader/token\", _entropy_metrics(token_idx_t, content_tokens_per_example)\n    )\n\n    return {**example_metrics, **token_metrics}\n
"},{"location":"api/utils/wandb/","title":"saev.utils.wandb","text":""},{"location":"api/utils/wandb/#saev.utils.wandb.ParallelWandbRun","title":"ParallelWandbRun(project, cfgs, mode, tags, dir='.wandb')","text":"

Inspired by https://community.wandb.ai/t/is-it-possible-to-log-to-multiple-runs-simultaneously/4387

Source code in src/saev/utils/wandb.py
def __init__(\n    self,\n    project: str,\n    cfgs: list[dict[str, object]],\n    mode: str,\n    tags: list[str],\n    dir: str = \".wandb\",\n):\n    cfg, *cfgs = cfgs\n    self.project = project\n    self.cfgs = cfgs\n    self.mode = mode\n    self.tags = tags\n    self.dir = dir\n    self.summary_updates: dict[str, object] = {}\n\n    self.live_run = wandb.init(\n        project=project, config=cfg, mode=mode, tags=tags, dir=dir\n    )\n\n    self.metric_queues: list[MetricQueue] = [[] for _ in self.cfgs]\n
"},{"location":"developers/contributing/","title":"Contributing","text":""},{"location":"developers/contributing/#project-layout","title":"Project layout","text":"
docs/\n    mkdocs.yml    # The configuration file.\n    src/\n        index.md  # The documentation homepage.\n        ...       # Other markdown pages, images and other files.\n
"},{"location":"developers/datapoint-init/","title":"Datapoint Initialization","text":"

Datapoint initialization is an SAE weight initializations strategy independently proposed by Anthropic and Pierre Peigne for improving SAE training.

Conceptually, we initialize each decoder column to look like a real datapoint, so every latent starts with a patch of input space where it \"wins\" and gets some gradient. Here's the algorithm:

  1. Select \\(n\\) random data points from your training data.
  2. Compute the mean \\(\\mu\\) and zero-center the data: \\(x_0 = x - \\mu\\).
  3. Linearly blend each zero-centered datapoint with Kaiming initialization: \\(w = p \\cdot (x - \\mu) + (1 - p) \\cdot r\\) where \\(p\\) is your blend probability and \\(r\\) is a randomly sampled Kaiming initalization vector.
  4. Initialize \\(W_\\text{enc}\\) as a concatenation of \\(n\\) blended vectors.
  5. Initialize \\(W_\\text{dec}\\) as \\(W_\\text{enc}^T\\).

Anthropic suggests \\(p = 0.8\\) for SAEs and 0.4 for \"weakly causal crosscoders\". I interpret this that there is no universally appropriate \\(p\\).

"},{"location":"developers/disk-layout/","title":"Storage & Run Manifest Spec (v1)","text":"

There are two main locations:

  1. $SAEV_SCRATCH/saev/shards: where we store transformer activations (referred to as shards_root in the codebase).
  2. $SAEV_NFS/saev/runs: where we store checkpoints and other computed intermediate stuff like example images, probe1d results, etc. (referred to as runs_root in the codebase).

Visually, these are:

$SAEV_SCRATCH/saev/\n  shards/\n    <shard_hash>/\n      metadata.json\n      shards.json\n      acts000000.bin\n      acts000001.bin\n      ...\n      labels.bin\n

and

$SAEV_NFS/saev/\n  runs/\n    <run_id>/\n      checkpoint/           # output of train.py on <shard_hash>\n        sae.pt\n        config.json\n      links/                # Symlinks\n        train-shards        # $SCRATCH/saev/shards/<shard_hash>\n        train-dataset       # Whatever the original image dataset was\n        val-shards          # $SCRATCH/saev/shards/<shard_hash>\n        val-dataset         # Whatever the original image dataset was\n      inference/            # outputs from dump.py\n        <shard_hash>/\n          config.json\n          token_acts.npz\n          visuals/          # output of visuals.py\n

Each $SAEV_SCRATCH/shards/<shard_hash>/ MUST include:

Note

Immutability: Files under saev/shards/<shard_hash>/ MUST be treated as read-only after publication. Any change yields a new shard_hash.

All CLI entrypoints should accept a single --run <path> argument. Every other path MUST be resolved from the run root:

Example resolution:

run = pathlib.Path(cfg.run)\nshards_root = (run / \"links\" / \"shards\").resolve()\ndataset_root = (run / \"links\" / \"dataset\").resolve()\nckpt = run / \"checkpoint\" / \"sae.pt\"\nlabels = vit_root / \"labels.bin\"\n
"},{"location":"developers/disk-layout/#faqs","title":"FAQs","text":""},{"location":"developers/naming/","title":"Variable Naming","text":""},{"location":"developers/protocol/","title":"saev Sharded Activation File Protocol","text":"

saev caches activations to disk rather than run ViT or LLM inference when training SAEs. Gemma Scope makes this decision as well (see Section 3.3.2 of https://arxiv.org/pdf/2408.05147). saev.data has a specific protocol to support this in on OSC, a super computer center, and take advantage of OSC's specific disk performance.

Goal: loss-lessly persist very large Transformer (ViT or LLM) activations in a form that is:

This document is the single normative source. Any divergence in code is a bug.

"},{"location":"developers/protocol/#1-directory-layout","title":"1. Directory layout","text":"
<dump_to>/<HASH>/\n    metadata.json    # UTF-8 JSON, human-readable, describes data-generating config\n    shards.json      # UTF-8 JSON, human-readable, describes shards.\n    acts000000.bin   # shard 0\n    acts000001.bin   # shard 1\n    ...\n    actsNNNNNN.bin   # shard NNNNNN  (zero-padded width=6)\n    labels.bin       # patch labels (optional)\n

HASH = sha256(json.dumps(metadata, sort_keys=True, separators=(',', ':')).encode('utf-8')) Guards against silent config drift.

"},{"location":"developers/protocol/#2-json-file-schemas","title":"2. JSON file schemas","text":""},{"location":"developers/protocol/#21-metadatajson","title":"2.1. metadata.json","text":"field type semantic family string \"clip\" \\| \"siglip\" \\| \"dinov2\" ckpt string model identifier (OpenCLIP, HF, etc.) layers int[] ViT residual\u2010block indices recorded patches_per_ex int example patches only (excludes CLS) cls_token bool true -> patch 0 is CLS, else no CLS d_model int activation dimensionality n_examples int total examples in dataset patches_per_shard int logical activations per shard (see #3) data object opaque dataset description dataset string absolute path to original dataset root dtype string numpy dtype. Fixed \"float32\" for now. protocol string \"2.1\" (shards after big refactor)

The data object is base64.b64encode(pickle.dumps(img_ds)).decode('utf8').

The dataset field stores the absolute path to the root directory of the original image dataset, allowing runs to create symlinks back to the source images for visualization and analysis.

"},{"location":"developers/protocol/#22-shardsjson","title":"2.2. shards.json","text":"

A single array of shard objects, each of which has the following fields:

field type semantic name string shard filename (acts000000.bin). n_examples int the number of examples in the shard."},{"location":"developers/protocol/#3-shard-sizing-maths","title":"3. Shard sizing maths","text":"
tokens_per_ex = patches_per_ex + (1 if cls_token else 0)\n\nexamples_per_shard = floor(patches_per_shard / (tokens_per_ex * len(layers)))\n\nshape_per_shard = (\n    examples_per_shard, len(layers), tokens_per_ex, d_model,\n)\n

patches_per_shard is a budget (default ~2.4 M) chosen so a shard is approximately 10 GiB for Float32 @ d_model = 1024.

The last shard will have a smaller value for examples_per_shard; this value is documented in n_examples in shards.json

"},{"location":"developers/protocol/#4-data-layout-and-global-indexing","title":"4. Data Layout and Global Indexing","text":"

The entire dataset of activations is treated as a single logical 4D tensor with the shape (n_examples, len(layers), tokens_per_ex, d_model). This logical tensor is C-contiguous with axes ordered [Example, Layer, Token, Dimension].

Physically, this tensor is split along the first axis (Example) into multiple shards, where each shard is a single binary file. The number of examples in each shard is constant, except for the final shard, which may be smaller.

To locate an arbitrary activation vector, a reader must convert a logical coordinate (global_ex_idx, layer_value, token_idx) into a file path and an offset within that file.

"},{"location":"developers/protocol/#41-definitions","title":"4.1 Definitions","text":"

Let the parameters from metadata.json be:

"},{"location":"developers/protocol/#42-coordinate-transformations","title":"4.2 Coordinate Transformations","text":"

Given a logical coordinate:

The physical location is found as follows:

  1. Identify Shard:

    • shard_idx = global_ex_idx // S
    • ex_in_shard = global_ex_idx % S The target file is acts{shard_idx:06d}.bin.
  2. Identify Layer Index: The stored data contains a subset of the ViT's layers. The logical layer_value must be mapped to its index in the stored layers array.

    • layer_idx = layers.index(layer) A reader must raise an error if layer is not in layers.
  3. Calculate Offset: The data within a shard is a 4D tensor of shape (S, L, T, D). The offset to the first byte of the desired activation vector [ex_in_shard, layer_idx , token_idx] is:

    • offset_in_vectors = (ex_in_shard * L * T) + (layer_idx * T) + token_idx
    • offset_in_bytes = offset_in_vectors * D * 4 (assuming 4 bytes for float32)

A reader can then seek to offset_in_bytes and read \\(D \\times 4\\) bytes to retrieve the vector.

Alternatively, rather than calculate the offset, readers can memmap the shard, then use Numpy indexing to get the activation vector.

"},{"location":"developers/protocol/#43-token-axis-layout","title":"4.3 Token Axis Layout","text":"

The token axis of length \\(T\\) is ordered as follows: * If cls_token is true: * Index 0: [CLS] token activation * Indices 1 to \\(P\\): Patch token activations * If cls_token is false: * Indices 0 to \\(P-1\\): Patch token activations

The relative order of patch tokens is preserved exactly as produced by the upstream Vision Transformer.

"},{"location":"developers/protocol/#5-versioning-compatibility","title":"5 Versioning & compatibility","text":"

That's it. Anything else you find in code that contradicts this document, fix the code or update the spec.

"},{"location":"developers/workflows/","title":"Workflows","text":"
  1. Generate inference activations (and thus visuals) for both training and validation splits.
"},{"location":"users/bird-mae-debugging/","title":"Debugging Bird-MAE Activations","text":"

This is an example of the kind of debugging you might have to do when training SAEs on a new model. The short version: Bird-MAE has an \"emergent outlier feature\" in dimension 296 that blows up after the first MLP. The fix is to record activations after the pre-MLP LayerNorm (block.norm2) instead of the raw residual stream, because the LayerNorm learns to suppress the outlier.

"},{"location":"users/bird-mae-debugging/#symptom-80-dead-neurons","title":"Symptom: 80% dead neurons","text":"

While training TopK SAEs on BirdMAE activations taken from birdsong, ~80% of my neurons were dead from the very start of training.

"},{"location":"users/bird-mae-debugging/#comparing-to-known-good-activations","title":"Comparing to known-good activations","text":"

First, I compared activations from BirdMAE to DINOv3 activations (which I know are well-behaved). I recorded 300K content token activation vectors from layer 14/24 from DINOv3 ViT-L/16 and BirdMAE-L. Each vector has 1024 dimensions. I flattened these vectors; for each of BirdMAE and DINOv3, I have a list of 307.2M neuron activations (300K x 1024 = 307,200,000). I plotted a histogram below. Note the log scale on the y-axis.

I zoomed in on the left-most cluster, ignoring the right cluster. While BirdMAE is more spread out, the shapes look good enough for now.

"},{"location":"users/bird-mae-debugging/#finding-the-outlier-dimension-296","title":"Finding the outlier: dimension 296","text":"

Looking at the right cluster, I realized that all of these values are from neuron 296 of 1024. Here, I colored activations based on their neuron: all BirdMAE neurons besides 296 are blue, DINOv3 is orange, and neuron 296 is red.

My activation matrix is \\(\\mathbb{R}^{300K \\times 1024}\\) for each dataset. In code, what I see is:

bird_acts.shape  # (300K, 1024)\nbird_acts[:, 295].min()  # 2549.54\nbird_acts[:, 295].max()  # 4625.12\n

Something is broken inside of BirdMAE.

"},{"location":"users/bird-mae-debugging/#tracing-the-outlier-through-the-residual-stream","title":"Tracing the outlier through the residual stream","text":"

Where in BirdMAE does this abnormality show up? Consider transformers as residual streams. After what layer does dimension 296/1024 blow up? See this diagram below: for a single random example from BirdMAE, we will track both the average neuron and neuron 296's value through the 24 transformer layers.

BirdMAE uses 256 content tokens for a single example. We take the average value of each neuron in the residual stream before each transformer block (the green \"Graph #1\" circle in the above diagram) and after the final transformer block. We plot each of the 1023 \"well-behaved\" neurons in light blue. We plot our degenerate neuron 296 in red. Note the log scale on the y-axis.

Our well-behaved neurons mostly stay in (-10, 10). Neuron 296 jumps straight to ~2.2K after the first residual block and is never fixed again. It's well-behaved coming out of the patch embedding before the first residual block.

"},{"location":"users/bird-mae-debugging/#narrowing-it-down-the-first-mlp","title":"Narrowing it down: the first MLP","text":"

Below is the output from the attention layers (Graph #2) in our architecture diagram.

Neuron 296 is mostly well-behaved; it's a little big after the second attention layer, but not insane.

Here, we can see that the output of the first MLP produces an abnormally high value for neuron 296. Why?

Here's a architecture diagram of BirdMAE's MLPs according to the model definition on HuggingFace. Let's look at the trainable parameters in these MLP across layers, starting from the end and working backwards.

fc2 has a weight parameter with shape (4096, 1024) and a bias parameter with shape (1024,). I take the L2 norm of fc2.weight's columns to see if col 296/1024 is different.

fc2.weight does appear to be different, and abnormally large (note the log scale). fc2.bias is also different, but it's not immediately obvious what's going on there to me.

"},{"location":"users/bird-mae-debugging/#root-cause-emergent-outlier-features","title":"Root cause: emergent outlier features","text":"

This is a known phenomenon in transformers called \"emergent outlier features.\" After extensive pretraining, a single dimension in the residual stream accumulates a very large magnitude. The model never needs to \"fix\" this because the pre-attention and pre-MLP `LayerNormss learn to suppress it: the learned multiplicative weight for dimension 296 is very small, and the bias is approximately 1. So later layers never actually \"see\" the outlier in practice.

We verified this by inspecting norm2.weight across layers and confirming that the learned scale for dimension 296 is near-zero, but that analysis is not reproduced here.

The BirdMAE authors never had to deal with this because all downstream use of the model goes through LayerNorm first.

"},{"location":"users/bird-mae-debugging/#fix-record-after-layernorm","title":"Fix: record after LayerNorm","text":"

The fix is to record activations after block.norm2 (the pre-MLP LayerNorm) instead of from the raw residual stream. In saev, this is implemented as:

def get_residuals(self) -> list[torch.nn.Module]:\n    return [block.norm2 for block in self.model.blocks]\n

After this change, the outlier is suppressed and SAE training works normally.

"},{"location":"users/bird-mae-debugging/#lessons","title":"Lessons","text":"
  1. Compare activation distributions to a known-good model. Histogramming flattened activations from 300K tokens is cheap and can reveal outliers.
  2. Emergent outlier features are real. If a single dimension dominates your activation distribution, check whether it's a known artifact of pretraining before assuming your recording code is wrong.
  3. Record after LayerNorm, not from the raw residual stream. The residual stream can carry high-magnitude \"bookkeeping\" values that LayerNorm suppresses. Recording post-norm avoids this entirely.
"},{"location":"users/glossary/","title":"Glossary","text":"

Definitions for words used in the code and documentation.

Modality-specific vocab:

"},{"location":"users/guide/","title":"Guide","text":"

This guide explains how to transition from the ADE20K demo to using saev with your own custom datasets.

Here are the steps:

  1. Save ViT activations to disk
  2. Train SAEs on activations
  3. Evaluate the SAE checkpoints
  4. Visualize Learned Features

Note

saev assumes you are running on NVIDIA GPUs. On a multi-GPU system, prefix your commands with CUDA_VISIBLE_DEVICES=X to run on GPU X.

"},{"location":"users/guide/#save-vit-activations-to-disk","title":"Save ViT Activations to Disk","text":"

To save activations to disk, we need to specify:

  1. Which model we would like to use
  2. Which layers we would like to save.
  3. Where on disk and how we would like to save activations.
  4. Which images we want to save activations for.

The saev/framework/shards.py script does all of this for us.

Run uv run launch.py shards --help to see all the configuration.

In practice, you might run:

uv run launch.py shards \\\n  --shards-root /fs/scratch/PAS2136/samuelstevens/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-16/openai \\\n  --d-model 768 \\\n  --layers 6 7 8 9 10 11 \\\n  --content-tokens-per-example 196 \\\n  --batch-size 512 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  data:img-seg-folder \\\n  --data.root /fs/scratch/PAS2136/samuelstevens/datasets/ADEChallengeData2016/ \\\n  --data.split training\n

This will save activations for the CLIP-pretrained model ViT-B/16, which has a residual stream dimension of 768, and has 196 patches per image (224 / 16 = 14; 14 x 14 = 196). It will save the last 6 layers. It will write 2.4M patches per shard, and save shards to a new directory /fs/scratch/PAS2136/samuelstevens/saev/shards.

Note

A note on storage space: A ViT-B/16 on ImageNet-1K will save 1.2M images x 197 patches/layer/image x 1 layer = ~240M activations, each of which take up 768 floats x 4 bytes/float = 3072 bytes, for a total of 723GB for the entire dataset. As you scale to larger models (ViT-L has 1024 dimensions, 14x14 patches are 224 patches/layer/image), recorded activations will grow even larger.

This script will also save a metadata.json file that will record the relevant metadata for these activations, which will be read by future steps. The activations will be in .bin files, numbered starting from 000000.

To add your own models, see the guide to extending in saev.activations.

"},{"location":"users/guide/#train-saes-on-activations","title":"Train SAEs on Activations","text":"

To train an SAE, we need to specify:

  1. Which activations to use as input.
  2. SAE architectural stuff.
  3. Optimization-related stuff.

The train.py script handles this.

Run uv run train.py --help to see all the configuration.

The most important options are:

This is a full example:

uv run train.py \\\n  --runs-root /fs/ess/PAS2136/samuelstevens/saev/runs \\\n  --lr 4e-3 \\\n  --sae.exp-factor 16 \\\n  --sae.d-model 1024 \\\n  --tag ade20k-v0.1 \\\n  --n-train 100_000_000 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --train-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/51567c6c \\\n  --train-data.layer 11 \\\n  --val-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3e27794f \\\n  --val-data.layer 11 \\\n  sae.activation:relu \\\n  objective:matryoshka \\\n  --objective.sparsity-coeff 1e-3 \\\n

This will train one (1) sparse autoencoder on the data. See the section on sweeps to learn how to train multiple SAEs in parallel using one or more GPUs.

"},{"location":"users/guide/#loader-entropy-metrics","title":"Loader Entropy Metrics","text":"

The training loop logs additional loader diagnostics derived from calc_batch_entropy in train.py. Every batch contributes two entropy measurements in natural log units:

All eight metrics appear alongside the existing loader/read_mb counters, helping spot skewed sampling or under-covered patches mid-run.

"},{"location":"users/guide/#evaluation","title":"Evaluation","text":"

After training an SAE, you probably want to use the SAE. While you can use the SAE as a regular PyTorch torch.nn.Module in combination with a saev.data.OrderedDataLoader or saev.data.IndexedDataset.

However, most SAEs are evaluated with a similar set of metrics (normalized MSE, L0, etc). The saev/framework/inference.py script calculates these metrics. You can run uv run launch.py inference --help to see all the options.

The most important options are:

uv run launch.py inference \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/z55bntm1/ \\\n  --data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/614861a0 \\\n  --data.layer 11\n
"},{"location":"users/guide/#visualize-learned-features","title":"Visualize Learned Features","text":"

Now that you've trained an SAE, you probably want to look at its learned features. One way to visualize an individual learned feature is by picking out images that maximize the activation of feature. We use the saved sparse token_acts.npz file from the previous inference step.

Warning

Because there are so many different ways to visualize SAE features, I moved it to contrib/trait_discovery (used for our preprint \"Towards Open-Ended Visual Scientific Discovery with Sparse Autoencoders\").

The most important options:

So first, move into the contrib/trait_discovery:

cd contrib/trait_discovery\n

Then run the script that generates highlighted images:

uv run scripts/launch.py visuals \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/unu6dbfb \\\n  --shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3802cb66 \\\n  --latents 0 1 2 3 4 5 6 7 8 9 49 56 57 125 202 \\\n  --n-latents 20 \\\n

Note

Because of limitations in the SAE training process, not all SAE latents are equally interesting. Some latents are dead, some are dense, some only fire on two images, etc. Typically, you want neurons that fire very strongly (high value) and fairly infrequently (low frequency). You might be interested in particular, fixed latents (--include-latents). I recommend using saev/interactive/metrics.py with marimo to figure out good thresholds.

"},{"location":"users/guide/#sweeps","title":"Sweeps","text":"

tl;dr: basically the slow part of training SAEs is loading vit activations from disk, and since SAEs are pretty small compared to other models, you can train a bunch of different SAEs in parallel on the same data using a big GPU. That way you can sweep learning rate, lambda, etc. all on one GPU.

"},{"location":"users/guide/#why-parallel-sweeps","title":"Why Parallel Sweeps","text":"

SAE training optimizes for a unique bottleneck compared to typical ML workflows: disk I/O rather than GPU computation. When training on vision transformer activations, loading the pre-computed activation data from disk is often the slowest part of the process, not the SAE training itself.

A single set of ImageNet activations for a vision transformer can require terabytes of storage. Reading this data repeatedly for each hyperparameter configuration would be extremely inefficient.

"},{"location":"users/guide/#parallelized-training-architecture","title":"Parallelized Training Architecture","text":"

To address this bottleneck, we implement parallel training that allows multiple SAE configurations to train simultaneously on the same data batch:

\nflowchart TD\n    A[Pre-computed ViT Activations] -->|Slow I/O| B[Memory Buffer]\n    B -->|Shared Batch| C[SAE Model 1]\n    B -->|Shared Batch| D[SAE Model 2]\n    B -->|Shared Batch| E[SAE Model 3]\n    B -->|Shared Batch| F[...]\n

This approach:

"},{"location":"users/guide/#running-a-sweep","title":"Running a Sweep","text":"

The train command accepts a --sweep parameter that points to a TOML file defining the hyperparameter grid:

uv run python -m saev train --sweep configs/my_sweep.toml\n

Here's an example sweep configuration file:

[sae]\nsparsity_coeff = [1e-4, 2e-4, 3e-4]\nd_model = 768\nd_sae = [6144, 12288]\n\n[data]\nscale_mean = true\n

This would train 6 models (3 sparsity coefficients \u00d7 2 SAE widths), each sharing the same data loading operation.

"},{"location":"users/guide/#limitations","title":"Limitations","text":"

Not all parameters can be swept in parallel. Parameters that affect data loading (like batch_size or dataset configuration) will cause the sweep to split into separate parallel groups. The system automatically handles this division to maximize efficiency.

"},{"location":"users/inference/","title":"Inference","text":"

If you want to get started quickly, try the inference notebook in marimo or on Google Colab.

Briefly, you need to:

  1. Download a checkpoint.
  2. Get the code.
  3. Load the checkpoint.
  4. Get activations.

Details are below.

"},{"location":"users/inference/#download-a-checkpoint","title":"Download a Checkpoint","text":"

First, download an SAE checkpoint from the Huggingface collection.

"},{"location":"users/inference/#single-checkpoint-repos","title":"Single-checkpoint repos","text":"

Some repos (CLIP, BioCLIP, DINOv2) contain a single sae.pt at the root. For instance, the SAE trained on OpenAI's CLIP ViT-B/16 with ImageNet-1K activations is here.

You can use wget if you want:

wget https://huggingface.co/osunlp/SAE_CLIP_24K_ViT-B-16_IN1K/resolve/main/sae.pt\n
"},{"location":"users/inference/#multi-checkpoint-repos","title":"Multi-checkpoint repos","text":"

The DINOv3 repos contain multiple checkpoints organized by layer and sparsity level. Each repo has a manifest.jsonl with metadata (layer, L0, MSE) for every checkpoint, so you can pick the right one programmatically.

Download a specific checkpoint:

from huggingface_hub import hf_hub_download\n\n# Pick a specific layer and run ID from the repo's README or manifest.jsonl\npath = hf_hub_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\", \"layer_23/lnleoyf6/sae.pt\")\n

Download all checkpoints in a repo:

from huggingface_hub import snapshot_download\n\nsnapshot_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\")\n

Available DINOv3 repos:

"},{"location":"users/inference/#get-the-code","title":"Get the Code","text":"

The easiest way to do this is to clone the code:

git clone https://github.com/Imageomics/saev\n

You can also install the package from git if you use uv (not sure about pip or cuda):

uv add git+https://github.com/Imageomics/saev\n

Or clone it and install it as an editable with pip, lik pip install -e . in your virtual environment.

Then you can do things like from saev import ....

Note

If you struggle to get saev installed, open an issue on GitHub and I will figure out how to make it easier.

"},{"location":"users/inference/#load-the-checkpoint","title":"Load the Checkpoint","text":"
import saev.nn\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n

Now you have a pretrained SAE.

"},{"location":"users/inference/#get-activations","title":"Get Activations","text":"

This is the hardest part. We need to:

  1. Pass an image into a ViT
  2. Record the dense ViT activations at the same layer that the SAE was trained on.
  3. Pass the activations into the SAE to get sparse activations.
  4. Do something interesting with the sparse SAE activations.

There are examples of this in the demo code: for classification and semantic segmentation. If the permalinks change, you are looking for the get_sae_latents() functions in both files.

Below is example code to do it using the saev package.

import saev.nn\nimport saev.data.models\nimport saev.data.shards\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n\nvit_cls = saev.data.models.load_model_cls(\"clip\")\nvit = vit_cls(\"ViT-B-16/openai\").to(device)\nvit = saev.data.shards.RecordedTransformer(vit, 196, True, [10])\n\nimg_tr, _ = vit_cls.make_transforms(\"ViT-B-16/openai\", 196)\nimg = Image.open(\"example.jpg\")\n\nx = img_tr(img)\n# Add a batch dimension.\nx = x[None, ...]\n_, vit_acts = vit(x)\n# Select the only layer and ignore the CLS token.\nvit_acts = vit_acts[:, 0, 1:, :]\n\nout = sae(vit_acts)\n# out.f_x: sparse SAE latents (batch, d_sae)\n# out.x_hats: reconstructed activations (batch, n_prefixes, d_model)\n

Now you have the sparse representation of all patches in the image (out.f_x) and the reconstructed activations (out.x_hats).

You might select the dimensions with maximal values for each patch and see what other images are maximally activating.

"},{"location":"users/new-project/","title":"New Project Structure","text":"

saev is structured like big_vision, Google's ViT codebase. To get the most use out of saev, you should not use it as a requirement in your project; rather, you should build inside of the source code of saev. This is a guide to that process.

TL;DR:

  1. Fork saev.
  2. Clone your fork.
  3. Create a new directory in contrib/.
  4. Update both src/saev and your new contrib directory as necessary.
  5. (Hopefully) publish.
  6. If your changes to src/saev are broadly useful and not overly restrictive, open a PR with your changes to src/saev.

I am currently applying SAEs to audio of birdsong, so this is how I'll develop it.

First, fork and clone saev. Do this however you want, but GitHub has a guide on it.

Second, you probably want to store code related to your project in this repo. Make a new directory in contrib/. I'm calling my new subproject \"birdsong.\"

[I] samuelstevens@host ~/p/saev (main)> tree -L 1 contrib/\ncontrib/\n\u251c\u2500\u2500 birdsong\n\u251c\u2500\u2500 interactive_interp\n\u2514\u2500\u2500 trait_discovery\n

Use uv to make a new package inside your new project:

[I] samuelstevens@host ~/p/s/c/birdsong (main)> uv init --package .\nAdding `birdsong` as member of workspace `~/projects/saev`\nInitialized project `birdsong` at `~/projects/saev/contrib/birdsong`\n

Now you have some additional files.

[I] samuelstevens@ascend-login02 ~/p/s/c/birdsong (main)> tree\n.\n\u251c\u2500\u2500 pyproject.toml\n\u251c\u2500\u2500 README.md\n\u2514\u2500\u2500 src\n    \u2514\u2500\u2500 birdsong\n        \u2514\u2500\u2500 __init__.py\n

Now I can write scripts and source code for birdsong-specific stuff in here. I'll probably add a notebook for looking at instances of birdsongs before and after using SAEs to identify patterns under a new birdsong/notebooks directory, and will add birdsong/logbook.md to store ongoing TODO items, and so on.

To train SAEs on audio files, I'll need to add a new dataset type to save activations. In order to do this, I'll edit src/saev/data/datasets.py.

I'll also need to add another model to the dataset, one that expects audio files. Since I don't think that DINOv3, OpenCLIP, or the other existing model families will be suitable, I'll need to add a new model family. Again, this will need to go somewhere in src/saev/data.

If I'm smart about it, these changes will be nice and non-destructive, and other users of saev can benefit from them. After I publish some results, to share this code with others, I'll open a PR from my fork/branch to main with the new datasets/models. But I won't open a PR with birdsong because that's specific to me, rather than to the library.1

  1. Technically, birdsong will be in saev because I'm a sort of privileged user because I'm the main developer. But other folks probably want their project-specific code attached to their GitHub page, rather than OSU-NLP's.\u00a0\u21a9

"},{"location":"users/sweeps/","title":"Sweeps","text":"

Hyperparameter sweeps in saev train multiple SAE configurations in parallel on a single GPU, amortizing the cost of loading activation data from disk across all models. Furthermore, sweeps make it easy to train multiple SAEs with one command across multiple GPUs using Slurm.

"},{"location":"users/sweeps/#quick-start","title":"Quick Start","text":"

Create a Python file defining your sweep:

# sweeps/my_sweep.py\n\ndef make_cfgs() -> list[dict]:\n    cfgs = []\n\n    # Grid search over learning rate and sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"objective\": {\"sparsity_coeff\": sparsity},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

Run the sweep:

uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23\n

This trains 9 SAEs (3 learning rates x 3 sparsity coefficients) in parallel.

"},{"location":"users/sweeps/#why-parallel-sweeps","title":"Why Parallel Sweeps?","text":"

SAE training is bottlenecked by disk I/O, not GPU computation. Loading terabytes of pre-computed ViT activations from disk is the slowest part. By training multiple SAE configurations on the same batch simultaneously, we amortize the I/O cost:

\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 ViT Activations (disk) \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n            \u2502 (slow I/O, once per batch)\n            \u25bc\n      \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n      \u2502  Batch   \u2502\n      \u2514\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2518\n            \u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n            \u25bc         \u25bc         \u25bc         \u25bc\n         SAE #1    SAE #2    SAE #3     ...\n        (lr=3e-4) (lr=1e-3) (lr=3e-3)\n
"},{"location":"users/sweeps/#sweep-configuration","title":"Sweep Configuration","text":""},{"location":"users/sweeps/#python-based-sweeps","title":"Python-Based Sweeps","text":"

Python sweeps give you full control over config generation. Your sweep file must define a make_cfgs() function that returns a list of dicts.

Grid search example:

def make_cfgs():\n    cfgs = []\n\n    for lr in [1e-4, 3e-4, 1e-3]:\n        for d_sae in [8192, 16384, 32768]:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

Paired parameters (not a grid):

def make_cfgs():\n    cfgs = []\n\n    # Grid over lr x sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            # Paired layers (train and val use same layer)\n            for layer in [6, 7, 8, 9, 10, 11]:\n                cfg = {\n                    \"lr\": lr,\n                    \"objective\": {\"sparsity_coeff\": sparsity},\n                    \"train_data\": {\"layer\": layer},\n                    \"val_data\": {\"layer\": layer},\n                }\n                cfgs.append(cfg)\n\n    return cfgs\n

This generates 54 configs (3 x 3 x 6) where each train/val pair uses the same layer, avoiding the 162 configs you'd get from a full grid (3 x 3 x 6 x 6).

Conditional sweeps:

def make_cfgs():\n    cfgs = []\n\n    for d_sae in [8192, 16384, 32768]:\n        # Use different LR for different SAE widths\n        lrs = [1e-3, 3e-3] if d_sae <= 16384 else [3e-4, 1e-3]\n\n        for lr in lrs:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n
"},{"location":"users/sweeps/#command-line-overrides","title":"Command-Line Overrides","text":"

Command-line arguments override sweep parameters with deep merging. The precedence order is: CLI > Sweep > Default.

uv run train.py --sweep sweeps/my_sweep.py \\\n  --lr 5e-4  # Overrides all LRs in the sweep\n

Override nested config fields with dotted notation:

uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23 \\\n  --sae.d-sae 16384\n

Deep merging means that when you override a nested field, only that specific field is replaced\u2014other fields in the nested config are preserved from the sweep or default values.

"},{"location":"users/sweeps/#parallel-groups","title":"Parallel Groups","text":"

Not all parameters can vary within a parallel sweep. Parameters that affect data loading (like train_data, n_train, device) must be identical across all configs in a parallel group.

When configs differ in these parameters, they're automatically split into separate Slurm jobs:

def make_cfgs():\n    cfgs = []\n\n    # These will run in 2 separate jobs\n    for layer in [6, 12]:  # Different data loading\n        for lr in [1e-4, 3e-4]:  # Can parallelize\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

This creates 2 parallel groups: - Job 1: layer=6, lr=[1e-4, 3e-4] - Job 2: layer=12, lr=[1e-4, 3e-4]

Implementation detail

See CANNOT_PARALLELIZE in train.py for the full list of parameters that split parallel groups. The split_cfgs() function handles grouping automatically.

"},{"location":"users/sweeps/#module-loading","title":"Module Loading","text":"

Your sweep file is executed as a Python module, so you can use imports and helper functions:

def make_cfgs():\n    cfgs = []\n\n    # You can use helper functions\n    base_layers = list(range(6, 24, 2))\n\n    for layer in base_layers:\n        for lr in [1e-4, 3e-4]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"val_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"sae\": {\"d_model\": 1024, \"d_sae\": 16384},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

Import mechanics

The sweep file is loaded with importlib.import_module(), so it must be importable as a Python module. Place sweep files in a location where Python can find them (typically the project root or a sweeps/ subdirectory).

"},{"location":"users/sweeps/#slurm-integration","title":"Slurm Integration","text":"

When running with --slurm-acct, each parallel group becomes a separate Slurm job:

uv run train.py --sweep sweeps/large.py \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --n-hours 24\n

The system automatically: - Groups configs that can parallelize - Submits one Slurm job per group - Waits for all jobs to complete - Reports results

"},{"location":"users/sweeps/#seed-management","title":"Seed Management","text":"

Seeds are automatically incremented for each config to ensure reproducibility:

# Base config has seed=42\n# Sweep generates 9 configs with seeds: 42, 43, 44, ..., 50\n

Override the base seed on the command line:

uv run train.py --sweep sweeps/my_sweep.py --seed 100\n
"},{"location":"users/sweeps/#examples","title":"Examples","text":"

Simple grid:

# sweeps/simple.py\ndef make_cfgs():\n    return [\n        {\"lr\": lr, \"objective\": {\"sparsity_coeff\": sp}}\n        for lr in [1e-4, 3e-4, 1e-3]\n        for sp in [4e-4, 8e-4, 1.6e-3]\n    ]\n

Layer sweep with paired train/val:

# sweeps/layers.py\ndef make_cfgs():\n    cfgs = []\n\n    for layer in range(6, 24, 2):  # Layers 6, 8, 10, ..., 22\n        for lr in [3e-4, 1e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n                \"val_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

Architecture sweep:

# sweeps/architecture.py\ndef make_cfgs():\n    cfgs = []\n\n    architectures = [\n        (\"small\", 8192, 1e-3),\n        (\"medium\", 16384, 5e-4),\n        (\"large\", 32768, 3e-4),\n    ]\n\n    for name, d_sae, lr in architectures:\n        cfg = {\n            \"lr\": lr,\n            \"sae\": {\"d_sae\": d_sae},\n            \"tag\": name,\n        }\n        cfgs.append(cfg)\n\n    return cfgs\n
"}]} \ No newline at end of file diff --git a/docs/api/sitemap.xml b/docs/api/sitemap.xml index 178dc5c..d9f457c 100644 --- a/docs/api/sitemap.xml +++ b/docs/api/sitemap.xml @@ -1,199 +1,199 @@ - https://osu-nlp-group.github.io/saev/api/ + https://imageomics.github.io/saev/api/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/colors/ + https://imageomics.github.io/saev/api/api/colors/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/configs/ + https://imageomics.github.io/saev/api/api/configs/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/disk/ + https://imageomics.github.io/saev/api/api/disk/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/helpers/ + https://imageomics.github.io/saev/api/api/helpers/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/metrics/ + https://imageomics.github.io/saev/api/api/metrics/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/saev/ + https://imageomics.github.io/saev/api/api/saev/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/summary/ + https://imageomics.github.io/saev/api/api/summary/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/viz/ + https://imageomics.github.io/saev/api/api/viz/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/bird_mae/ + https://imageomics.github.io/saev/api/api/data/bird_mae/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/buffers/ + https://imageomics.github.io/saev/api/api/data/buffers/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/clip/ + https://imageomics.github.io/saev/api/api/data/clip/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/datasets/ + https://imageomics.github.io/saev/api/api/data/datasets/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/dinov2/ + https://imageomics.github.io/saev/api/api/data/dinov2/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/dinov3/ + https://imageomics.github.io/saev/api/api/data/dinov3/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/fake_clip/ + https://imageomics.github.io/saev/api/api/data/fake_clip/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/indexed/ + https://imageomics.github.io/saev/api/api/data/indexed/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/models/ + https://imageomics.github.io/saev/api/api/data/models/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/ordered/ + https://imageomics.github.io/saev/api/api/data/ordered/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/pe/ + https://imageomics.github.io/saev/api/api/data/pe/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/saev.data/ + https://imageomics.github.io/saev/api/api/data/saev.data/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/shards/ + https://imageomics.github.io/saev/api/api/data/shards/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/shuffled/ + https://imageomics.github.io/saev/api/api/data/shuffled/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/siglip/ + https://imageomics.github.io/saev/api/api/data/siglip/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/data/transforms/ + https://imageomics.github.io/saev/api/api/data/transforms/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/framework/inference/ + https://imageomics.github.io/saev/api/api/framework/inference/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/framework/saev.framework/ + https://imageomics.github.io/saev/api/api/framework/saev.framework/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/framework/shards/ + https://imageomics.github.io/saev/api/api/framework/shards/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/framework/train/ + https://imageomics.github.io/saev/api/api/framework/train/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/nn/modeling/ + https://imageomics.github.io/saev/api/api/nn/modeling/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/nn/objectives/ + https://imageomics.github.io/saev/api/api/nn/objectives/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/nn/saev.nn/ + https://imageomics.github.io/saev/api/api/nn/saev.nn/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/utils/monitoring/ + https://imageomics.github.io/saev/api/api/utils/monitoring/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/utils/saev.utils/ + https://imageomics.github.io/saev/api/api/utils/saev.utils/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/utils/scheduling/ + https://imageomics.github.io/saev/api/api/utils/scheduling/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/utils/statistics/ + https://imageomics.github.io/saev/api/api/utils/statistics/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/api/utils/wandb/ + https://imageomics.github.io/saev/api/api/utils/wandb/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/developers/contributing/ + https://imageomics.github.io/saev/api/developers/contributing/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/developers/datapoint-init/ + https://imageomics.github.io/saev/api/developers/datapoint-init/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/developers/disk-layout/ + https://imageomics.github.io/saev/api/developers/disk-layout/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/developers/naming/ + https://imageomics.github.io/saev/api/developers/naming/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/developers/protocol/ + https://imageomics.github.io/saev/api/developers/protocol/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/developers/workflows/ + https://imageomics.github.io/saev/api/developers/workflows/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/users/bird-mae-debugging/ + https://imageomics.github.io/saev/api/users/bird-mae-debugging/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/users/glossary/ + https://imageomics.github.io/saev/api/users/glossary/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/users/guide/ + https://imageomics.github.io/saev/api/users/guide/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/users/inference/ + https://imageomics.github.io/saev/api/users/inference/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/users/new-project/ + https://imageomics.github.io/saev/api/users/new-project/ 2026-03-06 - https://osu-nlp-group.github.io/saev/api/users/sweeps/ + https://imageomics.github.io/saev/api/users/sweeps/ 2026-03-06 \ No newline at end of file diff --git a/docs/api/users/bird-mae-debugging/index.html b/docs/api/users/bird-mae-debugging/index.html index ce71365..e945e8a 100644 --- a/docs/api/users/bird-mae-debugging/index.html +++ b/docs/api/users/bird-mae-debugging/index.html @@ -8,7 +8,7 @@ - + @@ -2229,7 +2229,7 @@

Lessons + diff --git a/docs/api/users/glossary/index.html b/docs/api/users/glossary/index.html index e917ae2..dbda65d 100644 --- a/docs/api/users/glossary/index.html +++ b/docs/api/users/glossary/index.html @@ -8,7 +8,7 @@ - + @@ -2048,7 +2048,7 @@

Glossary + diff --git a/docs/api/users/guide/index.html b/docs/api/users/guide/index.html index a9f8f06..e6fa015 100644 --- a/docs/api/users/guide/index.html +++ b/docs/api/users/guide/index.html @@ -8,7 +8,7 @@ - + @@ -2371,7 +2371,7 @@

Limitations + diff --git a/docs/api/users/inference/index.html b/docs/api/users/inference/index.html index bb07909..e2a6eb8 100644 --- a/docs/api/users/inference/index.html +++ b/docs/api/users/inference/index.html @@ -8,7 +8,7 @@ - + @@ -2046,7 +2046,7 @@

Inference

-

If you want to get started quickly, try the inference notebook in marimo or on Google Colab.

+

If you want to get started quickly, try the inference notebook in marimo or on Google Colab.

Briefly, you need to:

    @@ -2085,16 +2085,16 @@

    Multi-checkpoint repos

    Get the Code

    The easiest way to do this is to clone the code:

    -
    git clone https://github.com/OSU-NLP-Group/saev
    +
    git clone https://github.com/Imageomics/saev
     

    You can also install the package from git if you use uv (not sure about pip or cuda):

    -
    uv add git+https://github.com/OSU-NLP-Group/saev
    +
    uv add git+https://github.com/Imageomics/saev
     

    Or clone it and install it as an editable with pip, lik pip install -e . in your virtual environment.

    Then you can do things like from saev import ....

    Note

    -

    If you struggle to get saev installed, open an issue on GitHub and I will figure out how to make it easier.

    +

    If you struggle to get saev installed, open an issue on GitHub and I will figure out how to make it easier.

    Load the Checkpoint

    import saev.nn
    @@ -2234,7 +2234,7 @@ 

    Get Activations + diff --git a/docs/api/users/new-project/index.html b/docs/api/users/new-project/index.html index 9be1ba2..ffbab7b 100644 --- a/docs/api/users/new-project/index.html +++ b/docs/api/users/new-project/index.html @@ -8,7 +8,7 @@ - + @@ -2085,7 +2085,7 @@

    New Project Structure + diff --git a/docs/api/users/sweeps/index.html b/docs/api/users/sweeps/index.html index c9ca900..35da9bd 100644 --- a/docs/api/users/sweeps/index.html +++ b/docs/api/users/sweeps/index.html @@ -8,7 +8,7 @@ - + @@ -2405,7 +2405,7 @@

    Examples + diff --git a/docs/assets/modelcards/SAE_BioCLIP_24K_ViT-B-16_iNat21.md b/docs/assets/modelcards/SAE_BioCLIP_24K_ViT-B-16_iNat21.md index cc5637f..da58371 100644 --- a/docs/assets/modelcards/SAE_BioCLIP_24K_ViT-B-16_iNat21.md +++ b/docs/assets/modelcards/SAE_BioCLIP_24K_ViT-B-16_iNat21.md @@ -4,15 +4,15 @@ license: mit # SAE for Imageomics's BioCLIP ViT-B/16 trained on iNat2021 Activations -![Overview of a the features found by a BioCLIP-trained SAE](https://osu-nlp-group.github.io/saev/assets/overview2-bioclip.webp) +![Overview of a the features found by a BioCLIP-trained SAE](https://imageomics.github.io/saev/assets/overview2-bioclip.webp) -* **Homepage:** https://osu-nlp-group.github.io/saev -* **Code:** https://github.com/OSU-NLP-Group/saev +* **Homepage:** https://imageomics.github.io/saev +* **Code:** https://github.com/Imageomics/saev * **Preprint:** https://arxiv.org/abs/2502.06755 -* **Demos:** https://osu-nlp-group.github.io/saev#demos +* **Demos:** https://imageomics.github.io/saev#demos * **Point of Contact:** [Sam Stevens](mailto:stevens.994@buckeyemail.osu.edu) ## Inference Instructions -Follow the instructions [here](https://osu-nlp-group.github.io/saev/api/saev/#inference-instructions). +Follow the instructions [here](https://imageomics.github.io/saev/api/saev/#inference-instructions). diff --git a/docs/assets/modelcards/SAE_CLIP_24K_ViT-B-16_IN1K.md b/docs/assets/modelcards/SAE_CLIP_24K_ViT-B-16_IN1K.md index e55df03..7ecac82 100644 --- a/docs/assets/modelcards/SAE_CLIP_24K_ViT-B-16_IN1K.md +++ b/docs/assets/modelcards/SAE_CLIP_24K_ViT-B-16_IN1K.md @@ -4,14 +4,14 @@ license: mit # SAE for OpenAI's CLIP ViT-B/16 trained on ImageNet-1K Activations -![Overview of a CLIP-trained SAE](https://osu-nlp-group.github.io/saev/assets/overview2.webp) +![Overview of a CLIP-trained SAE](https://imageomics.github.io/saev/assets/overview2.webp) -* **Homepage:** https://osu-nlp-group.github.io/saev -* **Code:** https://github.com/OSU-NLP-Group/saev +* **Homepage:** https://imageomics.github.io/saev +* **Code:** https://github.com/Imageomics/saev * **Preprint:** https://arxiv.org/abs/2502.06755 -* **Demos:** https://osu-nlp-group.github.io/saev#demos +* **Demos:** https://imageomics.github.io/saev#demos * **Point of Contact:** [Sam Stevens](mailto:stevens.994@buckeyemail.osu.edu) ## Inference Instructions -Follow the instructions [here](https://osu-nlp-group.github.io/saev/api/saev/#inference-instructions). +Follow the instructions [here](https://imageomics.github.io/saev/api/saev/#inference-instructions). diff --git a/docs/assets/modelcards/SAE_DINOv2_24K_ViT-B-14_IN1K.md b/docs/assets/modelcards/SAE_DINOv2_24K_ViT-B-14_IN1K.md index 1c86474..11b772b 100644 --- a/docs/assets/modelcards/SAE_DINOv2_24K_ViT-B-14_IN1K.md +++ b/docs/assets/modelcards/SAE_DINOv2_24K_ViT-B-14_IN1K.md @@ -4,12 +4,12 @@ license: mit # SAE for Meta's DINOv2 ViT-B/14 trained on ImageNet-1K Activations -* **Homepage:** https://osu-nlp-group.github.io/saev -* **Code:** https://github.com/OSU-NLP-Group/saev +* **Homepage:** https://imageomics.github.io/saev +* **Code:** https://github.com/Imageomics/saev * **Preprint:** https://arxiv.org/abs/2502.06755 -* **Demos:** https://osu-nlp-group.github.io/saev#demos +* **Demos:** https://imageomics.github.io/saev#demos * **Point of Contact:** [Sam Stevens](mailto:stevens.994@buckeyemail.osu.edu) ## Inference Instructions -Follow the instructions [here](https://osu-nlp-group.github.io/saev/api/saev/#inference-instructions). +Follow the instructions [here](https://imageomics.github.io/saev/api/saev/#inference-instructions). diff --git a/docs/demos/classification/dist/app.js b/docs/demos/classification/dist/app.js index 38025e0..94d05be 100644 --- a/docs/demos/classification/dist/app.js +++ b/docs/demos/classification/dist/app.js @@ -8416,7 +8416,7 @@ var $author$project$Classification$explainGradioError = function (err) { $elm$html$Html$a, _List_fromArray( [ - $elm$html$Html$Attributes$href('https://github.com/OSU-NLP-Group/saev/issues/new'), + $elm$html$Html$Attributes$href('https://github.com/Imageomics/saev/issues/new'), $elm$html$Html$Attributes$class('text-sky-500 hover:underline') ]), _List_fromArray( diff --git a/docs/demos/semseg/dist/app.js b/docs/demos/semseg/dist/app.js index 2ffc1e3..5f445f8 100644 --- a/docs/demos/semseg/dist/app.js +++ b/docs/demos/semseg/dist/app.js @@ -8422,7 +8422,7 @@ var $author$project$Semseg$explainGradioError = function (err) { $elm$html$Html$a, _List_fromArray( [ - $elm$html$Html$Attributes$href('https://github.com/OSU-NLP-Group/saev/issues/new'), + $elm$html$Html$Attributes$href('https://github.com/Imageomics/saev/issues/new'), $elm$html$Html$Attributes$class('text-sky-500 hover:underline') ]), _List_fromArray( diff --git a/docs/index.html b/docs/index.html index 93e9f07..a9277c1 100644 --- a/docs/index.html +++ b/docs/index.html @@ -33,7 +33,7 @@

    Sparse Autoencoders for Scientifically Rigorous Interpretation of Vision Mod stevens.994@osu.edu

    - + Code @@ -75,7 +75,7 @@

    saev

    It also includes some interactive demos for scientifically rigorous interpretation of ViTs.

    - API reference docs are available below, as well as the source code on GitHub. + API reference docs are available below, as well as the source code on GitHub.

    API Docs

    @@ -94,7 +94,7 @@

    References & Citations

    title = {{saev}}, author = {Stevens, Samuel and Wei-Lun Chao and Tanya Berger-Wolf and Yu Su}, license = {MIT}, - url = {https://github.com/osu-nlp-group/saev} + url = {https://github.com/Imageomics/saev} }

    Preprint: diff --git a/docs/internal/archive/reports/2025-10-03/report.typ b/docs/internal/archive/reports/2025-10-03/report.typ index c9f8893..f48b1a4 100644 --- a/docs/internal/archive/reports/2025-10-03/report.typ +++ b/docs/internal/archive/reports/2025-10-03/report.typ @@ -62,7 +62,7 @@ I haven't tried it yet because of some other blockers (see below). = Refactors -+ #link("https://osu-nlp-group.github.io/saev/api/")[User-facing documentation]. ++ #link("https://imageomics.github.io/saev/api/")[User-facing documentation]. + [in progress] Update disk layout to make it easier to refer from a particular run to the original sharded activations and the image dataset on disk. + [in progress] Removing references to images and vision transformers in order to support non-vision but still bi-directional transformers, like audio or other modalities. diff --git a/docs/internal/handoff/main.typ b/docs/internal/handoff/main.typ index 08e635a..c3b40b3 100644 --- a/docs/internal/handoff/main.typ +++ b/docs/internal/handoff/main.typ @@ -201,7 +201,7 @@ Three risks that could stall or kill the project: = Infrastructure and Data -- *Code:* Currently under the `OSU-NLP-Group` GitHub org. Needs discussion with Tanya about whether to transfer to the Imageomics GitHub org. +- *Code:* ~Currently under the `OSU-NLP-Group` GitHub org. Needs discussion with Tanya about whether to transfer to~ Now under the Imageomics GitHub org. - *Cluster data:* Trained checkpoints, activation caches, and intermediate results live on shared cluster scratch/project space (e.g., `/fs/scratch/PAS2136/`). This data persists but is regenerable from the configs if lost. - *No personal storage risk:* All important data is on shared infrastructure, not personal directories. diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index d80bc4e..32eb583 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -1,7 +1,7 @@ site_name: saev docs_dir: src site_dir: api -site_url: https://osu-nlp-group.github.io/saev/api +site_url: https://imageomics.github.io/saev/api theme: name: material @@ -30,7 +30,7 @@ extra_css: extra: social: - icon: fontawesome/brands/github - link: https://github.com/OSU-NLP-Group/saev + link: https://github.com/Imageomics/saev nav: - Home: index.md diff --git a/docs/src/index.md b/docs/src/index.md index 8c41e9c..3cb2fff 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -2,7 +2,7 @@ ![PyPI Downloads](https://static.pepy.tech/badge/saev) ![MIT License](https://img.shields.io/badge/License-MIT-efefef) -![GitHub Repo stars](https://img.shields.io/github/stars/OSU-NLP-group/saev?style=flat&label=GitHub%20%E2%AD%90) +![GitHub Repo stars](https://img.shields.io/github/stars/Imageomics/saev?style=flat&label=GitHub%20%E2%AD%90) saev is a framework for training and evaluating **S**parse **a**uto**e**ncoders (SAEs) for **v**ision transformers (ViTs), implemented in PyTorch. diff --git a/docs/src/users/inference.md b/docs/src/users/inference.md index 718ac1c..642bb44 100644 --- a/docs/src/users/inference.md +++ b/docs/src/users/inference.md @@ -1,6 +1,6 @@ # Inference -> If you want to get started quickly, try the [inference notebook](https://github.com/OSU-NLP-Group/saev/blob/main/examples/inference.ipynb) in marimo or on [Google Colab](https://colab.research.google.com/github/OSU-NLP-Group/saev/blob/main/examples/inference.ipynb). +> If you want to get started quickly, try the [inference notebook](https://github.com/Imageomics/saev/blob/main/examples/inference.ipynb) in marimo or on [Google Colab](https://colab.research.google.com/github/Imageomics/saev/blob/main/examples/inference.ipynb). Briefly, you need to: @@ -58,13 +58,13 @@ Available DINOv3 repos: The easiest way to do this is to clone the code: ``` -git clone https://github.com/OSU-NLP-Group/saev +git clone https://github.com/Imageomics/saev ``` You can also install the package from git if you use uv (not sure about pip or cuda): ```sh -uv add git+https://github.com/OSU-NLP-Group/saev +uv add git+https://github.com/Imageomics/saev ``` Or clone it and install it as an editable with pip, lik `pip install -e .` in your virtual environment. @@ -73,7 +73,7 @@ Then you can do things like `from saev import ...`. !!! note - If you struggle to get `saev` installed, open an issue on [GitHub](https://github.com/OSU-NLP-Group/saev) and I will figure out how to make it easier. + If you struggle to get `saev` installed, open an issue on [GitHub](https://github.com/Imageomics/saev) and I will figure out how to make it easier. ## Load the Checkpoint diff --git a/examples/inference.ipynb b/examples/inference.ipynb index c7d6e22..f46a1ec 100644 --- a/examples/inference.ipynb +++ b/examples/inference.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "62f8da60", "metadata": { "execution": { @@ -136,24 +136,24 @@ "output_type": "stream", "text": [ " Downloading duckdb-1.4.4-cp39-cp39-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl (20.4 MB)\r\n", - "\u001b[?25l\r", - "\u001b[K | | 10 kB 23.6 MB/s eta 0:00:01\r", - "\u001b[K | | 20 kB 6.5 MB/s eta 0:00:04\r", - "\u001b[K | | 30 kB 4.7 MB/s eta 0:00:05\r", - "\u001b[K | | 40 kB 4.3 MB/s eta 0:00:05\r", - "\u001b[K | | 51 kB 4.3 MB/s eta 0:00:05\r", - "\u001b[K | | 61 kB 3.1 MB/s eta 0:00:07\r", - "\u001b[K |▏ | 71 kB 3.5 MB/s eta 0:00:06\r", - "\u001b[K |▏ | 81 kB 4.0 MB/s eta 0:00:06\r", - "\u001b[K |▏ | 92 kB 4.5 MB/s eta 0:00:05\r", - "\u001b[K |▏ | 102 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▏ | 112 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▏ | 122 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▏ | 133 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▎ | 143 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▎ | 153 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▎ | 163 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▎ | 174 kB 4.8 MB/s eta 0:00:05\r", + "\u001b[?25l\r\n", + "\u001b[K | | 10 kB 23.6 MB/s eta 0:00:01\r\n", + "\u001b[K | | 20 kB 6.5 MB/s eta 0:00:04\r\n", + "\u001b[K | | 30 kB 4.7 MB/s eta 0:00:05\r\n", + "\u001b[K | | 40 kB 4.3 MB/s eta 0:00:05\r\n", + "\u001b[K | | 51 kB 4.3 MB/s eta 0:00:05\r\n", + "\u001b[K | | 61 kB 3.1 MB/s eta 0:00:07\r\n", + "\u001b[K |▏ | 71 kB 3.5 MB/s eta 0:00:06\r\n", + "\u001b[K |▏ | 81 kB 4.0 MB/s eta 0:00:06\r\n", + "\u001b[K |▏ | 92 kB 4.5 MB/s eta 0:00:05\r\n", + "\u001b[K |▏ | 102 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▏ | 112 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▏ | 122 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▏ | 133 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▎ | 143 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▎ | 153 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▎ | 163 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▎ | 174 kB 4.8 MB/s eta 0:00:05\r\n", "\u001b[K |▎ | 184 kB 4.8 MB/s eta 0:00:05" ] }, @@ -161,384 +161,384 @@ "name": "stdout", "output_type": "stream", "text": [ - "\r", - "\u001b[K |▎ | 194 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▎ | 204 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▍ | 215 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▍ | 225 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▍ | 235 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▍ | 245 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▍ | 256 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▍ | 266 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▍ | 276 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▌ | 286 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▌ | 296 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▌ | 307 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▌ | 317 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▌ | 327 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▌ | 337 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▌ | 348 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▋ | 358 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▋ | 368 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▋ | 378 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▋ | 389 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▋ | 399 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▋ | 409 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▋ | 419 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▊ | 430 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▊ | 440 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▊ | 450 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▊ | 460 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▊ | 471 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▊ | 481 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▊ | 491 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▉ | 501 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▉ | 512 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▉ | 522 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▉ | 532 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▉ | 542 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▉ | 552 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |▉ | 563 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 573 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 583 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 593 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 604 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 614 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 624 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 634 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 645 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 655 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 665 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 675 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 686 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 696 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█ | 706 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▏ | 716 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▏ | 727 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▏ | 737 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▏ | 747 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▏ | 757 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▏ | 768 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▏ | 778 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▎ | 788 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▎ | 798 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▎ | 808 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▎ | 819 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▎ | 829 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▎ | 839 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▍ | 849 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▍ | 860 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▍ | 870 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▍ | 880 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▍ | 890 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▍ | 901 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▍ | 911 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▌ | 921 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▌ | 931 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▌ | 942 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▌ | 952 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▌ | 962 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▌ | 972 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▌ | 983 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▋ | 993 kB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▋ | 1.0 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▋ | 1.0 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▋ | 1.0 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▋ | 1.0 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▋ | 1.0 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▋ | 1.1 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▉ | 1.1 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▉ | 1.1 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▉ | 1.2 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▉ | 1.2 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▉ | 1.2 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▉ | 1.2 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |█▉ | 1.2 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.2 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.2 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.2 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.2 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.2 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:05\r", - "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▎ | 1.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▎ | 1.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▎ | 1.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▎ | 1.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▎ | 1.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▎ | 1.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▎ | 1.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▍ | 1.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▍ | 1.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▍ | 1.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▍ | 1.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▍ | 1.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▍ | 1.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▍ | 1.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▋ | 1.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▋ | 1.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▋ | 1.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▋ | 1.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▋ | 1.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▋ | 1.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▊ | 1.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▊ | 1.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▊ | 1.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▊ | 1.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▊ | 1.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▊ | 1.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▊ | 1.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 1.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 2.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 2.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███ | 2.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▍ | 2.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▍ | 2.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▍ | 2.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▍ | 2.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▍ | 2.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▍ | 2.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▍ | 2.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▌ | 2.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▌ | 2.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▌ | 2.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▌ | 2.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▌ | 2.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▌ | 2.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▌ | 2.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▊ | 2.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▊ | 2.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▊ | 2.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▊ | 2.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▊ | 2.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▊ | 2.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▊ | 2.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▉ | 2.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▉ | 2.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▉ | 2.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▉ | 2.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▉ | 2.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▉ | 2.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███▉ | 2.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████ | 2.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████ | 2.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████ | 2.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████ | 2.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████ | 2.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████ | 2.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▏ | 2.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▏ | 2.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▏ | 2.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▏ | 2.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▏ | 2.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▏ | 2.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▏ | 2.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▎ | 2.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▎ | 2.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▎ | 2.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▎ | 2.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▎ | 2.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▎ | 2.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▎ | 2.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▌ | 2.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▌ | 2.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▌ | 2.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▌ | 2.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▌ | 2.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▌ | 2.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▌ | 2.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▋ | 2.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▋ | 2.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▋ | 2.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▋ | 2.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▋ | 2.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▋ | 3.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▋ | 3.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████ | 3.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▎ | 3.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▎ | 3.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▎ | 3.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▎ | 3.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▎ | 3.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▎ | 3.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▍ | 3.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▍ | 3.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▍ | 3.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▍ | 3.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▍ | 3.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▍ | 3.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▍ | 3.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▋ | 3.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▋ | 3.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▋ | 3.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▋ | 3.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▋ | 3.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▋ | 3.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▋ | 3.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▊ | 3.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▊ | 3.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▊ | 3.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▊ | 3.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▊ | 3.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▊ | 3.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▊ | 3.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████ | 3.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▏ | 3.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▏ | 3.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▏ | 3.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▏ | 3.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▏ | 3.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▏ | 4.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▏ | 4.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▍ | 4.0 MB 4.8 MB/s eta 0:00:04\r", + "\r\n", + "\u001b[K |▎ | 194 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▎ | 204 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▍ | 215 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▍ | 225 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▍ | 235 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▍ | 245 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▍ | 256 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▍ | 266 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▍ | 276 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▌ | 286 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▌ | 296 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▌ | 307 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▌ | 317 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▌ | 327 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▌ | 337 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▌ | 348 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▋ | 358 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▋ | 368 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▋ | 378 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▋ | 389 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▋ | 399 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▋ | 409 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▋ | 419 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▊ | 430 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▊ | 440 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▊ | 450 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▊ | 460 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▊ | 471 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▊ | 481 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▊ | 491 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▉ | 501 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▉ | 512 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▉ | 522 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▉ | 532 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▉ | 542 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▉ | 552 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |▉ | 563 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 573 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 583 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 593 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 604 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 614 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 624 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 634 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 645 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 655 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 665 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 675 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 686 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 696 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█ | 706 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▏ | 716 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▏ | 727 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▏ | 737 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▏ | 747 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▏ | 757 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▏ | 768 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▏ | 778 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▎ | 788 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▎ | 798 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▎ | 808 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▎ | 819 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▎ | 829 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▎ | 839 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▍ | 849 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▍ | 860 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▍ | 870 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▍ | 880 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▍ | 890 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▍ | 901 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▍ | 911 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▌ | 921 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▌ | 931 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▌ | 942 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▌ | 952 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▌ | 962 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▌ | 972 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▌ | 983 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▋ | 993 kB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▋ | 1.0 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▋ | 1.0 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▋ | 1.0 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▋ | 1.0 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▋ | 1.0 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▋ | 1.1 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▊ | 1.1 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▉ | 1.1 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▉ | 1.1 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▉ | 1.2 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▉ | 1.2 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▉ | 1.2 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▉ | 1.2 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |█▉ | 1.2 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.2 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.2 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.2 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.2 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.2 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██ | 1.3 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:05\r\n", + "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▏ | 1.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▎ | 1.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▎ | 1.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▎ | 1.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▎ | 1.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▎ | 1.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▎ | 1.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▎ | 1.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▍ | 1.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▍ | 1.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▍ | 1.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▍ | 1.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▍ | 1.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▍ | 1.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▍ | 1.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▌ | 1.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▋ | 1.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▋ | 1.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▋ | 1.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▋ | 1.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▋ | 1.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▋ | 1.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▊ | 1.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▊ | 1.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▊ | 1.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▊ | 1.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▊ | 1.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▊ | 1.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▊ | 1.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██▉ | 1.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 1.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 1.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 2.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 2.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███ | 2.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▏ | 2.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▎ | 2.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▍ | 2.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▍ | 2.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▍ | 2.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▍ | 2.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▍ | 2.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▍ | 2.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▍ | 2.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▌ | 2.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▌ | 2.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▌ | 2.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▌ | 2.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▌ | 2.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▌ | 2.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▌ | 2.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▋ | 2.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▊ | 2.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▊ | 2.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▊ | 2.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▊ | 2.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▊ | 2.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▊ | 2.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▊ | 2.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▉ | 2.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▉ | 2.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▉ | 2.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▉ | 2.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▉ | 2.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▉ | 2.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███▉ | 2.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████ | 2.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████ | 2.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████ | 2.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████ | 2.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████ | 2.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████ | 2.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████ | 2.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▏ | 2.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▏ | 2.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▏ | 2.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▏ | 2.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▏ | 2.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▏ | 2.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▏ | 2.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▎ | 2.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▎ | 2.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▎ | 2.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▎ | 2.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▎ | 2.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▎ | 2.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▎ | 2.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▍ | 2.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▌ | 2.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▌ | 2.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▌ | 2.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▌ | 2.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▌ | 2.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▌ | 2.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▌ | 2.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▋ | 2.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▋ | 2.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▋ | 2.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▋ | 2.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▋ | 2.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▋ | 3.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▋ | 3.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▊ | 3.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████▉ | 3.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████ | 3.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▏ | 3.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▎ | 3.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▎ | 3.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▎ | 3.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▎ | 3.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▎ | 3.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▎ | 3.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▍ | 3.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▍ | 3.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▍ | 3.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▍ | 3.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▍ | 3.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▍ | 3.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▍ | 3.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▌ | 3.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▋ | 3.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▋ | 3.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▋ | 3.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▋ | 3.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▋ | 3.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▋ | 3.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▋ | 3.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▊ | 3.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▊ | 3.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▊ | 3.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▊ | 3.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▊ | 3.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▊ | 3.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▊ | 3.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████▉ | 3.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████ | 3.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▏ | 3.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▏ | 3.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▏ | 3.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▏ | 3.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▏ | 3.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▏ | 4.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▏ | 4.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▎ | 4.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▍ | 4.0 MB 4.8 MB/s eta 0:00:04\r\n", "\u001b[K |██████▍ | 4.1 MB 4.8 MB/s eta 0:00:04" ] }, @@ -546,643 +546,643 @@ "name": "stdout", "output_type": "stream", "text": [ - "\r", - "\u001b[K |██████▍ | 4.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▍ | 4.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▍ | 4.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▍ | 4.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▍ | 4.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▌ | 4.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▌ | 4.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▌ | 4.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▌ | 4.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▌ | 4.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▌ | 4.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▌ | 4.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▋ | 4.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▋ | 4.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▋ | 4.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▋ | 4.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▋ | 4.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▋ | 4.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▊ | 4.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▊ | 4.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▊ | 4.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▊ | 4.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▊ | 4.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▊ | 4.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▊ | 4.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▉ | 4.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▉ | 4.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▉ | 4.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▉ | 4.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▉ | 4.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▉ | 4.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |██████▉ | 4.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▏ | 4.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▏ | 4.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▏ | 4.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▏ | 4.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▏ | 4.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▏ | 4.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▏ | 4.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▎ | 4.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▎ | 4.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▎ | 4.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▎ | 4.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▎ | 4.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▎ | 4.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▎ | 4.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▋ | 4.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▋ | 4.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▋ | 4.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▋ | 4.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▋ | 4.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▋ | 4.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▋ | 4.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▊ | 4.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▊ | 4.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▊ | 4.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▊ | 4.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▊ | 4.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▊ | 4.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▊ | 5.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████ | 5.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████ | 5.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████ | 5.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████ | 5.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▎ | 5.2 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▎ | 5.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▎ | 5.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▎ | 5.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▎ | 5.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▎ | 5.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▎ | 5.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▍ | 5.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▍ | 5.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▍ | 5.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▍ | 5.3 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▍ | 5.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▍ | 5.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▍ | 5.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▊ | 5.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▊ | 5.5 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▊ | 5.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▊ | 5.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▊ | 5.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▊ | 5.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▊ | 5.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▉ | 5.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▉ | 5.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▉ | 5.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▉ | 5.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▉ | 5.6 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▉ | 5.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |████████▉ | 5.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████ | 5.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▏ | 5.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▏ | 5.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▏ | 5.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▏ | 5.8 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▏ | 5.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▏ | 5.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▏ | 5.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▎ | 5.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▎ | 5.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▎ | 5.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▎ | 5.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▎ | 5.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▎ | 5.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▍ | 5.9 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▍ | 6.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▍ | 6.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▍ | 6.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▍ | 6.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▍ | 6.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▍ | 6.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▌ | 6.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▌ | 6.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▌ | 6.0 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▌ | 6.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▌ | 6.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▌ | 6.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▌ | 6.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▋ | 6.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▋ | 6.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▋ | 6.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▋ | 6.1 MB 4.8 MB/s eta 0:00:04\r", - "\u001b[K |█████████▋ | 6.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▋ | 6.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▋ | 6.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▉ | 6.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▉ | 6.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▉ | 6.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▉ | 6.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▉ | 6.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▉ | 6.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████▉ | 6.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▎ | 6.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▎ | 6.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▎ | 6.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▎ | 6.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▎ | 6.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▎ | 6.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▎ | 6.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▍ | 6.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▍ | 6.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▍ | 6.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▍ | 6.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▍ | 6.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▍ | 6.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▍ | 6.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▋ | 6.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▋ | 6.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▋ | 6.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▋ | 6.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▋ | 6.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▋ | 6.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▊ | 6.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▊ | 6.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▊ | 6.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▊ | 6.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▊ | 6.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▊ | 6.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▊ | 6.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 6.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 7.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 7.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████ | 7.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▍ | 7.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▍ | 7.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▍ | 7.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▍ | 7.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▍ | 7.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▍ | 7.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▍ | 7.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▌ | 7.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▌ | 7.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▌ | 7.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▌ | 7.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▌ | 7.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▌ | 7.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▌ | 7.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▊ | 7.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▊ | 7.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▊ | 7.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▊ | 7.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▊ | 7.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▊ | 7.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▊ | 7.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▉ | 7.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▉ | 7.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▉ | 7.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▉ | 7.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▉ | 7.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▉ | 7.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████▉ | 7.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████ | 7.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████ | 7.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████ | 7.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████ | 7.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████ | 7.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████ | 7.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▏ | 7.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▏ | 7.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▏ | 7.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▏ | 7.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▏ | 7.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▏ | 7.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▏ | 7.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▎ | 7.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▎ | 7.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▎ | 7.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▎ | 7.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▎ | 7.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▎ | 7.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▎ | 7.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▌ | 7.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▌ | 7.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▌ | 8.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▌ | 8.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▌ | 8.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▌ | 8.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▌ | 8.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▋ | 8.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▋ | 8.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▋ | 8.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▋ | 8.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▋ | 8.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▋ | 8.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▋ | 8.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████ | 8.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▎ | 8.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▎ | 8.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▎ | 8.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▎ | 8.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▎ | 8.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▎ | 8.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▍ | 8.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▍ | 8.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▍ | 8.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▍ | 8.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▍ | 8.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▍ | 8.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▍ | 8.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▋ | 8.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▋ | 8.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▋ | 8.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▋ | 8.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▋ | 8.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▋ | 8.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▋ | 8.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▊ | 8.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▊ | 8.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▊ | 8.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▊ | 8.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▊ | 8.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▊ | 8.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▊ | 8.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 9.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 9.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 9.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████ | 9.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▏ | 9.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▏ | 9.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▏ | 9.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▏ | 9.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▏ | 9.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▏ | 9.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▏ | 9.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▍ | 9.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▍ | 9.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▍ | 9.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▍ | 9.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▍ | 9.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▍ | 9.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▍ | 9.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▌ | 9.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▌ | 9.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▌ | 9.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▌ | 9.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▌ | 9.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▌ | 9.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▌ | 9.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▋ | 9.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▋ | 9.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▋ | 9.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▋ | 9.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▋ | 9.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▋ | 9.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▊ | 9.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▊ | 9.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▊ | 9.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▊ | 9.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▊ | 9.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▊ | 9.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▊ | 9.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▉ | 9.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▉ | 9.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▉ | 9.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▉ | 9.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▉ | 9.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▉ | 9.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |██████████████▉ | 9.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▏ | 9.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▏ | 9.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▏ | 9.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▏ | 9.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▏ | 9.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▏ | 9.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▏ | 9.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▎ | 9.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▎ | 9.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▎ | 9.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▎ | 9.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▎ | 9.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▎ | 9.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▎ | 9.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▋ | 9.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▋ | 9.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▋ | 9.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▋ | 10.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▋ | 10.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▋ | 10.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▋ | 10.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▊ | 10.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▊ | 10.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▊ | 10.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▊ | 10.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▊ | 10.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▊ | 10.0 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▊ | 10.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████ | 10.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████ | 10.1 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████ | 10.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████ | 10.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▎ | 10.3 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▎ | 10.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▎ | 10.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▎ | 10.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▎ | 10.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▎ | 10.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▎ | 10.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▍ | 10.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▍ | 10.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▍ | 10.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▍ | 10.4 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▍ | 10.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▍ | 10.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▍ | 10.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▋ | 10.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▋ | 10.6 MB 4.8 MB/s eta 0:00:03\r", + "\r\n", + "\u001b[K |██████▍ | 4.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▍ | 4.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▍ | 4.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▍ | 4.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▍ | 4.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▌ | 4.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▌ | 4.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▌ | 4.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▌ | 4.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▌ | 4.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▌ | 4.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▌ | 4.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▋ | 4.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▋ | 4.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▋ | 4.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▋ | 4.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▋ | 4.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▋ | 4.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▊ | 4.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▊ | 4.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▊ | 4.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▊ | 4.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▊ | 4.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▊ | 4.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▊ | 4.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▉ | 4.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▉ | 4.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▉ | 4.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▉ | 4.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▉ | 4.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▉ | 4.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |██████▉ | 4.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████ | 4.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▏ | 4.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▏ | 4.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▏ | 4.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▏ | 4.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▏ | 4.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▏ | 4.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▏ | 4.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▎ | 4.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▎ | 4.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▎ | 4.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▎ | 4.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▎ | 4.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▎ | 4.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▎ | 4.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▍ | 4.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▌ | 4.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▋ | 4.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▋ | 4.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▋ | 4.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▋ | 4.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▋ | 4.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▋ | 4.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▋ | 4.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▊ | 4.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▊ | 4.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▊ | 4.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▊ | 4.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▊ | 4.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▊ | 4.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▊ | 5.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |███████▉ | 5.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████ | 5.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████ | 5.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████ | 5.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████ | 5.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████ | 5.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▏ | 5.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▎ | 5.2 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▎ | 5.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▎ | 5.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▎ | 5.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▎ | 5.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▎ | 5.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▎ | 5.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▍ | 5.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▍ | 5.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▍ | 5.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▍ | 5.3 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▍ | 5.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▍ | 5.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▍ | 5.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▌ | 5.4 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▋ | 5.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▊ | 5.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▊ | 5.5 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▊ | 5.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▊ | 5.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▊ | 5.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▊ | 5.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▊ | 5.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▉ | 5.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▉ | 5.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▉ | 5.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▉ | 5.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▉ | 5.6 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▉ | 5.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |████████▉ | 5.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.7 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████ | 5.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▏ | 5.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▏ | 5.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▏ | 5.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▏ | 5.8 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▏ | 5.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▏ | 5.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▏ | 5.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▎ | 5.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▎ | 5.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▎ | 5.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▎ | 5.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▎ | 5.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▎ | 5.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▍ | 5.9 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▍ | 6.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▍ | 6.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▍ | 6.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▍ | 6.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▍ | 6.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▍ | 6.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▌ | 6.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▌ | 6.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▌ | 6.0 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▌ | 6.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▌ | 6.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▌ | 6.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▌ | 6.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▋ | 6.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▋ | 6.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▋ | 6.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▋ | 6.1 MB 4.8 MB/s eta 0:00:04\r\n", + "\u001b[K |█████████▋ | 6.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▋ | 6.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▋ | 6.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▊ | 6.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▉ | 6.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▉ | 6.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▉ | 6.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▉ | 6.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▉ | 6.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▉ | 6.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████▉ | 6.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████ | 6.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▏ | 6.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▎ | 6.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▎ | 6.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▎ | 6.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▎ | 6.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▎ | 6.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▎ | 6.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▎ | 6.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▍ | 6.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▍ | 6.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▍ | 6.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▍ | 6.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▍ | 6.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▍ | 6.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▍ | 6.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▌ | 6.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▋ | 6.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▋ | 6.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▋ | 6.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▋ | 6.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▋ | 6.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▋ | 6.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▊ | 6.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▊ | 6.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▊ | 6.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▊ | 6.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▊ | 6.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▊ | 6.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▊ | 6.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████▉ | 6.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 6.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 7.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 7.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 7.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████ | 7.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▏ | 7.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▎ | 7.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▍ | 7.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▍ | 7.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▍ | 7.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▍ | 7.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▍ | 7.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▍ | 7.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▍ | 7.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▌ | 7.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▌ | 7.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▌ | 7.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▌ | 7.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▌ | 7.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▌ | 7.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▌ | 7.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▋ | 7.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▊ | 7.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▊ | 7.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▊ | 7.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▊ | 7.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▊ | 7.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▊ | 7.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▊ | 7.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▉ | 7.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▉ | 7.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▉ | 7.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▉ | 7.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▉ | 7.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▉ | 7.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████▉ | 7.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████ | 7.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████ | 7.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████ | 7.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████ | 7.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████ | 7.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████ | 7.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████ | 7.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▏ | 7.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▏ | 7.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▏ | 7.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▏ | 7.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▏ | 7.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▏ | 7.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▏ | 7.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▎ | 7.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▎ | 7.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▎ | 7.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▎ | 7.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▎ | 7.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▎ | 7.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▎ | 7.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▍ | 7.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▌ | 7.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▌ | 7.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▌ | 8.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▌ | 8.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▌ | 8.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▌ | 8.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▌ | 8.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▋ | 8.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▋ | 8.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▋ | 8.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▋ | 8.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▋ | 8.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▋ | 8.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▋ | 8.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▊ | 8.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████▉ | 8.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████ | 8.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▏ | 8.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▎ | 8.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▎ | 8.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▎ | 8.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▎ | 8.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▎ | 8.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▎ | 8.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▍ | 8.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▍ | 8.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▍ | 8.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▍ | 8.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▍ | 8.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▍ | 8.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▍ | 8.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▌ | 8.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▋ | 8.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▋ | 8.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▋ | 8.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▋ | 8.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▋ | 8.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▋ | 8.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▋ | 8.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▊ | 8.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▊ | 8.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▊ | 8.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▊ | 8.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▊ | 8.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▊ | 8.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▊ | 8.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████▉ | 8.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 8.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 9.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 9.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 9.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████ | 9.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▏ | 9.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▏ | 9.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▏ | 9.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▏ | 9.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▏ | 9.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▏ | 9.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▏ | 9.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▎ | 9.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▍ | 9.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▍ | 9.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▍ | 9.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▍ | 9.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▍ | 9.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▍ | 9.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▍ | 9.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▌ | 9.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▌ | 9.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▌ | 9.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▌ | 9.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▌ | 9.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▌ | 9.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▌ | 9.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▋ | 9.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▋ | 9.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▋ | 9.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▋ | 9.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▋ | 9.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▋ | 9.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▊ | 9.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▊ | 9.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▊ | 9.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▊ | 9.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▊ | 9.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▊ | 9.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▊ | 9.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▉ | 9.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▉ | 9.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▉ | 9.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▉ | 9.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▉ | 9.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▉ | 9.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |██████████████▉ | 9.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████ | 9.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▏ | 9.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▏ | 9.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▏ | 9.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▏ | 9.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▏ | 9.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▏ | 9.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▏ | 9.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▎ | 9.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▎ | 9.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▎ | 9.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▎ | 9.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▎ | 9.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▎ | 9.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▎ | 9.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▍ | 9.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▌ | 9.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▋ | 9.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▋ | 9.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▋ | 9.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▋ | 10.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▋ | 10.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▋ | 10.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▋ | 10.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▊ | 10.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▊ | 10.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▊ | 10.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▊ | 10.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▊ | 10.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▊ | 10.0 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▊ | 10.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |███████████████▉ | 10.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████ | 10.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████ | 10.1 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████ | 10.2 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████ | 10.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████ | 10.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▏ | 10.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▎ | 10.3 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▎ | 10.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▎ | 10.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▎ | 10.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▎ | 10.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▎ | 10.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▎ | 10.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▍ | 10.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▍ | 10.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▍ | 10.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▍ | 10.4 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▍ | 10.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▍ | 10.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▍ | 10.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▌ | 10.5 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▋ | 10.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▋ | 10.6 MB 4.8 MB/s eta 0:00:03\r\n", "\u001b[K |████████████████▋ | 10.6 MB 4.8 MB/s eta 0:00:03" ] }, @@ -1190,406 +1190,406 @@ "name": "stdout", "output_type": "stream", "text": [ - "\r", - "\u001b[K |████████████████▋ | 10.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▋ | 10.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▋ | 10.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▋ | 10.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▊ | 10.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▊ | 10.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▊ | 10.6 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▊ | 10.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▊ | 10.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▊ | 10.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▊ | 10.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▉ | 10.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▉ | 10.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▉ | 10.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▉ | 10.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▉ | 10.7 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▉ | 10.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |████████████████▉ | 10.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████████ | 10.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████████ | 10.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████████ | 10.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████████ | 10.9 MB 4.8 MB/s eta 0:00:03\r", - "\u001b[K |█████████████████ | 10.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████ | 10.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▏ | 10.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▏ | 10.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▏ | 10.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▏ | 10.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▏ | 11.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▏ | 11.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▏ | 11.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▎ | 11.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▎ | 11.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▎ | 11.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▎ | 11.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▎ | 11.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▎ | 11.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▍ | 11.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▍ | 11.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▍ | 11.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▍ | 11.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▍ | 11.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▍ | 11.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▍ | 11.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▌ | 11.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▌ | 11.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▌ | 11.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▌ | 11.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▌ | 11.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▌ | 11.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▌ | 11.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▋ | 11.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▋ | 11.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▋ | 11.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▋ | 11.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▋ | 11.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▋ | 11.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▋ | 11.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▉ | 11.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▉ | 11.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▉ | 11.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▉ | 11.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▉ | 11.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▉ | 11.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████▉ | 11.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▎ | 11.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▎ | 11.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▎ | 11.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▎ | 11.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▎ | 11.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▎ | 11.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▎ | 11.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▍ | 11.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▍ | 11.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▍ | 11.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▍ | 11.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▍ | 11.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▍ | 11.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▍ | 11.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▋ | 11.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▋ | 11.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▋ | 11.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▋ | 11.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▋ | 11.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▋ | 11.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▊ | 11.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▊ | 11.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▊ | 11.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▊ | 11.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▊ | 11.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▊ | 12.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▊ | 12.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████ | 12.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▍ | 12.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▍ | 12.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▍ | 12.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▍ | 12.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▍ | 12.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▍ | 12.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▍ | 12.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▌ | 12.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▌ | 12.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▌ | 12.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▌ | 12.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▌ | 12.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▌ | 12.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▌ | 12.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▊ | 12.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▊ | 12.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▊ | 12.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▊ | 12.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▊ | 12.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▊ | 12.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▊ | 12.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▉ | 12.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▉ | 12.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▉ | 12.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▉ | 12.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▉ | 12.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████▉ | 12.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████ | 12.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▏ | 12.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▏ | 12.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▏ | 12.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▏ | 12.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▏ | 12.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▏ | 12.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▏ | 12.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▎ | 12.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▎ | 12.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▎ | 12.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▎ | 12.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▎ | 12.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▎ | 12.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▎ | 13.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▌ | 13.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▌ | 13.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▌ | 13.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▌ | 13.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▌ | 13.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▌ | 13.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▌ | 13.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▋ | 13.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▋ | 13.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▋ | 13.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▋ | 13.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▋ | 13.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▋ | 13.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▋ | 13.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████ | 13.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▏ | 13.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▏ | 13.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▏ | 13.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▏ | 13.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▏ | 13.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▏ | 13.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▎ | 13.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▎ | 13.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▎ | 13.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▎ | 13.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▎ | 13.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▎ | 13.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▎ | 13.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▍ | 13.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▍ | 13.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▍ | 13.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▍ | 13.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▍ | 13.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▍ | 13.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▍ | 13.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▋ | 13.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▋ | 13.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▋ | 13.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▋ | 13.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▋ | 13.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▋ | 13.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▋ | 13.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▊ | 13.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▊ | 13.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▊ | 13.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▊ | 13.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▊ | 13.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▊ | 13.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▊ | 13.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████ | 14.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▏ | 14.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▏ | 14.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▏ | 14.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▏ | 14.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▏ | 14.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▏ | 14.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▏ | 14.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▍ | 14.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▍ | 14.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▍ | 14.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▍ | 14.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▍ | 14.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▍ | 14.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▍ | 14.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▌ | 14.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▌ | 14.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▌ | 14.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▌ | 14.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▌ | 14.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▌ | 14.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▊ | 14.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▊ | 14.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▊ | 14.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▊ | 14.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▊ | 14.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▊ | 14.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▊ | 14.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▉ | 14.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▉ | 14.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▉ | 14.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▉ | 14.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▉ | 14.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▉ | 14.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |██████████████████████▉ | 14.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████ | 14.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████ | 14.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████ | 14.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████ | 14.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████ | 14.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████ | 14.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r", + "\r\n", + "\u001b[K |████████████████▋ | 10.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▋ | 10.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▋ | 10.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▋ | 10.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▊ | 10.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▊ | 10.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▊ | 10.6 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▊ | 10.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▊ | 10.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▊ | 10.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▊ | 10.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▉ | 10.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▉ | 10.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▉ | 10.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▉ | 10.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▉ | 10.7 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▉ | 10.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |████████████████▉ | 10.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████████ | 10.8 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████████ | 10.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████████ | 10.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████████ | 10.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████████ | 10.9 MB 4.8 MB/s eta 0:00:03\r\n", + "\u001b[K |█████████████████ | 10.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████ | 10.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▏ | 10.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▏ | 10.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▏ | 10.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▏ | 10.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▏ | 11.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▏ | 11.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▏ | 11.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▎ | 11.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▎ | 11.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▎ | 11.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▎ | 11.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▎ | 11.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▎ | 11.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▍ | 11.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▍ | 11.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▍ | 11.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▍ | 11.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▍ | 11.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▍ | 11.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▍ | 11.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▌ | 11.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▌ | 11.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▌ | 11.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▌ | 11.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▌ | 11.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▌ | 11.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▌ | 11.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▋ | 11.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▋ | 11.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▋ | 11.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▋ | 11.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▋ | 11.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▋ | 11.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▋ | 11.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▊ | 11.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▉ | 11.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▉ | 11.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▉ | 11.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▉ | 11.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▉ | 11.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▉ | 11.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████▉ | 11.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████ | 11.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▏ | 11.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▎ | 11.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▎ | 11.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▎ | 11.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▎ | 11.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▎ | 11.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▎ | 11.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▎ | 11.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▍ | 11.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▍ | 11.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▍ | 11.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▍ | 11.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▍ | 11.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▍ | 11.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▍ | 11.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▌ | 11.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▋ | 11.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▋ | 11.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▋ | 11.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▋ | 11.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▋ | 11.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▋ | 11.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▊ | 11.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▊ | 11.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▊ | 11.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▊ | 11.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▊ | 11.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▊ | 12.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▊ | 12.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████▉ | 12.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████ | 12.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▏ | 12.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▎ | 12.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▍ | 12.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▍ | 12.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▍ | 12.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▍ | 12.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▍ | 12.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▍ | 12.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▍ | 12.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▌ | 12.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▌ | 12.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▌ | 12.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▌ | 12.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▌ | 12.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▌ | 12.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▌ | 12.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▋ | 12.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▊ | 12.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▊ | 12.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▊ | 12.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▊ | 12.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▊ | 12.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▊ | 12.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▊ | 12.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▉ | 12.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▉ | 12.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▉ | 12.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▉ | 12.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▉ | 12.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████▉ | 12.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████ | 12.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▏ | 12.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▏ | 12.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▏ | 12.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▏ | 12.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▏ | 12.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▏ | 12.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▏ | 12.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▎ | 12.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▎ | 12.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▎ | 12.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▎ | 12.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▎ | 12.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▎ | 12.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▎ | 13.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▍ | 13.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▌ | 13.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▌ | 13.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▌ | 13.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▌ | 13.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▌ | 13.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▌ | 13.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▌ | 13.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▋ | 13.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▋ | 13.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▋ | 13.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▋ | 13.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▋ | 13.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▋ | 13.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▋ | 13.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▊ | 13.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████▉ | 13.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████ | 13.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▏ | 13.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▏ | 13.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▏ | 13.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▏ | 13.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▏ | 13.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▏ | 13.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▎ | 13.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▎ | 13.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▎ | 13.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▎ | 13.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▎ | 13.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▎ | 13.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▎ | 13.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▍ | 13.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▍ | 13.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▍ | 13.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▍ | 13.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▍ | 13.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▍ | 13.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▍ | 13.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▌ | 13.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▋ | 13.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▋ | 13.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▋ | 13.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▋ | 13.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▋ | 13.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▋ | 13.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▋ | 13.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▊ | 13.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▊ | 13.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▊ | 13.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▊ | 13.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▊ | 13.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▊ | 13.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▊ | 13.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |█████████████████████▉ | 13.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████ | 14.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▏ | 14.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▏ | 14.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▏ | 14.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▏ | 14.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▏ | 14.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▏ | 14.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▏ | 14.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▎ | 14.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▍ | 14.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▍ | 14.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▍ | 14.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▍ | 14.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▍ | 14.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▍ | 14.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▍ | 14.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▌ | 14.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▌ | 14.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▌ | 14.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▌ | 14.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▌ | 14.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▌ | 14.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▋ | 14.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▊ | 14.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▊ | 14.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▊ | 14.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▊ | 14.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▊ | 14.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▊ | 14.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▊ | 14.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▉ | 14.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▉ | 14.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▉ | 14.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▉ | 14.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▉ | 14.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▉ | 14.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |██████████████████████▉ | 14.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████ | 14.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████ | 14.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████ | 14.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████ | 14.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████ | 14.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████ | 14.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r\n", "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02" ] }, @@ -1597,418 +1597,418 @@ "name": "stdout", "output_type": "stream", "text": [ - "\r", - "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▏ | 14.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▏ | 14.7 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▏ | 14.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▏ | 14.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▏ | 14.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▏ | 14.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▏ | 14.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▎ | 14.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▎ | 14.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▎ | 14.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▎ | 14.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▎ | 14.8 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▎ | 14.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▎ | 14.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▋ | 15.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▋ | 15.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▋ | 15.0 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▋ | 15.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▋ | 15.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▋ | 15.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▋ | 15.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▊ | 15.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▊ | 15.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▊ | 15.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▊ | 15.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▊ | 15.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▊ | 15.1 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▊ | 15.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▉ | 15.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▉ | 15.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▉ | 15.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▉ | 15.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▉ | 15.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |███████████████████████▉ | 15.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.2 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████ | 15.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▎ | 15.4 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▎ | 15.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▎ | 15.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▎ | 15.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▎ | 15.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▎ | 15.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▎ | 15.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▍ | 15.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▍ | 15.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▍ | 15.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▍ | 15.5 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▍ | 15.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▍ | 15.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▍ | 15.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:02\r", - "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▊ | 15.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▊ | 15.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▊ | 15.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▊ | 15.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▊ | 15.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▊ | 15.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▊ | 15.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▉ | 15.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▉ | 15.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▉ | 15.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▉ | 15.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▉ | 15.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▉ | 15.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▉ | 15.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 16.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 16.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 16.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 16.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 16.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 16.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▏ | 16.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▏ | 16.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▏ | 16.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▏ | 16.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▏ | 16.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▏ | 16.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▍ | 16.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▍ | 16.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▍ | 16.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▍ | 16.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▍ | 16.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▍ | 16.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▍ | 16.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▌ | 16.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▌ | 16.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▌ | 16.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▌ | 16.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▌ | 16.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▌ | 16.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▌ | 16.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▋ | 16.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▋ | 16.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▋ | 16.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▋ | 16.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▋ | 16.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▋ | 16.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▋ | 16.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▉ | 16.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▉ | 16.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▉ | 16.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▉ | 16.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▉ | 16.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▉ | 16.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▉ | 16.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▎ | 16.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▎ | 16.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▎ | 16.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▎ | 16.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▎ | 16.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▎ | 16.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▎ | 16.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▍ | 16.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▍ | 16.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▍ | 16.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▍ | 16.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▍ | 16.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▍ | 16.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▍ | 16.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▌ | 16.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▌ | 16.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▌ | 16.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▌ | 16.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▌ | 16.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▌ | 16.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▋ | 16.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▋ | 16.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▋ | 16.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▋ | 17.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▋ | 17.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▋ | 17.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▋ | 17.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▊ | 17.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▊ | 17.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▊ | 17.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▊ | 17.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▊ | 17.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▊ | 17.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▊ | 17.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 17.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▍ | 17.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▍ | 17.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▍ | 17.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▍ | 17.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▍ | 17.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▍ | 17.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▍ | 17.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▌ | 17.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▌ | 17.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▌ | 17.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▌ | 17.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▌ | 17.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▌ | 17.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▌ | 17.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▊ | 17.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▊ | 17.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▊ | 17.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▊ | 17.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▊ | 17.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▊ | 17.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▊ | 17.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▉ | 17.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▉ | 17.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▉ | 17.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▉ | 17.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▉ | 17.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▉ | 17.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 17.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▏ | 17.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▏ | 17.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▏ | 17.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▏ | 18.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▏ | 18.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▏ | 18.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▏ | 18.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▎ | 18.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▎ | 18.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▎ | 18.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▎ | 18.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▎ | 18.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▎ | 18.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▎ | 18.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▌ | 18.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▌ | 18.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▌ | 18.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▌ | 18.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▌ | 18.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▌ | 18.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▌ | 18.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▋ | 18.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▋ | 18.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▋ | 18.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▋ | 18.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▋ | 18.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▋ | 18.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▋ | 18.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 18.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▏ | 18.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▏ | 18.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▏ | 18.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▏ | 18.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▏ | 18.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▏ | 18.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▎ | 18.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▎ | 18.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▎ | 18.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▎ | 18.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▎ | 18.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▎ | 18.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▎ | 18.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▍ | 18.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▍ | 18.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▍ | 18.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▍ | 18.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▍ | 18.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▍ | 18.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▍ | 18.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▋ | 18.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▋ | 18.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▋ | 18.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▋ | 18.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▋ | 18.9 MB 4.8 MB/s eta 0:00:01\r", + "\r\n", + "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████ | 14.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▏ | 14.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▏ | 14.7 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▏ | 14.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▏ | 14.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▏ | 14.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▏ | 14.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▏ | 14.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▎ | 14.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▎ | 14.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▎ | 14.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▎ | 14.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▎ | 14.8 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▎ | 14.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▎ | 14.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▍ | 14.9 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▌ | 15.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▋ | 15.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▋ | 15.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▋ | 15.0 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▋ | 15.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▋ | 15.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▋ | 15.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▋ | 15.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▊ | 15.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▊ | 15.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▊ | 15.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▊ | 15.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▊ | 15.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▊ | 15.1 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▊ | 15.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▉ | 15.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▉ | 15.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▉ | 15.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▉ | 15.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▉ | 15.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |███████████████████████▉ | 15.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.2 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.3 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████ | 15.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▏ | 15.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▎ | 15.4 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▎ | 15.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▎ | 15.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▎ | 15.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▎ | 15.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▎ | 15.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▎ | 15.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▍ | 15.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▍ | 15.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▍ | 15.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▍ | 15.5 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▍ | 15.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▍ | 15.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▍ | 15.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:02\r\n", + "\u001b[K |████████████████████████▌ | 15.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▋ | 15.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▊ | 15.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▊ | 15.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▊ | 15.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▊ | 15.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▊ | 15.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▊ | 15.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▊ | 15.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▉ | 15.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▉ | 15.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▉ | 15.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▉ | 15.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▉ | 15.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▉ | 15.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████▉ | 15.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 15.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 16.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 16.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 16.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 16.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 16.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 16.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▏ | 16.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▏ | 16.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▏ | 16.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▏ | 16.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▏ | 16.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▏ | 16.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▎ | 16.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▍ | 16.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▍ | 16.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▍ | 16.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▍ | 16.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▍ | 16.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▍ | 16.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▍ | 16.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▌ | 16.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▌ | 16.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▌ | 16.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▌ | 16.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▌ | 16.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▌ | 16.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▌ | 16.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▋ | 16.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▋ | 16.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▋ | 16.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▋ | 16.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▋ | 16.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▋ | 16.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▋ | 16.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▊ | 16.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▉ | 16.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▉ | 16.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▉ | 16.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▉ | 16.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▉ | 16.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▉ | 16.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▉ | 16.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████ | 16.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▏ | 16.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▎ | 16.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▎ | 16.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▎ | 16.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▎ | 16.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▎ | 16.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▎ | 16.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▎ | 16.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▍ | 16.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▍ | 16.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▍ | 16.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▍ | 16.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▍ | 16.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▍ | 16.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▍ | 16.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▌ | 16.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▌ | 16.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▌ | 16.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▌ | 16.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▌ | 16.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▌ | 16.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▋ | 16.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▋ | 16.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▋ | 16.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▋ | 17.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▋ | 17.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▋ | 17.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▋ | 17.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▊ | 17.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▊ | 17.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▊ | 17.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▊ | 17.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▊ | 17.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▊ | 17.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▊ | 17.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████▉ | 17.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████ | 17.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▏ | 17.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▎ | 17.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▍ | 17.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▍ | 17.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▍ | 17.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▍ | 17.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▍ | 17.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▍ | 17.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▍ | 17.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▌ | 17.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▌ | 17.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▌ | 17.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▌ | 17.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▌ | 17.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▌ | 17.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▌ | 17.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▋ | 17.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▊ | 17.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▊ | 17.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▊ | 17.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▊ | 17.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▊ | 17.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▊ | 17.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▊ | 17.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▉ | 17.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▉ | 17.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▉ | 17.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▉ | 17.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▉ | 17.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▉ | 17.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████ | 17.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▏ | 17.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▏ | 17.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▏ | 17.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▏ | 18.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▏ | 18.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▏ | 18.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▏ | 18.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▎ | 18.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▎ | 18.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▎ | 18.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▎ | 18.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▎ | 18.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▎ | 18.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▎ | 18.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▍ | 18.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▌ | 18.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▌ | 18.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▌ | 18.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▌ | 18.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▌ | 18.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▌ | 18.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▌ | 18.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▋ | 18.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▋ | 18.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▋ | 18.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▋ | 18.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▋ | 18.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▋ | 18.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▋ | 18.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▊ | 18.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████▉ | 18.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████ | 18.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▏ | 18.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▏ | 18.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▏ | 18.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▏ | 18.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▏ | 18.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▏ | 18.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▎ | 18.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▎ | 18.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▎ | 18.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▎ | 18.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▎ | 18.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▎ | 18.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▎ | 18.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▍ | 18.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▍ | 18.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▍ | 18.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▍ | 18.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▍ | 18.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▍ | 18.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▍ | 18.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▌ | 18.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▋ | 18.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▋ | 18.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▋ | 18.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▋ | 18.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▋ | 18.9 MB 4.8 MB/s eta 0:00:01\r\n", "\u001b[K |█████████████████████████████▋ | 18.9 MB 4.8 MB/s eta 0:00:01" ] }, @@ -2016,152 +2016,152 @@ "name": "stdout", "output_type": "stream", "text": [ - "\r", - "\u001b[K |█████████████████████████████▋ | 18.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▊ | 18.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▊ | 18.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▊ | 18.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▊ | 18.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▊ | 19.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▊ | 19.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▊ | 19.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 19.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▏ | 19.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▏ | 19.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▍ | 19.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▌ | 19.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▌ | 19.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▊ | 19.5 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▉ | 19.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▉ | 19.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▉ | 19.6 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▏| 19.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▏| 19.8 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▎| 20.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▎| 20.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▌| 20.0 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▋| 20.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▋| 20.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▋| 20.1 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▊| 20.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████████| 20.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████████| 20.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████████| 20.3 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01\r", + "\r\n", + "\u001b[K |█████████████████████████████▋ | 18.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▊ | 18.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▊ | 18.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▊ | 18.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▊ | 18.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▊ | 19.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▊ | 19.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▊ | 19.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▉ | 19.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▏ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▏ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▍ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▌ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▌ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▊ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▉ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▉ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▉ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▏| 19.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▏| 19.8 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▎| 20.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▎| 20.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▌| 20.0 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▋| 20.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▋| 20.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▋| 20.1 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▊| 20.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████████| 20.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████████| 20.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████████| 20.3 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01\r\n", "\u001b[K |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01" ] }, @@ -2169,7 +2169,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\r", + "\r\n", "\u001b[K |████████████████████████████████| 20.4 MB 4.8 MB/s \r\n" ] }, @@ -2179,14 +2179,14 @@ "text": [ "\u001b[?25hCollecting tqdm>=4.66.5\r\n", " Downloading tqdm-4.67.3-py3-none-any.whl (78 kB)\r\n", - "\u001b[?25l\r", - "\u001b[K |████▏ | 10 kB 37.6 MB/s eta 0:00:01\r", - "\u001b[K |████████▍ | 20 kB 52.7 MB/s eta 0:00:01\r", - "\u001b[K |████████████▌ | 30 kB 68.0 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▊ | 40 kB 77.6 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████ | 51 kB 84.9 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 61 kB 93.1 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▎ | 71 kB 12.4 MB/s eta 0:00:01\r", + "\u001b[?25l\r\n", + "\u001b[K |████▏ | 10 kB 37.6 MB/s eta 0:00:01\r\n", + "\u001b[K |████████▍ | 20 kB 52.7 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████▌ | 30 kB 68.0 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████▊ | 40 kB 77.6 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████ | 51 kB 84.9 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████ | 61 kB 93.1 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▎ | 71 kB 12.4 MB/s eta 0:00:01\r\n", "\u001b[K |████████████████████████████████| 78 kB 6.4 MB/s \r\n" ] }, @@ -2196,21 +2196,21 @@ "text": [ "\u001b[?25hCollecting psutil>=6.1.0\r\n", " Downloading psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl (155 kB)\r\n", - "\u001b[?25l\r", - "\u001b[K |██ | 10 kB 1.7 MB/s eta 0:00:01\r", - "\u001b[K |████▏ | 20 kB 3.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▎ | 30 kB 5.0 MB/s eta 0:00:01\r", - "\u001b[K |████████▍ | 40 kB 6.6 MB/s eta 0:00:01\r", - "\u001b[K |██████████▌ | 51 kB 8.2 MB/s eta 0:00:01\r", - "\u001b[K |████████████▋ | 61 kB 9.7 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▊ | 71 kB 11.2 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▉ | 81 kB 12.7 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████ | 92 kB 14.2 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████ | 102 kB 15.6 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▏ | 112 kB 15.6 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▎ | 122 kB 15.6 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▍ | 133 kB 15.6 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▌ | 143 kB 15.6 MB/s eta 0:00:01\r", + "\u001b[?25l\r\n", + "\u001b[K |██ | 10 kB 1.7 MB/s eta 0:00:01\r\n", + "\u001b[K |████▏ | 20 kB 3.4 MB/s eta 0:00:01\r\n", + "\u001b[K |██████▎ | 30 kB 5.0 MB/s eta 0:00:01\r\n", + "\u001b[K |████████▍ | 40 kB 6.6 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████▌ | 51 kB 8.2 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████▋ | 61 kB 9.7 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████▊ | 71 kB 11.2 MB/s eta 0:00:01\r\n", + "\u001b[K |████████████████▉ | 81 kB 12.7 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████ | 92 kB 14.2 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████ | 102 kB 15.6 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████▏ | 112 kB 15.6 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▎ | 122 kB 15.6 MB/s eta 0:00:01\r\n", + "\u001b[K |███████████████████████████▍ | 133 kB 15.6 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████████▌ | 143 kB 15.6 MB/s eta 0:00:01\r\n", "\u001b[K |███████████████████████████████▋| 153 kB 15.6 MB/s eta 0:00:01" ] }, @@ -2218,7 +2218,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\r", + "\r\n", "\u001b[K |████████████████████████████████| 155 kB 15.6 MB/s \r\n", "\u001b[?25hCollecting submitit>=1.5.2\r\n" ] @@ -2228,14 +2228,14 @@ "output_type": "stream", "text": [ " Downloading submitit-1.5.4-py3-none-any.whl (76 kB)\r\n", - "\u001b[?25l\r", - "\u001b[K |████▎ | 10 kB 45.3 MB/s eta 0:00:01\r", - "\u001b[K |████████▋ | 20 kB 65.5 MB/s eta 0:00:01\r", - "\u001b[K |█████████████ | 30 kB 82.9 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▎ | 40 kB 92.5 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▌ | 51 kB 99.9 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▉ | 61 kB 108.5 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▏ | 71 kB 112.0 MB/s eta 0:00:01\r", + "\u001b[?25l\r\n", + "\u001b[K |████▎ | 10 kB 45.3 MB/s eta 0:00:01\r\n", + "\u001b[K |████████▋ | 20 kB 65.5 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████ | 30 kB 82.9 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████▎ | 40 kB 92.5 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████▌ | 51 kB 99.9 MB/s eta 0:00:01\r\n", + "\u001b[K |█████████████████████████▉ | 61 kB 108.5 MB/s eta 0:00:01\r\n", + "\u001b[K |██████████████████████████████▏ | 71 kB 112.0 MB/s eta 0:00:01\r\n", "\u001b[K |████████████████████████████████| 76 kB 2.0 MB/s \r\n" ] }, @@ -2256,7 +2256,7 @@ } ], "source": [ - "!pip install \"saev @ git+https://github.com/OSU-NLP-Group/saev@6d6eff52c4ae04f5153badc0a553adddc8d3e3cc\"" + "!pip install \"saev @ git+https://github.com/Imageomics/saev@6d6eff52c4ae04f5153badc0a553adddc8d3e3cc\"" ] }, { diff --git a/pyproject.toml b/pyproject.toml index a511a49..05b0da8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,8 +38,8 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project.urls] -Homepage = "https://osu-nlp-group.github.io/saev" -Issues = "https://github.com/OSU-NLP-Group/saev/issues" +Homepage = "https://imageomics.github.io/saev" +Issues = "https://github.com/Imageomics/saev/issues" [tool.ruff] extend-exclude = ["tests/sweeps"] diff --git a/scripts/export_notebook.py b/scripts/export_notebook.py index 4c446fe..2411ee0 100644 --- a/scripts/export_notebook.py +++ b/scripts/export_notebook.py @@ -11,7 +11,7 @@ ROOT = Path(__file__).resolve().parent.parent MARIMO_SRC = ROOT / "examples" / "inference.py" IPYNB_DST = ROOT / "examples" / "inference.ipynb" -REPO = "OSU-NLP-Group/saev" +REPO = "Imageomics/saev" def get_commit_hash() -> str: diff --git a/scripts/push_models.py b/scripts/push_models.py index 1d10d38..559ab2d 100644 --- a/scripts/push_models.py +++ b/scripts/push_models.py @@ -1,5 +1,5 @@ -repo_url = "https://github.com/OSU-NLP-Group/saev" -docs_url = "https://osu-nlp-group.github.io/saev" +repo_url = "https://github.com/Imageomics/saev" +docs_url = "https://imageomics.github.io/saev" def main( diff --git a/src/web/apps/classification/dist/app.js b/src/web/apps/classification/dist/app.js index 38025e0..94d05be 100644 --- a/src/web/apps/classification/dist/app.js +++ b/src/web/apps/classification/dist/app.js @@ -8416,7 +8416,7 @@ var $author$project$Classification$explainGradioError = function (err) { $elm$html$Html$a, _List_fromArray( [ - $elm$html$Html$Attributes$href('https://github.com/OSU-NLP-Group/saev/issues/new'), + $elm$html$Html$Attributes$href('https://github.com/Imageomics/saev/issues/new'), $elm$html$Html$Attributes$class('text-sky-500 hover:underline') ]), _List_fromArray( diff --git a/src/web/apps/semseg/dist/app.js b/src/web/apps/semseg/dist/app.js index 2ffc1e3..5f445f8 100644 --- a/src/web/apps/semseg/dist/app.js +++ b/src/web/apps/semseg/dist/app.js @@ -8422,7 +8422,7 @@ var $author$project$Semseg$explainGradioError = function (err) { $elm$html$Html$a, _List_fromArray( [ - $elm$html$Html$Attributes$href('https://github.com/OSU-NLP-Group/saev/issues/new'), + $elm$html$Html$Attributes$href('https://github.com/Imageomics/saev/issues/new'), $elm$html$Html$Attributes$class('text-sky-500 hover:underline') ]), _List_fromArray( diff --git a/src/web/src/Classification.elm b/src/web/src/Classification.elm index 6283a74..b7eb7eb 100644 --- a/src/web/src/Classification.elm +++ b/src/web/src/Classification.elm @@ -536,7 +536,7 @@ explainGradioError err = let githubLink = Html.a - [ Html.Attributes.href "https://github.com/OSU-NLP-Group/saev/issues/new" + [ Html.Attributes.href "https://github.com/Imageomics/saev/issues/new" , class "text-sky-500 hover:underline" ] [ Html.text "GitHub" ] diff --git a/src/web/src/Semseg.elm b/src/web/src/Semseg.elm index b0e092d..0b7bc4a 100644 --- a/src/web/src/Semseg.elm +++ b/src/web/src/Semseg.elm @@ -507,7 +507,7 @@ explainGradioError err = let githubLink = Html.a - [ Html.Attributes.href "https://github.com/OSU-NLP-Group/saev/issues/new" + [ Html.Attributes.href "https://github.com/Imageomics/saev/issues/new" , class "text-sky-500 hover:underline" ] [ Html.text "GitHub" ] From 01cb2acf959967acb063576581527d61ab78348b Mon Sep 17 00:00:00 2001 From: egrace479 Date: Tue, 9 Jun 2026 10:56:37 -0400 Subject: [PATCH 3/9] Clarify license @samuelstevens first commit: a0f8528474f4f19c6a292f01ee1544ed03ff1f22 --- CITATION.cff | 2 +- LICENSE | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CITATION.cff b/CITATION.cff index d09bd8c..e3af6c7 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -23,7 +23,7 @@ keywords: - sparse autoencoders - interpretability - computer vision -license: CC-BY-4.0 +license: MIT commit: 6a34b6916fda7b04cc2d89749b3ad6425a8f39e6 date-released: '2025-11-16' diff --git a/LICENSE b/LICENSE index 2968c24..15ec217 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,7 @@ MIT License +Copyright (c) 2024-2026 Samuel Stevens + Copyright (c) 2023 Joseph Bloom Permission is hereby granted, free of charge, to any person obtaining a copy From 12bce2eeae89ccb4fa3a896e7d5ce03de65183e3 Mon Sep 17 00:00:00 2001 From: egrace479 Date: Tue, 9 Jun 2026 11:16:23 -0400 Subject: [PATCH 4/9] Add provenance files for Zenodo snapshot --- .github/workflows/validate-zenodo.yaml | 23 ++++++++++++++++ .zenodo.json | 37 ++++++++++++++++++++++++++ CITATION.cff | 13 ++++++--- 3 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/validate-zenodo.yaml create mode 100644 .zenodo.json diff --git a/.github/workflows/validate-zenodo.yaml b/.github/workflows/validate-zenodo.yaml new file mode 100644 index 0000000..838b1fa --- /dev/null +++ b/.github/workflows/validate-zenodo.yaml @@ -0,0 +1,23 @@ +name: Check zenodo metadata + +on: + push: + paths: + - '.zenodo.json' + - '.github/workflows/validate-zenodo.yaml' + +jobs: + check-zenodo-metadata: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-node@v6 + with: + node-version: '24' + - name: Install dependencies + run: npm install zenodraft@0.14.1 + - name: Check .zenodo.json file + run: | + npx zenodraft metadata validate .zenodo.json diff --git a/.zenodo.json b/.zenodo.json new file mode 100644 index 0000000..d4ae2b4 --- /dev/null +++ b/.zenodo.json @@ -0,0 +1,37 @@ +{ + "creators": [ + { + "name": "Stevens, Samuel", + "orcid": "https://orcid.org/0009-0000-9493-7766", + "affiliation": "The Ohio State University" + } + ], + "description": "saev is a package for training sparse autoencoders (SAEs) on vision transformers (ViTs) in PyTorch.", + "keywords": [ + "imageomics", + "sparse autoencoders", + "interpretability", + "computer vision" + ], + "title": "saev: Sparse Autoencoders for Vision Transformers", + "version": "0.1.0", + "license": "MIT", + "publication_date": "2026-06-10", + "grants": [ + { + "id": "021nxhr62::2118240" + } + ], + "related_identifiers": [ + { + "identifier": "10.48550/arXiv.2502.06755", + "relation": "isSupplementTo", + "resource_type": "publication-preprint" + }, + { + "identifier": "10.48550/arXiv.2511.17735", + "relation": "isSupplementTo", + "resource_type": "publication-preprint" + } + ] +} \ No newline at end of file diff --git a/CITATION.cff b/CITATION.cff index e3af6c7..e716abc 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -24,6 +24,13 @@ keywords: - interpretability - computer vision license: MIT -commit: 6a34b6916fda7b04cc2d89749b3ad6425a8f39e6 -date-released: '2025-11-16' - +date-released: '2026-06-10' +identifiers: + - description: "The GitHub release URL of tag v0.1.0." + type: url + value: "https://github.com/Imageomics/saev/releases/tag/v0.1.0" + - description: "The GitHub URL of the commit tagged with v0.1.0." + type: url + value: "https://github.com/Imageomics/saev/tree/" # Update on release +version: 0.1.0 +#doi: From 7155c39032cb047f9d606b4fe5c68b37ad220c81 Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Fri, 12 Jun 2026 12:14:13 -0400 Subject: [PATCH 5/9] Update docs build --- docs/api/api/framework/inference/index.html | 4 +- docs/api/api/framework/train/index.html | 32 +++--- docs/api/api/nn/modeling/index.html | 110 +++++++++++--------- docs/api/api/nn/saev.nn/index.html | 110 +++++++++++--------- docs/api/api/utils/wandb/index.html | 14 ++- docs/api/developers/protocol/index.html | 5 +- docs/api/search/search_index.json | 2 +- docs/api/sitemap.xml | 98 ++++++++--------- docs/api/sitemap.xml.gz | Bin 556 -> 553 bytes docs/api/users/guide/index.html | 2 +- docs/api/users/sweeps/index.html | 1 + 11 files changed, 209 insertions(+), 169 deletions(-) diff --git a/docs/api/api/framework/inference/index.html b/docs/api/api/framework/inference/index.html index 40d0505..914cb00 100644 --- a/docs/api/api/framework/inference/index.html +++ b/docs/api/api/framework/inference/index.html @@ -2647,7 +2647,8 @@

    360 361 362 -363

    @beartype.beartype
    +363
    +364
    @beartype.beartype
     def main(
         cfg: tp.Annotated[Config, tyro.conf.arg(name="")], sweep: pathlib.Path | None = None
     ):
    @@ -2691,6 +2692,7 @@ 

    executor = submitit.SlurmExecutor(folder=cfg.log_to) executor.update_parameters( + job_name="sae-inference", time=int(cfg.n_hours * 60), partition=cfg.slurm_partition, gpus_per_node=1, diff --git a/docs/api/api/framework/train/index.html b/docs/api/api/framework/train/index.html index 6dfae60..d8507f7 100644 --- a/docs/api/api/framework/train/index.html +++ b/docs/api/api/framework/train/index.html @@ -3425,9 +3425,7 @@

    Source code in src/saev/framework/train.py -
    508
    -509
    -510
    +              
    510
     511
     512
     513
    @@ -3533,7 +3531,9 @@ 

    613 614 615 -616

    @beartype.beartype
    +616
    +617
    +618
    @beartype.beartype
     @torch.no_grad()
     def evaluate(
         cfgs: list[Config], saes: torch.nn.ModuleList, objectives: torch.nn.ModuleList
    @@ -3731,9 +3731,7 @@ 

    Source code in src/saev/framework/train.py -
    703
    -704
    -705
    +              
    705
     706
     707
     708
    @@ -3823,7 +3821,9 @@ 

    792 793 794 -795

    @beartype.beartype
    +795
    +796
    +797
    @beartype.beartype
     def main(
         cfg: tp.Annotated[Config, tyro.conf.arg(name="")],
         sweep: pathlib.Path | None = None,
    @@ -4000,9 +4000,7 @@ 

    Source code in src/saev/framework/train.py -
    667
    -668
    -669
    +              
    669
     670
     671
     672
    @@ -4026,7 +4024,9 @@ 

    690 691 692 -693

    @beartype.beartype
    +693
    +694
    +695
    @beartype.beartype
     def split_cfgs(cfgs: list[Config]) -> list[list[Config]]:
         """
         Splits configs into groups that can be parallelized.
    @@ -4075,9 +4075,7 @@ 

    Source code in src/saev/framework/train.py -
    236
    -237
    -238
    +              
    238
     239
     240
     241
    @@ -4299,7 +4297,9 @@ 

    457 458 459 -460

    @beartype.beartype
    +460
    +461
    +462
    @beartype.beartype
     def train(
         cfgs: list[Config],
     ) -> tuple[
    diff --git a/docs/api/api/nn/modeling/index.html b/docs/api/api/nn/modeling/index.html
    index 7d9c4cd..97623e7 100644
    --- a/docs/api/api/nn/modeling/index.html
    +++ b/docs/api/api/nn/modeling/index.html
    @@ -2836,7 +2836,13 @@ 

    320 321 322 -323

    def __init__(self, cfg: SparseAutoencoderConfig):
    +323
    +324
    +325
    +326
    +327
    +328
    +329
    def __init__(self, cfg: SparseAutoencoderConfig):
         super().__init__()
     
         self.cfg = cfg
    @@ -2849,7 +2855,13 @@ 

    self.normalize_w_dec() - # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc. + # Initialize W_enc to the transpose of W_dec. + # + # .clone() is critical here. Without it, W_enc is a transposed VIEW sharing storage with W_dec, which causes two bugs: + # + # 1. load_state_dict breaks: loading W_enc overwrites W_dec (shared memory), then loading W_dec overwrites W_enc. The loaded SAE ends up with one weight being the transpose of the other instead of the independently-trained values. + # + # 2. Any code that mutates W_dec in-place (e.g. normalize_w_dec) silently mutates W_enc too. The datapoint init in train.make_saes() relied on this accident: normalize_w_dec() kept W_enc columns unit-norm via shared storage. With .clone(), make_saes() must explicitly sync W_enc after normalizing W_dec. self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone()) self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae)) @@ -3052,13 +3064,7 @@

    Source code in src/saev/nn/modeling.py -
    345
    -346
    -347
    -348
    -349
    -350
    -351
    +              
    351
     352
     353
     354
    @@ -3110,7 +3116,13 @@ 

    400 401 402 -403

    def decode(
    +403
    +404
    +405
    +406
    +407
    +408
    +409
    def decode(
         self,
         f_x: Float[Tensor, "batch d_sae"],
         *,
    @@ -3222,17 +3234,17 @@ 

    Source code in src/saev/nn/modeling.py -
    325
    -326
    -327
    -328
    -329
    -330
    -331
    +              
    331
     332
     333
     334
    -335
    def forward(self, x: Float[Tensor, "batch d_model"]) -> Output:
    +335
    +336
    +337
    +338
    +339
    +340
    +341
    def forward(self, x: Float[Tensor, "batch d_model"]) -> Output:
         """
         Given x, calculates the reconstructed x_hat and the intermediate activations f_x.
     
    @@ -3265,13 +3277,13 @@ 

    Source code in src/saev/nn/modeling.py -
    405
    -406
    -407
    -408
    -409
    -410
    -411
    @torch.no_grad()
    +              
    411
    +412
    +413
    +414
    +415
    +416
    +417
    @torch.no_grad()
     def normalize_w_dec(self):
         """
         Set W_dec to unit-norm columns.
    @@ -3300,13 +3312,7 @@ 

    Source code in src/saev/nn/modeling.py -
    413
    -414
    -415
    -416
    -417
    -418
    -419
    +              
    419
     420
     421
     422
    @@ -3326,7 +3332,13 @@ 

    436 437 438 -439

    @torch.no_grad()
    +439
    +440
    +441
    +442
    +443
    +444
    +445
    @torch.no_grad()
     def remove_parallel_grads(self):
         """
         Update grads so that they remove the parallel component
    @@ -3804,13 +3816,7 @@ 

    Source code in src/saev/nn/modeling.py -
    542
    -543
    -544
    -545
    -546
    -547
    -548
    +              
    548
     549
     550
     551
    @@ -3830,7 +3836,13 @@ 

    565 566 567 -568

    @beartype.beartype
    +568
    +569
    +570
    +571
    +572
    +573
    +574
    @beartype.beartype
     def dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):
         """
         Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).
    @@ -3879,13 +3891,7 @@ 

    Source code in src/saev/nn/modeling.py -
    571
    -572
    -573
    -574
    -575
    -576
    -577
    +              
    577
     578
     579
     580
    @@ -3960,7 +3966,13 @@ 

    649 650 651 -652

    @beartype.beartype
    +652
    +653
    +654
    +655
    +656
    +657
    +658
    @beartype.beartype
     def load(fpath: pathlib.Path | str, *, device="cpu") -> SparseAutoencoder:
         """
         Loads a sparse autoencoder from disk.
    diff --git a/docs/api/api/nn/saev.nn/index.html b/docs/api/api/nn/saev.nn/index.html
    index 2f9ab87..8874d27 100644
    --- a/docs/api/api/nn/saev.nn/index.html
    +++ b/docs/api/api/nn/saev.nn/index.html
    @@ -2250,7 +2250,13 @@ 

    320 321 322 -323

    def __init__(self, cfg: SparseAutoencoderConfig):
    +323
    +324
    +325
    +326
    +327
    +328
    +329
    def __init__(self, cfg: SparseAutoencoderConfig):
         super().__init__()
     
         self.cfg = cfg
    @@ -2263,7 +2269,13 @@ 

    self.normalize_w_dec() - # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc. + # Initialize W_enc to the transpose of W_dec. + # + # .clone() is critical here. Without it, W_enc is a transposed VIEW sharing storage with W_dec, which causes two bugs: + # + # 1. load_state_dict breaks: loading W_enc overwrites W_dec (shared memory), then loading W_dec overwrites W_enc. The loaded SAE ends up with one weight being the transpose of the other instead of the independently-trained values. + # + # 2. Any code that mutates W_dec in-place (e.g. normalize_w_dec) silently mutates W_enc too. The datapoint init in train.make_saes() relied on this accident: normalize_w_dec() kept W_enc columns unit-norm via shared storage. With .clone(), make_saes() must explicitly sync W_enc after normalizing W_dec. self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone()) self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae)) @@ -2466,13 +2478,7 @@

    Source code in src/saev/nn/modeling.py -
    345
    -346
    -347
    -348
    -349
    -350
    -351
    +              
    351
     352
     353
     354
    @@ -2524,7 +2530,13 @@ 

    400 401 402 -403

    def decode(
    +403
    +404
    +405
    +406
    +407
    +408
    +409
    def decode(
         self,
         f_x: Float[Tensor, "batch d_sae"],
         *,
    @@ -2636,17 +2648,17 @@ 

    Source code in src/saev/nn/modeling.py -
    325
    -326
    -327
    -328
    -329
    -330
    -331
    +              
    331
     332
     333
     334
    -335
    def forward(self, x: Float[Tensor, "batch d_model"]) -> Output:
    +335
    +336
    +337
    +338
    +339
    +340
    +341
    def forward(self, x: Float[Tensor, "batch d_model"]) -> Output:
         """
         Given x, calculates the reconstructed x_hat and the intermediate activations f_x.
     
    @@ -2679,13 +2691,13 @@ 

    Source code in src/saev/nn/modeling.py -
    405
    -406
    -407
    -408
    -409
    -410
    -411
    @torch.no_grad()
    +              
    411
    +412
    +413
    +414
    +415
    +416
    +417
    @torch.no_grad()
     def normalize_w_dec(self):
         """
         Set W_dec to unit-norm columns.
    @@ -2714,13 +2726,7 @@ 

    Source code in src/saev/nn/modeling.py -
    413
    -414
    -415
    -416
    -417
    -418
    -419
    +              
    419
     420
     421
     422
    @@ -2740,7 +2746,13 @@ 

    436 437 438 -439

    @torch.no_grad()
    +439
    +440
    +441
    +442
    +443
    +444
    +445
    @torch.no_grad()
     def remove_parallel_grads(self):
         """
         Update grads so that they remove the parallel component
    @@ -3049,13 +3061,7 @@ 

    Source code in src/saev/nn/modeling.py -
    542
    -543
    -544
    -545
    -546
    -547
    -548
    +              
    548
     549
     550
     551
    @@ -3075,7 +3081,13 @@ 

    565 566 567 -568

    @beartype.beartype
    +568
    +569
    +570
    +571
    +572
    +573
    +574
    @beartype.beartype
     def dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):
         """
         Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).
    @@ -3124,13 +3136,7 @@ 

    Source code in src/saev/nn/modeling.py -
    571
    -572
    -573
    -574
    -575
    -576
    -577
    +              
    577
     578
     579
     580
    @@ -3205,7 +3211,13 @@ 

    649 650 651 -652

    @beartype.beartype
    +652
    +653
    +654
    +655
    +656
    +657
    +658
    @beartype.beartype
     def load(fpath: pathlib.Path | str, *, device="cpu") -> SparseAutoencoder:
         """
         Loads a sparse autoencoder from disk.
    diff --git a/docs/api/api/utils/wandb/index.html b/docs/api/api/utils/wandb/index.html
    index 688b234..d44bbcc 100644
    --- a/docs/api/api/utils/wandb/index.html
    +++ b/docs/api/api/utils/wandb/index.html
    @@ -2063,7 +2063,12 @@ 

    30 31 32 -33

    def __init__(
    +33
    +34
    +35
    +36
    +37
    +38
    def __init__(
         self,
         project: str,
         cfgs: list[dict[str, object]],
    @@ -2080,7 +2085,12 @@ 

    self.summary_updates: dict[str, object] = {} self.live_run = wandb.init( - project=project, config=cfg, mode=mode, tags=tags, dir=dir + project=project, + config=cfg, + mode=mode, + tags=tags, + dir=dir, + settings=wandb.Settings(init_timeout=300), ) self.metric_queues: list[MetricQueue] = [[] for _ in self.cfgs] diff --git a/docs/api/developers/protocol/index.html b/docs/api/developers/protocol/index.html index 9762ef2..94d021d 100644 --- a/docs/api/developers/protocol/index.html +++ b/docs/api/developers/protocol/index.html @@ -2225,7 +2225,10 @@

    3. Shard sizing mathssaev is a framework for training and evaluating Sparse autoencoders (SAEs) for vision transformers (ViTs), implemented in PyTorch.

    "},{"location":"#installation","title":"Installation","text":"

    Installation is supported with uv. saev will likely work with pure pip, conda, etc. but I will not formally support it.

    Clone this repository, then from the root directory:

    uv run scripts/launch.py --help\n

    This will create a virtual environment and display the help for all the provided framework scripts.

    "},{"location":"#quick-start","title":"Quick Start","text":"

    Save some activations to disk:

    uv run scripts/launch.py shards \\\n  --shards-root /$SCRATCH/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-32/openai \\\n  --d-model 768 \\\n  --layers 11 \\\n  --patches-per-ex 49 \\\n  --batch-size 256 \\\n  data:cifar10\n

    Read the guide for details.

    "},{"location":"#why-saev","title":"Why saev?","text":"

    There are plenty of alternative libraries for SAEs:

    • Overcomplete, primarily developed by Thomas Fel.

    However, saev has some benefits:

    1. saev is more of a framework, rather than a library. The reason for this is that SAEs require lots of activations to train a relatively small neural network; while you can implement it with a simple inference loop, efficient training requires some caching on disk. This means using saev is a little more like Keras or PyTorch Lightning than Huggingface's Transformers or Datasets libraries.
    2. saev offers lots of tools for interacting with sparse autoencoders after training, including interactive notebooks and evaluations.
    3. saev includes complete code from preprints in the contrib/ directory, along with logbooks describing how the authors used and developed saev.
    "},{"location":"api/colors/","title":"saev.colors","text":"

    Utility color palettes used across saev visualizations.

    "},{"location":"api/configs/","title":"saev.configs","text":""},{"location":"api/configs/#saev.configs.dict_to_dataclass","title":"dict_to_dataclass(data, cls)","text":"

    Recursively convert a dictionary to a dataclass instance.

    Source code in src/saev/configs.py
    @beartype.beartype\ndef dict_to_dataclass(data: dict, cls: type[T]) -> T:\n    \"\"\"Recursively convert a dictionary to a dataclass instance.\"\"\"\n    if not dataclasses.is_dataclass(cls):\n        return data\n\n    field_types = {f.name: f.type for f in dataclasses.fields(cls)}\n    kwargs = {}\n\n    for field_name, field_type in field_types.items():\n        if field_name not in data:\n            continue\n\n        value = data[field_name]\n\n        # Handle Optional types\n        origin = tp.get_origin(field_type)\n        args = tp.get_args(field_type)\n\n        # Handle tuple[str, ...]\n        if origin is tuple and args:\n            kwargs[field_name] = tuple(value) if isinstance(value, list) else value\n        # Handle list[DataclassType]\n        elif origin is list and args and dataclasses.is_dataclass(args[0]):\n            kwargs[field_name] = [dict_to_dataclass(item, args[0]) for item in value]\n        # Handle regular dataclass fields\n        elif dataclasses.is_dataclass(field_type):\n            kwargs[field_name] = dict_to_dataclass(value, field_type)\n        # Handle pathlib.Path\n        elif field_type is pathlib.Path:\n            # Required Path field - always convert\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is tp.Union and pathlib.Path in args:\n            # Optional Path field (typing.Union style)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is types.UnionType and pathlib.Path in args:\n            # Optional Path field (Python 3.10+ union style with |)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        else:\n            kwargs[field_name] = value\n\n    return cls(**kwargs)\n
    "},{"location":"api/configs/#saev.configs.expand","title":"expand(config)","text":"

    Expand a nested dict that may contain lists into many dicts.

    Source code in src/saev/configs.py
    @beartype.beartype\ndef expand(config: dict[str, object]) -> Iterator[dict[str, object]]:\n    \"\"\"Expand a nested dict that may contain lists into many dicts.\"\"\"\n    yield from _expand_discrete(dict(config))\n
    "},{"location":"api/configs/#saev.configs.get_non_default_values","title":"get_non_default_values(obj, default_obj)","text":"

    Recursively find fields that differ from defaults.

    Source code in src/saev/configs.py
    @beartype.beartype\ndef get_non_default_values(obj: T, default_obj: T) -> dict:\n    \"\"\"Recursively find fields that differ from defaults.\"\"\"\n    # Check that obj and default_obj are instances of a dataclass.\n    assert dataclasses.is_dataclass(obj) and not isinstance(obj, type)\n    assert dataclasses.is_dataclass(default_obj) and not isinstance(default_obj, type)\n\n    diff = {}\n    for field in dataclasses.fields(obj):\n        obj_value = getattr(obj, field.name)\n        default_value = getattr(default_obj, field.name)\n\n        if obj_value == default_value:\n            continue\n\n        # If both are dataclasses of the same type, recurse to find nested differences\n        if (\n            dataclasses.is_dataclass(obj_value)\n            and dataclasses.is_dataclass(default_value)\n            and type(obj_value) is type(default_value)\n        ):\n            nested_diff = get_non_default_values(obj_value, default_value)\n            if nested_diff:\n                diff[field.name] = nested_diff\n        else:\n            # For non-dataclass fields or different types, just record the value\n            diff[field.name] = obj_value\n\n    return diff\n
    "},{"location":"api/configs/#saev.configs.load_cfgs","title":"load_cfgs(override, *, default, sweep_dcts)","text":"

    Load a list of configs from a combination of sources.

    Parameters:

    Name Type Description Default override T

    Command-line overridden values.

    required default T

    The default values for a config.

    required sweep_dcts list[dict]

    A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.

    required

    Returns:

    Type Description tuple[list[T], list[str]]

    A list of configs and a list of errors.

    Source code in src/saev/configs.py
    @beartype.beartype\ndef load_cfgs(\n    override: T, *, default: T, sweep_dcts: list[dict]\n) -> tuple[list[T], list[str]]:\n    \"\"\"\n    Load a list of configs from a combination of sources.\n\n    Args:\n        override: Command-line overridden values.\n        default: The default values for a config.\n        sweep_dcts: A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.\n\n    Returns:\n        A list of configs and a list of errors.\n    \"\"\"\n    # Check that override and default are instances of a dataclass.\n    assert dataclasses.is_dataclass(override) and not isinstance(override, type)\n    assert dataclasses.is_dataclass(default) and not isinstance(default, type)\n\n    # If there's nothing to sweep, return just the override\n    if not sweep_dcts:\n        return [override], []\n\n    # Find which fields were overridden (differ from default)\n    overridden_fields = get_non_default_values(override, default)\n\n    cfgs: list[T] = []\n    errs: list[str] = []\n\n    d = 0  # Global counter for seed incrementing across all expanded configs\n\n    for sweep_dct in sweep_dcts:\n        # Filter out overridden fields from this sweep dict\n        filtered_dct = _filter_overridden_fields(sweep_dct, overridden_fields)\n\n        # If there's nothing to sweep after filtering, just use override\n        if not filtered_dct:\n            cfgs.append(override)\n            d += 1\n            continue\n\n        # Apply the sweep dict to create a config\n        try:\n            updates = _recursive_dataclass_update(override, filtered_dct, override, d)\n\n            if hasattr(override, \"seed\") and \"seed\" not in updates:\n                updates[\"seed\"] = getattr(override, \"seed\", 0) + d\n\n            cfgs.append(dataclasses.replace(override, **updates))\n            d += 1\n        except Exception as err:\n            errs.append(str(err))\n            d += 1\n\n    return cfgs, errs\n
    "},{"location":"api/configs/#saev.configs.load_sweep","title":"load_sweep(sweep_fpath)","text":"

    Load a sweep file and return the list of config dicts.

    Parameters:

    Name Type Description Default sweep_fpath Path

    Path to a Python file with a make_cfgs() function.

    required

    Returns:

    Type Description list[dict]

    List of config dictionaries from make_cfgs(). Returns empty list if any error occurs.

    Source code in src/saev/configs.py
    @beartype.beartype\ndef load_sweep(sweep_fpath: pathlib.Path) -> list[dict]:\n    \"\"\"\n    Load a sweep file and return the list of config dicts.\n\n    Args:\n        sweep_fpath: Path to a Python file with a `make_cfgs()` function.\n\n    Returns:\n        List of config dictionaries from `make_cfgs()`. Returns empty list if any error occurs.\n    \"\"\"\n    try:\n        namespace = {}\n        exec(sweep_fpath.read_text(), namespace)\n        result = namespace[\"make_cfgs\"]()\n        if not isinstance(result, list):\n            logger.warning(\n                f\"make_cfgs() in {sweep_fpath} returned {type(result).__name__}, expected list\"\n            )\n            return []\n        return result\n    except Exception as err:\n        logger.warning(f\"Failed to load sweep from {sweep_fpath}: {err}\")\n        return []\n
    "},{"location":"api/disk/","title":"saev.disk","text":"

    Helpers for sticking with the layout described in disk-layout.md.

    "},{"location":"api/disk/#saev.disk.Run","title":"Run(run_dir)","text":"

    Represents an SAE training run and some associated data.

    Parameters:

    Name Type Description Default run_dir Path

    Run directory, should be $SAEV_NFS/saev/runs/. Assumes the run already exists and validates the structure. Use Run.new() to create a new run. required Source code in src/saev/disk.py

    def __init__(self, run_dir: pathlib.Path):\n    self.run_dir = run_dir\n\n    if len(self.run_dir.parts) < 3 or self.run_dir.parts[-3:-1] != (\"saev\", \"runs\"):\n        raise ValueError(\"Run directory is invalid.\")\n\n    if not self.run_dir.exists():\n        raise FileNotFoundError(\n            f\"Run directory does not exist: {self.run_dir}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"checkpoint\").exists():\n        raise FileNotFoundError(\n            f\"Checkpoint directory does not exist: {self.run_dir / 'checkpoint'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"links\").exists():\n        raise FileNotFoundError(\n            f\"Links directory does not exist: {self.run_dir / 'links'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"inference\").exists():\n        raise FileNotFoundError(\n            f\"Inference directory does not exist: {self.run_dir / 'inference'}. Use Run.new() to create a new run.\"\n        )\n
    "},{"location":"api/disk/#saev.disk.Run.ckpt","title":"ckpt property","text":"

    Path to the sae.pt checkpoint.

    "},{"location":"api/disk/#saev.disk.Run.config","title":"config property","text":"

    The training run config. Not a train.Config object because we don't want to import from train.py.

    "},{"location":"api/disk/#saev.disk.Run.inference","title":"inference property","text":"

    Path to the inference/ directory.

    "},{"location":"api/disk/#saev.disk.Run.run_id","title":"run_id property","text":"

    The run ID, created by wandb.

    "},{"location":"api/disk/#saev.disk.Run.train_shards","title":"train_shards property","text":"

    Path to shard root with metadata.json and acts*.bin files.

    "},{"location":"api/disk/#saev.disk.Run.val_shards","title":"val_shards property","text":"

    Path to shard root with metadata.json and acts*.bin files.

    "},{"location":"api/disk/#saev.disk.Run.new","title":"new(run_id, *, train_shards_dir, val_shards_dir, runs_root) classmethod","text":"

    Create a new run with directory structure and symlinks.

    Parameters:

    Name Type Description Default run_id str

    The run ID (typically from wandb).

    required train_shards_dir Path

    Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/). required val_shards_dir Path

    Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/). required runs_root Path

    Root directory for runs (typically $SAEV_NFS/saev/runs).

    required

    Returns:

    Type Description Run

    A new Run instance with all directories and symlinks created.

    Source code in src/saev/disk.py
    @classmethod\ndef new(\n    cls,\n    run_id: str,\n    *,\n    train_shards_dir: pathlib.Path,\n    val_shards_dir: pathlib.Path,\n    runs_root: pathlib.Path,\n) -> \"Run\":\n    \"\"\"\n    Create a new run with directory structure and symlinks.\n\n    Args:\n        run_id: The run ID (typically from wandb).\n        train_shards_dir: Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/<shard_hash>).\n        val_shards_dir: Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/<shard_hash>).\n        runs_root: Root directory for runs (typically $SAEV_NFS/saev/runs).\n\n    Returns:\n        A new Run instance with all directories and symlinks created.\n    \"\"\"\n    run_dir = runs_root / run_id\n    run_dir.mkdir(parents=True)\n    (run_dir / \"checkpoint\").mkdir()\n    (run_dir / \"links\").mkdir()\n    (run_dir / \"inference\").mkdir()\n\n    (run_dir / \"links\" / \"train-shards\").symlink_to(train_shards_dir)\n    (run_dir / \"links\" / \"val-shards\").symlink_to(val_shards_dir)\n\n    return cls(run_dir)\n
    "},{"location":"api/disk/#saev.disk.is_runs_root","title":"is_runs_root(path)","text":"

    Check if path is a valid runs root directory.

    A valid runs root ends with saev/runs and exists as a directory.

    Parameters:

    Name Type Description Default path Path

    Path to check.

    required

    Returns:

    Type Description bool

    True if path is a directory ending in saev/runs.

    Source code in src/saev/disk.py
    @beartype.beartype\ndef is_runs_root(path: pathlib.Path) -> bool:\n    \"\"\"\n    Check if `path` is a valid runs root directory.\n\n    A valid runs root ends with `saev/runs` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/runs.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"runs\")\n
    "},{"location":"api/disk/#saev.disk.is_shards_dir","title":"is_shards_dir(path)","text":"

    Check if path is a specific shards directory.

    A valid shards directory ends with saev/shards/<hash> for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).

    Parameters:

    Name Type Description Default path Path

    Path to check.

    required

    Returns:

    Type Description bool

    True if path is a directory ending in saev/shards/ with required files. Source code in src/saev/disk.py

    @beartype.beartype\ndef is_shards_dir(path: pathlib.Path) -> bool:\n    \"\"\"\n    Check if `path` is a specific shards directory.\n\n    A valid shards directory ends with `saev/shards/<hash>` for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards/<hash> with required files.\n    \"\"\"\n    if not path.is_dir():\n        return False\n\n    if len(path.parts) < 3 or path.parts[-3:-1] != (\"saev\", \"shards\"):\n        return False\n\n    return True\n
    "},{"location":"api/disk/#saev.disk.is_shards_root","title":"is_shards_root(path)","text":"

    Check if path is a valid shards root directory.

    A valid shards root ends with saev/shards and exists as a directory.

    Parameters:

    Name Type Description Default path Path

    Path to check.

    required

    Returns:

    Type Description bool

    True if path is a directory ending in saev/shards.

    Source code in src/saev/disk.py
    @beartype.beartype\ndef is_shards_root(path: pathlib.Path) -> bool:\n    \"\"\"\n    Check if `path` is a valid shards root directory.\n\n    A valid shards root ends with `saev/shards` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"shards\")\n
    "},{"location":"api/helpers/","title":"saev.helpers","text":""},{"location":"api/helpers/#saev.helpers.RemovedFeatureError","title":"RemovedFeatureError","text":"

    Bases: RuntimeError

    Feature existed before but is no longer supported.

    "},{"location":"api/helpers/#saev.helpers.batched_idx","title":"batched_idx(total_size, batch_size)","text":"

    Iterate over (start, end) indices for total_size examples, where end - start is at most batch_size.

    Parameters:

    Name Type Description Default total_size int

    total number of examples

    required batch_size int

    maximum distance between the generated indices.

    required

    Returns:

    Type Description

    A generator of (int, int) tuples that can slice up a list or a tensor.

    Source code in src/saev/helpers.py
    def __init__(self, total_size: int, batch_size: int):\n    self.total_size = total_size\n    self.batch_size = batch_size\n
    "},{"location":"api/helpers/#saev.helpers.batched_idx.__iter__","title":"__iter__()","text":"

    Yield (start, end) index pairs for batching.

    Source code in src/saev/helpers.py
    def __iter__(self) -> Iterator[tuple[int, int]]:\n    \"\"\"Yield (start, end) index pairs for batching.\"\"\"\n    for start in range(0, self.total_size, self.batch_size):\n        stop = min(start + self.batch_size, self.total_size)\n        yield start, stop\n
    "},{"location":"api/helpers/#saev.helpers.batched_idx.__len__","title":"__len__()","text":"

    Return the number of batches.

    Source code in src/saev/helpers.py
    def __len__(self) -> int:\n    \"\"\"Return the number of batches.\"\"\"\n    return (self.total_size + self.batch_size - 1) // self.batch_size\n
    "},{"location":"api/helpers/#saev.helpers.progress","title":"progress(it, *, every=10, desc='progress', total=0)","text":"

    Wraps an iterable with a logger like tqdm but doesn't use any control codes to manipulate a progress bar, which doesn't work well when your output is redirected to a file. Instead, simple logging statements are used, but it includes quality-of-life features like iteration speed and predicted time to finish.

    Parameters:

    Name Type Description Default it Iterable

    Iterable to wrap.

    required every int

    How many iterations between logging progress.

    10 desc str

    What to name the logger.

    'progress' total int

    If non-zero, how long the iterable is.

    0 Source code in src/saev/helpers.py
    def __init__(\n    self, it: Iterable, *, every: int = 10, desc: str = \"progress\", total: int = 0\n):\n    self.it = it\n    self.every = max(every, 1)\n    self.logger = logging.getLogger(desc)\n    self.total = total\n
    "},{"location":"api/helpers/#saev.helpers.csr_topk","title":"csr_topk(arr, *, k, axis=0, batch_size=1024)","text":"

    Takes the top k values of a sparse CSR array.

    We can only iterate efficiently over rows because it's a a CSR array.

    Parameters:

    Name Type Description Default arr csr_array | csr_matrix

    The CSR array of values with shape (rows, cols).

    required k int

    The k in \"top-k\".

    required axis int

    The dimension to sort along.

    0 batch_size int

    How many rows to process at once.

    1024

    Returns:

    Type Description NumpyTopK

    saev.helpers.NumpyTopK

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef csr_topk(\n    arr: scipy.sparse.csr_array | scipy.sparse.csr_matrix,\n    *,\n    k: int,\n    axis: int = 0,\n    batch_size: int = 1024,\n) -> NumpyTopK:\n    \"\"\"\n    Takes the top k values of a sparse CSR array.\n\n    We can only iterate efficiently over *rows* because it's a a *CSR* array.\n\n    Args:\n        arr: The CSR array of values with shape (rows, cols).\n        k: The k in \"top-k\".\n        axis: The dimension to sort along.\n        batch_size: How many rows to process at once.\n\n    Returns:\n        saev.helpers.NumpyTopK\n    \"\"\"\n    if axis == 0:\n        return _csr_topk_axis0(arr, k, batch_size)\n    elif axis == 1:\n        return _csr_topk_axis1(arr, k)\n    else:\n        raise ValueError(f\"axis must be 0 or 1, got {axis}\")\n
    "},{"location":"api/helpers/#saev.helpers.current_git_commit","title":"current_git_commit()","text":"

    Best-effort short SHA of the repo containing this file.

    Returns None when * git executable is missing, * we\u2019re not inside a git repo (e.g. installed wheel), * or any git call errors out.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef current_git_commit() -> str | None:\n    \"\"\"\n    Best-effort short SHA of the repo containing *this* file.\n\n    Returns `None` when\n    * `git` executable is missing,\n    * we\u2019re not inside a git repo (e.g. installed wheel),\n    * or any git call errors out.\n    \"\"\"\n    try:\n        # Walk up until we either hit a .git dir or the FS root\n        here = pathlib.Path(__file__).resolve()\n        for parent in (here, *here.parents):\n            if (parent / \".git\").exists():\n                break\n        else:  # no .git found\n            return None\n\n        result = subprocess.run(\n            [\"git\", \"-C\", str(parent), \"rev-parse\", \"--short\", \"HEAD\"],\n            stdout=subprocess.PIPE,\n            stderr=subprocess.DEVNULL,\n            text=True,\n            check=True,\n        )\n        return result.stdout.strip() or None\n    except (FileNotFoundError, subprocess.CalledProcessError):\n        return None\n
    "},{"location":"api/helpers/#saev.helpers.flattened","title":"flattened(dct, *, sep='.')","text":"

    Flatten a potentially nested dict to a single-level dict with .-separated keys.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef flattened(\n    dct: dict[str, object], *, sep: str = \".\"\n) -> dict[str, str | int | float | bool | None]:\n    \"\"\"\n    Flatten a potentially nested dict to a single-level dict with `.`-separated keys.\n    \"\"\"\n    new = {}\n    for key, value in dct.items():\n        if isinstance(value, dict):\n            for nested_key, nested_value in flattened(value).items():\n                new[key + \".\" + nested_key] = nested_value\n            continue\n\n        new[key] = value\n\n    return new\n
    "},{"location":"api/helpers/#saev.helpers.fssafe","title":"fssafe(s)","text":"

    Convert a string to be filesystem-safe by replacing special characters.

    This is particularly useful for checkpoint names that contain characters like 'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like 'hf-hub_timm_ViT-L-16-SigLIP2-256'.

    Parameters:

    Name Type Description Default s str

    String to make filesystem-safe.

    required

    Returns:

    Type Description str

    Filesystem-safe version of the string.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef fssafe(s: str) -> str:\n    \"\"\"\n    Convert a string to be filesystem-safe by replacing special characters.\n\n    This is particularly useful for checkpoint names that contain characters like\n    'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like\n    'hf-hub_timm_ViT-L-16-SigLIP2-256'.\n\n    Args:\n        s: String to make filesystem-safe.\n\n    Returns:\n        Filesystem-safe version of the string.\n    \"\"\"\n    # Replace common problematic characters with underscores\n    replacements = {\n        \"/\": \"_\",\n        \"\\\\\": \"_\",\n        \":\": \"_\",\n        \"*\": \"_\",\n        \"?\": \"_\",\n        '\"': \"_\",\n        \"<\": \"_\",\n        \">\": \"_\",\n        \"|\": \"_\",\n        \" \": \"_\",\n    }\n    for old, new in replacements.items():\n        s = s.replace(old, new)\n    # Remove any remaining non-alphanumeric characters except - _ .\n    return \"\".join(c if c.isalnum() or c in \"-_.\" else \"_\" for c in s)\n
    "},{"location":"api/helpers/#saev.helpers.get_cache_dir","title":"get_cache_dir()","text":"

    Get cache directory from environment variables, defaulting to the current working directory (.)

    Returns:

    Type Description str

    A path to a cache directory (might not exist yet).

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef get_cache_dir() -> str:\n    \"\"\"\n    Get cache directory from environment variables, defaulting to the current working directory (.)\n\n    Returns:\n        A path to a cache directory (might not exist yet).\n    \"\"\"\n    cache_dir = \"\"\n    for var in (\"SAEV_CACHE\", \"HF_HOME\", \"HF_HUB_CACHE\"):\n        cache_dir = cache_dir or os.environ.get(var, \"\")\n    return cache_dir or \".\"\n
    "},{"location":"api/helpers/#saev.helpers.get_slurm_job_count","title":"get_slurm_job_count()","text":"

    Get the current number of jobs in the queue for the current user.

    Uses squeue's -r flag to properly count job array elements individually. For example, a job array 12345_[0-99] will be counted as 100 jobs.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef get_slurm_job_count() -> int:\n    \"\"\"\n    Get the current number of jobs in the queue for the current user.\n\n    Uses squeue's -r flag to properly count job array elements individually.\n    For example, a job array 12345_[0-99] will be counted as 100 jobs.\n    \"\"\"\n    try:\n        # Use -r to display each array element on its own line\n        result = subprocess.run(\n            [\"squeue\", \"--me\", \"-h\", \"-r\"], capture_output=True, text=True, check=True\n        )\n\n        # Count non-empty lines\n        lines = result.stdout.strip().split(\"\\n\")\n        return len([line for line in lines if line.strip()])\n\n    except (subprocess.SubprocessError, FileNotFoundError):\n        # If we can't check, assume no jobs\n        return 0\n
    "},{"location":"api/helpers/#saev.helpers.get_slurm_max_array_size","title":"get_slurm_max_array_size()","text":"

    Get the MaxArraySize configuration from the current Slurm cluster.

    Returns:

    Name Type Description int int

    The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef get_slurm_max_array_size() -> int:\n    \"\"\"\n    Get the MaxArraySize configuration from the current Slurm cluster.\n\n    Returns:\n        int: The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # Run scontrol command to get config information\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"config\"], capture_output=True, text=True, check=True\n        )\n\n        # Search for MaxArraySize in the output\n        match = re.search(r\"MaxArraySize\\s*=\\s*(\\d+)\", result.stdout)\n        if match:\n            max_array_size = int(match.group(1))\n            logger.info(\"Detected MaxArraySize = %d\", max_array_size)\n            return max_array_size\n        else:\n            logger.warning(\n                \"Could not find MaxArraySize in scontrol output, using default of 1000\"\n            )\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error running scontrol: %s\", e)\n        return 1000  # Safe default\n    except ValueError as e:\n        logger.error(\"Error parsing MaxArraySize: %s\", e)\n        return 1000  # Safe default\n    except FileNotFoundError:\n        logger.warning(\n            \"scontrol command not found. Assuming not in Slurm environment. Returning default MaxArraySize=1000.\"\n        )\n        return 1000\n
    "},{"location":"api/helpers/#saev.helpers.get_slurm_max_submit_jobs","title":"get_slurm_max_submit_jobs()","text":"

    Get the MaxSubmitJobs limit from the current user's QOS.

    Returns:

    Name Type Description int int

    The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef get_slurm_max_submit_jobs() -> int:\n    \"\"\"\n    Get the MaxSubmitJobs limit from the current user's QOS.\n\n    Returns:\n        int: The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # First, try to get the QOS from a recent job\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"job\", \"-o\"],\n            capture_output=True,\n            text=True,\n            check=False,\n        )\n\n        qos_name = None\n        if result.returncode == 0 and result.stdout:\n            # Extract QOS from job info\n            match = re.search(r\"QOS=(\\S+)\", result.stdout)\n            if match:\n                qos_name = match.group(1)\n\n        if not qos_name:\n            # If no jobs, try to get default QOS from association\n            # This is less reliable but better than nothing\n            logger.warning(\"No active jobs to determine QOS, using default of 1000\")\n            return 1000\n\n        # Get the MaxSubmitJobs for this QOS\n        result = subprocess.run(\n            [\"sacctmgr\", \"show\", \"qos\", qos_name, \"format=maxsubmitjobs\", \"-n\", \"-P\"],\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        max_submit = result.stdout.strip()\n        if max_submit and max_submit.isdigit():\n            limit = int(max_submit)\n            logger.info(\"Detected MaxSubmitJobs = %d for QOS %s\", limit, qos_name)\n            return limit\n        else:\n            logger.warning(\"Could not parse MaxSubmitJobs, using default of 1000\")\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error getting MaxSubmitJobs: %s\", e)\n        return 1000\n    except (ValueError, FileNotFoundError) as e:\n        logger.error(\"Error: %s\", e)\n        return 1000\n
    "},{"location":"api/helpers/#saev.helpers.np_topk","title":"np_topk(arr, k, axis=None)","text":"

    A numpy implementation of torch.topk.

    Returns the k largest elements along the given axis. If axis is None, the array is flattened first.

    Parameters:

    Name Type Description Default arr ndarray

    Input array.

    required k int

    Number of top elements to return.

    required axis int | None

    Axis along which to find top k elements. If None, flattens array first.

    None

    Returns:

    Type Description NumpyTopK

    Array of k largest values along the specified axis, sorted in descending order.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef np_topk(arr: np.ndarray, k: int, axis: int | None = None) -> NumpyTopK:\n    \"\"\"A numpy implementation of torch.topk.\n\n    Returns the k largest elements along the given axis. If axis is None, the array is flattened first.\n\n    Args:\n        arr: Input array.\n        k: Number of top elements to return.\n        axis: Axis along which to find top k elements. If None, flattens array first.\n\n    Returns:\n        Array of k largest values along the specified axis, sorted in descending order.\n    \"\"\"\n    if axis is None:\n        arr = arr.flatten()\n        axis = 0\n\n    # Handle negative axis\n    if axis < 0:\n        axis = arr.ndim + axis\n\n    # For each position along other axes, sort and take top k\n    # Use argsort which is stable and will preserve order for equal values\n    sort_indices = np.argsort(-arr, axis=axis, kind=\"stable\")\n\n    # Take the first k sorted indices\n    topk_indices = np.take(sort_indices, np.arange(k), axis=axis)\n\n    # Gather the top k values\n    topk_values = np.take_along_axis(arr, topk_indices, axis=axis)\n\n    return NumpyTopK(values=topk_values, indices=topk_indices)\n
    "},{"location":"api/helpers/#saev.helpers.submit_job_array","title":"submit_job_array(executor, fn, args_list, *, logger=None, margin=0.8)","text":"

    Submit jobs in batches to respect Slurm's MaxArraySize limit.

    Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.

    Parameters:

    Name Type Description Default executor

    A submitit executor (SlurmExecutor or LocalExecutor).

    required fn Callable

    Worker function to call for each config.

    required args_list list

    List of arguments to pass to fn.

    required logger Logger | None

    Optional logger for progress messages.

    None margin float

    Fraction of MaxArraySize to use (default 0.8).

    0.8

    Yields:

    Type Description int

    Tuples of (global_index, result) for successful jobs.

    object

    For failed jobs, yields (global_index, None) and logs a warning.

    Example
    executor = submitit.SlurmExecutor(folder=\"./logs\")\nexecutor.update_parameters(...)\n\nfor idx, result in submit_job_array(executor, worker_fn, configs):\n    print(f\"Job {idx} returned {result}\")\n
    Source code in src/saev/helpers.py
    @beartype.beartype\ndef submit_job_array(\n    executor,\n    fn: tp.Callable,\n    args_list: list,\n    *,\n    logger: logging.Logger | None = None,\n    margin: float = 0.8,\n) -> Iterator[tuple[int, object]]:\n    \"\"\"\n    Submit jobs in batches to respect Slurm's MaxArraySize limit.\n\n    Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.\n\n    Args:\n        executor: A submitit executor (SlurmExecutor or LocalExecutor).\n        fn: Worker function to call for each config.\n        args_list: List of arguments to pass to fn.\n        logger: Optional logger for progress messages.\n        margin: Fraction of MaxArraySize to use (default 0.8).\n\n    Yields:\n        Tuples of (global_index, result) for successful jobs.\n        For failed jobs, yields (global_index, None) and logs a warning.\n\n    Example:\n        ```\n        executor = submitit.SlurmExecutor(folder=\"./logs\")\n        executor.update_parameters(...)\n\n        for idx, result in submit_job_array(executor, worker_fn, configs):\n            print(f\"Job {idx} returned {result}\")\n        ```\n    \"\"\"\n    from submitit.core.utils import UncompletedJobError\n\n    arr_size = int(get_slurm_max_array_size() * margin)\n    n_total = len(args_list)\n\n    for arr_start, arr_end in batched_idx(n_total, arr_size):\n        batch_args = args_list[arr_start:arr_end]\n\n        if logger:\n            logger.info(\n                \"Submitting batch of %d jobs (%d-%d of %d).\",\n                len(batch_args),\n                arr_start + 1,\n                arr_end,\n                n_total,\n            )\n\n        with executor.batch():\n            jobs = [executor.submit(fn, arg) for arg in batch_args]\n\n        time.sleep(5.0)\n\n        for i, job in enumerate(jobs):\n            global_idx = arr_start + i\n            try:\n                result = job.result()\n                yield global_idx, result\n            except UncompletedJobError:\n                if logger:\n                    logger.warning(\n                        \"Job %s (%d) did not finish.\", job.job_id, global_idx\n                    )\n                yield global_idx, None\n
    "},{"location":"api/metrics/","title":"saev.metrics","text":""},{"location":"api/metrics/#saev.metrics.Metrics","title":"Metrics(mse_per_dim, mse_per_token, normalized_mse, baseline_mse_per_dim, baseline_mse_per_token, sse_recon, sse_baseline, n_tokens, d_model, n_elements) dataclass","text":"

    Validated reconstruction metrics aggregated over one evaluation corpus.

    The primary totals are sse_recon (SAE reconstruction SSE) and sse_baseline (mean-baseline SSE). Derived terms are: - normalized_mse = sse_recon / sse_baseline - mse_per_dim = sse_recon / n_elements - mse_per_token = sse_recon / n_tokens - baseline_mse_per_dim = sse_baseline / n_elements - baseline_mse_per_token = sse_baseline / n_tokens

    Size terms are: - n_tokens: number of tokens included in aggregation - d_model: embedding width per token - n_elements = n_tokens * d_model

    "},{"location":"api/metrics/#saev.metrics.Metrics.from_accumulators","title":"from_accumulators(*, sse_recon, sse_baseline, n_tokens, d_model) classmethod","text":"

    Construct metrics from aggregate sums and shape information.

    Parameters:

    Name Type Description Default sse_recon float

    Sum of squared reconstruction errors over all selected tokens and dimensions.

    required sse_baseline float

    Sum of squared mean-baseline errors over the same tokens and dimensions.

    required n_tokens int

    Number of selected tokens in the aggregation set.

    required d_model int

    Activation dimension per token.

    required

    Returns:

    Type Description Metrics

    A validated Metrics object with all derived fields populated.

    Source code in src/saev/metrics.py
    @classmethod\ndef from_accumulators(\n    cls, *, sse_recon: float, sse_baseline: float, n_tokens: int, d_model: int\n) -> \"Metrics\":\n    \"\"\"Construct metrics from aggregate sums and shape information.\n\n    Args:\n        sse_recon: Sum of squared reconstruction errors over all selected tokens and dimensions.\n        sse_baseline: Sum of squared mean-baseline errors over the same tokens and dimensions.\n        n_tokens: Number of selected tokens in the aggregation set.\n        d_model: Activation dimension per token.\n\n    Returns:\n        A validated `Metrics` object with all derived fields populated.\n    \"\"\"\n\n    msg = f\"n_tokens must be positive, got {n_tokens}.\"\n    assert n_tokens > 0, msg\n    msg = f\"d_model must be positive, got {d_model}.\"\n    assert d_model > 0, msg\n    msg = f\"sse_recon must be >= 0, got {sse_recon}.\"\n    assert sse_recon >= 0.0, msg\n    msg = f\"sse_baseline must be > 0, got {sse_baseline}.\"\n    assert sse_baseline > 0.0, msg\n\n    n_elements = n_tokens * d_model\n    return cls(\n        mse_per_dim=sse_recon / n_elements,\n        mse_per_token=sse_recon / n_tokens,\n        normalized_mse=sse_recon / sse_baseline,\n        baseline_mse_per_dim=sse_baseline / n_elements,\n        baseline_mse_per_token=sse_baseline / n_tokens,\n        sse_recon=sse_recon,\n        sse_baseline=sse_baseline,\n        n_tokens=n_tokens,\n        d_model=d_model,\n        n_elements=n_elements,\n    )\n
    "},{"location":"api/saev/","title":"saev","text":"

    saev is a Python package for training sparse autoencoders (SAEs) on vision transformers (ViTs) in PyTorch.

    "},{"location":"api/summary/","title":"Summary","text":"
    • saev
    • saev.colors
    • saev.configs
    • saev.data
    • saev.data.bird_mae
    • saev.data.buffers
    • saev.data.clip
    • saev.data.datasets
    • saev.data.dinov2
    • saev.data.dinov3
    • saev.data.fake_clip
    • saev.data.indexed
    • saev.data.models
    • saev.data.ordered
    • saev.data.pe
    • saev.data.shards
    • saev.data.shuffled
    • saev.data.siglip
    • saev.data.transforms
    • saev.disk
    • saev.framework
    • saev.framework.inference
    • saev.framework.shards
    • saev.framework.train
    • saev.helpers
    • saev.metrics
    • saev.nn
    • saev.nn.modeling
    • saev.nn.objectives
    • saev.utils
    • saev.utils.monitoring
    • saev.utils.scheduling
    • saev.utils.statistics
    • saev.utils.wandb
    • saev.viz
    "},{"location":"api/viz/","title":"saev.viz","text":""},{"location":"api/viz/#saev.viz.load_palette","title":"load_palette(path)","text":"

    TODO: docstring.

    Source code in src/saev/viz.py
    @beartype.beartype\ndef load_palette(path: pathlib.Path) -> list[tuple[float, float, float]]:\n    \"\"\"TODO: docstring.\"\"\"\n    import glasbey\n\n    palette = []\n\n    for i, line in enumerate(path.read_text().split(\"\\n\")):\n        line = line.strip()\n        if not line:\n            palette.append(None)\n            continue\n\n        palette.append(parse_color(line))\n\n    # Extend the palette using https://glasbey.readthedocs.io/en/latest/extending_palettes.html\n    n_missing = sum(color is None for color in palette)\n    if n_missing:\n        seed_palette = [color for color in palette if color is not None]\n        if seed_palette:\n            extended = glasbey.extend_palette(\n                seed_palette, palette_size=len(seed_palette) + n_missing, as_hex=False\n            )\n            fill_colors = extended[len(seed_palette) :]\n        else:\n            fill_colors = glasbey.create_palette(palette_size=n_missing, as_hex=False)\n\n        fill_iter = iter(fill_colors)\n        for i, color in enumerate(palette):\n            if color is not None:\n                continue\n            next_color = tuple(float(chan) for chan in next(fill_iter))\n            palette[i] = next_color\n\n    for i, color in enumerate(palette):\n        assert color is not None\n        msg = f\"Color {i} is invalid: {color}\"\n        assert all(0 <= chan <= 1 and isinstance(chan, float) for chan in color), msg\n\n    return palette\n
    "},{"location":"api/data/bird_mae/","title":"saev.data.bird_mae","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.Encoder","title":"Encoder(cfg)","text":"

    Bases: Module

    Pure PyTorch Bird-MAE backbone (no HF).

    Source code in src/saev/data/bird_mae.py
    def __init__(self, cfg: Config) -> None:\n    super().__init__()\n    self.cfg = cfg\n\n    self.patch_embed = PatchEmbed(\n        img_size=(cfg.img_size_x, cfg.img_size_y),\n        patch_size=(cfg.patch_size, cfg.patch_size),\n        in_chans=cfg.in_chans,\n        embed_dim=cfg.embed_dim,\n    )\n\n    self.cls_token = nn.Parameter(torch.zeros(1, 1, cfg.embed_dim))\n    self.pos_embed = nn.Parameter(\n        torch.zeros(1, cfg.n_patches + 1, cfg.embed_dim),\n        requires_grad=cfg.pos_trainable,\n    )\n\n    if self.pos_embed.data.shape[1] == cfg.n_tokens:\n        pos_embed_np = get_2d_sincos_pos_embed_flexible(\n            self.pos_embed.shape[-1],\n            self.patch_embed.patch_hw,\n            cls_token=True,\n        )\n        self.pos_embed.data.copy_(\n            torch.from_numpy(pos_embed_np).float().unsqueeze(0)\n        )\n    else:\n        logger.warning(\n            \"Positional embedding shape mismatch. Will not initialize sin-cos pos embed.\"\n        )\n\n    dpr = [x.item() for x in torch.linspace(0, cfg.drop_rate, cfg.depth)]\n    self.blocks = nn.ModuleList([\n        Block(\n            dim=cfg.embed_dim,\n            n_heads=cfg.n_heads,\n            mlp_ratio=cfg.mlp_ratio,\n            qkv_bias=cfg.qkv_bias,\n            qk_norm=cfg.qk_norm,\n            init_values=cfg.init_values,\n            proj_drop=cfg.drop_rate,\n            attn_drop=cfg.drop_rate,\n            drop_path=dpr[i],\n            norm_layer=functools.partial(nn.LayerNorm, eps=cfg.norm_layer_eps),\n        )\n        for i in range(cfg.depth)\n    ])\n\n    self.pos_drop = nn.Dropout(p=cfg.drop_rate)\n    self.norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n    self.fc_norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n\n    nn.init.trunc_normal_(self.cls_token, std=0.02)\n    self.apply(self._init_weights)\n
    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.PatchEmbed","title":"PatchEmbed(img_size=(512, 128), patch_size=(16, 16), in_chans=1, embed_dim=768)","text":"

    Bases: Module

    Image (time x mel) to patch embeddings.

    Source code in src/saev/data/bird_mae.py
    def __init__(\n    self,\n    img_size: tuple[int, int] = (512, 128),\n    patch_size: tuple[int, int] = (16, 16),\n    in_chans: int = 1,\n    embed_dim: int = 768,\n) -> None:\n    super().__init__()\n    img_size = _ntuple(2)(img_size)\n    patch_size = _ntuple(2)(patch_size)\n    n_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])\n    self.patch_hw = (img_size[1] // patch_size[1], img_size[0] // patch_size[0])\n    self.img_size = img_size\n    self.patch_size = patch_size\n    self.n_patches = n_patches\n\n    self.proj = nn.Conv2d(\n        in_chans,\n        embed_dim,\n        kernel_size=patch_size,\n        stride=patch_size,\n    )\n
    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer","title":"Transformer(ckpt)","text":"

    Bases: Module, Transformer

    Source code in src/saev/data/bird_mae.py
    def __init__(self, ckpt: str):\n    super().__init__()\n    self.model = load(ckpt)\n\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(ckpt.lower())\n
    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_resize","title":"make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

    Create resize transform for visualization.

    Source code in src/saev/data/bird_mae.py
    @staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization.\"\"\"\n    raise NotImplementedError(\"Bird-MAE uses audio spectrograms, not images.\")\n
    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

    Create transforms for preprocessing: (data_transform, dict_transform | None).

    Source code in src/saev/data/bird_mae.py
    @staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n    return transform, None\n
    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio","title":"filter_audio(waveform, sample_rate, patches, *, mode='time')","text":"

    Filter audio based on SAE patch activations over the log-mel spectrogram.

    Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.

    Parameters:

    Name Type Description Default waveform Float[Tensor, ' samples']

    Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.

    required sample_rate int

    Audio sample rate in Hz. Should be 32000 for Bird-MAE.

    required patches Bool[Tensor, ' content_tokens_per_example']

    Boolean SAE activation values per patch, shape [256]. Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.

    required mode Literal['time', 'time+freq']

    Filtering mode. - \"time\": Clip to time segments with high activations (preserves all frequencies). - \"time+freq\": Clip time AND apply frequency masking via STFT.

    'time'

    Returns:

    Type Description Float[Tensor, ' clipped']

    Filtered audio waveform as a 1D torch tensor.

    Example

    waveform_np, sr = librosa.load(audio_path, sr=32000) mel = bird_mae.transform(waveform_np) # [512, 128] waveform = torch.from_numpy(waveform_np)

    Source code in src/saev/data/bird_mae.py
    @jaxtyped(typechecker=beartype.beartype)\ndef filter_audio(\n    waveform: Float[Tensor, \" samples\"],\n    sample_rate: int,\n    patches: Bool[Tensor, \" content_tokens_per_example\"],\n    *,\n    mode: tp.Literal[\"time\", \"time+freq\"] = \"time\",\n) -> Float[Tensor, \" clipped\"]:\n    \"\"\"\n    Filter audio based on SAE patch activations over the log-mel spectrogram.\n\n    Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.\n\n    Args:\n        waveform: Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.\n        sample_rate: Audio sample rate in Hz. Should be 32000 for Bird-MAE.\n        patches: Boolean SAE activation values per patch, shape [256].\n            Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.\n        mode: Filtering mode.\n            - \"time\": Clip to time segments with high activations (preserves all frequencies).\n            - \"time+freq\": Clip time AND apply frequency masking via STFT.\n\n    Returns:\n        Filtered audio waveform as a 1D torch tensor.\n\n    Example:\n        >>> waveform_np, sr = librosa.load(audio_path, sr=32000)\n        >>> mel = bird_mae.transform(waveform_np)  # [512, 128]\n        >>> waveform = torch.from_numpy(waveform_np)\n        >>> # ... run through SAE to get patch_activations [256] ...\n        >>> # ... covert SAE activations to bool with > 0 ...\n        >>> time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\")\n        >>> time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")\n    \"\"\"\n    msg = f\"Bird-MAE expects sample_rate={BIRDMAE_SR_HZ}, got {sample_rate}.\"\n    assert sample_rate == BIRDMAE_SR_HZ, msg\n    assert patches.shape == (BIRDMAE_N_TIME_PATCHES * BIRDMAE_N_MEL_PATCHES,)\n    assert waveform.ndim == 1, waveform.shape\n\n    # Match transform(): pad/truncate to exactly 5s\n    waveform_t = waveform.to(torch.float32)\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if waveform_t.numel() < max_len:\n        pad = max_len - waveform_t.numel()\n        waveform_t = F.pad(waveform_t, (0, pad))\n    else:\n        waveform_t = waveform_t[:max_len]\n    if mode == \"time+freq\":\n        # STFT parameters matching Kaldi/BirdMAE assumptions approximately\n        n_fft = BIRDMAE_STFT_N_FFT\n        hop_length = BIRDMAE_STFT_HOP_LENGTH\n        win_length = BIRDMAE_STFT_WIN_LENGTH\n        window = torch.hann_window(win_length)\n\n        stft = torch.stft(\n            waveform_t,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            return_complex=True,\n        )\n        # stft shape: [freq_bins, time_frames]\n        # freq_bins = 513\n        # time_frames ~ 498 for 160000 samples\n\n        freqs = torch.linspace(0, sample_rate / 2, stft.shape[0])\n        mask = torch.zeros_like(stft, dtype=torch.bool)\n\n        # Mel range\n        low_freq = BIRDMAE_STFT_LOW_FREQ_HZ\n        high_freq = sample_rate / 2\n        min_mel = hz_to_mel(low_freq)\n        max_mel = hz_to_mel(high_freq)\n        mel_range = max_mel - min_mel\n\n        active_patch_i = torch.nonzero(patches, as_tuple=False).flatten().tolist()\n        for i in active_patch_i:\n            time_idx = i // BIRDMAE_N_MEL_PATCHES\n            mel_idx = i % BIRDMAE_N_MEL_PATCHES\n\n            # Time range (frames)\n            t_start = time_idx * BIRDMAE_FRAMES_PER_PATCH\n            t_end = (time_idx + 1) * BIRDMAE_FRAMES_PER_PATCH\n\n            # Frequency range (Hz)\n            # 128 mel bins total, 16 bins per patch\n            p_mel_low = (\n                min_mel\n                + (mel_idx * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n            p_mel_high = (\n                min_mel\n                + ((mel_idx + 1) * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n\n            hz_low = mel_to_hz(p_mel_low)\n            hz_high = mel_to_hz(p_mel_high)\n\n            freq_mask = (freqs >= hz_low) & (freqs < hz_high)\n\n            # Apply mask to valid frames\n            valid_t_end = min(t_end, stft.shape[1])\n            if t_start < valid_t_end:\n                mask[freq_mask, t_start:valid_t_end] = True\n\n        stft_filtered = stft * mask\n        waveform_t = torch.istft(\n            stft_filtered,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            length=waveform_t.shape[0],\n        )\n\n    # Time clipping (applies to both modes)\n    active_time_indices = torch.unique(\n        torch.nonzero(patches, as_tuple=False).flatten() // BIRDMAE_N_MEL_PATCHES\n    ).tolist()\n    segments = []\n\n    for t in active_time_indices:\n        start = t * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        end = (t + 1) * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        if start >= waveform_t.shape[0]:\n            continue\n        seg = waveform_t[start : min(end, waveform_t.shape[0])]\n        segments.append(seg)\n\n    if not segments:\n        return waveform_t[:0]\n\n    return torch.cat(segments, dim=0)\n
    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--run-through-sae-to-get-patch_activations-256","title":"... run through SAE to get patch_activations [256] ...","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--covert-sae-activations-to-bool-with-0","title":"... covert SAE activations to bool with > 0 ...","text":"

    time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\") time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")

    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.transform","title":"transform(waveform)","text":"

    waveform: 1D tensor [samples] returns: 2D tensor [512, 128] matching HF's feature extractor output

    Source code in src/saev/data/bird_mae.py
    @jaxtyped(typechecker=beartype.beartype)\ndef transform(waveform: Float[np.ndarray, \" samples\"]) -> Float[Tensor, \"time mels\"]:\n    \"\"\"\n    waveform: 1D tensor [samples]\n    returns: 2D tensor [512, 128] matching HF's feature extractor output\n    \"\"\"\n    import torchaudio.compliance.kaldi\n\n    waveform = torch.from_numpy(waveform).to(torch.float32)\n    (n_samples,) = waveform.shape\n    # 1) pad/truncate to exactly 5 s\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if n_samples < max_len:\n        pad = max_len - n_samples\n        waveform = F.pad(waveform, (0, pad))\n    else:\n        waveform = waveform[:max_len]\n\n    # 2) mean-center (per clip)\n    waveform = waveform - waveform.mean(dim=0, keepdim=True)\n\n    # 3) Kaldi fbank: [T, 128]\n    fb = torchaudio.compliance.kaldi.fbank(\n        waveform[None, :],\n        htk_compat=True,\n        sample_frequency=BIRDMAE_SR_HZ,\n        use_energy=False,\n        window_type=\"hanning\",\n        num_mel_bins=BIRDMAE_N_MELS,\n        dither=0.0,\n        frame_shift=10.0,\n    )  # [T, 128]\n\n    # 4) pad to 512 frames with min value\n    t, _ = fb.shape\n    if t < BIRDMAE_TARGET_T:\n        diff = BIRDMAE_TARGET_T - t\n        min_val = fb.min()\n        fb = F.pad(fb, (0, 0, 0, diff), value=min_val.item())\n    elif t > BIRDMAE_TARGET_T:\n        fb = fb[:BIRDMAE_TARGET_T]\n\n    fb = (fb - BIRDMAE_MEAN) / (BIRDMAE_STD * 2.0)\n\n    assert fb.shape == (BIRDMAE_TARGET_T, BIRDMAE_N_MELS), fb.shape\n\n    return fb\n
    "},{"location":"api/data/buffers/","title":"saev.data.buffers","text":""},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer","title":"ReservoirBuffer(capacity, shape, *, dtype=torch.float32, meta_shape=(2,), meta_dtype=torch.int32, seed=0, collate_fn=None)","text":"

    Pool of (tensor, meta) pairs. Multiple producers call put(batch_x, batch_meta). Multiple consumers call get(batch_size) -> (x, meta). Random order, each sample delivered once, blocking semantics.

    Source code in src/saev/data/buffers.py
    def __init__(\n    self,\n    capacity: int,\n    shape: tuple[int, ...],\n    *,\n    dtype: torch.dtype = torch.float32,\n    meta_shape: tuple[int, ...] = (2,),\n    meta_dtype: torch.dtype = torch.int32,\n    seed: int = 0,\n    collate_fn: collections.abc.Callable | None = None,\n):\n    self.capacity = capacity\n    self._empty = 123456789\n\n    self.data = torch.full((capacity, *shape), self._empty, dtype=dtype)\n    self.data.share_memory_()\n\n    self.meta = torch.full((capacity, *meta_shape), self._empty, dtype=meta_dtype)\n    self.meta.share_memory_()\n\n    self.ctx = mp.get_context()\n\n    self.size = self.ctx.Value(\"L\", 0)  # current live items\n    self.lock = self.ctx.Lock()  # guards size+swap\n    self.free = self.ctx.Semaphore(capacity)\n    self.full = self.ctx.Semaphore(0)\n    # Each process has its own RNG.\n    self.rng = np.random.default_rng(seed)\n\n    self.collate_fn = collate_fn\n\n    self.logger = logging.getLogger(f\"reservoir({os.getpid()})\")\n
    "},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.close","title":"close()","text":"

    Release the shared-memory backing store (call once in the parent).

    Source code in src/saev/data/buffers.py
    def close(self) -> None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.data.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n
    "},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.fill","title":"fill()","text":"

    Approximate proportion of filled slots (race-safe enough for tests).

    Source code in src/saev/data/buffers.py
    def fill(self) -> float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n
    "},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.qsize","title":"qsize()","text":"

    Approximate number of filled slots (race-safe enough for tests).

    Source code in src/saev/data/buffers.py
    def qsize(self) -> int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return self.size.value\n
    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer","title":"RingBuffer(slots, shape, dtype)","text":"

    Fixed-capacity, multiple-producer / multiple-consumer queue backed by a shared-memory tensor.

    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer--parameters","title":"Parameters","text":"

    slots : int capacity in number of items (tensor rows) shape : tuple[int] shape of one item, e.g. (batch, dim) dtype : torch.dtype tensor dtype

    put(tensor) : blocks if full get() -> tensor : blocks if empty qsize() -> int advisory size (approximate) close() frees shared storage (call in the main process)

    Source code in src/saev/data/buffers.py
    def __init__(self, slots: int, shape: tuple[int, ...], dtype: torch.dtype):\n    assert slots > 0, \"slots must be positive\"\n    self.slots = slots\n    # 123456789 -> Should make you very worried.\n    self.buf = torch.full((slots, *shape), 123456789, dtype=dtype)\n    self.buf.share_memory_()\n\n    ctx = mp.get_context()  # obeys the global start method (\"spawn\")\n\n    # shared, lock-free counters\n    self.head = ctx.Value(\"L\", 0, lock=False)  # next free slot\n    self.tail = ctx.Value(\"L\", 0, lock=False)  # next occupied slot\n\n    # semaphores for blocking semantics\n    self.free = ctx.Semaphore(slots)  # initially all slots free\n    self.full = ctx.Semaphore(0)  # no filled slots yet\n\n    # one mutex for pointer updates\n    self.mutex = ctx.Lock()\n
    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.close","title":"close()","text":"

    Release the shared-memory backing store (call once in the parent).

    Source code in src/saev/data/buffers.py
    def close(self) -> None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.buf.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n
    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.fill","title":"fill()","text":"

    Approximate proportion of filled slots (race-safe enough for tests).

    Source code in src/saev/data/buffers.py
    def fill(self) -> float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n
    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.get","title":"get()","text":"

    Return a view of the next item; blocks if the queue is empty.

    Source code in src/saev/data/buffers.py
    def get(self) -> torch.Tensor:\n    \"\"\"Return a view of the next item; blocks if the queue is empty.\"\"\"\n    self.full.acquire()  # wait for data\n    with self.mutex:  # exclusive update of tail\n        idx = self.tail.value % self.slots\n        out = self.buf[idx].clone()\n        self.tail.value += 1\n    self.free.release()  # signal one more free slot\n    return out\n
    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.put","title":"put(tensor)","text":"

    Copy tensor into the next free slot; blocks if the queue is full.

    Source code in src/saev/data/buffers.py
    def put(self, tensor: torch.Tensor) -> None:\n    \"\"\"Copy `tensor` into the next free slot; blocks if the queue is full.\"\"\"\n    if tensor.shape != self.buf.shape[1:] or tensor.dtype != self.buf.dtype:\n        raise ValueError(\"tensor shape / dtype mismatch\")\n\n    self.free.acquire()  # wait for a free slot\n    with self.mutex:  # exclusive update of head\n        idx = self.head.value % self.slots\n        self.buf[idx].copy_(tensor)\n        self.head.value += 1\n    self.full.release()  # signal there is data\n
    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.qsize","title":"qsize()","text":"

    Approximate number of filled slots (race-safe enough for tests).

    Source code in src/saev/data/buffers.py
    def qsize(self) -> int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return (self.head.value - self.tail.value) % (1 << 64)\n
    "},{"location":"api/data/clip/","title":"saev.data.clip","text":""},{"location":"api/data/clip/#saev.data.clip.Vit","title":"Vit(ckpt)","text":"

    Bases: Transformer, Module

    Source code in src/saev/data/clip.py
    def __init__(self, ckpt: str):\n    super().__init__()\n\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n        _, ckpt = ckpt.split(\"hf-hub:\")\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n\n    assert not isinstance(self.model, open_clip.timm_model.TimmModel)\n
    "},{"location":"api/data/clip/#saev.data.clip.Vit.patch_size","title":"patch_size property","text":"

    Get patch size for CLIP models.

    "},{"location":"api/data/clip/#saev.data.clip.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

    Create transforms for preprocessing: (img_transform, sample_transform | None).

    Source code in src/saev/data/clip.py
    @staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n
    "},{"location":"api/data/datasets/","title":"saev.data.datasets","text":""},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025","title":"BirdClef2025(root=pathlib.Path('data/birdclef-2025'), split='train_audio') dataclass","text":"

    Bases: DatasetConfig

    Configuration for BirdCLEF 2025 dataset, filtering to only bird species (Aves).

    See https://www.kaggle.com/competitions/birdclef-2025/data for more information.

    "},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.n_examples","title":"n_examples property","text":"

    Number of bird audio samples in the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.root","title":"root = pathlib.Path('data/birdclef-2025') class-attribute instance-attribute","text":"

    Root directory containing the BirdCLEF 2025 data.

    "},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.split","title":"split = 'train_audio' class-attribute instance-attribute","text":"

    Which data split to use.

    "},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset","title":"BirdClef2025Dataset(cfg, *, audio_transform=None, mask_transform=None, sample_transform=None)","text":"

    Bases: Dataset

    Dataset for BirdCLEF 2025 filtered to bird species only (class_name == 'Aves').

    Source code in src/saev/data/datasets.py
    def __init__(\n    self,\n    cfg: BirdClef2025,\n    *,\n    audio_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    import polars as pl\n\n    self.cfg = cfg\n    self.audio_transform = audio_transform\n    self.sample_transform = sample_transform\n\n    # Load taxonomy and filter to birds only\n    taxonomy = pl.read_csv(cfg.root / \"taxonomy.csv\", infer_schema_length=None)\n    taxonomy = taxonomy.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n    birds = taxonomy.filter(pl.col(\"class_name\") == \"Aves\")\n    bird_labels = set(birds[\"primary_label\"].to_list())\n\n    # Build label -> target mapping from bird species only\n    sorted_labels = sorted(bird_labels)\n    self.label_to_target = {label: i for i, label in enumerate(sorted_labels)}\n    self.target_to_label = {i: label for label, i in self.label_to_target.items()}\n\n    if cfg.split == \"train_audio\":\n        train = pl.read_csv(cfg.root / \"train.csv\", infer_schema_length=None)\n        train = train.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n        train_birds = train.filter(pl.col(\"primary_label\").is_in(bird_labels))\n        self.samples = [\n            {\"label\": row[\"primary_label\"], \"filename\": row[\"filename\"]}\n            for row in train_birds.iter_rows(named=True)\n        ]\n    elif cfg.split == \"train_soundscapes\":\n        soundscapes_dpath = cfg.root / \"train_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    elif cfg.split == \"test_soundscapes\":\n        soundscapes_dpath = cfg.root / \"test_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    else:\n        tp.assert_never(cfg.split)\n
    "},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset.n_classes","title":"n_classes property","text":"

    Number of bird species.

    "},{"location":"api/data/datasets/#saev.data.datasets.Cifar10","title":"Cifar10(name='uoft-cs/cifar10', split='train') dataclass","text":"

    Bases: DatasetConfig

    Configuration for HuggingFace CIFAR-10.

    "},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.n_examples","title":"n_examples property","text":"

    Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.

    "},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.name","title":"name = 'uoft-cs/cifar10' class-attribute instance-attribute","text":"

    Dataset name on HuggingFace. Don't need to change this.

    "},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.root","title":"root property","text":"

    Dummy path for the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.split","title":"split = 'train' class-attribute instance-attribute","text":"

    Dataset split. Can be 'train' or 'test'.

    "},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig","title":"DatasetConfig","text":"

    Bases: ABC

    Abstract base class for dataset configurations.

    "},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.n_examples","title":"n_examples abstractmethod property","text":"

    Number of examples in the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.root","title":"root abstractmethod property","text":"

    Root directory path for the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImg","title":"FakeImg(n_examples=10) dataclass","text":"

    Bases: DatasetConfig

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImg.root","title":"root property","text":"

    Root directory path for the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg","title":"FakeImgSeg(n_examples=10, content_tokens_per_example=16, n_classes=3, bg_label=0) dataclass","text":"

    Bases: DatasetConfig

    Tiny synthetic segmentation dataset for tests.

    Generates dummy RGB images and pixel-level segmentation masks, mimicking the behavior of real segmentation datasets like ImgSegFolder.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.bg_label","title":"bg_label = 0 class-attribute instance-attribute","text":"

    Which class index is considered background.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.content_tokens_per_example","title":"content_tokens_per_example = 16 class-attribute instance-attribute","text":"

    Number of content tokens per example.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_classes","title":"n_classes = 3 class-attribute instance-attribute","text":"

    Number of segmentation classes.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_examples","title":"n_examples = 10 class-attribute instance-attribute","text":"

    Number of examples.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.root","title":"root property","text":"

    Root directory path for the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSegDataset","title":"FakeImgSegDataset(cfg, *, img_transform=None, mask_transform=None, sample_transform=None)","text":"

    Bases: Dataset

    Synthetic segmentation dataset providing pixel-level segmentation masks.

    Mimics ImgSegFolderDataset by providing:

    • image: a dummy RGB PIL image
    • segmentation: a PIL image with pixel-level class labels
    • index, target, label
    Source code in src/saev/data/datasets.py
    def __init__(\n    self,\n    cfg: FakeImgSeg,\n    *,\n    img_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    self.cfg = cfg\n    self.img_transform = img_transform\n    self.mask_transform = mask_transform\n    self.sample_transform = sample_transform\n
    "},{"location":"api/data/datasets/#saev.data.datasets.Imagenet","title":"Imagenet(name='ILSVRC/imagenet-1k', split='train') dataclass","text":"

    Bases: DatasetConfig

    Configuration for HuggingFace Imagenet.

    "},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.n_examples","title":"n_examples property","text":"

    Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.

    "},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.name","title":"name = 'ILSVRC/imagenet-1k' class-attribute instance-attribute","text":"

    Dataset name on HuggingFace. Don't need to change this..

    "},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.root","title":"root property","text":"

    Root directory path for the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.split","title":"split = 'train' class-attribute instance-attribute","text":"

    Dataset split. For the default ImageNet-1K dataset, can either be 'train', 'validation' or 'test'.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder","title":"ImgFolder(root=pathlib.Path('./data/split')) dataclass","text":"

    Bases: DatasetConfig

    Configuration for a generic image folder dataset that matches the structure used in PyTorch's ImageFolder.

    The datset must be laid out in:

    root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n

    If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.n_examples","title":"n_examples property","text":"

    Number of examples in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires walking the directory structure. If you need to reference this number very often, cache it in a local variable.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.root","title":"root = pathlib.Path('./data/split') class-attribute instance-attribute","text":"

    Where the class folders with images are stored. Can be a glob pattern to match multiple directories.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset","title":"ImgFolderDataset(*args, sample_transform=None, **kwargs)","text":"

    Bases: ImageFolder

    A generic image folder dataset that matches the structure used in PyTorch's ImageFolder.

    The datset must be laid out in:

    root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n

    If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.

    Source code in src/saev/data/datasets.py
    def __init__(self, *args, sample_transform: Callable | None = None, **kwargs):\n    super().__init__(*args, **kwargs)\n    self.sample_transform = sample_transform\n
    "},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset.__getitem__","title":"__getitem__(index)","text":"

    Parameters:

    Name Type Description Default index int

    Index

    required

    Returns:

    Type Description dict[str, object]

    dict with keys 'data', 'index', 'target' and 'label'.

    Source code in src/saev/data/datasets.py
    def __getitem__(self, index: int) -> dict[str, object]:\n    \"\"\"\n    Args:\n        index: Index\n\n    Returns:\n        dict with keys 'data', 'index', 'target' and 'label'.\n    \"\"\"\n    path, target = self.samples[index]\n    image = self.loader(path)\n    if self.transform is not None:\n        image = self.transform(image)\n    if self.target_transform is not None:\n        target = self.target_transform(target)\n\n    sample = {\n        \"data\": image,\n        \"target\": target,\n        \"label\": self.classes[target],\n        \"index\": index,\n    }\n\n    if self.sample_transform is not None:\n        sample = self.sample_transform(sample)\n\n    return sample\n
    "},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder","title":"ImgSegFolder(root=pathlib.Path('./data/segdataset'), split='training', labels_csv='labels.csv', bg_label=0) dataclass","text":"

    Bases: DatasetConfig

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.bg_label","title":"bg_label = 0 class-attribute instance-attribute","text":"

    Background label.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.labels_csv","title":"labels_csv = 'labels.csv' class-attribute instance-attribute","text":"

    CSV file with columns: stem,label1,label2,... First column must be 'stem'.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.n_examples","title":"n_examples property","text":"

    Number of examples in the dataset. Calculated on the fly by counting image files in root/images/split.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.root","title":"root = pathlib.Path('./data/segdataset') class-attribute instance-attribute","text":"

    Where the class folders with images are stored.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.split","title":"split = 'training' class-attribute instance-attribute","text":"

    Data split.

    "},{"location":"api/data/datasets/#saev.data.datasets.get_dataset","title":"get_dataset(cfg, *, data_transform=None, mask_transform=None, sample_transform=None)","text":"

    Gets the dataset for the current experiment; delegates construction to dataset-specific functions.

    Parameters:

    Name Type Description Default cfg Config

    Config for the dataset.

    required data_tr

    Transform to be applied to each 'data' key (typically the raw data).

    required mask_tr

    Transform to be applied to masks.

    required dict_tr

    Transform to be applied to the entire sample dict.

    required

    Returns: A dataset that has dictionaries with 'data', 'index', 'target', and 'label' keys containing examples.

    Source code in src/saev/data/datasets.py
    @beartype.beartype\ndef get_dataset(\n    cfg: Config,\n    *,\n    data_transform: Callable = None,\n    mask_transform: Callable | None = None,\n    sample_transform: Callable | None = None,\n):\n    \"\"\"\n    Gets the dataset for the current experiment; delegates construction to dataset-specific functions.\n\n    Args:\n        cfg: Config for the dataset.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        dict_tr: Transform to be applied to the entire sample dict.\n    Returns:\n        A dataset that has dictionaries with `'data'`, `'index'`, `'target'`, and `'label'` keys containing examples.\n    \"\"\"\n    # TODO: Can we reduce duplication? Or is it nice to see that there is no magic here?\n    if isinstance(cfg, Imagenet):\n        return ImagenetDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, Cifar10):\n        return Cifar10Dataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, ImgSegFolder):\n        return ImgSegFolderDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, ImgFolder):\n        ds = [\n            ImgFolderDataset(\n                root, transform=data_transform, sample_transform=sample_transform\n            )\n            for root in glob.glob(str(cfg.root), recursive=True)\n        ]\n        if len(ds) == 1:\n            return ds[0]\n        else:\n            return torch.utils.data.ConcatDataset(ds)\n    elif isinstance(cfg, FakeImg):\n        return FakeImgDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, FakeImgSeg):\n        return FakeImgSegDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, BirdClef2025):\n        return BirdClef2025Dataset(\n            cfg, audio_transform=data_transform, sample_transform=sample_transform\n        )\n    else:\n        tp.assert_never(cfg)\n
    "},{"location":"api/data/datasets/#saev.data.datasets.is_img_seg_dataset","title":"is_img_seg_dataset(data_cfg)","text":"

    Check if a dataset configuration is for an image segmentation dataset.

    Parameters:

    Name Type Description Default data_cfg DatasetConfig

    Dataset configuration

    required

    Returns:

    Type Description bool

    True if this is an image segmentation dataset that should have labels.bin

    Source code in src/saev/data/datasets.py
    @beartype.beartype\ndef is_img_seg_dataset(data_cfg: DatasetConfig) -> bool:\n    \"\"\"\n    Check if a dataset configuration is for an image segmentation dataset.\n\n    Args:\n        data_cfg: Dataset configuration\n\n    Returns:\n        True if this is an image segmentation dataset that should have labels.bin\n    \"\"\"\n    return isinstance(data_cfg, (FakeImgSeg, ImgSegFolder))\n
    "},{"location":"api/data/dinov2/","title":"saev.data.dinov2","text":""},{"location":"api/data/dinov3/","title":"saev.data.dinov3","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config","title":"Config(img_size=224, patch_size=16, in_chans=3, pos_embed_rope_base=100.0, pos_embed_rope_min_period=None, pos_embed_rope_max_period=None, pos_embed_rope_normalize_coords='separate', pos_embed_rope_dtype='bf16', embed_dim=768, depth=12, num_heads=12, ffn_ratio=4.0, qkv_bias=True, ffn_layer='mlp', ffn_bias=True, proj_bias=True, n_storage_tokens=0, mask_k_bias=False, untie_global_and_local_cls_norm=False, device=None) dataclass","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config.depth","title":"depth = 12 class-attribute instance-attribute","text":"

    Number of transformer blocks.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.device","title":"device = None class-attribute instance-attribute","text":"

    Device for tensor operations.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.embed_dim","title":"embed_dim = 768 class-attribute instance-attribute","text":"

    Embedding dimension for transformer.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_bias","title":"ffn_bias = True class-attribute instance-attribute","text":"

    Whether to use bias in feed-forward network.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_layer","title":"ffn_layer = 'mlp' class-attribute instance-attribute","text":"

    Type of feed-forward network layer.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_ratio","title":"ffn_ratio = 4.0 class-attribute instance-attribute","text":"

    Feed-forward network expansion ratio.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.img_size","title":"img_size = 224 class-attribute instance-attribute","text":"

    Image width and height in pixels.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.in_chans","title":"in_chans = 3 class-attribute instance-attribute","text":"

    Number of input image channels.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.mask_k_bias","title":"mask_k_bias = False class-attribute instance-attribute","text":"

    Whether to mask K bias in attention.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.n_storage_tokens","title":"n_storage_tokens = 0 class-attribute instance-attribute","text":"

    Number of storage/register tokens.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.num_heads","title":"num_heads = 12 class-attribute instance-attribute","text":"

    Number of attention heads.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.patch_size","title":"patch_size = 16 class-attribute instance-attribute","text":"

    Size of each patch in pixels.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_base","title":"pos_embed_rope_base = 100.0 class-attribute instance-attribute","text":"

    Base frequency for RoPE positional encoding.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_dtype","title":"pos_embed_rope_dtype = 'bf16' class-attribute instance-attribute","text":"

    Data type for RoPE positional encoding.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_max_period","title":"pos_embed_rope_max_period = None class-attribute instance-attribute","text":"

    Maximum period for RoPE positional encoding.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_min_period","title":"pos_embed_rope_min_period = None class-attribute instance-attribute","text":"

    Minimum period for RoPE positional encoding.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_normalize_coords","title":"pos_embed_rope_normalize_coords = 'separate' class-attribute instance-attribute","text":"

    Coordinate normalization method for RoPE encoding.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.proj_bias","title":"proj_bias = True class-attribute instance-attribute","text":"

    Whether to use bias in output projection.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.qkv_bias","title":"qkv_bias = True class-attribute instance-attribute","text":"

    Whether to use bias in QKV projection.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.untie_global_and_local_cls_norm","title":"untie_global_and_local_cls_norm = False class-attribute instance-attribute","text":"

    Whether to use separate norms for global and local CLS tokens.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.PatchEmbed","title":"PatchEmbed(img_size=224, patch_size=16, in_chans=3, embed_dim=768, flatten_embedding=True)","text":"

    Bases: Module

    2D image to patch embedding: (B,C,H,W) -> (B,N,D)

    Parameters:

    Name Type Description Default img_size int | tuple[int, int]

    Image size.

    224 patch_size int | tuple[int, int]

    Patch token size.

    16 in_chans int

    Number of input image channels.

    3 embed_dim int

    Number of linear projection output channels.

    768 Source code in src/saev/data/dinov3.py
    def __init__(\n    self,\n    img_size: int | tuple[int, int] = 224,\n    patch_size: int | tuple[int, int] = 16,\n    in_chans: int = 3,\n    embed_dim: int = 768,\n    flatten_embedding: bool = True,\n) -> None:\n    super().__init__()\n\n    image_hw = make_2tuple(img_size)\n    patch_hw = make_2tuple(patch_size)\n\n    self.image_hw = image_hw\n    self.patch_hw = patch_hw\n\n    self.in_chans = in_chans\n    self.embed_dim = embed_dim\n\n    self.proj = nn.Conv2d(\n        in_chans, embed_dim, kernel_size=patch_hw, stride=patch_hw\n    )\n    self.k = patch_hw[0]\n    assert self.proj.kernel_size == (self.k, self.k)\n    assert self.proj.stride == (self.k, self.k)\n    assert self.proj.padding == (0, 0)\n    assert self.proj.groups == 1\n    assert self.proj.dilation == (1, 1)\n
    "},{"location":"api/data/dinov3/#saev.data.dinov3.Vit","title":"Vit(ckpt)","text":"

    Bases: Module, Transformer

    Source code in src/saev/data/dinov3.py
    def __init__(self, ckpt: str):\n    super().__init__()\n    name = self._parse_name(ckpt)\n    self.model = load(name, ckpt)\n\n    self._ckpt = name\n    self.logger = logging.getLogger(f\"dinov3/{name}\")\n
    "},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_resize","title":"make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

    Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.

    Source code in src/saev/data/dinov3.py
    @staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    import functools\n\n    return functools.partial(\n        transforms.resize_to_patch_grid,\n        p=int(16 * scale),\n        n=n_patches_per_img,\n        resample=resample,\n    )\n
    "},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

    Create transforms for preprocessing: (img_transform, sample_transform | None).

    Source code in src/saev/data/dinov3.py
    @staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    img_transform = v2.Compose([\n        transforms.FlexResize(patch_size=16, n_patches=n_patches_per_img),\n        v2.ToImage(),\n        v2.ToDtype(torch.float32, scale=True),\n        v2.Normalize(mean=[0.4850, 0.4560, 0.4060], std=[0.2290, 0.2240, 0.2250]),\n    ])\n    sample_transform = transforms.Patchify(\n        patch_size=16, n_patches=n_patches_per_img\n    )\n    return img_transform, sample_transform\n
    "},{"location":"api/data/fake_clip/","title":"saev.data.fake_clip","text":"

    Fake CLIP model for testing with tiny-open-clip-model.

    This module provides a test-only vision transformer that works with the tiny-open-clip-model from HuggingFace, which uses 8x8 images and 2x2 patches instead of the standard 224x224 images with 16x16 patches.

    "},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit","title":"Vit(ckpt)","text":"

    Bases: Transformer, Module

    Source code in src/saev/data/fake_clip.py
    def __init__(self, ckpt: str):\n    super().__init__()\n\n    # Only support the tiny test model\n    assert ckpt == \"hf-hub:hf-internal-testing/tiny-open-clip-model\", (\n        f\"FakeClip only supports tiny-open-clip-model, got {ckpt}\"\n    )\n\n    clip, _ = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n
    "},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.patch_size","title":"patch_size property","text":"

    Tiny model uses 2x2 patches.

    "},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_resize","title":"make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

    Create resize transform for tiny model (8x8 images).

    Source code in src/saev/data/fake_clip.py
    @staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for tiny model (8x8 images).\"\"\"\n\n    def resize(img: Image.Image) -> Image.Image:\n        # Tiny model uses 8x8 images\n        size_px = (int(8 * scale), int(8 * scale))\n        return img.resize(size_px, resample=resample)\n\n    return resize\n
    "},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

    Create transforms for preprocessing: (img_transform, sample_transform | None).

    Source code in src/saev/data/fake_clip.py
    @staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    _, img_transform = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    return img_transform, None\n
    "},{"location":"api/data/indexed/","title":"saev.data.indexed","text":""},{"location":"api/data/indexed/#saev.data.indexed.Config","title":"Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False) dataclass","text":"

    Configuration for loading indexed activation data from disk

    Attributes:

    Name Type Description shards Path

    Directory with .bin shards and a metadata.json file.

    tokens Literal['special', 'content', 'all']

    Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

    layer int | Literal['all']

    Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

    debug bool

    Whether the dataloader process should log debug messages.

    "},{"location":"api/data/indexed/#saev.data.indexed.Dataset","title":"Dataset(cfg)","text":"

    Bases: Dataset

    Dataset of activations from disk.

    Attributes:

    Name Type Description cfg Config

    Configuration set via CLI args.

    md Metadata

    Activations metadata; automatically loaded from disk.

    layer_idx int

    Layer index into the shards if we are choosing a specific layer.

    Source code in src/saev/data/indexed.py
    def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n
    "},{"location":"api/data/indexed/#saev.data.indexed.Dataset.d_model","title":"d_model property","text":"

    Dimension of the underlying vision transformer's embedding space.

    "},{"location":"api/data/indexed/#saev.data.indexed.Dataset.Example","title":"Example","text":"

    Bases: TypedDict

    Individual example.

    "},{"location":"api/data/indexed/#saev.data.indexed.Dataset.__len__","title":"__len__()","text":"

    Dataset length depends on patches and layer.

    Source code in src/saev/data/indexed.py
    def __len__(self) -> int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n
    "},{"location":"api/data/models/","title":"saev.data.models","text":""},{"location":"api/data/models/#saev.data.models.Transformer","title":"Transformer","text":"

    Bases: ABC

    Protocol defining the interface for all Transformer models.

    "},{"location":"api/data/models/#saev.data.models.Transformer.patch_size","title":"patch_size abstractmethod property","text":"

    Patch size in pixels (e.g., 14 or 16).

    "},{"location":"api/data/models/#saev.data.models.Transformer.forward","title":"forward(batch) abstractmethod","text":"

    Run forward pass on batch of images.

    Source code in src/saev/data/models.py
    @abc.abstractmethod\ndef forward(\n    self, batch: Float[Tensor, \"batch 3 width height\"]\n) -> Float[Tensor, \"batch patches dim\"]:\n    \"\"\"Run forward pass on batch of images.\"\"\"\n
    "},{"location":"api/data/models/#saev.data.models.Transformer.get_residuals","title":"get_residuals() abstractmethod","text":"

    Return the list of residual blocks/layers for hook registration.

    Source code in src/saev/data/models.py
    @abc.abstractmethod\ndef get_residuals(self) -> list[torch.nn.Module]:\n    \"\"\"Return the list of residual blocks/layers for hook registration.\"\"\"\n
    "},{"location":"api/data/models/#saev.data.models.Transformer.get_token_i","title":"get_token_i(content_tokens_per_example) abstractmethod","text":"

    Return indices for selecting relevant tokens from activations.

    Source code in src/saev/data/models.py
    @abc.abstractmethod\ndef get_token_i(self, content_tokens_per_example: int) -> slice | torch.Tensor:\n    \"\"\"Return indices for selecting relevant tokens from activations.\"\"\"\n
    "},{"location":"api/data/models/#saev.data.models.Transformer.make_resize","title":"make_resize(ckpt, content_tokens_per_example, *, scale=1.0, resample=Image.LANCZOS) abstractmethod staticmethod","text":"

    Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.

    Source code in src/saev/data/models.py
    @staticmethod\n@abc.abstractmethod\ndef make_resize(\n    ckpt: str,\n    content_tokens_per_example: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n
    "},{"location":"api/data/models/#saev.data.models.Transformer.make_transforms","title":"make_transforms(ckpt, content_tokens_per_example) abstractmethod staticmethod","text":"

    Create transforms for preprocessing: (data_transform, dict_transform | None).

    Source code in src/saev/data/models.py
    @staticmethod\n@abc.abstractmethod\ndef make_transforms(\n    ckpt: str, content_tokens_per_example: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n
    "},{"location":"api/data/models/#saev.data.models.list_families","title":"list_families()","text":"

    List all ViT family names.

    Source code in src/saev/data/models.py
    def list_families() -> list[str]:\n    \"\"\"List all ViT family names.\"\"\"\n    return list(_global_model_registry.keys())\n
    "},{"location":"api/data/models/#saev.data.models.load_model_cls","title":"load_model_cls(family)","text":"

    Load a transformer family's class.

    Source code in src/saev/data/models.py
    @beartype.beartype\ndef load_model_cls(family: str) -> type[Transformer]:\n    \"\"\"Load a transformer family's class.\"\"\"\n    if family not in _global_model_registry:\n        raise ValueError(f\"Family '{family}' not found.\")\n\n    return _global_model_registry[family]\n
    "},{"location":"api/data/models/#saev.data.models.register_family","title":"register_family(cls)","text":"

    Register a new transformer family's class.

    Source code in src/saev/data/models.py
    @beartype.beartype\ndef register_family(cls: type[Transformer]):\n    \"\"\"Register a new transformer family's class.\"\"\"\n    if cls.family in _global_model_registry:\n        logger.warning(\"Overwriting key '%s' in registry.\", cls.family)\n    _global_model_registry[cls.family] = cls\n
    "},{"location":"api/data/ordered/","title":"saev.data.ordered","text":"

    Ordered (sequential) dataloader for activation data.

    This module provides a high-throughput dataloader that reads activation data from disk shards in sequential order, without shuffling. The implementation uses a single-threaded manager process to ensure data is delivered in the exact order it appears on disk.

    Patch labels are provided if there is a labels.bin file on disk.

    See the design decisions in src/saev/data/performance.md.

    Usage

    cfg = Config(shards=\"./shards\", layer=13, batch_size=4096) dataloader = DataLoader(cfg) for batch in dataloader: ... activations = batch[\"act\"] # [batch_size, d_model] ... image_indices = batch[\"example_idx\"] # [batch_size] ... patch_indices = batch[\"token_idx\"] # [batch_size] ... patch_labels = batch[\"patch_labels\"] # [batch_size]

    "},{"location":"api/data/ordered/#saev.data.ordered.Config","title":"Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0) dataclass","text":"

    Configuration for loading ordered (non-shuffled) activation data from disk

    Attributes:

    Name Type Description shards Path

    Directory with .bin shards and a metadata.json file.

    tokens Literal['content']

    Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

    layer int | Literal['all']

    Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

    batch_size int

    Batch size.

    batch_timeout_s float

    How long to wait for at least one batch.

    drop_last bool

    Whether to drop the last batch if it's smaller than the others.

    buffer_size int

    Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

    debug bool

    Whether the dataloader process should log debug messages.

    log_every_s float

    How frequently the dataloader process should log (debug) performance messages.

    "},{"location":"api/data/ordered/#saev.data.ordered.DataLoader","title":"DataLoader(cfg)","text":"

    High-throughput streaming loader that reads data from disk shards in order (no shuffling).

    Source code in src/saev/data/ordered.py
    def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n
    "},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.ExampleBatch","title":"ExampleBatch","text":"

    Bases: TypedDict

    Individual example.

    "},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__iter__","title":"__iter__()","text":"

    Yields batches in order.

    Source code in src/saev/data/ordered.py
    def __iter__(self) -> collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n < self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size < self.cfg.batch_size\n                    and n + actual_batch_size >= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n
    "},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__len__","title":"__len__()","text":"

    Returns the number of batches in an epoch.

    Source code in src/saev/data/ordered.py
    def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n
    "},{"location":"api/data/pe/","title":"saev.data.pe","text":"

    Perception Encoder (PE) models from Meta (Bolya et al., 2025).

    PE-Core: CLIP-style model for language alignment. PE-Spatial: Dense prediction model distilled from SAM 2.1.

    Both are available via timm.

    "},{"location":"api/data/pe/#saev.data.pe.Core","title":"Core(ckpt)","text":"

    Bases: _Base

    PE-Core: CLIP-style model for language alignment.

    Available checkpoints: - vit_pe_core_large_patch14_336.fb (L/14, 336px) - vit_pe_core_base_patch16_224.fb (B/16, 224px)

    Source code in src/saev/data/pe.py
    def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n
    "},{"location":"api/data/pe/#saev.data.pe.Spatial","title":"Spatial(ckpt)","text":"

    Bases: _Base

    PE-Spatial: Dense prediction model distilled from SAM 2.1.

    Available checkpoints: - vit_pe_spatial_large_patch14_448.fb (L/14, 448px) - vit_pe_spatial_base_patch16_512.fb (B/16, 512px)

    Source code in src/saev/data/pe.py
    def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n
    "},{"location":"api/data/saev.data/","title":"saev.data","text":""},{"location":"api/data/saev.data/#saev.data.IndexedConfig","title":"IndexedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False) dataclass","text":"

    Configuration for loading indexed activation data from disk

    Attributes:

    Name Type Description shards Path

    Directory with .bin shards and a metadata.json file.

    tokens Literal['special', 'content', 'all']

    Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

    layer int | Literal['all']

    Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

    debug bool

    Whether the dataloader process should log debug messages.

    "},{"location":"api/data/saev.data/#saev.data.IndexedDataset","title":"IndexedDataset(cfg)","text":"

    Bases: Dataset

    Dataset of activations from disk.

    Attributes:

    Name Type Description cfg Config

    Configuration set via CLI args.

    md Metadata

    Activations metadata; automatically loaded from disk.

    layer_idx int

    Layer index into the shards if we are choosing a specific layer.

    Source code in src/saev/data/indexed.py
    def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n
    "},{"location":"api/data/saev.data/#saev.data.IndexedDataset.d_model","title":"d_model property","text":"

    Dimension of the underlying vision transformer's embedding space.

    "},{"location":"api/data/saev.data/#saev.data.IndexedDataset.Example","title":"Example","text":"

    Bases: TypedDict

    Individual example.

    "},{"location":"api/data/saev.data/#saev.data.IndexedDataset.__len__","title":"__len__()","text":"

    Dataset length depends on patches and layer.

    Source code in src/saev/data/indexed.py
    def __len__(self) -> int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n
    "},{"location":"api/data/saev.data/#saev.data.Metadata","title":"Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1') dataclass","text":"

    Metadata for a sharded set of transformer activations.

    Parameters:

    Name Type Description Default family Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']

    The transformer family.

    required ckpt str

    The transformer checkpoint.

    required layers tuple[int, ...]

    Which layers were saved.

    required content_tokens_per_example int

    The number of content tokens per example.

    required cls_token bool

    Whether the transformer has a [CLS] token as well.

    required d_model int

    Model hidden dimension.

    required n_examples int

    Number of examples.

    required max_tokens_per_shard int

    The maximum number of tokens per shard.

    required data str

    base64-encoded string of pickle.dumps(dataset).

    required dataset Path

    Absolute path to the root directory of the original dataset.

    required pixel_agg PixelAgg

    (only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.

    MAJORITY dtype Literal['float32']

    How activations are stored.

    'float32' protocol Literal['1.0.0', '1.1', '2.1']

    Protocol version.

    '2.1'"},{"location":"api/data/saev.data/#saev.data.Metadata.examples_per_shard","title":"examples_per_shard property","text":"

    The number of examples per shard based on the protocol.

    Returns:

    Type Description int

    Number of examples that fit in a shard.

    "},{"location":"api/data/saev.data/#saev.data.Metadata.hash","title":"hash property","text":"

    First 8 bytes of a SHA256 hash of the metadata configuration.

    Returns:

    Type Description str

    Hexadecimal hash string uniquely identifying this configuration.

    "},{"location":"api/data/saev.data/#saev.data.Metadata.n_shards","title":"n_shards property","text":"

    Total number of shards needed to store all examples.

    Returns:

    Type Description int

    Number of shards required.

    "},{"location":"api/data/saev.data/#saev.data.Metadata.shard_shape","title":"shard_shape property","text":"

    Shape of each shard file.

    Returns:

    Type Description tuple[int, int, int, int]

    Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).

    "},{"location":"api/data/saev.data/#saev.data.Metadata.tokens_per_example","title":"tokens_per_example property","text":"

    Total number of tokens per example including [CLS] token if present.

    Returns:

    Type Description int

    Number of tokens plus one if [CLS] token is included.

    "},{"location":"api/data/saev.data/#saev.data.Metadata.dump","title":"dump(shards_root)","text":"

    Dumps a Metadata object to a metadata.json file in shards_root / hash.

    Parameters:

    Name Type Description Default shards_root Path

    Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.

    required Source code in src/saev/data/shards.py
    def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n
    "},{"location":"api/data/saev.data/#saev.data.Metadata.load","title":"load(shards_dir) classmethod","text":"

    Loads a Metadata object from a metadata.json file in shards_dir.

    Parameters:

    Name Type Description Default shards_dir Path

    Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in src/saev/data/shards.py

    @classmethod\ndef load(cls, shards_dir: pathlib.Path) -> tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/<hash> as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n
    "},{"location":"api/data/saev.data/#saev.data.OrderedConfig","title":"OrderedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0) dataclass","text":"

    Configuration for loading ordered (non-shuffled) activation data from disk

    Attributes:

    Name Type Description shards Path

    Directory with .bin shards and a metadata.json file.

    tokens Literal['content']

    Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

    layer int | Literal['all']

    Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

    batch_size int

    Batch size.

    batch_timeout_s float

    How long to wait for at least one batch.

    drop_last bool

    Whether to drop the last batch if it's smaller than the others.

    buffer_size int

    Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

    debug bool

    Whether the dataloader process should log debug messages.

    log_every_s float

    How frequently the dataloader process should log (debug) performance messages.

    "},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader","title":"OrderedDataLoader(cfg)","text":"

    High-throughput streaming loader that reads data from disk shards in order (no shuffling).

    Source code in src/saev/data/ordered.py
    def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n
    "},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.ExampleBatch","title":"ExampleBatch","text":"

    Bases: TypedDict

    Individual example.

    "},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__iter__","title":"__iter__()","text":"

    Yields batches in order.

    Source code in src/saev/data/ordered.py
    def __iter__(self) -> collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n < self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size < self.cfg.batch_size\n                    and n + actual_batch_size >= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n
    "},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__len__","title":"__len__()","text":"

    Returns the number of batches in an epoch.

    Source code in src/saev/data/ordered.py
    def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n
    "},{"location":"api/data/saev.data/#saev.data.PixelAgg","title":"PixelAgg","text":"

    Bases: Enum

    How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig","title":"ShuffledConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False) dataclass","text":"

    Configuration for loading shuffled activation data from disk.

    Attributes:

    Name Type Description shards Path

    Directory with .bin shards and a metadata.json file.

    tokens Literal['special', 'content', 'all']

    Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_size","title":"batch_size = 1024 * 16 class-attribute instance-attribute","text":"

    Batch size.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_timeout_s","title":"batch_timeout_s = 30.0 class-attribute instance-attribute","text":"

    How long to wait for at least one batch.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.buffer_size","title":"buffer_size = 64 class-attribute instance-attribute","text":"

    Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.debug","title":"debug = False class-attribute instance-attribute","text":"

    Whether the dataloader process should log debug messages.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.drop_last","title":"drop_last = False class-attribute instance-attribute","text":"

    Whether to drop the last batch if it's smaller than the others.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.ignore_labels","title":"ignore_labels = dataclasses.field(default_factory=list) class-attribute instance-attribute","text":"

    If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.layer","title":"layer = -1 class-attribute instance-attribute","text":"

    Which transformer layer(s) to read from disk. -1 is the default, but must be changed. \"all\" enumerates every recorded layer.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.log_every_s","title":"log_every_s = 30.0 class-attribute instance-attribute","text":"

    How frequently the dataloader process should log (debug) performance messages.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.min_buffer_fill","title":"min_buffer_fill = 0.0 class-attribute instance-attribute","text":"

    Fraction of the reservoir that must be populated before yielding batches.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.n_threads","title":"n_threads = 4 class-attribute instance-attribute","text":"

    Number of dataloading threads.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.scale_norm","title":"scale_norm = False class-attribute instance-attribute","text":"

    Whether to scale norms to sqrt(D).

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.seed","title":"seed = 17 class-attribute instance-attribute","text":"

    Random seed.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.use_tmpdir","title":"use_tmpdir = False class-attribute instance-attribute","text":"

    If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader","title":"ShuffledDataLoader(cfg)","text":"

    High-throughput streaming loader that deterministically shuffles data from disk shards.

    Source code in src/saev/data/shuffled.py
    def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n
    "},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.ExampleBatch","title":"ExampleBatch","text":"

    Bases: TypedDict

    Individual example.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__iter__","title":"__iter__()","text":"

    Yields batches.

    Source code in src/saev/data/shuffled.py
    def __iter__(self) -> collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n < self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n
    "},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__len__","title":"__len__()","text":"

    Returns the number of batches in an epoch.

    Source code in src/saev/data/shuffled.py
    def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n
    "},{"location":"api/data/saev.data/#saev.data.make_ordered_config","title":"make_ordered_config(shuffled_cfg, **overrides)","text":"

    Create an OrderedConfig from a ShuffledConfig, with optional overrides.

    Defaults come from shuffled_cfg for fields present in OrderedConfig, and overrides take precedence. Unknown override fields raise TypeError from the OrderedConfig constructor, mirroring dataclasses.replace.

    Source code in src/saev/data/__init__.py
    @beartype.beartype\ndef make_ordered_config(\n    shuffled_cfg: ShuffledConfig, **overrides: object\n) -> OrderedConfig:\n    \"\"\"Create an `OrderedConfig` from a `ShuffledConfig`, with optional overrides.\n\n    Defaults come from `shuffled_cfg` for fields present in `OrderedConfig`, and `overrides` take precedence. Unknown override fields raise `TypeError` from the `OrderedConfig` constructor, mirroring `dataclasses.replace`.\n    \"\"\"\n    params: dict[str, object] = {}\n    for f in dataclasses.fields(OrderedConfig):\n        name = f.name\n        if hasattr(shuffled_cfg, name):\n            params[name] = getattr(shuffled_cfg, name)\n    params.update(overrides)\n    return OrderedConfig(**params)\n
    "},{"location":"api/data/shards/","title":"saev.data.shards","text":"

    Library code for reading and writing sharded activations to disk.

    "},{"location":"api/data/shards/#saev.data.shards.Index","title":"Index(*, idx, example_idx, content_token_idx, shard_idx, example_idx_in_shard, layer_idx_in_shard, token_idx_in_shard) dataclass","text":"

    Attributes:

    Name Type Description idx int

    The index of the activation.

    example_idx int

    The index of the original example (image, audio clip etc).

    content_token_idx int

    The token's index within an example's content. -1 for all special tokens.

    shard_idx int

    The shard index.

    example_idx_in_shard int

    The example index along the examples axis in a shard.

    token_idx_in_shard int

    The token index along the tokens axis in a shard.

    "},{"location":"api/data/shards/#saev.data.shards.IndexMap","title":"IndexMap(md, tokens, layer)","text":"

    Attributes:

    Name Type Description md Metadata

    Metadata

    tokens Literal['special', 'content', 'all']

    Which subset of tokens to load.

    layer int

    Which layer to load.

    layer_idx_lookup dict[int, int]

    The lookup from a transformer layer to the layer idx in the shard.

    Source code in src/saev/data/shards.py
    def __init__(\n    self,\n    md: Metadata,\n    tokens: tp.Literal[\"special\", \"content\", \"all\"],\n    layer: int | tp.Literal[\"all\"],\n):\n    if tokens == \"special\":\n        assert md.cls_token\n\n    self.md = md\n    self.tokens = tokens\n    self.layer = layer\n\n    if isinstance(layer, int):\n        err_msg = f\"No matche for layer; {layer} not in {md.layers}.\"\n        assert layer in md.layers, err_msg\n\n    self.layer_idx_lookup = {layer: i for i, layer in enumerate(md.layers)}\n
    "},{"location":"api/data/shards/#saev.data.shards.IndexMap.__len__","title":"__len__()","text":"

    Dataset length depends on patches and layer.

    Source code in src/saev/data/shards.py
    def __len__(self) -> int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    match (self.tokens, self.layer):\n        case (\"special\", \"all\"):\n            # Return a CLS token from a random example and random layer.\n            return self.md.n_examples * len(self.md.layers)\n        case (\"special\", int()):\n            # Return a CLS token from a random example and fixed layer.\n            return self.md.n_examples\n        case (\"content\", int()):\n            # Return a patch from a random example, fixed layer, and random patch.\n            return self.md.n_examples * self.md.content_tokens_per_example\n        case (\"content\", \"all\"):\n            # Return a patch from a random example, random layer and random patch.\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.content_tokens_per_example\n            )\n        case (\"all\", int()):\n            # Return a token from a random example, fixed layer, and random token (including special).\n            return self.md.n_examples * self.md.tokens_per_example\n        case (\"all\", \"all\"):\n            # Return a token from a random example, random layer and random token (including special).\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.tokens_per_example\n            )\n        case _:\n            tp.assert_never((self.cfg.tokens, self.cfg.layer))\n
    "},{"location":"api/data/shards/#saev.data.shards.LabelsWriter","title":"LabelsWriter(shards_dir, md)","text":"

    LabelsWriter handles writing patch-level segmentation labels to a single binary file.

    Parameters:

    Name Type Description Default shards_dir Path

    The shard directory; $SAEV_SCRATCH/saev/shards/ required md Metadata

    The Metadata object.

    required

    Attributes:

    Name Type Description labels UInt8[ndarray, 'n_examples n_patches']

    The integer patch labels.

    labels_path Path

    Where the integer patch labels are stored.

    md Metadata

    The dataset metadata.

    has_written bool

    Whether we have written any data to self.labels.

    Source code in src/saev/data/shards.py
    def __init__(self, shards_dir: pathlib.Path, md: Metadata):\n    assert disk.is_shards_dir(shards_dir)\n    self.logger = logging.getLogger(\"labels-writer\")\n    self.md = md\n    self.has_written = False\n\n    # Always create memory-mapped file for labels\n    # If nothing is written, it will be deleted in flush()\n    self.labels_path = shards_dir / \"labels.bin\"\n    self.labels = np.memmap(\n        self.labels_path,\n        mode=\"w+\",\n        dtype=np.uint8,\n        shape=(self.md.n_examples, self.md.content_tokens_per_example),\n    )\n    self.logger.info(\"Opened labels file '%s'.\", self.labels_path)\n
    "},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.flush","title":"flush()","text":"

    Flush the memory-mapped file to disk if anything was written.

    Source code in src/saev/data/shards.py
    def flush(self) -> None:\n    \"\"\"Flush the memory-mapped file to disk if anything was written.\"\"\"\n    if self.has_written:\n        self.labels.flush()\n        self.logger.info(\"Flushed labels to '%s'.\", self.labels_path)\n
    "},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.write_batch","title":"write_batch(batch_labels, start_idx)","text":"

    Write a batch of labels to the memory-mapped file.

    Parameters:

    Name Type Description Default batch_labels ndarray | Tensor

    Array of shape (batch_size, content_tokens_per_example) with uint8 dtype

    required start_idx int

    Starting index in the global labels array

    required Source code in src/saev/data/shards.py
    @beartype.beartype\ndef write_batch(self, batch_labels: np.ndarray | Tensor, start_idx: int):\n    \"\"\"\n    Write a batch of labels to the memory-mapped file.\n\n    Args:\n        batch_labels: Array of shape (batch_size, content_tokens_per_example) with uint8 dtype\n        start_idx: Starting index in the global labels array\n    \"\"\"\n    # Convert to numpy if needed\n    if isinstance(batch_labels, torch.Tensor):\n        batch_labels = batch_labels.cpu().numpy()\n\n    batch_size = len(batch_labels)\n    assert start_idx + batch_size <= self.md.n_examples\n    assert batch_labels.shape == (batch_size, self.md.content_tokens_per_example)\n    assert batch_labels.dtype == np.uint8\n\n    self.labels[start_idx : start_idx + batch_size] = batch_labels\n    self.has_written = True\n
    "},{"location":"api/data/shards/#saev.data.shards.Metadata","title":"Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1') dataclass","text":"

    Metadata for a sharded set of transformer activations.

    Parameters:

    Name Type Description Default family Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']

    The transformer family.

    required ckpt str

    The transformer checkpoint.

    required layers tuple[int, ...]

    Which layers were saved.

    required content_tokens_per_example int

    The number of content tokens per example.

    required cls_token bool

    Whether the transformer has a [CLS] token as well.

    required d_model int

    Model hidden dimension.

    required n_examples int

    Number of examples.

    required max_tokens_per_shard int

    The maximum number of tokens per shard.

    required data str

    base64-encoded string of pickle.dumps(dataset).

    required dataset Path

    Absolute path to the root directory of the original dataset.

    required pixel_agg PixelAgg

    (only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.

    MAJORITY dtype Literal['float32']

    How activations are stored.

    'float32' protocol Literal['1.0.0', '1.1', '2.1']

    Protocol version.

    '2.1'"},{"location":"api/data/shards/#saev.data.shards.Metadata.examples_per_shard","title":"examples_per_shard property","text":"

    The number of examples per shard based on the protocol.

    Returns:

    Type Description int

    Number of examples that fit in a shard.

    "},{"location":"api/data/shards/#saev.data.shards.Metadata.hash","title":"hash property","text":"

    First 8 bytes of a SHA256 hash of the metadata configuration.

    Returns:

    Type Description str

    Hexadecimal hash string uniquely identifying this configuration.

    "},{"location":"api/data/shards/#saev.data.shards.Metadata.n_shards","title":"n_shards property","text":"

    Total number of shards needed to store all examples.

    Returns:

    Type Description int

    Number of shards required.

    "},{"location":"api/data/shards/#saev.data.shards.Metadata.shard_shape","title":"shard_shape property","text":"

    Shape of each shard file.

    Returns:

    Type Description tuple[int, int, int, int]

    Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).

    "},{"location":"api/data/shards/#saev.data.shards.Metadata.tokens_per_example","title":"tokens_per_example property","text":"

    Total number of tokens per example including [CLS] token if present.

    Returns:

    Type Description int

    Number of tokens plus one if [CLS] token is included.

    "},{"location":"api/data/shards/#saev.data.shards.Metadata.dump","title":"dump(shards_root)","text":"

    Dumps a Metadata object to a metadata.json file in shards_root / hash.

    Parameters:

    Name Type Description Default shards_root Path

    Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.

    required Source code in src/saev/data/shards.py
    def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n
    "},{"location":"api/data/shards/#saev.data.shards.Metadata.load","title":"load(shards_dir) classmethod","text":"

    Loads a Metadata object from a metadata.json file in shards_dir.

    Parameters:

    Name Type Description Default shards_dir Path

    Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in src/saev/data/shards.py

    @classmethod\ndef load(cls, shards_dir: pathlib.Path) -> tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/<hash> as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n
    "},{"location":"api/data/shards/#saev.data.shards.PixelAgg","title":"PixelAgg","text":"

    Bases: Enum

    How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).

    "},{"location":"api/data/shards/#saev.data.shards.RecordedTransformer","title":"RecordedTransformer(model, content_tokens_per_example, cls_token, layers)","text":"

    Bases: Module

    A wrapper around a transformer model that records intermediate layer activations during forward passes.

    Parameters:

    Name Type Description Default model Module

    The transformer model to wrap.

    required content_tokens_per_example int

    Number of content tokens per example.

    required cls_token bool

    Whether to record the [CLS] token in addition to content tokens.

    required layers Sequence[int]

    Which transformer layers to record activations from.

    required

    Attributes:

    Name Type Description model Module

    The wrapped transformer model.

    content_tokens_per_example int

    Number of content tokens per example.

    cls_token bool

    Whether the [CLS] token is included in recorded activations.

    layers Sequence[int]

    Tuple of layer indices being recorded.

    token_i slice

    Token indices to extract from model outputs.

    logger

    Logger instance for this recorder.

    Source code in src/saev/data/shards.py
    def __init__(\n    self,\n    model: torch.nn.Module,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    layers: Sequence[int],\n):\n    super().__init__()\n\n    self.model = model\n\n    self.content_tokens_per_example = content_tokens_per_example\n    self.cls_token = cls_token\n    self.layers = layers\n\n    self.token_i = model.get_token_i(content_tokens_per_example)\n\n    self._storage = None\n    self._i = 0\n\n    self.logger = logging.getLogger(f\"recorder({model.name})\")\n\n    for i in self.layers:\n        self.model.get_residuals()[i].register_forward_hook(self.hook)\n
    "},{"location":"api/data/shards/#saev.data.shards.Shard","title":"Shard(name, n_examples) dataclass","text":"

    A single shard entry in shards.json, recording the filename and number of examples.

    Attributes:

    Name Type Description name str

    The filename of the shard (e.g., \"acts000000.bin\").

    n_examples int

    Number of examples stored in this shard.

    "},{"location":"api/data/shards/#saev.data.shards.ShardInfo","title":"ShardInfo(shards=list()) dataclass","text":"

    A container for shard metadata as recorded in shards.json.

    Parameters:

    Name Type Description Default shards list[Shard]

    A list of Shard objects.

    list()"},{"location":"api/data/shards/#saev.data.shards.ShardWriter","title":"ShardWriter(shards_root, md)","text":"

    ShardWriter is a stateful object that handles sharded activation writing to disk.

    Parameters:

    Name Type Description Default shards_root Path

    The $SAEV_SCRATCH/saev/shards path.

    required md Metadata

    The Metadata object for these shards.

    required

    Attributes:

    Name Type Description shards Path

    The $SAEV_SCRATCH/saev/shards/. shard int acts_path Path acts Float[ndarray, 'examples_per_shard n_layers all_patches d_model'] | None filled int labels_writer LabelsWriter

    The LabelsWriter writer.

    Source code in src/saev/data/shards.py
    def __init__(self, shards_root: pathlib.Path, md: Metadata):\n    assert disk.is_shards_root(shards_root)\n    self.md = md\n\n    self.logger = logging.getLogger(\"shard-writer\")\n\n    self.shards_dir = shards_root / md.hash\n    self.shards_dir.mkdir(exist_ok=True)\n\n    # builder for shard manifest\n    self._shards: ShardInfo = ShardInfo()\n\n    # Always initialize labels writer (it handles non-seg datasets internally)\n    self.labels_writer = LabelsWriter(self.shards_dir, md)\n\n    self.shard = -1\n    self.acts = None\n    self.next_shard()\n
    "},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__enter__","title":"__enter__()","text":"

    Context manager entry.

    Source code in src/saev/data/shards.py
    def __enter__(self):\n    \"\"\"Context manager entry.\"\"\"\n    return self\n
    "},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__exit__","title":"__exit__(exc_type, exc_val, exc_tb)","text":"

    Context manager exit - handle cleanup.

    Source code in src/saev/data/shards.py
    def __exit__(self, exc_type, exc_val, exc_tb):\n    \"\"\"Context manager exit - handle cleanup.\"\"\"\n    self.flush()\n\n    # Delete empty labels file if nothing was written\n    if not self.labels_writer.has_written:\n        if os.path.exists(self.labels_writer.labels_path):\n            os.remove(self.labels_writer.labels_path)\n            self.logger.info(\n                \"Removed empty labels file '%s'.\", self.labels_writer.labels_path\n            )\n
    "},{"location":"api/data/shards/#saev.data.shards.ShardWriter.write_batch","title":"write_batch(activations, start_idx, patch_labels=None)","text":"

    Write a batch of activations and (optionally) patch labels.

    Parameters:

    Name Type Description Default activations Float[Tensor, 'batch n_layers all_patches d_model']

    Batch of activations to write.

    required start_idx int

    Starting index for this batch.

    required patch_labels UInt8[Tensor, 'batch n_patches'] | None

    Optional patch labels for segmentation datasets.

    None Source code in src/saev/data/shards.py
    def write_batch(\n    self,\n    activations: Float[Tensor, \"batch n_layers all_patches d_model\"],\n    start_idx: int,\n    patch_labels: UInt8[Tensor, \"batch n_patches\"] | None = None,\n) -> None:\n    \"\"\"Write a batch of activations and (optionally) patch labels.\n\n    Args:\n        activations: Batch of activations to write.\n        start_idx: Starting index for this batch.\n        patch_labels: Optional patch labels for segmentation datasets.\n    \"\"\"\n    batch_size = len(activations)\n    end_idx = start_idx + batch_size\n\n    # Write activations (handling sharding)\n    offset = self.md.examples_per_shard * self.shard\n\n    if end_idx >= offset + self.md.examples_per_shard:\n        # We have run out of space in this mmap'ed file. Let's fill it as much as we can.\n        n_fit = offset + self.md.examples_per_shard - start_idx\n        self.acts[start_idx - offset : start_idx - offset + n_fit] = activations[\n            :n_fit\n        ]\n        self.filled = start_idx - offset + n_fit\n\n        # Write labels for the portion that fits\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                labels_to_write = (\n                    patch_labels[:n_fit].cpu().numpy().astype(np.uint8)\n                )\n            elif not isinstance(patch_labels, np.ndarray):\n                labels_to_write = np.array(patch_labels[:n_fit], dtype=np.uint8)\n            else:\n                labels_to_write = patch_labels[:n_fit]\n\n            self.labels_writer.write_batch(labels_to_write, start_idx)\n\n        self.next_shard()\n\n        # Recursively call write_batch for remaining data\n        if n_fit < batch_size:\n            self.write_batch(\n                activations[n_fit:],\n                start_idx + n_fit,\n                patch_labels[n_fit:] if patch_labels is not None else None,\n            )\n    else:\n        msg = f\"0 <= {start_idx} - {offset} <= {offset} + {self.md.examples_per_shard}\"\n        assert 0 <= start_idx - offset <= offset + self.md.examples_per_shard, msg\n        msg = (\n            f\"0 <= {end_idx} - {offset} <= {offset} + {self.md.examples_per_shard}\"\n        )\n        assert 0 <= end_idx - offset <= offset + self.md.examples_per_shard, msg\n        self.acts[start_idx - offset : end_idx - offset] = activations\n        self.filled = end_idx - offset\n\n        # Write labels if provided\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                patch_labels = patch_labels.cpu().numpy().astype(np.uint8)\n            elif not isinstance(patch_labels, np.ndarray):\n                patch_labels = np.array(patch_labels, dtype=np.uint8)\n\n            self.labels_writer.write_batch(patch_labels, start_idx)\n
    "},{"location":"api/data/shards/#saev.data.shards.get_dataloader","title":"get_dataloader(data, *, batch_size, n_workers, data_tr=None, mask_tr=None, sample_tr=None)","text":"

    Get a dataloader for a default map-style dataset.

    Parameters:

    Name Type Description Default data Config

    Config for the dataset.

    required batch_size int

    Batch size.

    required n_workers int

    Number of dataloader workers.

    required data_tr Callable | None

    Transform to be applied to each 'data' key (typically the raw data).

    None mask_tr Callable | None

    Transform to be applied to masks.

    None sample_tr Callable | None

    Transform to be applied to the entire sample dict.

    None

    Returns:

    Type Description DataLoader

    A PyTorch Dataloader that yields dictionaries with 'data' keys containing data batches, 'index' keys containing original dataset indices and 'label' keys containing label batches.

    Source code in src/saev/data/shards.py
    @beartype.beartype\ndef get_dataloader(\n    data: datasets.Config,\n    *,\n    batch_size: int,\n    n_workers: int,\n    data_tr: Callable | None = None,\n    mask_tr: Callable | None = None,\n    sample_tr: Callable | None = None,\n) -> torch.utils.data.DataLoader:\n    \"\"\"\n    Get a dataloader for a default map-style dataset.\n\n    Args:\n        data: Config for the dataset.\n        batch_size: Batch size.\n        n_workers: Number of dataloader workers.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        sample_tr: Transform to be applied to the entire sample dict.\n\n    Returns:\n        A PyTorch Dataloader that yields dictionaries with `'data'` keys containing data batches, `'index'` keys containing original dataset indices and `'label'` keys containing label batches.\n    \"\"\"\n    dataset = datasets.get_dataset(\n        data, data_transform=data_tr, mask_transform=mask_tr, sample_transform=sample_tr\n    )\n\n    dataloader = torch.utils.data.DataLoader(\n        dataset=dataset,\n        batch_size=batch_size,\n        drop_last=False,\n        num_workers=n_workers,\n        persistent_workers=n_workers > 0,\n        shuffle=False,\n        pin_memory=False,\n    )\n    return dataloader\n
    "},{"location":"api/data/shards/#saev.data.shards.pixel_to_patch_labels","title":"pixel_to_patch_labels(seg, n_patches, patch_size, pixel_agg=PixelAgg.MAJORITY, bg_label=0, max_classes=256)","text":"

    Convert pixel-level segmentation to patch-level labels using vectorized operations.

    Parameters:

    Name Type Description Default seg Image

    Pixel-level segmentation mask as PIL Image

    required n_patches int

    Total number of patches expected

    required patch_size int

    Size of each patch in pixels

    required pixel_agg PixelAgg

    How to aggregate pixel labels into patch labels

    MAJORITY bg_label int

    Background label index

    0 max_classes int

    Maximum number of classes (for bincount)

    256

    Returns:

    Type Description UInt8[Tensor, ' n_patches']

    Patch labels as uint8 tensor of shape (n_patches,)

    Source code in src/saev/data/shards.py
    @jaxtyped(typechecker=beartype.beartype)\ndef pixel_to_patch_labels(\n    seg: Image.Image,\n    n_patches: int,\n    patch_size: int,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n    bg_label: int = 0,\n    max_classes: int = 256,\n) -> UInt8[Tensor, \" n_patches\"]:\n    \"\"\"\n    Convert pixel-level segmentation to patch-level labels using vectorized operations.\n\n    Args:\n        seg: Pixel-level segmentation mask as PIL Image\n        n_patches: Total number of patches expected\n        patch_size: Size of each patch in pixels\n        pixel_agg: How to aggregate pixel labels into patch labels\n        bg_label: Background label index\n        max_classes: Maximum number of classes (for bincount)\n\n    Returns:\n        Patch labels as uint8 tensor of shape (n_patches,)\n    \"\"\"\n    # Convert to torch tensor for vectorized operations\n    seg_tensor = torch.from_numpy(np.array(seg, dtype=np.uint8))\n    assert seg_tensor.ndim == 2\n\n    h, w = seg_tensor.shape\n\n    # Calculate patch grid dimensions\n    patch_grid_h = h // patch_size\n    patch_grid_w = w // patch_size\n    assert patch_grid_w * patch_grid_h == n_patches, (\n        f\"Image size {w}x{h} with patch_size {patch_size} gives {patch_grid_w}x{patch_grid_h} = {patch_grid_w * patch_grid_h} patches, expected {n_patches}\"\n    )\n\n    # Reshape into patches using einops: (n_patches, patch_size * patch_size)\n    patches = einops.rearrange(\n        seg_tensor,\n        \"(h p1) (w p2) -> (h w) (p1 p2)\",\n        p1=patch_size,\n        p2=patch_size,\n        h=patch_grid_h,\n        w=patch_grid_w,\n    )\n\n    # Use vectorized bincount approach to get class counts for all patches at once\n    # counts[i, c] = number of times class c appears in patch i\n    offsets = torch.arange(n_patches, device=patches.device).unsqueeze(1) * max_classes\n    flat = (patches + offsets).reshape(-1)\n    counts = torch.bincount(flat, minlength=n_patches * max_classes).reshape(\n        n_patches, max_classes\n    )\n\n    if pixel_agg is PixelAgg.MAJORITY:\n        # Take the most common label in each patch\n        patch_labels = counts.argmax(dim=1)\n    elif pixel_agg is PixelAgg.PREFER_FG:\n        # Take the most common non-background label, or background if all background\n        nonbg = counts.clone()\n        nonbg[:, bg_label] = 0\n        has_nonbg = nonbg.sum(dim=1) > 0\n        nonbg_arg = nonbg.argmax(dim=1)\n        bg_tensor = torch.full_like(nonbg_arg, bg_label)\n        patch_labels = torch.where(has_nonbg, nonbg_arg, bg_tensor)\n    else:\n        tp.assert_never(pixel_agg)\n\n    return patch_labels.to(torch.uint8)\n
    "},{"location":"api/data/shards/#saev.data.shards.worker_fn","title":"worker_fn(*, family, ckpt, content_tokens_per_example, cls_token, d_model, layers, data, batch_size, n_workers, max_tokens_per_shard, shards_root, device, pixel_agg=PixelAgg.MAJORITY)","text":"

    Parameters:

    Name Type Description Default family str

    Transformer family (dinov2, dinov3, clip, etc).

    required ckpt str

    Transformer ckpt (hf-hub:imageomics/bioclip2, etc).

    required content_tokens_per_example int

    Number of content tokens per example.

    required cls_token bool

    Whether the transformer has a [CLS] token.

    required d_model int

    Hidden dimension of transformer.

    required layers list[int]

    The layers to record activations for.

    required data Config

    Config for the particular (image) dataset to load.

    required batch_size int

    Batch size for the dataset.

    required n_workers int

    Number of workers for loading examples fromm the dataset.

    required max_tokens_per_shard int

    Maximum number of tokens per disk shard.

    required pixel_agg PixelAgg

    Optional method for aggregating segmentation label pixels.

    MAJORITY shards_root Path

    Where to save shards. Should end with 'shards'. See disk-layout.md; this is $SAEV_SCRATCH/saev/shards.

    required device str

    Device for doing the computation.

    required

    Returns:

    Type Description Path

    Path to the shards directory.

    Source code in src/saev/data/shards.py
    @beartype.beartype\ndef worker_fn(\n    *,\n    family: str,\n    ckpt: str,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    d_model: int,\n    layers: list[int],\n    data: datasets.Config,\n    batch_size: int,\n    n_workers: int,\n    max_tokens_per_shard: int,\n    shards_root: pathlib.Path,\n    device: str,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n) -> pathlib.Path:\n    \"\"\"\n    Args:\n        family: Transformer family (dinov2, dinov3, clip, etc).\n        ckpt: Transformer ckpt (hf-hub:imageomics/bioclip2, etc).\n        content_tokens_per_example: Number of content tokens per example.\n        cls_token: Whether the transformer has a [CLS] token.\n        d_model: Hidden dimension of transformer.\n        layers: The layers to record activations for.\n        data: Config for the particular (image) dataset to load.\n        batch_size: Batch size for the dataset.\n        n_workers: Number of workers for loading examples fromm the dataset.\n        max_tokens_per_shard: Maximum number of tokens per disk shard.\n        pixel_agg: Optional method for aggregating segmentation label pixels.\n        shards_root: Where to save shards. Should end with 'shards'. See [disk-layout.md](../../developers/disk-layout.md); this is $SAEV_SCRATCH/saev/shards.\n        device: Device for doing the computation.\n\n    Returns:\n        Path to the shards directory.\n    \"\"\"\n    from saev import helpers\n    from saev.data import models\n\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n        torch.backends.cudnn.benchmark = True\n        torch.backends.cudnn.deterministic = True\n\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n    logger = logging.getLogger(\"worker_fn\")\n\n    if device == \"cuda\" and not torch.cuda.is_available():\n        logger.warning(\"No CUDA device available, using CPU.\")\n        device = \"cpu\"\n\n    assert shards_root.name == \"shards\"\n\n    model_cls = models.load_model_cls(family)\n    model_instance = model_cls(ckpt).to(device)\n    model = RecordedTransformer(\n        model_instance, content_tokens_per_example, cls_token, layers\n    )\n\n    data_tr, sample_tr = model_cls.make_transforms(ckpt, content_tokens_per_example)\n\n    mask_tr = None\n    if datasets.is_img_seg_dataset(data):\n        # For image segmentation datasets, create a transform that converts pixels to patches\n        # Use make_resize with NEAREST interpolation for segmentation masks\n        seg_resize_tr = model_cls.make_resize(\n            ckpt, content_tokens_per_example, scale=1.0, resample=Image.NEAREST\n        )\n\n        def seg_to_patches(seg):\n            \"\"\"Transform that resizes segmentation and converts to patch labels.\"\"\"\n\n            # Convert to patch labels\n            return pixel_to_patch_labels(\n                seg_resize_tr(seg),\n                content_tokens_per_example,\n                patch_size=model_instance.patch_size,\n                pixel_agg=pixel_agg,\n                bg_label=data.bg_label,\n            )\n\n        mask_tr = seg_to_patches\n\n    dataloader = get_dataloader(\n        data,\n        batch_size=batch_size,\n        n_workers=n_workers,\n        data_tr=data_tr,\n        mask_tr=mask_tr,\n        sample_tr=sample_tr,\n    )\n\n    n_batches = math.ceil(data.n_examples / batch_size)\n    logger.info(\"Dumping %d batches of %d examples.\", n_batches, batch_size)\n\n    model = model.to(device)\n\n    md = Metadata(\n        family=family,\n        ckpt=ckpt,\n        layers=tuple(layers),\n        content_tokens_per_example=content_tokens_per_example,\n        cls_token=cls_token,\n        d_model=d_model,\n        n_examples=data.n_examples,\n        max_tokens_per_shard=max_tokens_per_shard,\n        data=base64.b64encode(pickle.dumps(data)).decode(\"utf8\"),\n        dataset=data.root,\n        pixel_agg=pixel_agg,\n    )\n    md.dump(shards_root)\n\n    # Use context manager for proper cleanup\n    with ShardWriter(shards_root, md) as writer:\n        i = 0\n        # Calculate and write transformer activations.\n        with torch.inference_mode():\n            for batch in helpers.progress(dataloader, total=n_batches):\n                data = batch.get(\"data\").to(device)\n                grid = batch.get(\"grid\")\n                if grid is not None:\n                    grid = grid.to(device)\n                    out, cache = model(data, grid=grid)\n                else:\n                    out, cache = model(data)\n                # cache has shape [batch size, n layers, n patches + 1, d model]\n                del out\n\n                # Write activations and labels (if present) in one call\n                patch_labels = batch.get(\"patch_labels\")\n                if patch_labels is not None:\n                    logger.debug(\n                        \"Found patch_labels in batch: shape=%s\",\n                        patch_labels.shape\n                        if hasattr(patch_labels, \"shape\")\n                        else \"unknown\",\n                    )\n                    # Ensure correct shape\n                    assert patch_labels.shape == (\n                        len(cache),\n                        content_tokens_per_example,\n                    )\n                else:\n                    logger.debug(f\"No patch_labels in batch. Keys: {batch.keys()}\")\n\n                writer.write_batch(cache, i, patch_labels=patch_labels)\n\n                i += len(cache)\n\n    return shards_root / md.hash\n
    "},{"location":"api/data/shuffled/","title":"saev.data.shuffled","text":""},{"location":"api/data/shuffled/#saev.data.shuffled.Config","title":"Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False) dataclass","text":"

    Configuration for loading shuffled activation data from disk.

    Attributes:

    Name Type Description shards Path

    Directory with .bin shards and a metadata.json file.

    tokens Literal['special', 'content', 'all']

    Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_size","title":"batch_size = 1024 * 16 class-attribute instance-attribute","text":"

    Batch size.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_timeout_s","title":"batch_timeout_s = 30.0 class-attribute instance-attribute","text":"

    How long to wait for at least one batch.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.buffer_size","title":"buffer_size = 64 class-attribute instance-attribute","text":"

    Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.debug","title":"debug = False class-attribute instance-attribute","text":"

    Whether the dataloader process should log debug messages.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.drop_last","title":"drop_last = False class-attribute instance-attribute","text":"

    Whether to drop the last batch if it's smaller than the others.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.ignore_labels","title":"ignore_labels = dataclasses.field(default_factory=list) class-attribute instance-attribute","text":"

    If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.layer","title":"layer = -1 class-attribute instance-attribute","text":"

    Which transformer layer(s) to read from disk. -1 is the default, but must be changed. \"all\" enumerates every recorded layer.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.log_every_s","title":"log_every_s = 30.0 class-attribute instance-attribute","text":"

    How frequently the dataloader process should log (debug) performance messages.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.min_buffer_fill","title":"min_buffer_fill = 0.0 class-attribute instance-attribute","text":"

    Fraction of the reservoir that must be populated before yielding batches.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.n_threads","title":"n_threads = 4 class-attribute instance-attribute","text":"

    Number of dataloading threads.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.scale_norm","title":"scale_norm = False class-attribute instance-attribute","text":"

    Whether to scale norms to sqrt(D).

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.seed","title":"seed = 17 class-attribute instance-attribute","text":"

    Random seed.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.use_tmpdir","title":"use_tmpdir = False class-attribute instance-attribute","text":"

    If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader","title":"DataLoader(cfg)","text":"

    High-throughput streaming loader that deterministically shuffles data from disk shards.

    Source code in src/saev/data/shuffled.py
    def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n
    "},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.ExampleBatch","title":"ExampleBatch","text":"

    Bases: TypedDict

    Individual example.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__iter__","title":"__iter__()","text":"

    Yields batches.

    Source code in src/saev/data/shuffled.py
    def __iter__(self) -> collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n < self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n
    "},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__len__","title":"__len__()","text":"

    Returns the number of batches in an epoch.

    Source code in src/saev/data/shuffled.py
    def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n
    "},{"location":"api/data/siglip/","title":"saev.data.siglip","text":""},{"location":"api/data/siglip/#saev.data.siglip.Vit","title":"Vit(ckpt)","text":"

    Bases: Module, Transformer

    Source code in src/saev/data/siglip.py
    def __init__(self, ckpt: str):\n    super().__init__()\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model\n\n    assert isinstance(self.model, open_clip.timm_model.TimmModel)\n
    "},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_resize","title":"make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

    Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.

    Source code in src/saev/data/siglip.py
    @staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    from PIL import Image\n\n    def resize(img: Image.Image) -> Image.Image:\n        # SigLIP typically uses 224x224 or 384x384 images\n        # We'll assume 224x224 for simplicity\n        resize_size_px = (int(224 * scale), int(224 * scale))\n        return img.resize(resize_size_px, resample=resample)\n\n    return resize\n
    "},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

    Create transforms for preprocessing: (img_transform, sample_transform | None).

    Source code in src/saev/data/siglip.py
    @staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n
    "},{"location":"api/data/transforms/","title":"saev.data.transforms","text":""},{"location":"api/data/transforms/#saev.data.transforms.conv2d_to_tokens","title":"conv2d_to_tokens(x_bchw, conv)","text":"

    Conv2d then flatten spatial to L, return (B, L, D).

    Source code in src/saev/data/transforms.py
    @jaxtyped(typechecker=beartype.beartype)\ndef conv2d_to_tokens(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -> Float[Tensor, \"b n d\"]:\n    \"\"\"Conv2d then flatten spatial to L, return (B, L, D).\"\"\"\n    y_bdhw = conv(x_bchw)\n    return einops.rearrange(y_bdhw, \"b d h w -> b (h w) d\")\n
    "},{"location":"api/data/transforms/#saev.data.transforms.resize_to_patch_grid","title":"resize_to_patch_grid(img, *, p, n, resample=Image.LANCZOS)","text":"

    Resize image to (w, h) so that: - w % p == 0, h % p == 0 - (h/p) * (w/p) == N - Minimizes change in aspect ratio.

    Source code in src/saev/data/transforms.py
    @beartype.beartype\ndef resize_to_patch_grid(\n    img: Image.Image,\n    *,\n    p: int,\n    n: int,\n    resample: Image.Resampling | int = Image.LANCZOS,\n) -> Image.Image:\n    \"\"\"\n    Resize image to (w, h) so that:\n      - w % p == 0, h % p == 0\n      - (h/p) * (w/p) == N\n      - Minimizes change in aspect ratio.\n    \"\"\"\n    if p <= 0 or n <= 0:\n        raise ValueError(\"p and n must be positive integers\")\n\n    w0, h0 = img.size\n    a0 = w0 / h0\n\n    # Find the aspect ratio closest to a0\n    best_c = 0\n    best_dist = float(\"inf\")\n    for i in range(1, int(math.sqrt(n) + 1)):\n        if n % i != 0:\n            continue\n\n        for d in (i, n // i):\n            c, r = d, n // d\n            aspect = c / r\n            dist = abs(aspect - a0)\n\n            if dist < best_dist:\n                best_c = d\n                best_dist = dist\n\n    c = best_c\n    r = n // c\n    w, h = c * p, r * p\n    return img.resize((w, h), resample=resample)\n
    "},{"location":"api/data/transforms/#saev.data.transforms.unfolded_conv2d","title":"unfolded_conv2d(x_bchw, conv)","text":"

    Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels. Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.

    Source code in src/saev/data/transforms.py
    @jaxtyped(typechecker=beartype.beartype)\ndef unfolded_conv2d(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -> Float[Tensor, \"b n d\"]:\n    \"\"\"\n    Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels.\n    Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.\n    \"\"\"\n    k = conv.kernel_size[0]\n\n    assert conv.kernel_size == (k, k)\n    assert conv.stride == (k, k)\n    assert conv.padding == (0, 0)\n    assert conv.groups == 1\n    assert conv.dilation == (1, 1)\n\n    *b, c, h, w = x_bchw.shape\n\n    assert h % k == 0 and w % k == 0\n\n    tokens_bnd = einops.rearrange(\n        x_bchw, \"b c (hp p1) (wp p2) -> b (hp wp) (c p1 p2)\", p1=k, p2=k\n    ).contiguous()\n    w_dp = conv.weight.reshape(conv.out_channels, c * k * k)\n    tokens_bnd = tokens_bnd @ w_dp.T\n    if conv.bias is not None:\n        tokens_bnd = tokens_bnd + conv.bias[None, None, :]\n    return tokens_bnd\n
    "},{"location":"api/framework/inference/","title":"saev.framework.inference","text":"

    Script for dumping SAE inference artifacts in a single pass over the dataset.

    Default mode writes 5 files:

    1. mean_values.pt
    2. sparsity.pt
    3. distributions.pt
    4. token_acts.npz
    5. metrics.json

    If save=False, only metrics.json is written.

    metrics.json is serialized from saev.metrics.Metrics.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config","title":"Config(run=pathlib.Path('./runs/abcdefg'), data=OrderedConfig(), n_dists=25, ignore_labels=list(), force_recompute=False, save=True, device='cuda', slurm_acct='', slurm_partition='', n_hours=4.0, mem_gb=80, log_to=os.path.join('.', 'logs')) dataclass","text":"

    Configuration for computing image activations.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.data","title":"data = OrderedConfig() class-attribute instance-attribute","text":"

    Data configuration

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.device","title":"device = 'cuda' class-attribute instance-attribute","text":"

    Which accelerator to use.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.force_recompute","title":"force_recompute = False class-attribute instance-attribute","text":"

    Force recomputation even if files exist.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.ignore_labels","title":"ignore_labels = dataclasses.field(default_factory=list) class-attribute instance-attribute","text":"

    Which token labels to ignore when calculating summarized image activations.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.log_to","title":"log_to = os.path.join('.', 'logs') class-attribute instance-attribute","text":"

    Where to log Slurm job stdout/stderr.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.mem_gb","title":"mem_gb = 80 class-attribute instance-attribute","text":"

    Node memory in GB.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.n_dists","title":"n_dists = 25 class-attribute instance-attribute","text":"

    Number of features to save distributions for.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.n_hours","title":"n_hours = 4.0 class-attribute instance-attribute","text":"

    Slurm job length in hours.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.run","title":"run = pathlib.Path('./runs/abcdefg') class-attribute instance-attribute","text":"

    Path to the sae.pt file.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.save","title":"save = True class-attribute instance-attribute","text":"

    Whether to write token_acts/statistics files. If False, only metrics.json is written.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_acct","title":"slurm_acct = '' class-attribute instance-attribute","text":"

    Slurm account string. Empty means to not use Slurm.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_partition","title":"slurm_partition = '' class-attribute instance-attribute","text":"

    Slurm partition.

    "},{"location":"api/framework/inference/#saev.framework.inference.main","title":"main(cfg, sweep=None)","text":"

    Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.

    Parameters:

    Name Type Description Default cfg Annotated[Config, arg(name='')]

    Baseline config inference.

    required sweep Path | None

    Path to .py file defining the sweep parameters.

    None Source code in src/saev/framework/inference.py
    @beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")], sweep: pathlib.Path | None = None\n):\n    \"\"\"\n    Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.\n\n    Args:\n        cfg: Baseline config inference.\n        sweep: Path to .py file defining the sweep parameters.\n    \"\"\"\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    assert all(c.slurm_acct == cfgs[0].slurm_acct for c in cfgs)\n    cfg = cfgs[0]\n\n    if not cfg.slurm_acct:\n        for i, cfg_item in enumerate(cfgs, start=1):\n            logger.info(\"Running config %d/%d locally.\", i, len(cfgs))\n            worker_fn(cfg_item)\n        logger.info(\"Jobs done.\")\n        return 0\n\n    import submitit\n    from submitit.core.utils import UncompletedJobError\n\n    executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n    executor.update_parameters(\n        time=int(cfg.n_hours * 60),\n        partition=cfg.slurm_partition,\n        gpus_per_node=1,\n        ntasks_per_node=1,\n        mem=f\"{cfg.mem_gb}GB\",\n        stderr_to_stdout=True,\n        account=cfg.slurm_acct,\n    )\n    with executor.batch():\n        jobs = []\n        for i, cfg in enumerate(cfgs):\n            do, reason, _ = need_compute(cfg)\n            if not do:\n                continue\n\n            logger.info(reason)\n            jobs.append(executor.submit(worker_fn, cfg))\n\n    time.sleep(5.0)\n\n    for i, job in enumerate(jobs, start=1):\n        logger.info(\"Job %d/%d: %s %s\", i, len(jobs), job.job_id, job.state)\n\n    for i, job in enumerate(jobs, start=1):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", i, len(jobs))\n        except UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, i)\n\n    logger.info(\"Jobs done.\")\n    return 0\n
    "},{"location":"api/framework/saev.framework/","title":"saev.framework","text":"

    Submitit entrypoint modules for SAE workflows.

    saev.framework is for script-like modules (e.g. train/inference/shards) that need importable module paths for submitit launchers. Place reusable data/model utilities outside this package.

    "},{"location":"api/framework/shards/","title":"saev.framework.shards","text":"

    To save lots of activations, we want to do things in parallel, with lots of slurm jobs, and save multiple files, rather than just one.

    This script handles that additional complexity.

    Conceptually, activations are either thought of as

    1. A single [n_imgs x n_layers x (n_patches + 1), d_model] tensor. This is a dataset
    2. Multiple [n_imgs_per_shard, n_layers, (n_patches + 1), d_model] tensors. This is a set of sharded activations.
    "},{"location":"api/framework/shards/#saev.framework.shards.Config","title":"Config(data=datasets.Imagenet(), shards_root=pathlib.Path('$SAEV_SCRATCH/saev/shards/'), family='clip', ckpt='ViT-L-14/openai', batch_size=1024, n_workers=8, d_model=1024, layers=(lambda: [-2])(), content_tokens_per_example=256, cls_token=True, pixel_agg=PixelAgg.MAJORITY, max_tokens_per_shard=2400000, ssl=True, device='cuda', n_hours=24.0, slurm_acct='', slurm_partition='', log_to='./logs') dataclass","text":"

    Configuration for calculating and saving ViT activations.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.batch_size","title":"batch_size = 1024 class-attribute instance-attribute","text":"

    Batch size for ViT inference.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.ckpt","title":"ckpt = 'ViT-L-14/openai' class-attribute instance-attribute","text":"

    Specific model checkpoint.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.cls_token","title":"cls_token = True class-attribute instance-attribute","text":"

    Whether the model has a [CLS] token.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.content_tokens_per_example","title":"content_tokens_per_example = 256 class-attribute instance-attribute","text":"

    Number of content tokens per example (depends on model).

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.d_model","title":"d_model = 1024 class-attribute instance-attribute","text":"

    Dimension of the ViT activations (depends on model).

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.data","title":"data = dataclasses.field(default_factory=(datasets.Imagenet)) class-attribute instance-attribute","text":"

    Which dataset to use.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.device","title":"device = 'cuda' class-attribute instance-attribute","text":"

    Which device to use.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.family","title":"family = 'clip' class-attribute instance-attribute","text":"

    Which model family.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.layers","title":"layers = dataclasses.field(default_factory=(lambda: [-2])) class-attribute instance-attribute","text":"

    Which layers to save. By default, the second-to-last layer.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.log_to","title":"log_to = './logs' class-attribute instance-attribute","text":"

    Where to log Slurm job stdout/stderr.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.max_tokens_per_shard","title":"max_tokens_per_shard = 2400000 class-attribute instance-attribute","text":"

    Maximum number of activations per shard; 2.4M is approximately 10GB for 1024-dimensional 4-byte activations.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.n_hours","title":"n_hours = 24.0 class-attribute instance-attribute","text":"

    Slurm job length.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.n_workers","title":"n_workers = 8 class-attribute instance-attribute","text":"

    Number of dataloader workers.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.shards_root","title":"shards_root = pathlib.Path('$SAEV_SCRATCH/saev/shards/') class-attribute instance-attribute","text":"

    Where to write shards.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_acct","title":"slurm_acct = '' class-attribute instance-attribute","text":"

    Slurm account string.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_partition","title":"slurm_partition = '' class-attribute instance-attribute","text":"

    Slurm partition.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.ssl","title":"ssl = True class-attribute instance-attribute","text":"

    Whether to use SSL.

    "},{"location":"api/framework/shards/#saev.framework.shards.cli","title":"cli(cfg)","text":"

    Save ViT activations for use later on.

    Parameters:

    Name Type Description Default cfg Annotated[Config, arg(name='')]

    Configuration for activations.

    required Source code in src/saev/framework/shards.py
    @beartype.beartype\ndef cli(cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")]):\n    \"\"\"\n    Save ViT activations for use later on.\n\n    Args:\n        cfg: Configuration for activations.\n    \"\"\"\n    logger = logging.getLogger(\"dump\")\n\n    if not cfg.ssl:\n        logger.warning(\"Ignoring SSL certs. Try not to do this!\")\n        # https://github.com/openai/whisper/discussions/734#discussioncomment-4491761\n        # Ideally we don't have to disable SSL but we are only downloading weights.\n        import ssl\n\n        ssl._create_default_https_context = ssl._create_unverified_context\n\n    from saev.data import shards\n\n    kwargs = dict(\n        family=cfg.family,\n        ckpt=cfg.ckpt,\n        content_tokens_per_example=cfg.content_tokens_per_example,\n        cls_token=cfg.cls_token,\n        d_model=cfg.d_model,\n        layers=cfg.layers,\n        data=cfg.data,\n        batch_size=cfg.batch_size,\n        n_workers=cfg.n_workers,\n        max_tokens_per_shard=cfg.max_tokens_per_shard,\n        shards_root=cfg.shards_root,\n        device=cfg.device,\n        pixel_agg=cfg.pixel_agg,\n    )\n\n    # Actually record activations.\n    if cfg.slurm_acct:\n        import submitit\n\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n        executor.update_parameters(\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            cpus_per_task=cfg.n_workers + 4,\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n\n        job = executor.submit(shards.worker_fn, **kwargs)\n        logger.info(\"Running job '%s'.\", job.job_id)\n        job.result()\n\n    else:\n        shards.worker_fn(**kwargs)\n
    "},{"location":"api/framework/train/","title":"saev.framework.train","text":"

    Trains many SAEs in parallel to amortize the cost of loading a single batch of data over many SAE training runs.

    Checklist for making sure your training doesn't suck:

    • [ ] Data scaling: scale vectors so their average L2 norm is sqrt(n).
    • [ ] Initialize b_e such that each feature activates 10K * d_model / (n * d_sae) of the time, which means that on average, each example activates 10K features.
    • [x] Initialize b_d to 0.
    • [x] Sweep learning rate and sparsity coefficients.
    • [ ] Decay learning rate to 0 over the last 20% of training.
    • [ ] Warmup sparsity over all of training.
    • [x] Gradient clipping (clip at 1 with clip_grad_norm)
    • [x] Track dead latents through training
    "},{"location":"api/framework/train/#saev.framework.train.Config","title":"Config(train_data=saev.data.ShuffledConfig(), val_data=saev.data.ShuffledConfig(), n_train=100000000, n_val=10000000, sae=nn.SparseAutoencoderConfig(), objective=nn.objectives.Matryoshka(), n_sparsity_warmup=0, optim='adam', lr=0.0004, n_lr_warmup=500, grad_clip=1.0, track=True, wandb_project='saev', tags=(), log_every=25, runs_root=pathlib.Path('$SAEV_NFS/saev/runs'), device='cuda', seed=42, slurm_acct='', slurm_partition='', n_hours=24.0, mem_gb=128, log_to=os.path.join('.', 'logs')) dataclass","text":"

    Configuration for training a sparse autoencoder on a vision transformer.

    "},{"location":"api/framework/train/#saev.framework.train.Config.device","title":"device = 'cuda' class-attribute instance-attribute","text":"

    Hardware device.

    "},{"location":"api/framework/train/#saev.framework.train.Config.grad_clip","title":"grad_clip = 1.0 class-attribute instance-attribute","text":"

    Maximum gradient norm across all SAE parameters.

    "},{"location":"api/framework/train/#saev.framework.train.Config.log_every","title":"log_every = 25 class-attribute instance-attribute","text":"

    How often to log to WandB.

    "},{"location":"api/framework/train/#saev.framework.train.Config.log_to","title":"log_to = os.path.join('.', 'logs') class-attribute instance-attribute","text":"

    Where to log Slurm job stdout/stderr.

    "},{"location":"api/framework/train/#saev.framework.train.Config.lr","title":"lr = 0.0004 class-attribute instance-attribute","text":"

    Learning rate.

    "},{"location":"api/framework/train/#saev.framework.train.Config.mem_gb","title":"mem_gb = 128 class-attribute instance-attribute","text":"

    Node memory in GB.

    "},{"location":"api/framework/train/#saev.framework.train.Config.n_hours","title":"n_hours = 24.0 class-attribute instance-attribute","text":"

    Slurm job length in hours.

    "},{"location":"api/framework/train/#saev.framework.train.Config.n_lr_warmup","title":"n_lr_warmup = 500 class-attribute instance-attribute","text":"

    Number of learning rate warmup steps.

    "},{"location":"api/framework/train/#saev.framework.train.Config.n_sparsity_warmup","title":"n_sparsity_warmup = 0 class-attribute instance-attribute","text":"

    Number of sparsity coefficient warmup steps.

    "},{"location":"api/framework/train/#saev.framework.train.Config.n_train","title":"n_train = 100000000 class-attribute instance-attribute","text":"

    Number of SAE training samples.

    "},{"location":"api/framework/train/#saev.framework.train.Config.n_val","title":"n_val = 10000000 class-attribute instance-attribute","text":"

    Number of SAE evaluation samples.

    "},{"location":"api/framework/train/#saev.framework.train.Config.objective","title":"objective = nn.objectives.Matryoshka() class-attribute instance-attribute","text":"

    SAE objective configuration.

    "},{"location":"api/framework/train/#saev.framework.train.Config.optim","title":"optim = 'adam' class-attribute instance-attribute","text":"

    Optimizer for training.

    "},{"location":"api/framework/train/#saev.framework.train.Config.runs_root","title":"runs_root = pathlib.Path('$SAEV_NFS/saev/runs') class-attribute instance-attribute","text":"

    Root directory for runs.

    "},{"location":"api/framework/train/#saev.framework.train.Config.sae","title":"sae = nn.SparseAutoencoderConfig() class-attribute instance-attribute","text":"

    SAE configuration.

    "},{"location":"api/framework/train/#saev.framework.train.Config.seed","title":"seed = 42 class-attribute instance-attribute","text":"

    Random seed.

    "},{"location":"api/framework/train/#saev.framework.train.Config.slurm_acct","title":"slurm_acct = '' class-attribute instance-attribute","text":"

    Slurm account string. Empty means to not use Slurm.

    "},{"location":"api/framework/train/#saev.framework.train.Config.slurm_partition","title":"slurm_partition = '' class-attribute instance-attribute","text":"

    Slurm partition.

    "},{"location":"api/framework/train/#saev.framework.train.Config.tags","title":"tags = () class-attribute instance-attribute","text":"

    Tags to add to WandB run.

    "},{"location":"api/framework/train/#saev.framework.train.Config.track","title":"track = True class-attribute instance-attribute","text":"

    Whether to track with WandB.

    "},{"location":"api/framework/train/#saev.framework.train.Config.train_data","title":"train_data = saev.data.ShuffledConfig() class-attribute instance-attribute","text":"

    Training data.

    "},{"location":"api/framework/train/#saev.framework.train.Config.val_data","title":"val_data = saev.data.ShuffledConfig() class-attribute instance-attribute","text":"

    Validation data.

    "},{"location":"api/framework/train/#saev.framework.train.Config.wandb_project","title":"wandb_project = 'saev' class-attribute instance-attribute","text":"

    WandB project name.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics","title":"EvalMetrics(l0, l1, mse, normalized_mse, sse_sae, sse_baseline, n_dead, n_almost_dead, n_dense, freqs, mean_values, almost_dead_threshold, dense_threshold) dataclass","text":"

    Results of evaluating a trained SAE on a datset.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.almost_dead_threshold","title":"almost_dead_threshold instance-attribute","text":"

    Threshold for an \"almost dead\" neuron.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.dense_threshold","title":"dense_threshold instance-attribute","text":"

    Threshold for a dense neuron.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.freqs","title":"freqs instance-attribute","text":"

    How often each feature fired.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l0","title":"l0 instance-attribute","text":"

    Mean L0 across all examples.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l1","title":"l1 instance-attribute","text":"

    Mean L1 across all examples.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mean_values","title":"mean_values instance-attribute","text":"

    The mean value for each feature when it did fire.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mse","title":"mse instance-attribute","text":"

    Mean MSE across all examples.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_almost_dead","title":"n_almost_dead instance-attribute","text":"

    Number of neurons that fired on fewer than almost_dead_threshold of examples.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dead","title":"n_dead instance-attribute","text":"

    Number of neurons that never fired on any example.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dense","title":"n_dense instance-attribute","text":"

    Number of neurons that fired on more than dense_threshold of examples.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.normalized_mse","title":"normalized_mse instance-attribute","text":"

    Normalized reconstruction MSE (SAE SSE / mean-baseline SSE).

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_baseline","title":"sse_baseline instance-attribute","text":"

    Total reconstruction sum-squared error for the mean baseline.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_sae","title":"sse_sae instance-attribute","text":"

    Total reconstruction sum-squared error for the SAE.

    "},{"location":"api/framework/train/#saev.framework.train.evaluate","title":"evaluate(cfgs, saes, objectives)","text":"

    Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.

    The metrics computed are mean L0/L1/MSE losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values. A list of EvalMetrics is returned, one for each SAE.

    Source code in src/saev/framework/train.py
    @beartype.beartype\n@torch.no_grad()\ndef evaluate(\n    cfgs: list[Config], saes: torch.nn.ModuleList, objectives: torch.nn.ModuleList\n) -> list[EvalMetrics]:\n    \"\"\"\n    Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.\n\n    The metrics computed are mean ``L0``/``L1``/``MSE`` losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values.  A list of `EvalMetrics` is returned, one for each SAE.\n    \"\"\"\n\n    torch.cuda.empty_cache()\n\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    saes.eval()\n    objectives.eval()\n\n    cfg = cfgs[0]\n\n    almost_dead_lim = 1e-7\n    dense_lim = 1e-2\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.val_data)\n    n_val = min(dataloader.n_samples, cfg.n_val)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, n_val)\n\n    n_fired = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    values = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    total_l0_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_l1_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_mse_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_sse_sae = torch.zeros(len(cfgs), dtype=torch.float64, device=cfg.device)\n    sum_sq = torch.zeros((), dtype=torch.float64, device=cfg.device)\n    sum_vec = torch.zeros(\n        (saes[0].cfg.d_model,), dtype=torch.float64, device=cfg.device\n    )\n    n_tokens = 0\n\n    for batch in helpers.progress(dataloader, desc=\"eval\", every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        batch_size = acts_BD.shape[0]\n        acts_BD_f64 = acts_BD.to(torch.float64)\n        sum_sq += torch.sum(acts_BD_f64 * acts_BD_f64)\n        sum_vec += acts_BD_f64.sum(dim=0)\n        n_tokens += batch_size\n        for i, (sae, objective) in enumerate(zip(saes, objectives)):\n            # Objective now handles the forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            # Get f_x for metrics\n            residual = acts_BD - fwd.x_hats[:, -1, :]\n            total_sse_sae[i] += torch.sum((residual.to(torch.float64)) ** 2)\n            n_fired[i] += einops.reduce(\n                fwd.f_x > 0, \"batch d_sae -> d_sae\", \"sum\"\n            ).cpu()\n            values[i] += einops.reduce(fwd.f_x, \"batch d_sae -> d_sae\", \"sum\").cpu()\n            total_l0_sum[i] += loss.l0.cpu().item() * batch_size\n            total_l1_sum[i] += loss.l1.cpu().item() * batch_size\n            total_mse_sum[i] += loss.mse.cpu().item() * batch_size\n\n    msg = \"Validation dataloader yielded zero tokens; cannot compute normalized MSE.\"\n    assert n_tokens > 0, msg\n    sum_vec_sq = torch.dot(sum_vec, sum_vec)\n    sse_baseline = sum_sq - sum_vec_sq / n_tokens\n    msg = (\n        f\"Validation baseline variance non-positive: \"\n        f\"sse_baseline={sse_baseline.item():.6e}\"\n    )\n    assert sse_baseline > 0, msg\n    sse_baseline_value = sse_baseline.item()\n\n    mean_values = values / n_fired\n    freqs = n_fired / n_tokens\n\n    l0 = (total_l0_sum / n_tokens).tolist()\n    l1 = (total_l1_sum / n_tokens).tolist()\n    mse = (total_mse_sum / n_tokens).tolist()\n    sse_sae = total_sse_sae.tolist()\n    normalized_mse = (total_sse_sae / sse_baseline_value).tolist()\n    sse_baseline_all = [sse_baseline_value] * len(cfgs)\n\n    n_dead = einops.reduce(freqs == 0, \"n_saes d_sae -> n_saes\", \"sum\").tolist()\n    n_almost_dead = einops.reduce(\n        freqs < almost_dead_lim, \"n_saes d_sae -> n_saes\", \"sum\"\n    ).tolist()\n    n_dense = einops.reduce(freqs > dense_lim, \"n_saes d_sae -> n_saes\", \"sum\").tolist()\n\n    metrics = []\n    for i in range(len(cfgs)):\n        metrics.append(\n            EvalMetrics(\n                l0=l0[i],\n                l1=l1[i],\n                mse=mse[i],\n                normalized_mse=normalized_mse[i],\n                sse_sae=sse_sae[i],\n                sse_baseline=sse_baseline_all[i],\n                n_dead=n_dead[i],\n                n_almost_dead=n_almost_dead[i],\n                n_dense=n_dense[i],\n                freqs=freqs[i],\n                mean_values=mean_values[i],\n                almost_dead_threshold=almost_dead_lim,\n                dense_threshold=dense_lim,\n            )\n        )\n\n    return metrics\n
    "},{"location":"api/framework/train/#saev.framework.train.main","title":"main(cfg, sweep=None, max_parallel=None)","text":"

    Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.

    Parameters:

    Name Type Description Default cfg Annotated[Config, arg(name='')]

    Baseline config for training an SAE.

    required sweep Path | None

    Path to .py file defining the sweep parameters.

    None max_parallel int | None

    Maximum SAEs to train concurrently within a single worker.

    None Source code in src/saev/framework/train.py
    @beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")],\n    sweep: pathlib.Path | None = None,\n    max_parallel: int | None = None,\n):\n    \"\"\"\n    Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.\n\n    Args:\n        cfg: Baseline config for training an SAE.\n        sweep: Path to .py file defining the sweep parameters.\n        max_parallel: Maximum SAEs to train concurrently within a single worker.\n    \"\"\"\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n\n    import submitit\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    cfgs = split_cfgs(cfgs)\n    # codex resume 019ac16a-dc07-78e3-82c7-e5c08a6c6f0c\n    if max_parallel:\n        cfgs = [\n            subgroup\n            for group in cfgs\n            for subgroup in [\n                group[start:end]\n                for start, end in helpers.batched_idx(len(group), max_parallel)\n            ]\n        ]\n\n    logger.info(\"Running %d training jobs.\", len(cfgs))\n\n    # Use the first resolved config for submitit parameters (n_hours, mem_gb, etc.) so that sweep values take effect instead of CLI defaults.\n    cfg = cfgs[0][0]\n\n    if cfg.slurm_acct:\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n        executor.update_parameters(\n            job_name=\"sae-train\",\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            mem=f\"{cfg.mem_gb}GB\",\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n    else:\n        executor = submitit.DebugExecutor(folder=cfg.log_to)\n\n    try:\n        cloudpickle.dumps(worker_fn)\n        for group in cfgs:\n            cloudpickle.dumps(group)\n    except TypeError as err:\n        raise AssertionError(f\"Failed to pickle: {err}\")\n\n    with executor.batch():\n        jobs = [executor.submit(worker_fn, group) for group in cfgs]\n\n    # Give the executor five seconds to fire the jobs off.\n    time.sleep(5.0)\n\n    # Log initial status.\n    for j, job in enumerate(jobs):\n        logger.info(\"Job %d/%d: %s %s\", j + 1, len(jobs), job.job_id, job.state)\n\n    for j, job in enumerate(jobs):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", j + 1, len(jobs))\n        except submitit.core.utils.UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, j)\n\n    logger.info(\"Jobs done.\")\n
    "},{"location":"api/framework/train/#saev.framework.train.split_cfgs","title":"split_cfgs(cfgs)","text":"

    Splits configs into groups that can be parallelized.

    Parameters:

    Name Type Description Default cfgs list[Config]

    A list of configs from a sweep file.

    required

    Returns:

    Type Description list[list[Config]]

    A list of lists, where the configs in each sublist do not differ in any keys that are in CANNOT_PARALLELIZE. This means that each sublist is a valid \"parallel\" set of configs for train.

    Source code in src/saev/framework/train.py
    @beartype.beartype\ndef split_cfgs(cfgs: list[Config]) -> list[list[Config]]:\n    \"\"\"\n    Splits configs into groups that can be parallelized.\n\n    Arguments:\n        cfgs: A list of configs from a sweep file.\n\n    Returns:\n        A list of lists, where the configs in each sublist do not differ in any keys that are in `CANNOT_PARALLELIZE`. This means that each sublist is a valid \"parallel\" set of configs for `train`.\n    \"\"\"\n    groups = collections.defaultdict(list)\n    for cfg in cfgs:\n        key = _parallel_key(cfg)\n        groups[key].append(cfg)\n\n    return [\n        [\n            dataclasses.replace(\n                cfg,\n                train_data=dataclasses.replace(cfg.train_data, seed=cfg.seed),\n                val_data=dataclasses.replace(cfg.val_data, seed=cfg.seed),\n            )\n            for cfg in group\n        ]\n        for _, group in sorted(groups.items())\n    ]\n
    "},{"location":"api/framework/train/#saev.framework.train.train","title":"train(cfgs)","text":"

    Explicitly declare the optimizer, schedulers, dataloader, etc outside of main so that all the variables are dropped from scope and can be garbage collected.

    Source code in src/saev/framework/train.py
    @beartype.beartype\ndef train(\n    cfgs: list[Config],\n) -> tuple[\n    torch.nn.ModuleList, torch.nn.ModuleList, saev.utils.wandb.ParallelWandbRun, int\n]:\n    \"\"\"\n    Explicitly declare the optimizer, schedulers, dataloader, etc outside of `main` so that all the variables are dropped from scope and can be garbage collected.\n    \"\"\"\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    logger.info(\"Parallelizing %d runs.\", len(cfgs))\n\n    cfg = cfgs[0]\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.train_data)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, cfg.n_train)\n\n    saes, objectives, param_groups = make_saes(\n        [(c.sae, c.objective) for c in cfgs], dataloader\n    )\n\n    mode = \"online\" if cfg.track else \"disabled\"\n    tags = list(cfg.tags)\n\n    # Add metadata to configs for WandB logging\n    metadata_dict = dataclasses.asdict(dataloader.metadata)\n    wandb_configs = []\n    for c in cfgs:\n        cfg_dict = dataclasses.asdict(c)\n        cfg_dict[\"train_data\"][\"metadata\"] = metadata_dict\n        wandb_configs.append(cfg_dict)\n\n    run = saev.utils.wandb.ParallelWandbRun(\n        cfg.wandb_project, wandb_configs, mode, tags\n    )\n    slurm_job_id = os.environ.get(\"SLURM_JOB_ID\")\n    if slurm_job_id:\n        run.set_summary(\"slurm_job_id\", slurm_job_id)\n\n    # Build per-SAE bundles of optimizers/param_groups/schedulers so each config's LR and warmup drive both Muon and Adam param groups for that SAE. We reshape the flat param_groups into per-SAE lists because we need to:\n    #   (a) build schedulers with that SAE's cfg\n    #   (b) step/zero only that SAE's optimizers\n    #   (c) log that SAE's LR without fishing through a mixed flat list.\n    grouped_pgs: list[list[dict[str, object]]] = []\n    optimizers: list[list[torch.optim.Optimizer]] = []\n    lr_schedulers: list[list[saev.utils.scheduling.WarmupCosine]] = []\n\n    for i, (sae, cfg, param_group) in enumerate(zip(saes, cfgs, param_groups)):\n        if cfg.optim == \"adam\":\n            opts = [torch.optim.Adam([param_group], fused=True)]\n        elif cfg.optim == \"muon\":\n            muon_params = [p for p in sae.parameters() if p.ndim == 2]\n            msg = f\"Muon optimizer requires 2D params; SAE {i} has none.\"\n            assert muon_params, msg\n            adam_params = [p for p in sae.parameters() if p.ndim != 2]\n            msg = f\"Adam optimizer requires non-2D params; SAE {i} has none.\"\n            assert adam_params, msg\n\n            opts = [\n                torch.optim.Muon(muon_params, lr=0.0),\n                torch.optim.Adam(adam_params, lr=0.0, fused=True),\n            ]\n        else:\n            tp.assert_never(cfg.optim)\n\n        pgs = [pg for opt in opts for pg in opt.param_groups]\n        scheds = [\n            saev.utils.scheduling.WarmupCosine(\n                0.0, cfg.n_lr_warmup, cfg.lr, len(dataloader), 0.0\n            )\n            for _ in pgs\n        ]\n\n        optimizers.append(opts)\n        grouped_pgs.append(pgs)\n        lr_schedulers.append(scheds)\n\n    param_groups = grouped_pgs\n\n    saes.train()\n    saes = saes.to(cfg.device)\n    objectives.train()\n    objectives = objectives.to(cfg.device)\n\n    global_step, n_patches_seen = 0, 0\n    dl_monitor = DataloaderMonitor(dataloader)\n\n    for batch in helpers.progress(dataloader, every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        for sae in saes:\n            sae.normalize_w_dec()\n        # Forward passes and loss calculations.\n        losses = []\n        fwds = []\n        for sae, objective in zip(saes, objectives):\n            # Objective handles the SAE forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            losses.append(loss)\n            fwds.append(fwd)\n\n        n_patches_seen += len(acts_BD)\n\n        for loss in losses:\n            loss.loss.backward()\n\n        # remove parallel gradients or normalize columns?\n        for sae in saes:\n            sae.remove_parallel_grads()\n\n        # Calculate gradient norms before optimizer step\n        grad_norms = []\n        for sae, cfg in zip(saes, cfgs):\n            # Clip gradients and get the gradient norm\n            grad_norm = torch.nn.utils.clip_grad_norm_(\n                sae.parameters(), max_norm=cfg.grad_clip\n            )\n\n            grad_norms.append(grad_norm)\n\n        # Log metrics after gradient computation\n        if (global_step + 1) % cfg.log_every == 0:\n            with torch.no_grad():\n                now = time.time()\n                dl_metrics = dl_monitor.compute(now=now)\n\n                metadata = dataloader.metadata\n                entropy_metrics = statistics.calc_batch_entropy(\n                    batch[\"example_idx\"].to(\"cpu\"),\n                    batch[\"token_idx\"].to(\"cpu\"),\n                    metadata.n_examples,\n                    metadata.content_tokens_per_example,\n                )\n                dl_metrics.update(entropy_metrics)\n\n                acts_bd_f64 = acts_BD.to(torch.float64)\n                n_batch = acts_bd_f64.shape[0]\n                msg = \"Batch is empty; cannot compute normalized MSE.\"\n                assert n_batch > 0, msg\n                batch_sum_sq = torch.sum(acts_bd_f64 * acts_bd_f64)\n                batch_sum_vec = acts_bd_f64.sum(dim=0)\n                batch_baseline_sse = (\n                    batch_sum_sq - torch.dot(batch_sum_vec, batch_sum_vec) / n_batch\n                )\n                msg = f\"Batch baseline variance non-positive: sse_baseline={batch_baseline_sse.item():.6e}\"\n                assert batch_baseline_sse > 0, msg\n                batch_baseline_sse_value = batch_baseline_sse.item()\n\n                metrics = []\n                for i, (loss, sae, objective, fwd) in enumerate(\n                    zip(losses, saes, objectives, fwds)\n                ):\n                    current_lr = param_groups[i][0][\"lr\"]\n                    # Explained variance: 1 - Var(x - x_hat) / Var(x)\n                    residual = acts_BD - fwd.x_hats[:, -1, :]\n                    batch_sse_sae_value = torch.sum(\n                        (residual.to(torch.float64)) ** 2\n                    ).item()\n                    normalized_mse_value = (\n                        batch_sse_sae_value / batch_baseline_sse_value\n                    )\n                    explained_var = 1 - residual.var() / acts_BD.var()\n\n                    # Dead unit percentage: fraction of units that never activate\n                    dead_pct = ((fwd.f_x.abs() > 1e-12).sum(0) == 0).float().mean()\n\n                    # Dictionary coherence: max |<w_i, w_j>| for i != j\n                    W = sae.W_dec  # (d_sae, d_model)\n                    # Normalize each row (each SAE feature)\n                    W_norm = W / W.norm(dim=1, keepdim=True)\n                    coherence = (W_norm @ W_norm.T).abs().triu(1).max()\n\n                    # Average decoder row L2 norm (since W_dec is d_sae x d_model)\n                    avg_w_row_norm = sae.W_dec.norm(dim=1).mean()\n\n                    metric = {\n                        **{f\"loss/{key}\": val for key, val in loss.metrics().items()},\n                        \"progress/n_patches_seen\": n_patches_seen,\n                        \"progress/learning_rate\": current_lr,\n                        \"metrics/explained_variance\": explained_var.item(),\n                        \"metrics/dead_unit_pct\": dead_pct.item(),\n                        \"metrics/dictionary_coherence\": coherence.item(),\n                        \"metrics/avg_decoder_row_norm\": avg_w_row_norm.item(),\n                        \"metrics/grad_norm\": grad_norms[i].item(),\n                        \"metrics/sse_sae\": batch_sse_sae_value,\n                        \"metrics/sse_baseline\": batch_baseline_sse_value,\n                        \"metrics/normalized_mse\": normalized_mse_value,\n                        **dl_metrics,\n                    }\n\n                    metrics.append(metric)\n                run.log(metrics, step=global_step)\n\n                logger.info(\n                    \", \".join(\n                        f\"{key}: {value:.5f}\"\n                        for key, value in losses[0].metrics().items()\n                    )\n                )\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.step()\n\n        # Update LR and sparsity coefficients.\n        for pgs, scheds in zip(param_groups, lr_schedulers):\n            for pg, sched in zip(pgs, scheds):\n                pg[\"lr\"] = sched.step()\n\n        # for objective, scheduler in zip(objectives, sparsity_schedulers):\n        #     objective.sparsity_coeff = scheduler.step()\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.zero_grad()\n\n        global_step += 1\n\n    return saes, objectives, run, global_step\n
    "},{"location":"api/nn/modeling/","title":"saev.nn.modeling","text":"

    Neural network architectures for sparse autoencoders.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.AuxK","title":"AuxK(key='auxk', k_aux=512, alpha=1 / 32) dataclass","text":"

    AuxK auxiliary reconstruction loss for dead latents.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK","title":"BatchTopK(key='batch-top-k', top_k=32, sparsity=NoSparsity(), momentum=0.1, aux=AuxK()) dataclass","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK.top_k","title":"top_k = 32 class-attribute instance-attribute","text":"

    How many values are allowed to be non-zero per sample in the batch.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation","title":"BatchTopKActivation(cfg)","text":"

    Bases: Module

    BatchTopK activation and inference-time threshold for sparse autoencoders.

    This module implements a BatchTopK nonlinearity that enforces a fixed sparsity budget across a batch, together with an inference-time approximation that replaces the batch-coupled operation with a simple elementwise threshold.

    Training mode (model.train()): Given pre-activation codes x with shape [batch, d_sae], the BatchTopK activation flattens the batch to shape [batch * d_sae], selects the largest (batch * top_k) entries by value, and sets all other entries to zero. This enforces an average of exactly top_k active features per example while allowing the \"activation budget\" to move between examples in the batch.

    During training, we also estimate an inference threshold theta that approximates the effective cutoff induced by BatchTopK. For each batch, we compute the minimum positive activation that survives the BatchTopK mask and update an exponential moving average of this quantity. This running estimate plays the same role as BatchNorm running statistics: it is updated only in training mode and treated as fixed at inference.\n

    Eval mode (model.eval()): At inference time we do not apply a batch-coupled top-k, since that would make each example depend on the rest of the eval batch. Instead, we use the stored running threshold theta to define a JumpReLU nonlinearity:

        y = x if x > theta else 0\n\napplied elementwise and independently to each example. This preserves the approximate sparsity level learned during training, but makes the layer deterministic and sample-wise independent for evaluation, probing, and downstream use.\n
    Inputs

    x: Tensor of shape [batch, d_sae] containing pre-activation codes.

    Outputs

    Tensor of shape [batch, d_sae] with the same dtype and device as x, where either: - in training mode: exactly (batch * top_k) entries are non-zero across the batch due to the BatchTopK mask, or - in eval mode: entries are zeroed by an elementwise JumpReLU with the learned threshold theta.

    Source code in src/saev/nn/modeling.py
    def __init__(self, cfg: BatchTopK):\n    super().__init__()\n    self.cfg = cfg\n\n    self.register_buffer(\"threshold\", torch.tensor(0.0))\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation.forward","title":"forward(x)","text":"

    Apply top-k activation to each sample in the batch.

    Source code in src/saev/nn/modeling.py
    def forward(self, x: Float[Tensor, \"batch d_sae\"]) -> Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to each sample in the batch.\n    \"\"\"\n\n    if not self.training:\n        if self.threshold <= 0:\n            return torch.where(x > 0, x, torch.zeros_like(x))\n\n        return torch.where(x > self.threshold, x, torch.zeros_like(x))\n\n    bsz, d_sae = x.shape\n    x_flat = x.flatten()\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k * bsz, d_sae * bsz)\n    _, idxs = torch.topk(x_flat, k, sorted=False)\n    mask = torch.zeros_like(x_flat).scatter(-1, idxs, 1.0).reshape(x.shape)\n\n    x = torch.mul(mask, x)\n\n    with torch.no_grad():\n        pos = x[x > 0]\n        if pos.numel() >= 0:\n            self.threshold.mul_(1 - self.cfg.momentum).add_(\n                self.cfg.momentum * pos.min()\n            )\n\n    return x\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.NoAux","title":"NoAux(key='no-aux') dataclass","text":"

    No auxiliary loss (e.g., for ReLU).

    "},{"location":"api/nn/modeling/#saev.nn.modeling.NoSparsity","title":"NoSparsity(key='no-sparsity') dataclass","text":"

    No explicit sparsity penalty (e.g. for TopK/BatchTopK where k controls sparsity).

    "},{"location":"api/nn/modeling/#saev.nn.modeling.Relu","title":"Relu(key='relu', sparsity=L1Sparsity(coeff=0.0004), aux=NoAux()) dataclass","text":"

    Vanilla ReLU

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder","title":"SparseAutoencoder(cfg)","text":"

    Bases: Module

    Sparse auto-encoder (SAE)

    Source code in src/saev/nn/modeling.py
    def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.EncodeOut","title":"EncodeOut","text":"

    Bases: NamedTuple

    Outputs of encode: pre-activations and activated latents.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.Output","title":"Output","text":"

    Bases: NamedTuple

    Full SAE forward outputs for objectives and metrics.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.decode","title":"decode(f_x, *, prefixes=None)","text":"

    Decode latent features to reconstructions.

    Parameters:

    Name Type Description Default f_x Float[Tensor, 'batch d_sae']

    Latent features of shape (batch, d_sae)

    required prefixes Int64[Tensor, ' n_prefixes'] | None

    Optional tensor of prefix lengths for Matryoshka decoding.

    None

    Returns:

    Type Description Float[Tensor, 'batch n_prefixes d_model']

    Matryoshka reconstructions (batch, n_prefixes, d_model).

    Source code in src/saev/nn/modeling.py
    def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -> Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] > prefixes[:-1])\n    assert 1 <= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -> (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -> ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.forward","title":"forward(x)","text":"

    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.

    Parameters:

    Name Type Description Default x Float[Tensor, 'batch d_model']

    a batch of transformer activations.

    required Source code in src/saev/nn/modeling.py
    def forward(self, x: Float[Tensor, \"batch d_model\"]) -> Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.normalize_w_dec","title":"normalize_w_dec()","text":"

    Set W_dec to unit-norm columns.

    Source code in src/saev/nn/modeling.py
    @torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.remove_parallel_grads","title":"remove_parallel_grads()","text":"

    Update grads so that they remove the parallel component

    Source code in src/saev/nn/modeling.py
    @torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -> d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq > 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -> d_sae d_model\",\n    )\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig","title":"SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True) dataclass","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.activation","title":"activation = TopK() class-attribute instance-attribute","text":"

    Activation function.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_model","title":"d_model = 1024 class-attribute instance-attribute","text":"

    Size of x.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_sae","title":"d_sae = 1024 * 16 class-attribute instance-attribute","text":"

    Number of features in SAE latent space; size of f(x).

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.normalize_w_dec","title":"normalize_w_dec = True class-attribute instance-attribute","text":"

    Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_blend","title":"reinit_blend = 0.8 class-attribute instance-attribute","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"reinit_enc_dec_tranpose = True class-attribute instance-attribute","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.remove_parallel_grads","title":"remove_parallel_grads = True class-attribute instance-attribute","text":"

    Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.TopK","title":"TopK(key='top-k', top_k=32, sparsity=NoSparsity(), aux=AuxK()) dataclass","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.TopK.top_k","title":"top_k = 32 class-attribute instance-attribute","text":"

    How many values are allowed to be non-zero.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation","title":"TopKActivation(cfg)","text":"

    Bases: Module

    Top-K activation function. For use as activation function of sparse encoder.

    Source code in src/saev/nn/modeling.py
    def __init__(self, cfg: TopK):\n    super().__init__()\n    self.cfg = cfg\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation.forward","title":"forward(x)","text":"

    Apply top-k activation to the input tensor.

    Source code in src/saev/nn/modeling.py
    def forward(self, x: Float[Tensor, \"batch d_sae\"]) -> Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to the input tensor.\n    \"\"\"\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k, d_sae)\n    _, idxs = torch.topk(x, k, dim=-1, sorted=False)\n    mask = torch.zeros_like(x).scatter(-1, idxs, 1.0)\n\n    return torch.mul(mask, x)\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.dump","title":"dump(fpath, sae)","text":"

    Save an SAE checkpoint to disk along with configuration, using the trick from equinox.

    Parameters:

    Name Type Description Default fpath Path | str

    filepath to save checkpoint to.

    required sae SparseAutoencoder

    sparse autoencoder checkpoint to save.

    required Source code in src/saev/nn/modeling.py
    @beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.load","title":"load(fpath, *, device='cpu')","text":"

    Loads a sparse autoencoder from disk.

    Source code in src/saev/nn/modeling.py
    @beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -> SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n
    "},{"location":"api/nn/objectives/","title":"saev.nn.objectives","text":""},{"location":"api/nn/objectives/#saev.nn.objectives.Loss","title":"Loss() dataclass","text":"

    The loss term for an autoencoder training batch.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.Loss.loss","title":"loss property","text":"

    Total loss.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka","title":"Matryoshka(n_prefixes=10, dead_threshold_tokens=10000000) dataclass","text":"

    Config for the Matryoshka loss for another arbitrary SAE class.

    Reference code is here: https://github.com/noanabeshima/matryoshka-saes and the original reading is https://sparselatents.com/matryoshka.html and https://arxiv.org/pdf/2503.17547

    "},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.dead_threshold_tokens","title":"dead_threshold_tokens = 10000000 class-attribute instance-attribute","text":"

    Tokens without activation before a latent is considered dead.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.n_prefixes","title":"n_prefixes = 10 class-attribute instance-attribute","text":"

    Number of random length prefixes to use for loss calculation.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss","title":"MatryoshkaLoss(mse, sparsity, l0, l1, aux, n_dead) dataclass","text":"

    Bases: Loss

    The composite loss terms for an training batch.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.aux","title":"aux instance-attribute","text":"

    Auxiliary loss term (e.g., AuxK).

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l0","title":"l0 instance-attribute","text":"

    Sum of L0 magnitudes of hidden activations for all prefix lengths.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l1","title":"l1 instance-attribute","text":"

    Sum of L1 magnitudes of hidden activations for all prefix lengths.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.loss","title":"loss property","text":"

    Total loss.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.mse","title":"mse instance-attribute","text":"

    Average of reconstruction loss (mean squared error) for all prefix lengths.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.n_dead","title":"n_dead instance-attribute","text":"

    Number of dead latents (per aux loss threshold).

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.sparsity","title":"sparsity instance-attribute","text":"

    Sparsity loss, typically lambda * L1.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaObjective","title":"MatryoshkaObjective(cfg)","text":"

    Bases: Objective

    Torch module for calculating the matryoshka loss for an SAE.

    Source code in src/saev/nn/objectives.py
    def __init__(self, cfg: Matryoshka):\n    super().__init__()\n    self.cfg = cfg\n    self.toks_since_active: Tensor | None = None\n
    "},{"location":"api/nn/objectives/#saev.nn.objectives.sample_prefixes","title":"sample_prefixes(d_sae, n_prefixes, min_prefix_length=1, pareto_power=0.5)","text":"

    Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)

    Parameters:

    Name Type Description Default d_sae int

    Total number of latent dimensions

    required n_prefixes int

    Number of prefixes to sample

    required min_prefix_length int

    Minimum length of any prefix

    1 pareto_power float

    Power parameter for Pareto distribution (lower = more uniform)

    0.5

    Returns:

    Type Description Int64[Tensor, ' n_prefixes']

    torch.Tensor: Sorted prefix lengths

    Source code in src/saev/nn/objectives.py
    @torch.no_grad()\n@jaxtyped(typechecker=beartype.beartype)\ndef sample_prefixes(\n    d_sae: int, n_prefixes: int, min_prefix_length: int = 1, pareto_power: float = 0.5\n) -> Int64[Tensor, \" n_prefixes\"]:\n    \"\"\"\n    Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with\n    Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)\n\n    Args:\n        d_sae: Total number of latent dimensions\n        n_prefixes: Number of prefixes to sample\n        min_prefix_length: Minimum length of any prefix\n        pareto_power: Power parameter for Pareto distribution (lower = more uniform)\n\n    Returns:\n        torch.Tensor: Sorted prefix lengths\n    \"\"\"\n    if n_prefixes <= 1:\n        return torch.tensor([d_sae], dtype=torch.int64)\n\n    assert n_prefixes <= d_sae\n\n    # Calculate probability distribution favoring shorter prefixes\n    lengths = torch.arange(1, d_sae)\n    pareto_cdf = 1 - ((min_prefix_length / lengths.float()) ** pareto_power)\n    pareto_pdf = torch.cat([pareto_cdf[:1], pareto_cdf[1:] - pareto_cdf[:-1]])\n    probability_dist = pareto_pdf / pareto_pdf.sum()\n\n    # Sample and sort prefix lengths\n    sampled_indices = torch.multinomial(\n        probability_dist, num_samples=n_prefixes - 1, replacement=False\n    )\n\n    # Convert indices to actual prefix lengths\n    prefixes = lengths[sampled_indices]\n\n    # Add n_latents as the final prefix\n    prefixes = torch.cat((prefixes.detach().clone(), torch.tensor([d_sae])))\n\n    prefixes, _ = torch.sort(prefixes, descending=False)\n\n    return prefixes.to(torch.int64)\n
    "},{"location":"api/nn/saev.nn/","title":"saev.nn","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder","title":"SparseAutoencoder(cfg)","text":"

    Bases: Module

    Sparse auto-encoder (SAE)

    Source code in src/saev/nn/modeling.py
    def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n
    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.EncodeOut","title":"EncodeOut","text":"

    Bases: NamedTuple

    Outputs of encode: pre-activations and activated latents.

    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.Output","title":"Output","text":"

    Bases: NamedTuple

    Full SAE forward outputs for objectives and metrics.

    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.decode","title":"decode(f_x, *, prefixes=None)","text":"

    Decode latent features to reconstructions.

    Parameters:

    Name Type Description Default f_x Float[Tensor, 'batch d_sae']

    Latent features of shape (batch, d_sae)

    required prefixes Int64[Tensor, ' n_prefixes'] | None

    Optional tensor of prefix lengths for Matryoshka decoding.

    None

    Returns:

    Type Description Float[Tensor, 'batch n_prefixes d_model']

    Matryoshka reconstructions (batch, n_prefixes, d_model).

    Source code in src/saev/nn/modeling.py
    def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -> Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] > prefixes[:-1])\n    assert 1 <= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -> (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -> ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n
    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.forward","title":"forward(x)","text":"

    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.

    Parameters:

    Name Type Description Default x Float[Tensor, 'batch d_model']

    a batch of transformer activations.

    required Source code in src/saev/nn/modeling.py
    def forward(self, x: Float[Tensor, \"batch d_model\"]) -> Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n
    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.normalize_w_dec","title":"normalize_w_dec()","text":"

    Set W_dec to unit-norm columns.

    Source code in src/saev/nn/modeling.py
    @torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n
    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.remove_parallel_grads","title":"remove_parallel_grads()","text":"

    Update grads so that they remove the parallel component

    Source code in src/saev/nn/modeling.py
    @torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -> d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq > 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -> d_sae d_model\",\n    )\n
    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig","title":"SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True) dataclass","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.activation","title":"activation = TopK() class-attribute instance-attribute","text":"

    Activation function.

    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_model","title":"d_model = 1024 class-attribute instance-attribute","text":"

    Size of x.

    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_sae","title":"d_sae = 1024 * 16 class-attribute instance-attribute","text":"

    Number of features in SAE latent space; size of f(x).

    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.normalize_w_dec","title":"normalize_w_dec = True class-attribute instance-attribute","text":"

    Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".

    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_blend","title":"reinit_blend = 0.8 class-attribute instance-attribute","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"reinit_enc_dec_tranpose = True class-attribute instance-attribute","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.remove_parallel_grads","title":"remove_parallel_grads = True class-attribute instance-attribute","text":"

    Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.

    "},{"location":"api/nn/saev.nn/#saev.nn.dump","title":"dump(fpath, sae)","text":"

    Save an SAE checkpoint to disk along with configuration, using the trick from equinox.

    Parameters:

    Name Type Description Default fpath Path | str

    filepath to save checkpoint to.

    required sae SparseAutoencoder

    sparse autoencoder checkpoint to save.

    required Source code in src/saev/nn/modeling.py
    @beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n
    "},{"location":"api/nn/saev.nn/#saev.nn.load","title":"load(fpath, *, device='cpu')","text":"

    Loads a sparse autoencoder from disk.

    Source code in src/saev/nn/modeling.py
    @beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -> SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n
    "},{"location":"api/utils/monitoring/","title":"saev.utils.monitoring","text":""},{"location":"api/utils/monitoring/#saev.utils.monitoring.DataloaderMonitor","title":"DataloaderMonitor(dataloader, process_factory=None)","text":"

    Tracks IO and CPU activity for the dataloader manager process and its children.

    The monitor owns the dataloader handle and psutil processes internally, so callers simply construct it with the dataloader and then call compute() whenever metrics are needed.

    Source code in src/saev/utils/monitoring.py
    def __init__(\n    self,\n    dataloader: object,\n    process_factory: Callable[[int], psutil.Process] | None = None,\n) -> None:\n    self.dataloader = dataloader\n    self.process_factory = process_factory or psutil.Process\n    self._reset_state()\n
    "},{"location":"api/utils/saev.utils/","title":"saev.utils","text":""},{"location":"api/utils/scheduling/","title":"saev.utils.scheduling","text":""},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter","title":"BatchLimiter(dataloader, n_samples)","text":"

    Limits the number of batches to only return n_samples total samples.

    Source code in src/saev/utils/scheduling.py
    def __init__(self, dataloader: DataLoaderLike, n_samples: int):\n    self.dataloader = dataloader\n    self.n_samples = n_samples\n    self.batch_size = dataloader.batch_size\n    self.drop_last = dataloader.drop_last\n
    "},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter.__getattr__","title":"__getattr__(name)","text":"

    Pass through attribute access to the wrapped dataloader.

    Source code in src/saev/utils/scheduling.py
    def __getattr__(self, name: str) -> Any:\n    \"\"\"Pass through attribute access to the wrapped dataloader.\"\"\"\n    # __getattr__ is only called when the attribute wasn't found on self\n    # So we delegate to the wrapped dataloader\n    try:\n        return getattr(self.dataloader, name)\n    except AttributeError:\n        # Re-raise with more context about where the attribute was not found\n        raise AttributeError(\n            f\"'{self.__class__.__name__}' object and its wrapped dataloader have no attribute '{name}'\"\n        )\n
    "},{"location":"api/utils/scheduling/#saev.utils.scheduling.Warmup","title":"Warmup(init, final, n_steps)","text":"

    Bases: Scheduler

    Linearly increases from init to final over n_warmup_steps steps.

    Source code in src/saev/utils/scheduling.py
    def __init__(self, init: float, final: float, n_steps: int):\n    self.final = final\n    self.init = init\n    self.n_steps = n_steps\n    self._step = 0\n
    "},{"location":"api/utils/scheduling/#saev.utils.scheduling.WarmupCosine","title":"WarmupCosine(init, n_warmup, peak, n_steps, final)","text":"

    Bases: Scheduler

    Linearly increases from init to peak over n_warmup steps, then decrease down to final using cosine decay over n_steps - n_warmup.

    Source code in src/saev/utils/scheduling.py
    def __init__(\n    self, init: float, n_warmup: int, peak: float, n_steps: int, final: float\n):\n    self.init = init\n    self.peak = peak\n    self.final = final\n    self.n_warmup = n_warmup\n    self.n_steps = n_steps\n    self._step = 0\n
    "},{"location":"api/utils/statistics/","title":"saev.utils.statistics","text":""},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator","title":"PercentileEstimator(percentile, total, lr=0.001, shape=())","text":"Source code in src/saev/utils/statistics.py
    def __init__(\n    self,\n    percentile: float | int,\n    total: int,\n    lr: float = 1e-3,\n    shape: tuple[int, ...] = (),\n):\n    self.percentile = percentile\n    self.total = total\n    self.lr = lr\n\n    self._estimate = torch.zeros(shape)\n    self._step = 0\n
    "},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator.update","title":"update(x)","text":"

    Update the estimator with a new value.

    This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.

    Parameters:

    Name Type Description Default x float | Tensor

    The new value to incorporate into the estimation

    required Source code in src/saev/utils/statistics.py
    def update(self, x: float | Tensor):\n    \"\"\"\n    Update the estimator with a new value.\n\n    This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.\n\n    Arguments:\n        x: The new value to incorporate into the estimation\n    \"\"\"\n    self._step += 1\n\n    step_size = self.lr * (self.total - self._step) / self.total\n\n    # Is a no-op if it's already on the same device.\n    if isinstance(x, Tensor):\n        self._estimate = self._estimate.to(x.device)\n\n    self._estimate += step_size * (\n        torch.sign(x - self._estimate) + 2 * self.percentile / 100 - 1.0\n    )\n
    "},{"location":"api/utils/statistics/#saev.utils.statistics.calc_batch_entropy","title":"calc_batch_entropy(example_idx, token_idx, n_examples, content_tokens_per_example)","text":"

    Compute entropy and coverage metrics for a batch of shuffled indices.

    The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.

    Source code in src/saev/utils/statistics.py
    @beartype.beartype\ndef calc_batch_entropy(\n    example_idx: IndexLike,\n    token_idx: IndexLike,\n    n_examples: int,\n    content_tokens_per_example: int,\n) -> dict[str, float]:\n    \"\"\"\n    Compute entropy and coverage metrics for a batch of shuffled indices.\n\n    The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.\n    \"\"\"\n    example_idx_t = _to_tensor(example_idx)\n    token_idx_t = _to_tensor(token_idx)\n    if n_examples <= 0:\n        raise ValueError(\"n_examples must be positive.\")\n    if content_tokens_per_example <= 0:\n        raise ValueError(\"content_tokens_per_example must be positive.\")\n\n    if example_idx_t.ndim != 1:\n        raise ValueError(\"example_idx must be 1D.\")\n    if token_idx_t.ndim != 1:\n        raise ValueError(\"token_idx must be 1D.\")\n    if example_idx_t.numel() == 0:\n        raise ValueError(\"example_idx must contain at least one element.\")\n\n    _assert_batch_dim(example_idx_t, token_idx_t)\n\n    example_metrics = _add_prefix(\n        \"loader/example\", _entropy_metrics(example_idx_t, n_examples)\n    )\n    token_metrics = _add_prefix(\n        \"loader/token\", _entropy_metrics(token_idx_t, content_tokens_per_example)\n    )\n\n    return {**example_metrics, **token_metrics}\n
    "},{"location":"api/utils/wandb/","title":"saev.utils.wandb","text":""},{"location":"api/utils/wandb/#saev.utils.wandb.ParallelWandbRun","title":"ParallelWandbRun(project, cfgs, mode, tags, dir='.wandb')","text":"

    Inspired by https://community.wandb.ai/t/is-it-possible-to-log-to-multiple-runs-simultaneously/4387

    Source code in src/saev/utils/wandb.py
    def __init__(\n    self,\n    project: str,\n    cfgs: list[dict[str, object]],\n    mode: str,\n    tags: list[str],\n    dir: str = \".wandb\",\n):\n    cfg, *cfgs = cfgs\n    self.project = project\n    self.cfgs = cfgs\n    self.mode = mode\n    self.tags = tags\n    self.dir = dir\n    self.summary_updates: dict[str, object] = {}\n\n    self.live_run = wandb.init(\n        project=project, config=cfg, mode=mode, tags=tags, dir=dir\n    )\n\n    self.metric_queues: list[MetricQueue] = [[] for _ in self.cfgs]\n
    "},{"location":"developers/contributing/","title":"Contributing","text":""},{"location":"developers/contributing/#project-layout","title":"Project layout","text":"
    docs/\n    mkdocs.yml    # The configuration file.\n    src/\n        index.md  # The documentation homepage.\n        ...       # Other markdown pages, images and other files.\n
    "},{"location":"developers/datapoint-init/","title":"Datapoint Initialization","text":"

    Datapoint initialization is an SAE weight initializations strategy independently proposed by Anthropic and Pierre Peigne for improving SAE training.

    Conceptually, we initialize each decoder column to look like a real datapoint, so every latent starts with a patch of input space where it \"wins\" and gets some gradient. Here's the algorithm:

    1. Select \\(n\\) random data points from your training data.
    2. Compute the mean \\(\\mu\\) and zero-center the data: \\(x_0 = x - \\mu\\).
    3. Linearly blend each zero-centered datapoint with Kaiming initialization: \\(w = p \\cdot (x - \\mu) + (1 - p) \\cdot r\\) where \\(p\\) is your blend probability and \\(r\\) is a randomly sampled Kaiming initalization vector.
    4. Initialize \\(W_\\text{enc}\\) as a concatenation of \\(n\\) blended vectors.
    5. Initialize \\(W_\\text{dec}\\) as \\(W_\\text{enc}^T\\).

    Anthropic suggests \\(p = 0.8\\) for SAEs and 0.4 for \"weakly causal crosscoders\". I interpret this that there is no universally appropriate \\(p\\).

    "},{"location":"developers/disk-layout/","title":"Storage & Run Manifest Spec (v1)","text":"

    There are two main locations:

    1. $SAEV_SCRATCH/saev/shards: where we store transformer activations (referred to as shards_root in the codebase).
    2. $SAEV_NFS/saev/runs: where we store checkpoints and other computed intermediate stuff like example images, probe1d results, etc. (referred to as runs_root in the codebase).

    Visually, these are:

    $SAEV_SCRATCH/saev/\n  shards/\n    <shard_hash>/\n      metadata.json\n      shards.json\n      acts000000.bin\n      acts000001.bin\n      ...\n      labels.bin\n

    and

    $SAEV_NFS/saev/\n  runs/\n    <run_id>/\n      checkpoint/           # output of train.py on <shard_hash>\n        sae.pt\n        config.json\n      links/                # Symlinks\n        train-shards        # $SCRATCH/saev/shards/<shard_hash>\n        train-dataset       # Whatever the original image dataset was\n        val-shards          # $SCRATCH/saev/shards/<shard_hash>\n        val-dataset         # Whatever the original image dataset was\n      inference/            # outputs from dump.py\n        <shard_hash>/\n          config.json\n          token_acts.npz\n          visuals/          # output of visuals.py\n

    Each $SAEV_SCRATCH/shards/<shard_hash>/ MUST include:

    • metadata.json (UTF-8, canonical spec; see protocol.md)
    • shards.json (UTF-8, shard index and sizes; see protocol.md)
    • acts*.bin (binary shards; format in protocol.md)
    • labels.bin (binary patch labels aligned to shards; format in protocol.md)

    Note

    Immutability: Files under saev/shards/<shard_hash>/ MUST be treated as read-only after publication. Any change yields a new shard_hash.

    All CLI entrypoints should accept a single --run <path> argument. Every other path MUST be resolved from the run root:

    • ViT activations: links/shards \u2192 saev/shards/<shard_hash>
    • Dataset: links/dataset \u2192 Dataset root, wherever it is on disk.
    • SAE checkpoint: checkpoint/sae.pt

    Example resolution:

    run = pathlib.Path(cfg.run)\nshards_root = (run / \"links\" / \"shards\").resolve()\ndataset_root = (run / \"links\" / \"dataset\").resolve()\nckpt = run / \"checkpoint\" / \"sae.pt\"\nlabels = vit_root / \"labels.bin\"\n
    • $SAEV_SCRATCH and $SAEV_NFS should be set for all users/processes running saev tools.
    "},{"location":"developers/disk-layout/#faqs","title":"FAQs","text":"
    • Where do patch labels live? Next to acts*.bin in $SAEV_SCRATCH/shards/<shard_hash>/labels.bin. Scripts discover them via links/shards/labels.bin.

    • Can I put datasets directly in $SAEV_SCRATCH? Sure, but not in $SAEV_SCRATCH/shards.

    "},{"location":"developers/naming/","title":"Variable Naming","text":""},{"location":"developers/protocol/","title":"saev Sharded Activation File Protocol","text":"

    saev caches activations to disk rather than run ViT or LLM inference when training SAEs. Gemma Scope makes this decision as well (see Section 3.3.2 of https://arxiv.org/pdf/2408.05147). saev.data has a specific protocol to support this in on OSC, a super computer center, and take advantage of OSC's specific disk performance.

    Goal: loss-lessly persist very large Transformer (ViT or LLM) activations in a form that is:

    • mem-mappable
    • Parameterized solely by the experiment configuration (scripts/shards.py:Config)
    • Referenced by a content-hash, so identical configs collide, divergent ones never do
    • Can be read quickly in a random order for training, and can be read (slowly) with random-access for visuals.

    This document is the single normative source. Any divergence in code is a bug.

    "},{"location":"developers/protocol/#1-directory-layout","title":"1. Directory layout","text":"
    <dump_to>/<HASH>/\n    metadata.json    # UTF-8 JSON, human-readable, describes data-generating config\n    shards.json      # UTF-8 JSON, human-readable, describes shards.\n    acts000000.bin   # shard 0\n    acts000001.bin   # shard 1\n    ...\n    actsNNNNNN.bin   # shard NNNNNN  (zero-padded width=6)\n    labels.bin       # patch labels (optional)\n

    HASH = sha256(json.dumps(metadata, sort_keys=True, separators=(',', ':')).encode('utf-8')) Guards against silent config drift.

    "},{"location":"developers/protocol/#2-json-file-schemas","title":"2. JSON file schemas","text":""},{"location":"developers/protocol/#21-metadatajson","title":"2.1. metadata.json","text":"field type semantic family string \"clip\" \\| \"siglip\" \\| \"dinov2\" ckpt string model identifier (OpenCLIP, HF, etc.) layers int[] ViT residual\u2010block indices recorded patches_per_ex int example patches only (excludes CLS) cls_token bool true -> patch 0 is CLS, else no CLS d_model int activation dimensionality n_examples int total examples in dataset patches_per_shard int logical activations per shard (see #3) data object opaque dataset description dataset string absolute path to original dataset root dtype string numpy dtype. Fixed \"float32\" for now. protocol string \"2.1\" (shards after big refactor)

    The data object is base64.b64encode(pickle.dumps(img_ds)).decode('utf8').

    The dataset field stores the absolute path to the root directory of the original image dataset, allowing runs to create symlinks back to the source images for visualization and analysis.

    "},{"location":"developers/protocol/#22-shardsjson","title":"2.2. shards.json","text":"

    A single array of shard objects, each of which has the following fields:

    field type semantic name string shard filename (acts000000.bin). n_examples int the number of examples in the shard."},{"location":"developers/protocol/#3-shard-sizing-maths","title":"3. Shard sizing maths","text":"
    tokens_per_ex = patches_per_ex + (1 if cls_token else 0)\n\nexamples_per_shard = floor(patches_per_shard / (tokens_per_ex * len(layers)))\n\nshape_per_shard = (\n    examples_per_shard, len(layers), tokens_per_ex, d_model,\n)\n

    patches_per_shard is a budget (default ~2.4 M) chosen so a shard is approximately 10 GiB for Float32 @ d_model = 1024.

    The last shard will have a smaller value for examples_per_shard; this value is documented in n_examples in shards.json

    "},{"location":"developers/protocol/#4-data-layout-and-global-indexing","title":"4. Data Layout and Global Indexing","text":"

    The entire dataset of activations is treated as a single logical 4D tensor with the shape (n_examples, len(layers), tokens_per_ex, d_model). This logical tensor is C-contiguous with axes ordered [Example, Layer, Token, Dimension].

    Physically, this tensor is split along the first axis (Example) into multiple shards, where each shard is a single binary file. The number of examples in each shard is constant, except for the final shard, which may be smaller.

    To locate an arbitrary activation vector, a reader must convert a logical coordinate (global_ex_idx, layer_value, token_idx) into a file path and an offset within that file.

    "},{"location":"developers/protocol/#41-definitions","title":"4.1 Definitions","text":"

    Let the parameters from metadata.json be:

    • L = len(layers)
    • P = patches_per_ex
    • T = P + (1 if cls_token else 0) (Total tokens per example)
    • D = d_model
    • S = n_examples from shards.json or examples_per_shard from Section 3 (shard sizing).
    "},{"location":"developers/protocol/#42-coordinate-transformations","title":"4.2 Coordinate Transformations","text":"

    Given a logical coordinate:

    • global_ex_idx: integer, with 0 <= global_ex_idx < n_examples
    • layer: integer, must be an element of layers
    • token_idx: integer, 0 <= token_idx < T

    The physical location is found as follows:

    1. Identify Shard:

      • shard_idx = global_ex_idx // S
      • ex_in_shard = global_ex_idx % S The target file is acts{shard_idx:06d}.bin.
    2. Identify Layer Index: The stored data contains a subset of the ViT's layers. The logical layer_value must be mapped to its index in the stored layers array.

      • layer_idx = layers.index(layer) A reader must raise an error if layer is not in layers.
    3. Calculate Offset: The data within a shard is a 4D tensor of shape (S, L, T, D). The offset to the first byte of the desired activation vector [ex_in_shard, layer_idx , token_idx] is:

      • offset_in_vectors = (ex_in_shard * L * T) + (layer_idx * T) + token_idx
      • offset_in_bytes = offset_in_vectors * D * 4 (assuming 4 bytes for float32)

    A reader can then seek to offset_in_bytes and read \\(D \\times 4\\) bytes to retrieve the vector.

    Alternatively, rather than calculate the offset, readers can memmap the shard, then use Numpy indexing to get the activation vector.

    "},{"location":"developers/protocol/#43-token-axis-layout","title":"4.3 Token Axis Layout","text":"

    The token axis of length \\(T\\) is ordered as follows: * If cls_token is true: * Index 0: [CLS] token activation * Indices 1 to \\(P\\): Patch token activations * If cls_token is false: * Indices 0 to \\(P-1\\): Patch token activations

    The relative order of patch tokens is preserved exactly as produced by the upstream Vision Transformer.

    "},{"location":"developers/protocol/#5-versioning-compatibility","title":"5 Versioning & compatibility","text":"
    • Major changes (shape reorder, dtype switch, new required JSON keys) increment the major protocol version number at the top of this document and must emit a breaking warning in loader code.
    • Minor, backward-compatible additions (new optional JSON key) merely update this doc and the minor protocol version number.

    That's it. Anything else you find in code that contradicts this document, fix the code or update the spec.

    "},{"location":"developers/workflows/","title":"Workflows","text":"
    1. Generate inference activations (and thus visuals) for both training and validation splits.
    "},{"location":"users/bird-mae-debugging/","title":"Debugging Bird-MAE Activations","text":"

    This is an example of the kind of debugging you might have to do when training SAEs on a new model. The short version: Bird-MAE has an \"emergent outlier feature\" in dimension 296 that blows up after the first MLP. The fix is to record activations after the pre-MLP LayerNorm (block.norm2) instead of the raw residual stream, because the LayerNorm learns to suppress the outlier.

    "},{"location":"users/bird-mae-debugging/#symptom-80-dead-neurons","title":"Symptom: 80% dead neurons","text":"

    While training TopK SAEs on BirdMAE activations taken from birdsong, ~80% of my neurons were dead from the very start of training.

    "},{"location":"users/bird-mae-debugging/#comparing-to-known-good-activations","title":"Comparing to known-good activations","text":"

    First, I compared activations from BirdMAE to DINOv3 activations (which I know are well-behaved). I recorded 300K content token activation vectors from layer 14/24 from DINOv3 ViT-L/16 and BirdMAE-L. Each vector has 1024 dimensions. I flattened these vectors; for each of BirdMAE and DINOv3, I have a list of 307.2M neuron activations (300K x 1024 = 307,200,000). I plotted a histogram below. Note the log scale on the y-axis.

    I zoomed in on the left-most cluster, ignoring the right cluster. While BirdMAE is more spread out, the shapes look good enough for now.

    "},{"location":"users/bird-mae-debugging/#finding-the-outlier-dimension-296","title":"Finding the outlier: dimension 296","text":"

    Looking at the right cluster, I realized that all of these values are from neuron 296 of 1024. Here, I colored activations based on their neuron: all BirdMAE neurons besides 296 are blue, DINOv3 is orange, and neuron 296 is red.

    My activation matrix is \\(\\mathbb{R}^{300K \\times 1024}\\) for each dataset. In code, what I see is:

    bird_acts.shape  # (300K, 1024)\nbird_acts[:, 295].min()  # 2549.54\nbird_acts[:, 295].max()  # 4625.12\n

    Something is broken inside of BirdMAE.

    "},{"location":"users/bird-mae-debugging/#tracing-the-outlier-through-the-residual-stream","title":"Tracing the outlier through the residual stream","text":"

    Where in BirdMAE does this abnormality show up? Consider transformers as residual streams. After what layer does dimension 296/1024 blow up? See this diagram below: for a single random example from BirdMAE, we will track both the average neuron and neuron 296's value through the 24 transformer layers.

    BirdMAE uses 256 content tokens for a single example. We take the average value of each neuron in the residual stream before each transformer block (the green \"Graph #1\" circle in the above diagram) and after the final transformer block. We plot each of the 1023 \"well-behaved\" neurons in light blue. We plot our degenerate neuron 296 in red. Note the log scale on the y-axis.

    Our well-behaved neurons mostly stay in (-10, 10). Neuron 296 jumps straight to ~2.2K after the first residual block and is never fixed again. It's well-behaved coming out of the patch embedding before the first residual block.

    "},{"location":"users/bird-mae-debugging/#narrowing-it-down-the-first-mlp","title":"Narrowing it down: the first MLP","text":"

    Below is the output from the attention layers (Graph #2) in our architecture diagram.

    Neuron 296 is mostly well-behaved; it's a little big after the second attention layer, but not insane.

    Here, we can see that the output of the first MLP produces an abnormally high value for neuron 296. Why?

    Here's a architecture diagram of BirdMAE's MLPs according to the model definition on HuggingFace. Let's look at the trainable parameters in these MLP across layers, starting from the end and working backwards.

    fc2 has a weight parameter with shape (4096, 1024) and a bias parameter with shape (1024,). I take the L2 norm of fc2.weight's columns to see if col 296/1024 is different.

    fc2.weight does appear to be different, and abnormally large (note the log scale). fc2.bias is also different, but it's not immediately obvious what's going on there to me.

    "},{"location":"users/bird-mae-debugging/#root-cause-emergent-outlier-features","title":"Root cause: emergent outlier features","text":"

    This is a known phenomenon in transformers called \"emergent outlier features.\" After extensive pretraining, a single dimension in the residual stream accumulates a very large magnitude. The model never needs to \"fix\" this because the pre-attention and pre-MLP `LayerNormss learn to suppress it: the learned multiplicative weight for dimension 296 is very small, and the bias is approximately 1. So later layers never actually \"see\" the outlier in practice.

    We verified this by inspecting norm2.weight across layers and confirming that the learned scale for dimension 296 is near-zero, but that analysis is not reproduced here.

    The BirdMAE authors never had to deal with this because all downstream use of the model goes through LayerNorm first.

    "},{"location":"users/bird-mae-debugging/#fix-record-after-layernorm","title":"Fix: record after LayerNorm","text":"

    The fix is to record activations after block.norm2 (the pre-MLP LayerNorm) instead of from the raw residual stream. In saev, this is implemented as:

    def get_residuals(self) -> list[torch.nn.Module]:\n    return [block.norm2 for block in self.model.blocks]\n

    After this change, the outlier is suppressed and SAE training works normally.

    "},{"location":"users/bird-mae-debugging/#lessons","title":"Lessons","text":"
    1. Compare activation distributions to a known-good model. Histogramming flattened activations from 300K tokens is cheap and can reveal outliers.
    2. Emergent outlier features are real. If a single dimension dominates your activation distribution, check whether it's a known artifact of pretraining before assuming your recording code is wrong.
    3. Record after LayerNorm, not from the raw residual stream. The residual stream can carry high-magnitude \"bookkeeping\" values that LayerNorm suppresses. Recording post-norm avoids this entirely.
    "},{"location":"users/glossary/","title":"Glossary","text":"

    Definitions for words used in the code and documentation.

    • example: one dataset item (image, sentence, audio clip, point cloud, graph instance).
    • token: one model position in the encoder\u2019s residual stream (the thing with hidden size d_model). Always \"token\" inside the model.
    • content token: tokens derived from the raw input (image patches, wordpieces, audio windows, nodes, etc.).
    • special token: tokens not directly derived from the raw input (class/summary token, [SEP], [MASK], [PAD], register tokens, etc.).
    • sequence length L: total tokens per example (content + special). If variable, call it \u201cragged\u201d.
    • layer: an integer index into the encoder\u2019s stack.
    • activation kind (optional but useful): which stream you saved (e.g., resid_pre, resid_post, mlp_out, attn_out, qkv, head_out).

    Modality-specific vocab:

    • patch (vision): a 2D content token. Often laid out on a grid with shape (H_patches, W_patches).
    • frame/token or tube (video): content token in time \u00d7 space; often (T, H, W).
    • wordpiece / subword (text): content token from a tokenizer.
    • window / frame (audio): time\u2013frequency window.
    • node (graph), point (point cloud).
    "},{"location":"users/guide/","title":"Guide","text":"

    This guide explains how to transition from the ADE20K demo to using saev with your own custom datasets.

    Here are the steps:

    1. Save ViT activations to disk
    2. Train SAEs on activations
    3. Evaluate the SAE checkpoints
    4. Visualize Learned Features

    Note

    saev assumes you are running on NVIDIA GPUs. On a multi-GPU system, prefix your commands with CUDA_VISIBLE_DEVICES=X to run on GPU X.

    "},{"location":"users/guide/#save-vit-activations-to-disk","title":"Save ViT Activations to Disk","text":"

    To save activations to disk, we need to specify:

    1. Which model we would like to use
    2. Which layers we would like to save.
    3. Where on disk and how we would like to save activations.
    4. Which images we want to save activations for.

    The saev/framework/shards.py script does all of this for us.

    Run uv run launch.py shards --help to see all the configuration.

    In practice, you might run:

    uv run launch.py shards \\\n  --shards-root /fs/scratch/PAS2136/samuelstevens/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-16/openai \\\n  --d-model 768 \\\n  --layers 6 7 8 9 10 11 \\\n  --content-tokens-per-example 196 \\\n  --batch-size 512 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  data:img-seg-folder \\\n  --data.root /fs/scratch/PAS2136/samuelstevens/datasets/ADEChallengeData2016/ \\\n  --data.split training\n

    This will save activations for the CLIP-pretrained model ViT-B/16, which has a residual stream dimension of 768, and has 196 patches per image (224 / 16 = 14; 14 x 14 = 196). It will save the last 6 layers. It will write 2.4M patches per shard, and save shards to a new directory /fs/scratch/PAS2136/samuelstevens/saev/shards.

    Note

    A note on storage space: A ViT-B/16 on ImageNet-1K will save 1.2M images x 197 patches/layer/image x 1 layer = ~240M activations, each of which take up 768 floats x 4 bytes/float = 3072 bytes, for a total of 723GB for the entire dataset. As you scale to larger models (ViT-L has 1024 dimensions, 14x14 patches are 224 patches/layer/image), recorded activations will grow even larger.

    This script will also save a metadata.json file that will record the relevant metadata for these activations, which will be read by future steps. The activations will be in .bin files, numbered starting from 000000.

    To add your own models, see the guide to extending in saev.activations.

    "},{"location":"users/guide/#train-saes-on-activations","title":"Train SAEs on Activations","text":"

    To train an SAE, we need to specify:

    1. Which activations to use as input.
    2. SAE architectural stuff.
    3. Optimization-related stuff.

    The train.py script handles this.

    Run uv run train.py --help to see all the configuration.

    The most important options are:

    • --runs-root: where to store runs.
    • --train-data and --val-data: How to load the training and validation data. You probably want to specify both --{train,val}-data.shards (the shard directory) and --{train,val}-data.layer (which layer to use).
    • sae.activation: sae.activation:relu to use the ReLU activation.

    This is a full example:

    uv run train.py \\\n  --runs-root /fs/ess/PAS2136/samuelstevens/saev/runs \\\n  --lr 4e-3 \\\n  --sae.exp-factor 16 \\\n  --sae.d-model 1024 \\\n  --tag ade20k-v0.1 \\\n  --n-train 100_000_000 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --train-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/51567c6c \\\n  --train-data.layer 11 \\\n  --val-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3e27794f \\\n  --val-data.layer 11 \\\n  sae.activation:relu \\\n  objective:matryoshka \\\n  --objective.sparsity-coeff 1e-3 \\\n

    This will train one (1) sparse autoencoder on the data. See the section on sweeps to learn how to train multiple SAEs in parallel using one or more GPUs.

    "},{"location":"users/guide/#loader-entropy-metrics","title":"Loader Entropy Metrics","text":"

    The training loop logs additional loader diagnostics derived from calc_batch_entropy in train.py. Every batch contributes two entropy measurements in natural log units:

    • loader/example_entropy and loader/example_entropy_normalized summarize how evenly the shuffled loader samples example indices. Normalization divides the raw entropy by ln(metadata.n_examples) so perfectly uniform sampling is 1.0.
    • loader/token_entropy and loader/token_entropy_normalized do the same for patch indices using ln(metadata.content_tokens_per_example) as the normalizer.
    • loader/example_coverage and loader/token_coverage report the fraction of distinct example or patch indices seen in the current batch relative to their theoretical support.

    All eight metrics appear alongside the existing loader/read_mb counters, helping spot skewed sampling or under-covered patches mid-run.

    "},{"location":"users/guide/#evaluation","title":"Evaluation","text":"

    After training an SAE, you probably want to use the SAE. While you can use the SAE as a regular PyTorch torch.nn.Module in combination with a saev.data.OrderedDataLoader or saev.data.IndexedDataset.

    However, most SAEs are evaluated with a similar set of metrics (normalized MSE, L0, etc). The saev/framework/inference.py script calculates these metrics. You can run uv run launch.py inference --help to see all the options.

    The most important options are:

    • --run: The path to the SAE run directory.
    • --data: The options for the OrderedDataLoader. Specifically, you need to set --data.shards and --data.layer, just like for training.
    uv run launch.py inference \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/z55bntm1/ \\\n  --data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/614861a0 \\\n  --data.layer 11\n
    "},{"location":"users/guide/#visualize-learned-features","title":"Visualize Learned Features","text":"

    Now that you've trained an SAE, you probably want to look at its learned features. One way to visualize an individual learned feature is by picking out images that maximize the activation of feature. We use the saved sparse token_acts.npz file from the previous inference step.

    Warning

    Because there are so many different ways to visualize SAE features, I moved it to contrib/trait_discovery (used for our preprint \"Towards Open-Ended Visual Scientific Discovery with Sparse Autoencoders\").

    The most important options:

    • --run: The path to the SAE run directory.
    • --shards: The shards directory.
    • --latents: The 0-indexed latents to save images for.
    • --n-latents: The number of randomly selected latents to save images for.

    So first, move into the contrib/trait_discovery:

    cd contrib/trait_discovery\n

    Then run the script that generates highlighted images:

    uv run scripts/launch.py visuals \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/unu6dbfb \\\n  --shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3802cb66 \\\n  --latents 0 1 2 3 4 5 6 7 8 9 49 56 57 125 202 \\\n  --n-latents 20 \\\n

    Note

    Because of limitations in the SAE training process, not all SAE latents are equally interesting. Some latents are dead, some are dense, some only fire on two images, etc. Typically, you want neurons that fire very strongly (high value) and fairly infrequently (low frequency). You might be interested in particular, fixed latents (--include-latents). I recommend using saev/interactive/metrics.py with marimo to figure out good thresholds.

    "},{"location":"users/guide/#sweeps","title":"Sweeps","text":"

    tl;dr: basically the slow part of training SAEs is loading vit activations from disk, and since SAEs are pretty small compared to other models, you can train a bunch of different SAEs in parallel on the same data using a big GPU. That way you can sweep learning rate, lambda, etc. all on one GPU.

    "},{"location":"users/guide/#why-parallel-sweeps","title":"Why Parallel Sweeps","text":"

    SAE training optimizes for a unique bottleneck compared to typical ML workflows: disk I/O rather than GPU computation. When training on vision transformer activations, loading the pre-computed activation data from disk is often the slowest part of the process, not the SAE training itself.

    A single set of ImageNet activations for a vision transformer can require terabytes of storage. Reading this data repeatedly for each hyperparameter configuration would be extremely inefficient.

    "},{"location":"users/guide/#parallelized-training-architecture","title":"Parallelized Training Architecture","text":"

    To address this bottleneck, we implement parallel training that allows multiple SAE configurations to train simultaneously on the same data batch:

    \nflowchart TD\n    A[Pre-computed ViT Activations] -->|Slow I/O| B[Memory Buffer]\n    B -->|Shared Batch| C[SAE Model 1]\n    B -->|Shared Batch| D[SAE Model 2]\n    B -->|Shared Batch| E[SAE Model 3]\n    B -->|Shared Batch| F[...]\n

    This approach:

    • Loads each batch of activations once from disk
    • Uses that same batch for multiple SAE models with different hyperparameters
    • Amortizes the slow I/O cost across all models in the sweep
    "},{"location":"users/guide/#running-a-sweep","title":"Running a Sweep","text":"

    The train command accepts a --sweep parameter that points to a TOML file defining the hyperparameter grid:

    uv run python -m saev train --sweep configs/my_sweep.toml\n

    Here's an example sweep configuration file:

    [sae]\nsparsity_coeff = [1e-4, 2e-4, 3e-4]\nd_model = 768\nd_sae = [6144, 12288]\n\n[data]\nscale_mean = true\n

    This would train 6 models (3 sparsity coefficients \u00d7 2 SAE widths), each sharing the same data loading operation.

    "},{"location":"users/guide/#limitations","title":"Limitations","text":"

    Not all parameters can be swept in parallel. Parameters that affect data loading (like batch_size or dataset configuration) will cause the sweep to split into separate parallel groups. The system automatically handles this division to maximize efficiency.

    "},{"location":"users/inference/","title":"Inference","text":"

    If you want to get started quickly, try the inference notebook in marimo or on Google Colab.

    Briefly, you need to:

    1. Download a checkpoint.
    2. Get the code.
    3. Load the checkpoint.
    4. Get activations.

    Details are below.

    "},{"location":"users/inference/#download-a-checkpoint","title":"Download a Checkpoint","text":"

    First, download an SAE checkpoint from the Huggingface collection.

    "},{"location":"users/inference/#single-checkpoint-repos","title":"Single-checkpoint repos","text":"

    Some repos (CLIP, BioCLIP, DINOv2) contain a single sae.pt at the root. For instance, the SAE trained on OpenAI's CLIP ViT-B/16 with ImageNet-1K activations is here.

    You can use wget if you want:

    wget https://huggingface.co/osunlp/SAE_CLIP_24K_ViT-B-16_IN1K/resolve/main/sae.pt\n
    "},{"location":"users/inference/#multi-checkpoint-repos","title":"Multi-checkpoint repos","text":"

    The DINOv3 repos contain multiple checkpoints organized by layer and sparsity level. Each repo has a manifest.jsonl with metadata (layer, L0, MSE) for every checkpoint, so you can pick the right one programmatically.

    Download a specific checkpoint:

    from huggingface_hub import hf_hub_download\n\n# Pick a specific layer and run ID from the repo's README or manifest.jsonl\npath = hf_hub_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\", \"layer_23/lnleoyf6/sae.pt\")\n

    Download all checkpoints in a repo:

    from huggingface_hub import snapshot_download\n\nsnapshot_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\")\n

    Available DINOv3 repos:

    • osunlp/SAE_DINOv3_ViT-S-16_IN1K (layers 6-11)
    • osunlp/SAE_DINOv3_ViT-B-16_IN1K (layers 6-11)
    • osunlp/SAE_DINOv3_ViT-L-16_IN1K (layers 13-23)
    • osunlp/SAE_DINOv3_TopK_ViT-L-16_IN1K (layers 13-23)
    "},{"location":"users/inference/#get-the-code","title":"Get the Code","text":"

    The easiest way to do this is to clone the code:

    git clone https://github.com/Imageomics/saev\n

    You can also install the package from git if you use uv (not sure about pip or cuda):

    uv add git+https://github.com/Imageomics/saev\n

    Or clone it and install it as an editable with pip, lik pip install -e . in your virtual environment.

    Then you can do things like from saev import ....

    Note

    If you struggle to get saev installed, open an issue on GitHub and I will figure out how to make it easier.

    "},{"location":"users/inference/#load-the-checkpoint","title":"Load the Checkpoint","text":"
    import saev.nn\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n

    Now you have a pretrained SAE.

    "},{"location":"users/inference/#get-activations","title":"Get Activations","text":"

    This is the hardest part. We need to:

    1. Pass an image into a ViT
    2. Record the dense ViT activations at the same layer that the SAE was trained on.
    3. Pass the activations into the SAE to get sparse activations.
    4. Do something interesting with the sparse SAE activations.

    There are examples of this in the demo code: for classification and semantic segmentation. If the permalinks change, you are looking for the get_sae_latents() functions in both files.

    Below is example code to do it using the saev package.

    import saev.nn\nimport saev.data.models\nimport saev.data.shards\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n\nvit_cls = saev.data.models.load_model_cls(\"clip\")\nvit = vit_cls(\"ViT-B-16/openai\").to(device)\nvit = saev.data.shards.RecordedTransformer(vit, 196, True, [10])\n\nimg_tr, _ = vit_cls.make_transforms(\"ViT-B-16/openai\", 196)\nimg = Image.open(\"example.jpg\")\n\nx = img_tr(img)\n# Add a batch dimension.\nx = x[None, ...]\n_, vit_acts = vit(x)\n# Select the only layer and ignore the CLS token.\nvit_acts = vit_acts[:, 0, 1:, :]\n\nout = sae(vit_acts)\n# out.f_x: sparse SAE latents (batch, d_sae)\n# out.x_hats: reconstructed activations (batch, n_prefixes, d_model)\n

    Now you have the sparse representation of all patches in the image (out.f_x) and the reconstructed activations (out.x_hats).

    You might select the dimensions with maximal values for each patch and see what other images are maximally activating.

    "},{"location":"users/new-project/","title":"New Project Structure","text":"

    saev is structured like big_vision, Google's ViT codebase. To get the most use out of saev, you should not use it as a requirement in your project; rather, you should build inside of the source code of saev. This is a guide to that process.

    TL;DR:

    1. Fork saev.
    2. Clone your fork.
    3. Create a new directory in contrib/.
    4. Update both src/saev and your new contrib directory as necessary.
    5. (Hopefully) publish.
    6. If your changes to src/saev are broadly useful and not overly restrictive, open a PR with your changes to src/saev.

    I am currently applying SAEs to audio of birdsong, so this is how I'll develop it.

    First, fork and clone saev. Do this however you want, but GitHub has a guide on it.

    Second, you probably want to store code related to your project in this repo. Make a new directory in contrib/. I'm calling my new subproject \"birdsong.\"

    [I] samuelstevens@host ~/p/saev (main)> tree -L 1 contrib/\ncontrib/\n\u251c\u2500\u2500 birdsong\n\u251c\u2500\u2500 interactive_interp\n\u2514\u2500\u2500 trait_discovery\n

    Use uv to make a new package inside your new project:

    [I] samuelstevens@host ~/p/s/c/birdsong (main)> uv init --package .\nAdding `birdsong` as member of workspace `~/projects/saev`\nInitialized project `birdsong` at `~/projects/saev/contrib/birdsong`\n

    Now you have some additional files.

    [I] samuelstevens@ascend-login02 ~/p/s/c/birdsong (main)> tree\n.\n\u251c\u2500\u2500 pyproject.toml\n\u251c\u2500\u2500 README.md\n\u2514\u2500\u2500 src\n    \u2514\u2500\u2500 birdsong\n        \u2514\u2500\u2500 __init__.py\n

    Now I can write scripts and source code for birdsong-specific stuff in here. I'll probably add a notebook for looking at instances of birdsongs before and after using SAEs to identify patterns under a new birdsong/notebooks directory, and will add birdsong/logbook.md to store ongoing TODO items, and so on.

    To train SAEs on audio files, I'll need to add a new dataset type to save activations. In order to do this, I'll edit src/saev/data/datasets.py.

    I'll also need to add another model to the dataset, one that expects audio files. Since I don't think that DINOv3, OpenCLIP, or the other existing model families will be suitable, I'll need to add a new model family. Again, this will need to go somewhere in src/saev/data.

    If I'm smart about it, these changes will be nice and non-destructive, and other users of saev can benefit from them. After I publish some results, to share this code with others, I'll open a PR from my fork/branch to main with the new datasets/models. But I won't open a PR with birdsong because that's specific to me, rather than to the library.1

    1. Technically, birdsong will be in saev because I'm a sort of privileged user because I'm the main developer. But other folks probably want their project-specific code attached to their GitHub page, rather than OSU-NLP's.\u00a0\u21a9

    "},{"location":"users/sweeps/","title":"Sweeps","text":"

    Hyperparameter sweeps in saev train multiple SAE configurations in parallel on a single GPU, amortizing the cost of loading activation data from disk across all models. Furthermore, sweeps make it easy to train multiple SAEs with one command across multiple GPUs using Slurm.

    "},{"location":"users/sweeps/#quick-start","title":"Quick Start","text":"

    Create a Python file defining your sweep:

    # sweeps/my_sweep.py\n\ndef make_cfgs() -> list[dict]:\n    cfgs = []\n\n    # Grid search over learning rate and sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"objective\": {\"sparsity_coeff\": sparsity},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

    Run the sweep:

    uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23\n

    This trains 9 SAEs (3 learning rates x 3 sparsity coefficients) in parallel.

    "},{"location":"users/sweeps/#why-parallel-sweeps","title":"Why Parallel Sweeps?","text":"

    SAE training is bottlenecked by disk I/O, not GPU computation. Loading terabytes of pre-computed ViT activations from disk is the slowest part. By training multiple SAE configurations on the same batch simultaneously, we amortize the I/O cost:

    \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 ViT Activations (disk) \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n            \u2502 (slow I/O, once per batch)\n            \u25bc\n      \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n      \u2502  Batch   \u2502\n      \u2514\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2518\n            \u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n            \u25bc         \u25bc         \u25bc         \u25bc\n         SAE #1    SAE #2    SAE #3     ...\n        (lr=3e-4) (lr=1e-3) (lr=3e-3)\n
    "},{"location":"users/sweeps/#sweep-configuration","title":"Sweep Configuration","text":""},{"location":"users/sweeps/#python-based-sweeps","title":"Python-Based Sweeps","text":"

    Python sweeps give you full control over config generation. Your sweep file must define a make_cfgs() function that returns a list of dicts.

    Grid search example:

    def make_cfgs():\n    cfgs = []\n\n    for lr in [1e-4, 3e-4, 1e-3]:\n        for d_sae in [8192, 16384, 32768]:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

    Paired parameters (not a grid):

    def make_cfgs():\n    cfgs = []\n\n    # Grid over lr x sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            # Paired layers (train and val use same layer)\n            for layer in [6, 7, 8, 9, 10, 11]:\n                cfg = {\n                    \"lr\": lr,\n                    \"objective\": {\"sparsity_coeff\": sparsity},\n                    \"train_data\": {\"layer\": layer},\n                    \"val_data\": {\"layer\": layer},\n                }\n                cfgs.append(cfg)\n\n    return cfgs\n

    This generates 54 configs (3 x 3 x 6) where each train/val pair uses the same layer, avoiding the 162 configs you'd get from a full grid (3 x 3 x 6 x 6).

    Conditional sweeps:

    def make_cfgs():\n    cfgs = []\n\n    for d_sae in [8192, 16384, 32768]:\n        # Use different LR for different SAE widths\n        lrs = [1e-3, 3e-3] if d_sae <= 16384 else [3e-4, 1e-3]\n\n        for lr in lrs:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n
    "},{"location":"users/sweeps/#command-line-overrides","title":"Command-Line Overrides","text":"

    Command-line arguments override sweep parameters with deep merging. The precedence order is: CLI > Sweep > Default.

    uv run train.py --sweep sweeps/my_sweep.py \\\n  --lr 5e-4  # Overrides all LRs in the sweep\n

    Override nested config fields with dotted notation:

    uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23 \\\n  --sae.d-sae 16384\n

    Deep merging means that when you override a nested field, only that specific field is replaced\u2014other fields in the nested config are preserved from the sweep or default values.

    "},{"location":"users/sweeps/#parallel-groups","title":"Parallel Groups","text":"

    Not all parameters can vary within a parallel sweep. Parameters that affect data loading (like train_data, n_train, device) must be identical across all configs in a parallel group.

    When configs differ in these parameters, they're automatically split into separate Slurm jobs:

    def make_cfgs():\n    cfgs = []\n\n    # These will run in 2 separate jobs\n    for layer in [6, 12]:  # Different data loading\n        for lr in [1e-4, 3e-4]:  # Can parallelize\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

    This creates 2 parallel groups: - Job 1: layer=6, lr=[1e-4, 3e-4] - Job 2: layer=12, lr=[1e-4, 3e-4]

    Implementation detail

    See CANNOT_PARALLELIZE in train.py for the full list of parameters that split parallel groups. The split_cfgs() function handles grouping automatically.

    "},{"location":"users/sweeps/#module-loading","title":"Module Loading","text":"

    Your sweep file is executed as a Python module, so you can use imports and helper functions:

    def make_cfgs():\n    cfgs = []\n\n    # You can use helper functions\n    base_layers = list(range(6, 24, 2))\n\n    for layer in base_layers:\n        for lr in [1e-4, 3e-4]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"val_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"sae\": {\"d_model\": 1024, \"d_sae\": 16384},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

    Import mechanics

    The sweep file is loaded with importlib.import_module(), so it must be importable as a Python module. Place sweep files in a location where Python can find them (typically the project root or a sweeps/ subdirectory).

    "},{"location":"users/sweeps/#slurm-integration","title":"Slurm Integration","text":"

    When running with --slurm-acct, each parallel group becomes a separate Slurm job:

    uv run train.py --sweep sweeps/large.py \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --n-hours 24\n

    The system automatically: - Groups configs that can parallelize - Submits one Slurm job per group - Waits for all jobs to complete - Reports results

    "},{"location":"users/sweeps/#seed-management","title":"Seed Management","text":"

    Seeds are automatically incremented for each config to ensure reproducibility:

    # Base config has seed=42\n# Sweep generates 9 configs with seeds: 42, 43, 44, ..., 50\n

    Override the base seed on the command line:

    uv run train.py --sweep sweeps/my_sweep.py --seed 100\n
    "},{"location":"users/sweeps/#examples","title":"Examples","text":"

    Simple grid:

    # sweeps/simple.py\ndef make_cfgs():\n    return [\n        {\"lr\": lr, \"objective\": {\"sparsity_coeff\": sp}}\n        for lr in [1e-4, 3e-4, 1e-3]\n        for sp in [4e-4, 8e-4, 1.6e-3]\n    ]\n

    Layer sweep with paired train/val:

    # sweeps/layers.py\ndef make_cfgs():\n    cfgs = []\n\n    for layer in range(6, 24, 2):  # Layers 6, 8, 10, ..., 22\n        for lr in [3e-4, 1e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n                \"val_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

    Architecture sweep:

    # sweeps/architecture.py\ndef make_cfgs():\n    cfgs = []\n\n    architectures = [\n        (\"small\", 8192, 1e-3),\n        (\"medium\", 16384, 5e-4),\n        (\"large\", 32768, 3e-4),\n    ]\n\n    for name, d_sae, lr in architectures:\n        cfg = {\n            \"lr\": lr,\n            \"sae\": {\"d_sae\": d_sae},\n            \"tag\": name,\n        }\n        cfgs.append(cfg)\n\n    return cfgs\n
    "}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"],"fields":{"title":{"boost":1000.0},"text":{"boost":1.0},"tags":{"boost":1000000.0}}},"docs":[{"location":"","title":"saev","text":"

    saev is a framework for training and evaluating Sparse autoencoders (SAEs) for vision transformers (ViTs), implemented in PyTorch.

    "},{"location":"#installation","title":"Installation","text":"

    Installation is supported with uv. saev will likely work with pure pip, conda, etc. but I will not formally support it.

    Clone this repository, then from the root directory:

    uv run scripts/launch.py --help\n

    This will create a virtual environment and display the help for all the provided framework scripts.

    "},{"location":"#quick-start","title":"Quick Start","text":"

    Save some activations to disk:

    uv run scripts/launch.py shards \\\n  --shards-root /$SCRATCH/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-32/openai \\\n  --d-model 768 \\\n  --layers 11 \\\n  --patches-per-ex 49 \\\n  --batch-size 256 \\\n  data:cifar10\n

    Read the guide for details.

    "},{"location":"#why-saev","title":"Why saev?","text":"

    There are plenty of alternative libraries for SAEs:

    • Overcomplete, primarily developed by Thomas Fel.

    However, saev has some benefits:

    1. saev is more of a framework, rather than a library. The reason for this is that SAEs require lots of activations to train a relatively small neural network; while you can implement it with a simple inference loop, efficient training requires some caching on disk. This means using saev is a little more like Keras or PyTorch Lightning than Huggingface's Transformers or Datasets libraries.
    2. saev offers lots of tools for interacting with sparse autoencoders after training, including interactive notebooks and evaluations.
    3. saev includes complete code from preprints in the contrib/ directory, along with logbooks describing how the authors used and developed saev.
    "},{"location":"api/colors/","title":"saev.colors","text":"

    Utility color palettes used across saev visualizations.

    "},{"location":"api/configs/","title":"saev.configs","text":""},{"location":"api/configs/#saev.configs.dict_to_dataclass","title":"dict_to_dataclass(data, cls)","text":"

    Recursively convert a dictionary to a dataclass instance.

    Source code in src/saev/configs.py
    @beartype.beartype\ndef dict_to_dataclass(data: dict, cls: type[T]) -> T:\n    \"\"\"Recursively convert a dictionary to a dataclass instance.\"\"\"\n    if not dataclasses.is_dataclass(cls):\n        return data\n\n    field_types = {f.name: f.type for f in dataclasses.fields(cls)}\n    kwargs = {}\n\n    for field_name, field_type in field_types.items():\n        if field_name not in data:\n            continue\n\n        value = data[field_name]\n\n        # Handle Optional types\n        origin = tp.get_origin(field_type)\n        args = tp.get_args(field_type)\n\n        # Handle tuple[str, ...]\n        if origin is tuple and args:\n            kwargs[field_name] = tuple(value) if isinstance(value, list) else value\n        # Handle list[DataclassType]\n        elif origin is list and args and dataclasses.is_dataclass(args[0]):\n            kwargs[field_name] = [dict_to_dataclass(item, args[0]) for item in value]\n        # Handle regular dataclass fields\n        elif dataclasses.is_dataclass(field_type):\n            kwargs[field_name] = dict_to_dataclass(value, field_type)\n        # Handle pathlib.Path\n        elif field_type is pathlib.Path:\n            # Required Path field - always convert\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is tp.Union and pathlib.Path in args:\n            # Optional Path field (typing.Union style)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is types.UnionType and pathlib.Path in args:\n            # Optional Path field (Python 3.10+ union style with |)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        else:\n            kwargs[field_name] = value\n\n    return cls(**kwargs)\n
    "},{"location":"api/configs/#saev.configs.expand","title":"expand(config)","text":"

    Expand a nested dict that may contain lists into many dicts.

    Source code in src/saev/configs.py
    @beartype.beartype\ndef expand(config: dict[str, object]) -> Iterator[dict[str, object]]:\n    \"\"\"Expand a nested dict that may contain lists into many dicts.\"\"\"\n    yield from _expand_discrete(dict(config))\n
    "},{"location":"api/configs/#saev.configs.get_non_default_values","title":"get_non_default_values(obj, default_obj)","text":"

    Recursively find fields that differ from defaults.

    Source code in src/saev/configs.py
    @beartype.beartype\ndef get_non_default_values(obj: T, default_obj: T) -> dict:\n    \"\"\"Recursively find fields that differ from defaults.\"\"\"\n    # Check that obj and default_obj are instances of a dataclass.\n    assert dataclasses.is_dataclass(obj) and not isinstance(obj, type)\n    assert dataclasses.is_dataclass(default_obj) and not isinstance(default_obj, type)\n\n    diff = {}\n    for field in dataclasses.fields(obj):\n        obj_value = getattr(obj, field.name)\n        default_value = getattr(default_obj, field.name)\n\n        if obj_value == default_value:\n            continue\n\n        # If both are dataclasses of the same type, recurse to find nested differences\n        if (\n            dataclasses.is_dataclass(obj_value)\n            and dataclasses.is_dataclass(default_value)\n            and type(obj_value) is type(default_value)\n        ):\n            nested_diff = get_non_default_values(obj_value, default_value)\n            if nested_diff:\n                diff[field.name] = nested_diff\n        else:\n            # For non-dataclass fields or different types, just record the value\n            diff[field.name] = obj_value\n\n    return diff\n
    "},{"location":"api/configs/#saev.configs.load_cfgs","title":"load_cfgs(override, *, default, sweep_dcts)","text":"

    Load a list of configs from a combination of sources.

    Parameters:

    Name Type Description Default override T

    Command-line overridden values.

    required default T

    The default values for a config.

    required sweep_dcts list[dict]

    A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.

    required

    Returns:

    Type Description tuple[list[T], list[str]]

    A list of configs and a list of errors.

    Source code in src/saev/configs.py
    @beartype.beartype\ndef load_cfgs(\n    override: T, *, default: T, sweep_dcts: list[dict]\n) -> tuple[list[T], list[str]]:\n    \"\"\"\n    Load a list of configs from a combination of sources.\n\n    Args:\n        override: Command-line overridden values.\n        default: The default values for a config.\n        sweep_dcts: A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.\n\n    Returns:\n        A list of configs and a list of errors.\n    \"\"\"\n    # Check that override and default are instances of a dataclass.\n    assert dataclasses.is_dataclass(override) and not isinstance(override, type)\n    assert dataclasses.is_dataclass(default) and not isinstance(default, type)\n\n    # If there's nothing to sweep, return just the override\n    if not sweep_dcts:\n        return [override], []\n\n    # Find which fields were overridden (differ from default)\n    overridden_fields = get_non_default_values(override, default)\n\n    cfgs: list[T] = []\n    errs: list[str] = []\n\n    d = 0  # Global counter for seed incrementing across all expanded configs\n\n    for sweep_dct in sweep_dcts:\n        # Filter out overridden fields from this sweep dict\n        filtered_dct = _filter_overridden_fields(sweep_dct, overridden_fields)\n\n        # If there's nothing to sweep after filtering, just use override\n        if not filtered_dct:\n            cfgs.append(override)\n            d += 1\n            continue\n\n        # Apply the sweep dict to create a config\n        try:\n            updates = _recursive_dataclass_update(override, filtered_dct, override, d)\n\n            if hasattr(override, \"seed\") and \"seed\" not in updates:\n                updates[\"seed\"] = getattr(override, \"seed\", 0) + d\n\n            cfgs.append(dataclasses.replace(override, **updates))\n            d += 1\n        except Exception as err:\n            errs.append(str(err))\n            d += 1\n\n    return cfgs, errs\n
    "},{"location":"api/configs/#saev.configs.load_sweep","title":"load_sweep(sweep_fpath)","text":"

    Load a sweep file and return the list of config dicts.

    Parameters:

    Name Type Description Default sweep_fpath Path

    Path to a Python file with a make_cfgs() function.

    required

    Returns:

    Type Description list[dict]

    List of config dictionaries from make_cfgs(). Returns empty list if any error occurs.

    Source code in src/saev/configs.py
    @beartype.beartype\ndef load_sweep(sweep_fpath: pathlib.Path) -> list[dict]:\n    \"\"\"\n    Load a sweep file and return the list of config dicts.\n\n    Args:\n        sweep_fpath: Path to a Python file with a `make_cfgs()` function.\n\n    Returns:\n        List of config dictionaries from `make_cfgs()`. Returns empty list if any error occurs.\n    \"\"\"\n    try:\n        namespace = {}\n        exec(sweep_fpath.read_text(), namespace)\n        result = namespace[\"make_cfgs\"]()\n        if not isinstance(result, list):\n            logger.warning(\n                f\"make_cfgs() in {sweep_fpath} returned {type(result).__name__}, expected list\"\n            )\n            return []\n        return result\n    except Exception as err:\n        logger.warning(f\"Failed to load sweep from {sweep_fpath}: {err}\")\n        return []\n
    "},{"location":"api/disk/","title":"saev.disk","text":"

    Helpers for sticking with the layout described in disk-layout.md.

    "},{"location":"api/disk/#saev.disk.Run","title":"Run(run_dir)","text":"

    Represents an SAE training run and some associated data.

    Parameters:

    Name Type Description Default run_dir Path

    Run directory, should be $SAEV_NFS/saev/runs/. Assumes the run already exists and validates the structure. Use Run.new() to create a new run. required Source code in src/saev/disk.py

    def __init__(self, run_dir: pathlib.Path):\n    self.run_dir = run_dir\n\n    if len(self.run_dir.parts) < 3 or self.run_dir.parts[-3:-1] != (\"saev\", \"runs\"):\n        raise ValueError(\"Run directory is invalid.\")\n\n    if not self.run_dir.exists():\n        raise FileNotFoundError(\n            f\"Run directory does not exist: {self.run_dir}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"checkpoint\").exists():\n        raise FileNotFoundError(\n            f\"Checkpoint directory does not exist: {self.run_dir / 'checkpoint'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"links\").exists():\n        raise FileNotFoundError(\n            f\"Links directory does not exist: {self.run_dir / 'links'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"inference\").exists():\n        raise FileNotFoundError(\n            f\"Inference directory does not exist: {self.run_dir / 'inference'}. Use Run.new() to create a new run.\"\n        )\n
    "},{"location":"api/disk/#saev.disk.Run.ckpt","title":"ckpt property","text":"

    Path to the sae.pt checkpoint.

    "},{"location":"api/disk/#saev.disk.Run.config","title":"config property","text":"

    The training run config. Not a train.Config object because we don't want to import from train.py.

    "},{"location":"api/disk/#saev.disk.Run.inference","title":"inference property","text":"

    Path to the inference/ directory.

    "},{"location":"api/disk/#saev.disk.Run.run_id","title":"run_id property","text":"

    The run ID, created by wandb.

    "},{"location":"api/disk/#saev.disk.Run.train_shards","title":"train_shards property","text":"

    Path to shard root with metadata.json and acts*.bin files.

    "},{"location":"api/disk/#saev.disk.Run.val_shards","title":"val_shards property","text":"

    Path to shard root with metadata.json and acts*.bin files.

    "},{"location":"api/disk/#saev.disk.Run.new","title":"new(run_id, *, train_shards_dir, val_shards_dir, runs_root) classmethod","text":"

    Create a new run with directory structure and symlinks.

    Parameters:

    Name Type Description Default run_id str

    The run ID (typically from wandb).

    required train_shards_dir Path

    Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/). required val_shards_dir Path

    Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/). required runs_root Path

    Root directory for runs (typically $SAEV_NFS/saev/runs).

    required

    Returns:

    Type Description Run

    A new Run instance with all directories and symlinks created.

    Source code in src/saev/disk.py
    @classmethod\ndef new(\n    cls,\n    run_id: str,\n    *,\n    train_shards_dir: pathlib.Path,\n    val_shards_dir: pathlib.Path,\n    runs_root: pathlib.Path,\n) -> \"Run\":\n    \"\"\"\n    Create a new run with directory structure and symlinks.\n\n    Args:\n        run_id: The run ID (typically from wandb).\n        train_shards_dir: Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/<shard_hash>).\n        val_shards_dir: Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/<shard_hash>).\n        runs_root: Root directory for runs (typically $SAEV_NFS/saev/runs).\n\n    Returns:\n        A new Run instance with all directories and symlinks created.\n    \"\"\"\n    run_dir = runs_root / run_id\n    run_dir.mkdir(parents=True)\n    (run_dir / \"checkpoint\").mkdir()\n    (run_dir / \"links\").mkdir()\n    (run_dir / \"inference\").mkdir()\n\n    (run_dir / \"links\" / \"train-shards\").symlink_to(train_shards_dir)\n    (run_dir / \"links\" / \"val-shards\").symlink_to(val_shards_dir)\n\n    return cls(run_dir)\n
    "},{"location":"api/disk/#saev.disk.is_runs_root","title":"is_runs_root(path)","text":"

    Check if path is a valid runs root directory.

    A valid runs root ends with saev/runs and exists as a directory.

    Parameters:

    Name Type Description Default path Path

    Path to check.

    required

    Returns:

    Type Description bool

    True if path is a directory ending in saev/runs.

    Source code in src/saev/disk.py
    @beartype.beartype\ndef is_runs_root(path: pathlib.Path) -> bool:\n    \"\"\"\n    Check if `path` is a valid runs root directory.\n\n    A valid runs root ends with `saev/runs` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/runs.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"runs\")\n
    "},{"location":"api/disk/#saev.disk.is_shards_dir","title":"is_shards_dir(path)","text":"

    Check if path is a specific shards directory.

    A valid shards directory ends with saev/shards/<hash> for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).

    Parameters:

    Name Type Description Default path Path

    Path to check.

    required

    Returns:

    Type Description bool

    True if path is a directory ending in saev/shards/ with required files. Source code in src/saev/disk.py

    @beartype.beartype\ndef is_shards_dir(path: pathlib.Path) -> bool:\n    \"\"\"\n    Check if `path` is a specific shards directory.\n\n    A valid shards directory ends with `saev/shards/<hash>` for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards/<hash> with required files.\n    \"\"\"\n    if not path.is_dir():\n        return False\n\n    if len(path.parts) < 3 or path.parts[-3:-1] != (\"saev\", \"shards\"):\n        return False\n\n    return True\n
    "},{"location":"api/disk/#saev.disk.is_shards_root","title":"is_shards_root(path)","text":"

    Check if path is a valid shards root directory.

    A valid shards root ends with saev/shards and exists as a directory.

    Parameters:

    Name Type Description Default path Path

    Path to check.

    required

    Returns:

    Type Description bool

    True if path is a directory ending in saev/shards.

    Source code in src/saev/disk.py
    @beartype.beartype\ndef is_shards_root(path: pathlib.Path) -> bool:\n    \"\"\"\n    Check if `path` is a valid shards root directory.\n\n    A valid shards root ends with `saev/shards` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"shards\")\n
    "},{"location":"api/helpers/","title":"saev.helpers","text":""},{"location":"api/helpers/#saev.helpers.RemovedFeatureError","title":"RemovedFeatureError","text":"

    Bases: RuntimeError

    Feature existed before but is no longer supported.

    "},{"location":"api/helpers/#saev.helpers.batched_idx","title":"batched_idx(total_size, batch_size)","text":"

    Iterate over (start, end) indices for total_size examples, where end - start is at most batch_size.

    Parameters:

    Name Type Description Default total_size int

    total number of examples

    required batch_size int

    maximum distance between the generated indices.

    required

    Returns:

    Type Description

    A generator of (int, int) tuples that can slice up a list or a tensor.

    Source code in src/saev/helpers.py
    def __init__(self, total_size: int, batch_size: int):\n    self.total_size = total_size\n    self.batch_size = batch_size\n
    "},{"location":"api/helpers/#saev.helpers.batched_idx.__iter__","title":"__iter__()","text":"

    Yield (start, end) index pairs for batching.

    Source code in src/saev/helpers.py
    def __iter__(self) -> Iterator[tuple[int, int]]:\n    \"\"\"Yield (start, end) index pairs for batching.\"\"\"\n    for start in range(0, self.total_size, self.batch_size):\n        stop = min(start + self.batch_size, self.total_size)\n        yield start, stop\n
    "},{"location":"api/helpers/#saev.helpers.batched_idx.__len__","title":"__len__()","text":"

    Return the number of batches.

    Source code in src/saev/helpers.py
    def __len__(self) -> int:\n    \"\"\"Return the number of batches.\"\"\"\n    return (self.total_size + self.batch_size - 1) // self.batch_size\n
    "},{"location":"api/helpers/#saev.helpers.progress","title":"progress(it, *, every=10, desc='progress', total=0)","text":"

    Wraps an iterable with a logger like tqdm but doesn't use any control codes to manipulate a progress bar, which doesn't work well when your output is redirected to a file. Instead, simple logging statements are used, but it includes quality-of-life features like iteration speed and predicted time to finish.

    Parameters:

    Name Type Description Default it Iterable

    Iterable to wrap.

    required every int

    How many iterations between logging progress.

    10 desc str

    What to name the logger.

    'progress' total int

    If non-zero, how long the iterable is.

    0 Source code in src/saev/helpers.py
    def __init__(\n    self, it: Iterable, *, every: int = 10, desc: str = \"progress\", total: int = 0\n):\n    self.it = it\n    self.every = max(every, 1)\n    self.logger = logging.getLogger(desc)\n    self.total = total\n
    "},{"location":"api/helpers/#saev.helpers.csr_topk","title":"csr_topk(arr, *, k, axis=0, batch_size=1024)","text":"

    Takes the top k values of a sparse CSR array.

    We can only iterate efficiently over rows because it's a a CSR array.

    Parameters:

    Name Type Description Default arr csr_array | csr_matrix

    The CSR array of values with shape (rows, cols).

    required k int

    The k in \"top-k\".

    required axis int

    The dimension to sort along.

    0 batch_size int

    How many rows to process at once.

    1024

    Returns:

    Type Description NumpyTopK

    saev.helpers.NumpyTopK

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef csr_topk(\n    arr: scipy.sparse.csr_array | scipy.sparse.csr_matrix,\n    *,\n    k: int,\n    axis: int = 0,\n    batch_size: int = 1024,\n) -> NumpyTopK:\n    \"\"\"\n    Takes the top k values of a sparse CSR array.\n\n    We can only iterate efficiently over *rows* because it's a a *CSR* array.\n\n    Args:\n        arr: The CSR array of values with shape (rows, cols).\n        k: The k in \"top-k\".\n        axis: The dimension to sort along.\n        batch_size: How many rows to process at once.\n\n    Returns:\n        saev.helpers.NumpyTopK\n    \"\"\"\n    if axis == 0:\n        return _csr_topk_axis0(arr, k, batch_size)\n    elif axis == 1:\n        return _csr_topk_axis1(arr, k)\n    else:\n        raise ValueError(f\"axis must be 0 or 1, got {axis}\")\n
    "},{"location":"api/helpers/#saev.helpers.current_git_commit","title":"current_git_commit()","text":"

    Best-effort short SHA of the repo containing this file.

    Returns None when * git executable is missing, * we\u2019re not inside a git repo (e.g. installed wheel), * or any git call errors out.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef current_git_commit() -> str | None:\n    \"\"\"\n    Best-effort short SHA of the repo containing *this* file.\n\n    Returns `None` when\n    * `git` executable is missing,\n    * we\u2019re not inside a git repo (e.g. installed wheel),\n    * or any git call errors out.\n    \"\"\"\n    try:\n        # Walk up until we either hit a .git dir or the FS root\n        here = pathlib.Path(__file__).resolve()\n        for parent in (here, *here.parents):\n            if (parent / \".git\").exists():\n                break\n        else:  # no .git found\n            return None\n\n        result = subprocess.run(\n            [\"git\", \"-C\", str(parent), \"rev-parse\", \"--short\", \"HEAD\"],\n            stdout=subprocess.PIPE,\n            stderr=subprocess.DEVNULL,\n            text=True,\n            check=True,\n        )\n        return result.stdout.strip() or None\n    except (FileNotFoundError, subprocess.CalledProcessError):\n        return None\n
    "},{"location":"api/helpers/#saev.helpers.flattened","title":"flattened(dct, *, sep='.')","text":"

    Flatten a potentially nested dict to a single-level dict with .-separated keys.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef flattened(\n    dct: dict[str, object], *, sep: str = \".\"\n) -> dict[str, str | int | float | bool | None]:\n    \"\"\"\n    Flatten a potentially nested dict to a single-level dict with `.`-separated keys.\n    \"\"\"\n    new = {}\n    for key, value in dct.items():\n        if isinstance(value, dict):\n            for nested_key, nested_value in flattened(value).items():\n                new[key + \".\" + nested_key] = nested_value\n            continue\n\n        new[key] = value\n\n    return new\n
    "},{"location":"api/helpers/#saev.helpers.fssafe","title":"fssafe(s)","text":"

    Convert a string to be filesystem-safe by replacing special characters.

    This is particularly useful for checkpoint names that contain characters like 'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like 'hf-hub_timm_ViT-L-16-SigLIP2-256'.

    Parameters:

    Name Type Description Default s str

    String to make filesystem-safe.

    required

    Returns:

    Type Description str

    Filesystem-safe version of the string.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef fssafe(s: str) -> str:\n    \"\"\"\n    Convert a string to be filesystem-safe by replacing special characters.\n\n    This is particularly useful for checkpoint names that contain characters like\n    'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like\n    'hf-hub_timm_ViT-L-16-SigLIP2-256'.\n\n    Args:\n        s: String to make filesystem-safe.\n\n    Returns:\n        Filesystem-safe version of the string.\n    \"\"\"\n    # Replace common problematic characters with underscores\n    replacements = {\n        \"/\": \"_\",\n        \"\\\\\": \"_\",\n        \":\": \"_\",\n        \"*\": \"_\",\n        \"?\": \"_\",\n        '\"': \"_\",\n        \"<\": \"_\",\n        \">\": \"_\",\n        \"|\": \"_\",\n        \" \": \"_\",\n    }\n    for old, new in replacements.items():\n        s = s.replace(old, new)\n    # Remove any remaining non-alphanumeric characters except - _ .\n    return \"\".join(c if c.isalnum() or c in \"-_.\" else \"_\" for c in s)\n
    "},{"location":"api/helpers/#saev.helpers.get_cache_dir","title":"get_cache_dir()","text":"

    Get cache directory from environment variables, defaulting to the current working directory (.)

    Returns:

    Type Description str

    A path to a cache directory (might not exist yet).

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef get_cache_dir() -> str:\n    \"\"\"\n    Get cache directory from environment variables, defaulting to the current working directory (.)\n\n    Returns:\n        A path to a cache directory (might not exist yet).\n    \"\"\"\n    cache_dir = \"\"\n    for var in (\"SAEV_CACHE\", \"HF_HOME\", \"HF_HUB_CACHE\"):\n        cache_dir = cache_dir or os.environ.get(var, \"\")\n    return cache_dir or \".\"\n
    "},{"location":"api/helpers/#saev.helpers.get_slurm_job_count","title":"get_slurm_job_count()","text":"

    Get the current number of jobs in the queue for the current user.

    Uses squeue's -r flag to properly count job array elements individually. For example, a job array 12345_[0-99] will be counted as 100 jobs.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef get_slurm_job_count() -> int:\n    \"\"\"\n    Get the current number of jobs in the queue for the current user.\n\n    Uses squeue's -r flag to properly count job array elements individually.\n    For example, a job array 12345_[0-99] will be counted as 100 jobs.\n    \"\"\"\n    try:\n        # Use -r to display each array element on its own line\n        result = subprocess.run(\n            [\"squeue\", \"--me\", \"-h\", \"-r\"], capture_output=True, text=True, check=True\n        )\n\n        # Count non-empty lines\n        lines = result.stdout.strip().split(\"\\n\")\n        return len([line for line in lines if line.strip()])\n\n    except (subprocess.SubprocessError, FileNotFoundError):\n        # If we can't check, assume no jobs\n        return 0\n
    "},{"location":"api/helpers/#saev.helpers.get_slurm_max_array_size","title":"get_slurm_max_array_size()","text":"

    Get the MaxArraySize configuration from the current Slurm cluster.

    Returns:

    Name Type Description int int

    The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef get_slurm_max_array_size() -> int:\n    \"\"\"\n    Get the MaxArraySize configuration from the current Slurm cluster.\n\n    Returns:\n        int: The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # Run scontrol command to get config information\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"config\"], capture_output=True, text=True, check=True\n        )\n\n        # Search for MaxArraySize in the output\n        match = re.search(r\"MaxArraySize\\s*=\\s*(\\d+)\", result.stdout)\n        if match:\n            max_array_size = int(match.group(1))\n            logger.info(\"Detected MaxArraySize = %d\", max_array_size)\n            return max_array_size\n        else:\n            logger.warning(\n                \"Could not find MaxArraySize in scontrol output, using default of 1000\"\n            )\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error running scontrol: %s\", e)\n        return 1000  # Safe default\n    except ValueError as e:\n        logger.error(\"Error parsing MaxArraySize: %s\", e)\n        return 1000  # Safe default\n    except FileNotFoundError:\n        logger.warning(\n            \"scontrol command not found. Assuming not in Slurm environment. Returning default MaxArraySize=1000.\"\n        )\n        return 1000\n
    "},{"location":"api/helpers/#saev.helpers.get_slurm_max_submit_jobs","title":"get_slurm_max_submit_jobs()","text":"

    Get the MaxSubmitJobs limit from the current user's QOS.

    Returns:

    Name Type Description int int

    The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef get_slurm_max_submit_jobs() -> int:\n    \"\"\"\n    Get the MaxSubmitJobs limit from the current user's QOS.\n\n    Returns:\n        int: The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # First, try to get the QOS from a recent job\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"job\", \"-o\"],\n            capture_output=True,\n            text=True,\n            check=False,\n        )\n\n        qos_name = None\n        if result.returncode == 0 and result.stdout:\n            # Extract QOS from job info\n            match = re.search(r\"QOS=(\\S+)\", result.stdout)\n            if match:\n                qos_name = match.group(1)\n\n        if not qos_name:\n            # If no jobs, try to get default QOS from association\n            # This is less reliable but better than nothing\n            logger.warning(\"No active jobs to determine QOS, using default of 1000\")\n            return 1000\n\n        # Get the MaxSubmitJobs for this QOS\n        result = subprocess.run(\n            [\"sacctmgr\", \"show\", \"qos\", qos_name, \"format=maxsubmitjobs\", \"-n\", \"-P\"],\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        max_submit = result.stdout.strip()\n        if max_submit and max_submit.isdigit():\n            limit = int(max_submit)\n            logger.info(\"Detected MaxSubmitJobs = %d for QOS %s\", limit, qos_name)\n            return limit\n        else:\n            logger.warning(\"Could not parse MaxSubmitJobs, using default of 1000\")\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error getting MaxSubmitJobs: %s\", e)\n        return 1000\n    except (ValueError, FileNotFoundError) as e:\n        logger.error(\"Error: %s\", e)\n        return 1000\n
    "},{"location":"api/helpers/#saev.helpers.np_topk","title":"np_topk(arr, k, axis=None)","text":"

    A numpy implementation of torch.topk.

    Returns the k largest elements along the given axis. If axis is None, the array is flattened first.

    Parameters:

    Name Type Description Default arr ndarray

    Input array.

    required k int

    Number of top elements to return.

    required axis int | None

    Axis along which to find top k elements. If None, flattens array first.

    None

    Returns:

    Type Description NumpyTopK

    Array of k largest values along the specified axis, sorted in descending order.

    Source code in src/saev/helpers.py
    @beartype.beartype\ndef np_topk(arr: np.ndarray, k: int, axis: int | None = None) -> NumpyTopK:\n    \"\"\"A numpy implementation of torch.topk.\n\n    Returns the k largest elements along the given axis. If axis is None, the array is flattened first.\n\n    Args:\n        arr: Input array.\n        k: Number of top elements to return.\n        axis: Axis along which to find top k elements. If None, flattens array first.\n\n    Returns:\n        Array of k largest values along the specified axis, sorted in descending order.\n    \"\"\"\n    if axis is None:\n        arr = arr.flatten()\n        axis = 0\n\n    # Handle negative axis\n    if axis < 0:\n        axis = arr.ndim + axis\n\n    # For each position along other axes, sort and take top k\n    # Use argsort which is stable and will preserve order for equal values\n    sort_indices = np.argsort(-arr, axis=axis, kind=\"stable\")\n\n    # Take the first k sorted indices\n    topk_indices = np.take(sort_indices, np.arange(k), axis=axis)\n\n    # Gather the top k values\n    topk_values = np.take_along_axis(arr, topk_indices, axis=axis)\n\n    return NumpyTopK(values=topk_values, indices=topk_indices)\n
    "},{"location":"api/helpers/#saev.helpers.submit_job_array","title":"submit_job_array(executor, fn, args_list, *, logger=None, margin=0.8)","text":"

    Submit jobs in batches to respect Slurm's MaxArraySize limit.

    Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.

    Parameters:

    Name Type Description Default executor

    A submitit executor (SlurmExecutor or LocalExecutor).

    required fn Callable

    Worker function to call for each config.

    required args_list list

    List of arguments to pass to fn.

    required logger Logger | None

    Optional logger for progress messages.

    None margin float

    Fraction of MaxArraySize to use (default 0.8).

    0.8

    Yields:

    Type Description int

    Tuples of (global_index, result) for successful jobs.

    object

    For failed jobs, yields (global_index, None) and logs a warning.

    Example
    executor = submitit.SlurmExecutor(folder=\"./logs\")\nexecutor.update_parameters(...)\n\nfor idx, result in submit_job_array(executor, worker_fn, configs):\n    print(f\"Job {idx} returned {result}\")\n
    Source code in src/saev/helpers.py
    @beartype.beartype\ndef submit_job_array(\n    executor,\n    fn: tp.Callable,\n    args_list: list,\n    *,\n    logger: logging.Logger | None = None,\n    margin: float = 0.8,\n) -> Iterator[tuple[int, object]]:\n    \"\"\"\n    Submit jobs in batches to respect Slurm's MaxArraySize limit.\n\n    Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.\n\n    Args:\n        executor: A submitit executor (SlurmExecutor or LocalExecutor).\n        fn: Worker function to call for each config.\n        args_list: List of arguments to pass to fn.\n        logger: Optional logger for progress messages.\n        margin: Fraction of MaxArraySize to use (default 0.8).\n\n    Yields:\n        Tuples of (global_index, result) for successful jobs.\n        For failed jobs, yields (global_index, None) and logs a warning.\n\n    Example:\n        ```\n        executor = submitit.SlurmExecutor(folder=\"./logs\")\n        executor.update_parameters(...)\n\n        for idx, result in submit_job_array(executor, worker_fn, configs):\n            print(f\"Job {idx} returned {result}\")\n        ```\n    \"\"\"\n    from submitit.core.utils import UncompletedJobError\n\n    arr_size = int(get_slurm_max_array_size() * margin)\n    n_total = len(args_list)\n\n    for arr_start, arr_end in batched_idx(n_total, arr_size):\n        batch_args = args_list[arr_start:arr_end]\n\n        if logger:\n            logger.info(\n                \"Submitting batch of %d jobs (%d-%d of %d).\",\n                len(batch_args),\n                arr_start + 1,\n                arr_end,\n                n_total,\n            )\n\n        with executor.batch():\n            jobs = [executor.submit(fn, arg) for arg in batch_args]\n\n        time.sleep(5.0)\n\n        for i, job in enumerate(jobs):\n            global_idx = arr_start + i\n            try:\n                result = job.result()\n                yield global_idx, result\n            except UncompletedJobError:\n                if logger:\n                    logger.warning(\n                        \"Job %s (%d) did not finish.\", job.job_id, global_idx\n                    )\n                yield global_idx, None\n
    "},{"location":"api/metrics/","title":"saev.metrics","text":""},{"location":"api/metrics/#saev.metrics.Metrics","title":"Metrics(mse_per_dim, mse_per_token, normalized_mse, baseline_mse_per_dim, baseline_mse_per_token, sse_recon, sse_baseline, n_tokens, d_model, n_elements) dataclass","text":"

    Validated reconstruction metrics aggregated over one evaluation corpus.

    The primary totals are sse_recon (SAE reconstruction SSE) and sse_baseline (mean-baseline SSE). Derived terms are: - normalized_mse = sse_recon / sse_baseline - mse_per_dim = sse_recon / n_elements - mse_per_token = sse_recon / n_tokens - baseline_mse_per_dim = sse_baseline / n_elements - baseline_mse_per_token = sse_baseline / n_tokens

    Size terms are: - n_tokens: number of tokens included in aggregation - d_model: embedding width per token - n_elements = n_tokens * d_model

    "},{"location":"api/metrics/#saev.metrics.Metrics.from_accumulators","title":"from_accumulators(*, sse_recon, sse_baseline, n_tokens, d_model) classmethod","text":"

    Construct metrics from aggregate sums and shape information.

    Parameters:

    Name Type Description Default sse_recon float

    Sum of squared reconstruction errors over all selected tokens and dimensions.

    required sse_baseline float

    Sum of squared mean-baseline errors over the same tokens and dimensions.

    required n_tokens int

    Number of selected tokens in the aggregation set.

    required d_model int

    Activation dimension per token.

    required

    Returns:

    Type Description Metrics

    A validated Metrics object with all derived fields populated.

    Source code in src/saev/metrics.py
    @classmethod\ndef from_accumulators(\n    cls, *, sse_recon: float, sse_baseline: float, n_tokens: int, d_model: int\n) -> \"Metrics\":\n    \"\"\"Construct metrics from aggregate sums and shape information.\n\n    Args:\n        sse_recon: Sum of squared reconstruction errors over all selected tokens and dimensions.\n        sse_baseline: Sum of squared mean-baseline errors over the same tokens and dimensions.\n        n_tokens: Number of selected tokens in the aggregation set.\n        d_model: Activation dimension per token.\n\n    Returns:\n        A validated `Metrics` object with all derived fields populated.\n    \"\"\"\n\n    msg = f\"n_tokens must be positive, got {n_tokens}.\"\n    assert n_tokens > 0, msg\n    msg = f\"d_model must be positive, got {d_model}.\"\n    assert d_model > 0, msg\n    msg = f\"sse_recon must be >= 0, got {sse_recon}.\"\n    assert sse_recon >= 0.0, msg\n    msg = f\"sse_baseline must be > 0, got {sse_baseline}.\"\n    assert sse_baseline > 0.0, msg\n\n    n_elements = n_tokens * d_model\n    return cls(\n        mse_per_dim=sse_recon / n_elements,\n        mse_per_token=sse_recon / n_tokens,\n        normalized_mse=sse_recon / sse_baseline,\n        baseline_mse_per_dim=sse_baseline / n_elements,\n        baseline_mse_per_token=sse_baseline / n_tokens,\n        sse_recon=sse_recon,\n        sse_baseline=sse_baseline,\n        n_tokens=n_tokens,\n        d_model=d_model,\n        n_elements=n_elements,\n    )\n
    "},{"location":"api/saev/","title":"saev","text":"

    saev is a Python package for training sparse autoencoders (SAEs) on vision transformers (ViTs) in PyTorch.

    "},{"location":"api/summary/","title":"Summary","text":"
    • saev
    • saev.colors
    • saev.configs
    • saev.data
    • saev.data.bird_mae
    • saev.data.buffers
    • saev.data.clip
    • saev.data.datasets
    • saev.data.dinov2
    • saev.data.dinov3
    • saev.data.fake_clip
    • saev.data.indexed
    • saev.data.models
    • saev.data.ordered
    • saev.data.pe
    • saev.data.shards
    • saev.data.shuffled
    • saev.data.siglip
    • saev.data.transforms
    • saev.disk
    • saev.framework
    • saev.framework.inference
    • saev.framework.shards
    • saev.framework.train
    • saev.helpers
    • saev.metrics
    • saev.nn
    • saev.nn.modeling
    • saev.nn.objectives
    • saev.utils
    • saev.utils.monitoring
    • saev.utils.scheduling
    • saev.utils.statistics
    • saev.utils.wandb
    • saev.viz
    "},{"location":"api/viz/","title":"saev.viz","text":""},{"location":"api/viz/#saev.viz.load_palette","title":"load_palette(path)","text":"

    TODO: docstring.

    Source code in src/saev/viz.py
    @beartype.beartype\ndef load_palette(path: pathlib.Path) -> list[tuple[float, float, float]]:\n    \"\"\"TODO: docstring.\"\"\"\n    import glasbey\n\n    palette = []\n\n    for i, line in enumerate(path.read_text().split(\"\\n\")):\n        line = line.strip()\n        if not line:\n            palette.append(None)\n            continue\n\n        palette.append(parse_color(line))\n\n    # Extend the palette using https://glasbey.readthedocs.io/en/latest/extending_palettes.html\n    n_missing = sum(color is None for color in palette)\n    if n_missing:\n        seed_palette = [color for color in palette if color is not None]\n        if seed_palette:\n            extended = glasbey.extend_palette(\n                seed_palette, palette_size=len(seed_palette) + n_missing, as_hex=False\n            )\n            fill_colors = extended[len(seed_palette) :]\n        else:\n            fill_colors = glasbey.create_palette(palette_size=n_missing, as_hex=False)\n\n        fill_iter = iter(fill_colors)\n        for i, color in enumerate(palette):\n            if color is not None:\n                continue\n            next_color = tuple(float(chan) for chan in next(fill_iter))\n            palette[i] = next_color\n\n    for i, color in enumerate(palette):\n        assert color is not None\n        msg = f\"Color {i} is invalid: {color}\"\n        assert all(0 <= chan <= 1 and isinstance(chan, float) for chan in color), msg\n\n    return palette\n
    "},{"location":"api/data/bird_mae/","title":"saev.data.bird_mae","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.Encoder","title":"Encoder(cfg)","text":"

    Bases: Module

    Pure PyTorch Bird-MAE backbone (no HF).

    Source code in src/saev/data/bird_mae.py
    def __init__(self, cfg: Config) -> None:\n    super().__init__()\n    self.cfg = cfg\n\n    self.patch_embed = PatchEmbed(\n        img_size=(cfg.img_size_x, cfg.img_size_y),\n        patch_size=(cfg.patch_size, cfg.patch_size),\n        in_chans=cfg.in_chans,\n        embed_dim=cfg.embed_dim,\n    )\n\n    self.cls_token = nn.Parameter(torch.zeros(1, 1, cfg.embed_dim))\n    self.pos_embed = nn.Parameter(\n        torch.zeros(1, cfg.n_patches + 1, cfg.embed_dim),\n        requires_grad=cfg.pos_trainable,\n    )\n\n    if self.pos_embed.data.shape[1] == cfg.n_tokens:\n        pos_embed_np = get_2d_sincos_pos_embed_flexible(\n            self.pos_embed.shape[-1],\n            self.patch_embed.patch_hw,\n            cls_token=True,\n        )\n        self.pos_embed.data.copy_(\n            torch.from_numpy(pos_embed_np).float().unsqueeze(0)\n        )\n    else:\n        logger.warning(\n            \"Positional embedding shape mismatch. Will not initialize sin-cos pos embed.\"\n        )\n\n    dpr = [x.item() for x in torch.linspace(0, cfg.drop_rate, cfg.depth)]\n    self.blocks = nn.ModuleList([\n        Block(\n            dim=cfg.embed_dim,\n            n_heads=cfg.n_heads,\n            mlp_ratio=cfg.mlp_ratio,\n            qkv_bias=cfg.qkv_bias,\n            qk_norm=cfg.qk_norm,\n            init_values=cfg.init_values,\n            proj_drop=cfg.drop_rate,\n            attn_drop=cfg.drop_rate,\n            drop_path=dpr[i],\n            norm_layer=functools.partial(nn.LayerNorm, eps=cfg.norm_layer_eps),\n        )\n        for i in range(cfg.depth)\n    ])\n\n    self.pos_drop = nn.Dropout(p=cfg.drop_rate)\n    self.norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n    self.fc_norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n\n    nn.init.trunc_normal_(self.cls_token, std=0.02)\n    self.apply(self._init_weights)\n
    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.PatchEmbed","title":"PatchEmbed(img_size=(512, 128), patch_size=(16, 16), in_chans=1, embed_dim=768)","text":"

    Bases: Module

    Image (time x mel) to patch embeddings.

    Source code in src/saev/data/bird_mae.py
    def __init__(\n    self,\n    img_size: tuple[int, int] = (512, 128),\n    patch_size: tuple[int, int] = (16, 16),\n    in_chans: int = 1,\n    embed_dim: int = 768,\n) -> None:\n    super().__init__()\n    img_size = _ntuple(2)(img_size)\n    patch_size = _ntuple(2)(patch_size)\n    n_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])\n    self.patch_hw = (img_size[1] // patch_size[1], img_size[0] // patch_size[0])\n    self.img_size = img_size\n    self.patch_size = patch_size\n    self.n_patches = n_patches\n\n    self.proj = nn.Conv2d(\n        in_chans,\n        embed_dim,\n        kernel_size=patch_size,\n        stride=patch_size,\n    )\n
    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer","title":"Transformer(ckpt)","text":"

    Bases: Module, Transformer

    Source code in src/saev/data/bird_mae.py
    def __init__(self, ckpt: str):\n    super().__init__()\n    self.model = load(ckpt)\n\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(ckpt.lower())\n
    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_resize","title":"make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

    Create resize transform for visualization.

    Source code in src/saev/data/bird_mae.py
    @staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization.\"\"\"\n    raise NotImplementedError(\"Bird-MAE uses audio spectrograms, not images.\")\n
    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

    Create transforms for preprocessing: (data_transform, dict_transform | None).

    Source code in src/saev/data/bird_mae.py
    @staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n    return transform, None\n
    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio","title":"filter_audio(waveform, sample_rate, patches, *, mode='time')","text":"

    Filter audio based on SAE patch activations over the log-mel spectrogram.

    Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.

    Parameters:

    Name Type Description Default waveform Float[Tensor, ' samples']

    Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.

    required sample_rate int

    Audio sample rate in Hz. Should be 32000 for Bird-MAE.

    required patches Bool[Tensor, ' content_tokens_per_example']

    Boolean SAE activation values per patch, shape [256]. Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.

    required mode Literal['time', 'time+freq']

    Filtering mode. - \"time\": Clip to time segments with high activations (preserves all frequencies). - \"time+freq\": Clip time AND apply frequency masking via STFT.

    'time'

    Returns:

    Type Description Float[Tensor, ' clipped']

    Filtered audio waveform as a 1D torch tensor.

    Example

    waveform_np, sr = librosa.load(audio_path, sr=32000) mel = bird_mae.transform(waveform_np) # [512, 128] waveform = torch.from_numpy(waveform_np)

    Source code in src/saev/data/bird_mae.py
    @jaxtyped(typechecker=beartype.beartype)\ndef filter_audio(\n    waveform: Float[Tensor, \" samples\"],\n    sample_rate: int,\n    patches: Bool[Tensor, \" content_tokens_per_example\"],\n    *,\n    mode: tp.Literal[\"time\", \"time+freq\"] = \"time\",\n) -> Float[Tensor, \" clipped\"]:\n    \"\"\"\n    Filter audio based on SAE patch activations over the log-mel spectrogram.\n\n    Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.\n\n    Args:\n        waveform: Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.\n        sample_rate: Audio sample rate in Hz. Should be 32000 for Bird-MAE.\n        patches: Boolean SAE activation values per patch, shape [256].\n            Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.\n        mode: Filtering mode.\n            - \"time\": Clip to time segments with high activations (preserves all frequencies).\n            - \"time+freq\": Clip time AND apply frequency masking via STFT.\n\n    Returns:\n        Filtered audio waveform as a 1D torch tensor.\n\n    Example:\n        >>> waveform_np, sr = librosa.load(audio_path, sr=32000)\n        >>> mel = bird_mae.transform(waveform_np)  # [512, 128]\n        >>> waveform = torch.from_numpy(waveform_np)\n        >>> # ... run through SAE to get patch_activations [256] ...\n        >>> # ... covert SAE activations to bool with > 0 ...\n        >>> time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\")\n        >>> time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")\n    \"\"\"\n    msg = f\"Bird-MAE expects sample_rate={BIRDMAE_SR_HZ}, got {sample_rate}.\"\n    assert sample_rate == BIRDMAE_SR_HZ, msg\n    assert patches.shape == (BIRDMAE_N_TIME_PATCHES * BIRDMAE_N_MEL_PATCHES,)\n    assert waveform.ndim == 1, waveform.shape\n\n    # Match transform(): pad/truncate to exactly 5s\n    waveform_t = waveform.to(torch.float32)\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if waveform_t.numel() < max_len:\n        pad = max_len - waveform_t.numel()\n        waveform_t = F.pad(waveform_t, (0, pad))\n    else:\n        waveform_t = waveform_t[:max_len]\n    if mode == \"time+freq\":\n        # STFT parameters matching Kaldi/BirdMAE assumptions approximately\n        n_fft = BIRDMAE_STFT_N_FFT\n        hop_length = BIRDMAE_STFT_HOP_LENGTH\n        win_length = BIRDMAE_STFT_WIN_LENGTH\n        window = torch.hann_window(win_length)\n\n        stft = torch.stft(\n            waveform_t,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            return_complex=True,\n        )\n        # stft shape: [freq_bins, time_frames]\n        # freq_bins = 513\n        # time_frames ~ 498 for 160000 samples\n\n        freqs = torch.linspace(0, sample_rate / 2, stft.shape[0])\n        mask = torch.zeros_like(stft, dtype=torch.bool)\n\n        # Mel range\n        low_freq = BIRDMAE_STFT_LOW_FREQ_HZ\n        high_freq = sample_rate / 2\n        min_mel = hz_to_mel(low_freq)\n        max_mel = hz_to_mel(high_freq)\n        mel_range = max_mel - min_mel\n\n        active_patch_i = torch.nonzero(patches, as_tuple=False).flatten().tolist()\n        for i in active_patch_i:\n            time_idx = i // BIRDMAE_N_MEL_PATCHES\n            mel_idx = i % BIRDMAE_N_MEL_PATCHES\n\n            # Time range (frames)\n            t_start = time_idx * BIRDMAE_FRAMES_PER_PATCH\n            t_end = (time_idx + 1) * BIRDMAE_FRAMES_PER_PATCH\n\n            # Frequency range (Hz)\n            # 128 mel bins total, 16 bins per patch\n            p_mel_low = (\n                min_mel\n                + (mel_idx * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n            p_mel_high = (\n                min_mel\n                + ((mel_idx + 1) * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n\n            hz_low = mel_to_hz(p_mel_low)\n            hz_high = mel_to_hz(p_mel_high)\n\n            freq_mask = (freqs >= hz_low) & (freqs < hz_high)\n\n            # Apply mask to valid frames\n            valid_t_end = min(t_end, stft.shape[1])\n            if t_start < valid_t_end:\n                mask[freq_mask, t_start:valid_t_end] = True\n\n        stft_filtered = stft * mask\n        waveform_t = torch.istft(\n            stft_filtered,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            length=waveform_t.shape[0],\n        )\n\n    # Time clipping (applies to both modes)\n    active_time_indices = torch.unique(\n        torch.nonzero(patches, as_tuple=False).flatten() // BIRDMAE_N_MEL_PATCHES\n    ).tolist()\n    segments = []\n\n    for t in active_time_indices:\n        start = t * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        end = (t + 1) * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        if start >= waveform_t.shape[0]:\n            continue\n        seg = waveform_t[start : min(end, waveform_t.shape[0])]\n        segments.append(seg)\n\n    if not segments:\n        return waveform_t[:0]\n\n    return torch.cat(segments, dim=0)\n
    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--run-through-sae-to-get-patch_activations-256","title":"... run through SAE to get patch_activations [256] ...","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--covert-sae-activations-to-bool-with-0","title":"... covert SAE activations to bool with > 0 ...","text":"

    time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\") time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")

    "},{"location":"api/data/bird_mae/#saev.data.bird_mae.transform","title":"transform(waveform)","text":"

    waveform: 1D tensor [samples] returns: 2D tensor [512, 128] matching HF's feature extractor output

    Source code in src/saev/data/bird_mae.py
    @jaxtyped(typechecker=beartype.beartype)\ndef transform(waveform: Float[np.ndarray, \" samples\"]) -> Float[Tensor, \"time mels\"]:\n    \"\"\"\n    waveform: 1D tensor [samples]\n    returns: 2D tensor [512, 128] matching HF's feature extractor output\n    \"\"\"\n    import torchaudio.compliance.kaldi\n\n    waveform = torch.from_numpy(waveform).to(torch.float32)\n    (n_samples,) = waveform.shape\n    # 1) pad/truncate to exactly 5 s\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if n_samples < max_len:\n        pad = max_len - n_samples\n        waveform = F.pad(waveform, (0, pad))\n    else:\n        waveform = waveform[:max_len]\n\n    # 2) mean-center (per clip)\n    waveform = waveform - waveform.mean(dim=0, keepdim=True)\n\n    # 3) Kaldi fbank: [T, 128]\n    fb = torchaudio.compliance.kaldi.fbank(\n        waveform[None, :],\n        htk_compat=True,\n        sample_frequency=BIRDMAE_SR_HZ,\n        use_energy=False,\n        window_type=\"hanning\",\n        num_mel_bins=BIRDMAE_N_MELS,\n        dither=0.0,\n        frame_shift=10.0,\n    )  # [T, 128]\n\n    # 4) pad to 512 frames with min value\n    t, _ = fb.shape\n    if t < BIRDMAE_TARGET_T:\n        diff = BIRDMAE_TARGET_T - t\n        min_val = fb.min()\n        fb = F.pad(fb, (0, 0, 0, diff), value=min_val.item())\n    elif t > BIRDMAE_TARGET_T:\n        fb = fb[:BIRDMAE_TARGET_T]\n\n    fb = (fb - BIRDMAE_MEAN) / (BIRDMAE_STD * 2.0)\n\n    assert fb.shape == (BIRDMAE_TARGET_T, BIRDMAE_N_MELS), fb.shape\n\n    return fb\n
    "},{"location":"api/data/buffers/","title":"saev.data.buffers","text":""},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer","title":"ReservoirBuffer(capacity, shape, *, dtype=torch.float32, meta_shape=(2,), meta_dtype=torch.int32, seed=0, collate_fn=None)","text":"

    Pool of (tensor, meta) pairs. Multiple producers call put(batch_x, batch_meta). Multiple consumers call get(batch_size) -> (x, meta). Random order, each sample delivered once, blocking semantics.

    Source code in src/saev/data/buffers.py
    def __init__(\n    self,\n    capacity: int,\n    shape: tuple[int, ...],\n    *,\n    dtype: torch.dtype = torch.float32,\n    meta_shape: tuple[int, ...] = (2,),\n    meta_dtype: torch.dtype = torch.int32,\n    seed: int = 0,\n    collate_fn: collections.abc.Callable | None = None,\n):\n    self.capacity = capacity\n    self._empty = 123456789\n\n    self.data = torch.full((capacity, *shape), self._empty, dtype=dtype)\n    self.data.share_memory_()\n\n    self.meta = torch.full((capacity, *meta_shape), self._empty, dtype=meta_dtype)\n    self.meta.share_memory_()\n\n    self.ctx = mp.get_context()\n\n    self.size = self.ctx.Value(\"L\", 0)  # current live items\n    self.lock = self.ctx.Lock()  # guards size+swap\n    self.free = self.ctx.Semaphore(capacity)\n    self.full = self.ctx.Semaphore(0)\n    # Each process has its own RNG.\n    self.rng = np.random.default_rng(seed)\n\n    self.collate_fn = collate_fn\n\n    self.logger = logging.getLogger(f\"reservoir({os.getpid()})\")\n
    "},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.close","title":"close()","text":"

    Release the shared-memory backing store (call once in the parent).

    Source code in src/saev/data/buffers.py
    def close(self) -> None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.data.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n
    "},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.fill","title":"fill()","text":"

    Approximate proportion of filled slots (race-safe enough for tests).

    Source code in src/saev/data/buffers.py
    def fill(self) -> float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n
    "},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.qsize","title":"qsize()","text":"

    Approximate number of filled slots (race-safe enough for tests).

    Source code in src/saev/data/buffers.py
    def qsize(self) -> int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return self.size.value\n
    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer","title":"RingBuffer(slots, shape, dtype)","text":"

    Fixed-capacity, multiple-producer / multiple-consumer queue backed by a shared-memory tensor.

    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer--parameters","title":"Parameters","text":"

    slots : int capacity in number of items (tensor rows) shape : tuple[int] shape of one item, e.g. (batch, dim) dtype : torch.dtype tensor dtype

    put(tensor) : blocks if full get() -> tensor : blocks if empty qsize() -> int advisory size (approximate) close() frees shared storage (call in the main process)

    Source code in src/saev/data/buffers.py
    def __init__(self, slots: int, shape: tuple[int, ...], dtype: torch.dtype):\n    assert slots > 0, \"slots must be positive\"\n    self.slots = slots\n    # 123456789 -> Should make you very worried.\n    self.buf = torch.full((slots, *shape), 123456789, dtype=dtype)\n    self.buf.share_memory_()\n\n    ctx = mp.get_context()  # obeys the global start method (\"spawn\")\n\n    # shared, lock-free counters\n    self.head = ctx.Value(\"L\", 0, lock=False)  # next free slot\n    self.tail = ctx.Value(\"L\", 0, lock=False)  # next occupied slot\n\n    # semaphores for blocking semantics\n    self.free = ctx.Semaphore(slots)  # initially all slots free\n    self.full = ctx.Semaphore(0)  # no filled slots yet\n\n    # one mutex for pointer updates\n    self.mutex = ctx.Lock()\n
    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.close","title":"close()","text":"

    Release the shared-memory backing store (call once in the parent).

    Source code in src/saev/data/buffers.py
    def close(self) -> None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.buf.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n
    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.fill","title":"fill()","text":"

    Approximate proportion of filled slots (race-safe enough for tests).

    Source code in src/saev/data/buffers.py
    def fill(self) -> float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n
    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.get","title":"get()","text":"

    Return a view of the next item; blocks if the queue is empty.

    Source code in src/saev/data/buffers.py
    def get(self) -> torch.Tensor:\n    \"\"\"Return a view of the next item; blocks if the queue is empty.\"\"\"\n    self.full.acquire()  # wait for data\n    with self.mutex:  # exclusive update of tail\n        idx = self.tail.value % self.slots\n        out = self.buf[idx].clone()\n        self.tail.value += 1\n    self.free.release()  # signal one more free slot\n    return out\n
    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.put","title":"put(tensor)","text":"

    Copy tensor into the next free slot; blocks if the queue is full.

    Source code in src/saev/data/buffers.py
    def put(self, tensor: torch.Tensor) -> None:\n    \"\"\"Copy `tensor` into the next free slot; blocks if the queue is full.\"\"\"\n    if tensor.shape != self.buf.shape[1:] or tensor.dtype != self.buf.dtype:\n        raise ValueError(\"tensor shape / dtype mismatch\")\n\n    self.free.acquire()  # wait for a free slot\n    with self.mutex:  # exclusive update of head\n        idx = self.head.value % self.slots\n        self.buf[idx].copy_(tensor)\n        self.head.value += 1\n    self.full.release()  # signal there is data\n
    "},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.qsize","title":"qsize()","text":"

    Approximate number of filled slots (race-safe enough for tests).

    Source code in src/saev/data/buffers.py
    def qsize(self) -> int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return (self.head.value - self.tail.value) % (1 << 64)\n
    "},{"location":"api/data/clip/","title":"saev.data.clip","text":""},{"location":"api/data/clip/#saev.data.clip.Vit","title":"Vit(ckpt)","text":"

    Bases: Transformer, Module

    Source code in src/saev/data/clip.py
    def __init__(self, ckpt: str):\n    super().__init__()\n\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n        _, ckpt = ckpt.split(\"hf-hub:\")\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n\n    assert not isinstance(self.model, open_clip.timm_model.TimmModel)\n
    "},{"location":"api/data/clip/#saev.data.clip.Vit.patch_size","title":"patch_size property","text":"

    Get patch size for CLIP models.

    "},{"location":"api/data/clip/#saev.data.clip.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

    Create transforms for preprocessing: (img_transform, sample_transform | None).

    Source code in src/saev/data/clip.py
    @staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n
    "},{"location":"api/data/datasets/","title":"saev.data.datasets","text":""},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025","title":"BirdClef2025(root=pathlib.Path('data/birdclef-2025'), split='train_audio') dataclass","text":"

    Bases: DatasetConfig

    Configuration for BirdCLEF 2025 dataset, filtering to only bird species (Aves).

    See https://www.kaggle.com/competitions/birdclef-2025/data for more information.

    "},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.n_examples","title":"n_examples property","text":"

    Number of bird audio samples in the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.root","title":"root = pathlib.Path('data/birdclef-2025') class-attribute instance-attribute","text":"

    Root directory containing the BirdCLEF 2025 data.

    "},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.split","title":"split = 'train_audio' class-attribute instance-attribute","text":"

    Which data split to use.

    "},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset","title":"BirdClef2025Dataset(cfg, *, audio_transform=None, mask_transform=None, sample_transform=None)","text":"

    Bases: Dataset

    Dataset for BirdCLEF 2025 filtered to bird species only (class_name == 'Aves').

    Source code in src/saev/data/datasets.py
    def __init__(\n    self,\n    cfg: BirdClef2025,\n    *,\n    audio_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    import polars as pl\n\n    self.cfg = cfg\n    self.audio_transform = audio_transform\n    self.sample_transform = sample_transform\n\n    # Load taxonomy and filter to birds only\n    taxonomy = pl.read_csv(cfg.root / \"taxonomy.csv\", infer_schema_length=None)\n    taxonomy = taxonomy.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n    birds = taxonomy.filter(pl.col(\"class_name\") == \"Aves\")\n    bird_labels = set(birds[\"primary_label\"].to_list())\n\n    # Build label -> target mapping from bird species only\n    sorted_labels = sorted(bird_labels)\n    self.label_to_target = {label: i for i, label in enumerate(sorted_labels)}\n    self.target_to_label = {i: label for label, i in self.label_to_target.items()}\n\n    if cfg.split == \"train_audio\":\n        train = pl.read_csv(cfg.root / \"train.csv\", infer_schema_length=None)\n        train = train.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n        train_birds = train.filter(pl.col(\"primary_label\").is_in(bird_labels))\n        self.samples = [\n            {\"label\": row[\"primary_label\"], \"filename\": row[\"filename\"]}\n            for row in train_birds.iter_rows(named=True)\n        ]\n    elif cfg.split == \"train_soundscapes\":\n        soundscapes_dpath = cfg.root / \"train_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    elif cfg.split == \"test_soundscapes\":\n        soundscapes_dpath = cfg.root / \"test_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    else:\n        tp.assert_never(cfg.split)\n
    "},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset.n_classes","title":"n_classes property","text":"

    Number of bird species.

    "},{"location":"api/data/datasets/#saev.data.datasets.Cifar10","title":"Cifar10(name='uoft-cs/cifar10', split='train') dataclass","text":"

    Bases: DatasetConfig

    Configuration for HuggingFace CIFAR-10.

    "},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.n_examples","title":"n_examples property","text":"

    Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.

    "},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.name","title":"name = 'uoft-cs/cifar10' class-attribute instance-attribute","text":"

    Dataset name on HuggingFace. Don't need to change this.

    "},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.root","title":"root property","text":"

    Dummy path for the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.split","title":"split = 'train' class-attribute instance-attribute","text":"

    Dataset split. Can be 'train' or 'test'.

    "},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig","title":"DatasetConfig","text":"

    Bases: ABC

    Abstract base class for dataset configurations.

    "},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.n_examples","title":"n_examples abstractmethod property","text":"

    Number of examples in the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.root","title":"root abstractmethod property","text":"

    Root directory path for the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImg","title":"FakeImg(n_examples=10) dataclass","text":"

    Bases: DatasetConfig

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImg.root","title":"root property","text":"

    Root directory path for the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg","title":"FakeImgSeg(n_examples=10, content_tokens_per_example=16, n_classes=3, bg_label=0) dataclass","text":"

    Bases: DatasetConfig

    Tiny synthetic segmentation dataset for tests.

    Generates dummy RGB images and pixel-level segmentation masks, mimicking the behavior of real segmentation datasets like ImgSegFolder.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.bg_label","title":"bg_label = 0 class-attribute instance-attribute","text":"

    Which class index is considered background.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.content_tokens_per_example","title":"content_tokens_per_example = 16 class-attribute instance-attribute","text":"

    Number of content tokens per example.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_classes","title":"n_classes = 3 class-attribute instance-attribute","text":"

    Number of segmentation classes.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_examples","title":"n_examples = 10 class-attribute instance-attribute","text":"

    Number of examples.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.root","title":"root property","text":"

    Root directory path for the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSegDataset","title":"FakeImgSegDataset(cfg, *, img_transform=None, mask_transform=None, sample_transform=None)","text":"

    Bases: Dataset

    Synthetic segmentation dataset providing pixel-level segmentation masks.

    Mimics ImgSegFolderDataset by providing:

    • image: a dummy RGB PIL image
    • segmentation: a PIL image with pixel-level class labels
    • index, target, label
    Source code in src/saev/data/datasets.py
    def __init__(\n    self,\n    cfg: FakeImgSeg,\n    *,\n    img_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    self.cfg = cfg\n    self.img_transform = img_transform\n    self.mask_transform = mask_transform\n    self.sample_transform = sample_transform\n
    "},{"location":"api/data/datasets/#saev.data.datasets.Imagenet","title":"Imagenet(name='ILSVRC/imagenet-1k', split='train') dataclass","text":"

    Bases: DatasetConfig

    Configuration for HuggingFace Imagenet.

    "},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.n_examples","title":"n_examples property","text":"

    Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.

    "},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.name","title":"name = 'ILSVRC/imagenet-1k' class-attribute instance-attribute","text":"

    Dataset name on HuggingFace. Don't need to change this..

    "},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.root","title":"root property","text":"

    Root directory path for the dataset.

    "},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.split","title":"split = 'train' class-attribute instance-attribute","text":"

    Dataset split. For the default ImageNet-1K dataset, can either be 'train', 'validation' or 'test'.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder","title":"ImgFolder(root=pathlib.Path('./data/split')) dataclass","text":"

    Bases: DatasetConfig

    Configuration for a generic image folder dataset that matches the structure used in PyTorch's ImageFolder.

    The datset must be laid out in:

    root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n

    If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.n_examples","title":"n_examples property","text":"

    Number of examples in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires walking the directory structure. If you need to reference this number very often, cache it in a local variable.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.root","title":"root = pathlib.Path('./data/split') class-attribute instance-attribute","text":"

    Where the class folders with images are stored. Can be a glob pattern to match multiple directories.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset","title":"ImgFolderDataset(*args, sample_transform=None, **kwargs)","text":"

    Bases: ImageFolder

    A generic image folder dataset that matches the structure used in PyTorch's ImageFolder.

    The datset must be laid out in:

    root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n

    If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.

    Source code in src/saev/data/datasets.py
    def __init__(self, *args, sample_transform: Callable | None = None, **kwargs):\n    super().__init__(*args, **kwargs)\n    self.sample_transform = sample_transform\n
    "},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset.__getitem__","title":"__getitem__(index)","text":"

    Parameters:

    Name Type Description Default index int

    Index

    required

    Returns:

    Type Description dict[str, object]

    dict with keys 'data', 'index', 'target' and 'label'.

    Source code in src/saev/data/datasets.py
    def __getitem__(self, index: int) -> dict[str, object]:\n    \"\"\"\n    Args:\n        index: Index\n\n    Returns:\n        dict with keys 'data', 'index', 'target' and 'label'.\n    \"\"\"\n    path, target = self.samples[index]\n    image = self.loader(path)\n    if self.transform is not None:\n        image = self.transform(image)\n    if self.target_transform is not None:\n        target = self.target_transform(target)\n\n    sample = {\n        \"data\": image,\n        \"target\": target,\n        \"label\": self.classes[target],\n        \"index\": index,\n    }\n\n    if self.sample_transform is not None:\n        sample = self.sample_transform(sample)\n\n    return sample\n
    "},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder","title":"ImgSegFolder(root=pathlib.Path('./data/segdataset'), split='training', labels_csv='labels.csv', bg_label=0) dataclass","text":"

    Bases: DatasetConfig

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.bg_label","title":"bg_label = 0 class-attribute instance-attribute","text":"

    Background label.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.labels_csv","title":"labels_csv = 'labels.csv' class-attribute instance-attribute","text":"

    CSV file with columns: stem,label1,label2,... First column must be 'stem'.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.n_examples","title":"n_examples property","text":"

    Number of examples in the dataset. Calculated on the fly by counting image files in root/images/split.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.root","title":"root = pathlib.Path('./data/segdataset') class-attribute instance-attribute","text":"

    Where the class folders with images are stored.

    "},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.split","title":"split = 'training' class-attribute instance-attribute","text":"

    Data split.

    "},{"location":"api/data/datasets/#saev.data.datasets.get_dataset","title":"get_dataset(cfg, *, data_transform=None, mask_transform=None, sample_transform=None)","text":"

    Gets the dataset for the current experiment; delegates construction to dataset-specific functions.

    Parameters:

    Name Type Description Default cfg Config

    Config for the dataset.

    required data_tr

    Transform to be applied to each 'data' key (typically the raw data).

    required mask_tr

    Transform to be applied to masks.

    required dict_tr

    Transform to be applied to the entire sample dict.

    required

    Returns: A dataset that has dictionaries with 'data', 'index', 'target', and 'label' keys containing examples.

    Source code in src/saev/data/datasets.py
    @beartype.beartype\ndef get_dataset(\n    cfg: Config,\n    *,\n    data_transform: Callable = None,\n    mask_transform: Callable | None = None,\n    sample_transform: Callable | None = None,\n):\n    \"\"\"\n    Gets the dataset for the current experiment; delegates construction to dataset-specific functions.\n\n    Args:\n        cfg: Config for the dataset.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        dict_tr: Transform to be applied to the entire sample dict.\n    Returns:\n        A dataset that has dictionaries with `'data'`, `'index'`, `'target'`, and `'label'` keys containing examples.\n    \"\"\"\n    # TODO: Can we reduce duplication? Or is it nice to see that there is no magic here?\n    if isinstance(cfg, Imagenet):\n        return ImagenetDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, Cifar10):\n        return Cifar10Dataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, ImgSegFolder):\n        return ImgSegFolderDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, ImgFolder):\n        ds = [\n            ImgFolderDataset(\n                root, transform=data_transform, sample_transform=sample_transform\n            )\n            for root in glob.glob(str(cfg.root), recursive=True)\n        ]\n        if len(ds) == 1:\n            return ds[0]\n        else:\n            return torch.utils.data.ConcatDataset(ds)\n    elif isinstance(cfg, FakeImg):\n        return FakeImgDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, FakeImgSeg):\n        return FakeImgSegDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, BirdClef2025):\n        return BirdClef2025Dataset(\n            cfg, audio_transform=data_transform, sample_transform=sample_transform\n        )\n    else:\n        tp.assert_never(cfg)\n
    "},{"location":"api/data/datasets/#saev.data.datasets.is_img_seg_dataset","title":"is_img_seg_dataset(data_cfg)","text":"

    Check if a dataset configuration is for an image segmentation dataset.

    Parameters:

    Name Type Description Default data_cfg DatasetConfig

    Dataset configuration

    required

    Returns:

    Type Description bool

    True if this is an image segmentation dataset that should have labels.bin

    Source code in src/saev/data/datasets.py
    @beartype.beartype\ndef is_img_seg_dataset(data_cfg: DatasetConfig) -> bool:\n    \"\"\"\n    Check if a dataset configuration is for an image segmentation dataset.\n\n    Args:\n        data_cfg: Dataset configuration\n\n    Returns:\n        True if this is an image segmentation dataset that should have labels.bin\n    \"\"\"\n    return isinstance(data_cfg, (FakeImgSeg, ImgSegFolder))\n
    "},{"location":"api/data/dinov2/","title":"saev.data.dinov2","text":""},{"location":"api/data/dinov3/","title":"saev.data.dinov3","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config","title":"Config(img_size=224, patch_size=16, in_chans=3, pos_embed_rope_base=100.0, pos_embed_rope_min_period=None, pos_embed_rope_max_period=None, pos_embed_rope_normalize_coords='separate', pos_embed_rope_dtype='bf16', embed_dim=768, depth=12, num_heads=12, ffn_ratio=4.0, qkv_bias=True, ffn_layer='mlp', ffn_bias=True, proj_bias=True, n_storage_tokens=0, mask_k_bias=False, untie_global_and_local_cls_norm=False, device=None) dataclass","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config.depth","title":"depth = 12 class-attribute instance-attribute","text":"

    Number of transformer blocks.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.device","title":"device = None class-attribute instance-attribute","text":"

    Device for tensor operations.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.embed_dim","title":"embed_dim = 768 class-attribute instance-attribute","text":"

    Embedding dimension for transformer.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_bias","title":"ffn_bias = True class-attribute instance-attribute","text":"

    Whether to use bias in feed-forward network.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_layer","title":"ffn_layer = 'mlp' class-attribute instance-attribute","text":"

    Type of feed-forward network layer.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_ratio","title":"ffn_ratio = 4.0 class-attribute instance-attribute","text":"

    Feed-forward network expansion ratio.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.img_size","title":"img_size = 224 class-attribute instance-attribute","text":"

    Image width and height in pixels.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.in_chans","title":"in_chans = 3 class-attribute instance-attribute","text":"

    Number of input image channels.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.mask_k_bias","title":"mask_k_bias = False class-attribute instance-attribute","text":"

    Whether to mask K bias in attention.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.n_storage_tokens","title":"n_storage_tokens = 0 class-attribute instance-attribute","text":"

    Number of storage/register tokens.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.num_heads","title":"num_heads = 12 class-attribute instance-attribute","text":"

    Number of attention heads.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.patch_size","title":"patch_size = 16 class-attribute instance-attribute","text":"

    Size of each patch in pixels.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_base","title":"pos_embed_rope_base = 100.0 class-attribute instance-attribute","text":"

    Base frequency for RoPE positional encoding.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_dtype","title":"pos_embed_rope_dtype = 'bf16' class-attribute instance-attribute","text":"

    Data type for RoPE positional encoding.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_max_period","title":"pos_embed_rope_max_period = None class-attribute instance-attribute","text":"

    Maximum period for RoPE positional encoding.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_min_period","title":"pos_embed_rope_min_period = None class-attribute instance-attribute","text":"

    Minimum period for RoPE positional encoding.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_normalize_coords","title":"pos_embed_rope_normalize_coords = 'separate' class-attribute instance-attribute","text":"

    Coordinate normalization method for RoPE encoding.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.proj_bias","title":"proj_bias = True class-attribute instance-attribute","text":"

    Whether to use bias in output projection.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.qkv_bias","title":"qkv_bias = True class-attribute instance-attribute","text":"

    Whether to use bias in QKV projection.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.Config.untie_global_and_local_cls_norm","title":"untie_global_and_local_cls_norm = False class-attribute instance-attribute","text":"

    Whether to use separate norms for global and local CLS tokens.

    "},{"location":"api/data/dinov3/#saev.data.dinov3.PatchEmbed","title":"PatchEmbed(img_size=224, patch_size=16, in_chans=3, embed_dim=768, flatten_embedding=True)","text":"

    Bases: Module

    2D image to patch embedding: (B,C,H,W) -> (B,N,D)

    Parameters:

    Name Type Description Default img_size int | tuple[int, int]

    Image size.

    224 patch_size int | tuple[int, int]

    Patch token size.

    16 in_chans int

    Number of input image channels.

    3 embed_dim int

    Number of linear projection output channels.

    768 Source code in src/saev/data/dinov3.py
    def __init__(\n    self,\n    img_size: int | tuple[int, int] = 224,\n    patch_size: int | tuple[int, int] = 16,\n    in_chans: int = 3,\n    embed_dim: int = 768,\n    flatten_embedding: bool = True,\n) -> None:\n    super().__init__()\n\n    image_hw = make_2tuple(img_size)\n    patch_hw = make_2tuple(patch_size)\n\n    self.image_hw = image_hw\n    self.patch_hw = patch_hw\n\n    self.in_chans = in_chans\n    self.embed_dim = embed_dim\n\n    self.proj = nn.Conv2d(\n        in_chans, embed_dim, kernel_size=patch_hw, stride=patch_hw\n    )\n    self.k = patch_hw[0]\n    assert self.proj.kernel_size == (self.k, self.k)\n    assert self.proj.stride == (self.k, self.k)\n    assert self.proj.padding == (0, 0)\n    assert self.proj.groups == 1\n    assert self.proj.dilation == (1, 1)\n
    "},{"location":"api/data/dinov3/#saev.data.dinov3.Vit","title":"Vit(ckpt)","text":"

    Bases: Module, Transformer

    Source code in src/saev/data/dinov3.py
    def __init__(self, ckpt: str):\n    super().__init__()\n    name = self._parse_name(ckpt)\n    self.model = load(name, ckpt)\n\n    self._ckpt = name\n    self.logger = logging.getLogger(f\"dinov3/{name}\")\n
    "},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_resize","title":"make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

    Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.

    Source code in src/saev/data/dinov3.py
    @staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    import functools\n\n    return functools.partial(\n        transforms.resize_to_patch_grid,\n        p=int(16 * scale),\n        n=n_patches_per_img,\n        resample=resample,\n    )\n
    "},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

    Create transforms for preprocessing: (img_transform, sample_transform | None).

    Source code in src/saev/data/dinov3.py
    @staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    img_transform = v2.Compose([\n        transforms.FlexResize(patch_size=16, n_patches=n_patches_per_img),\n        v2.ToImage(),\n        v2.ToDtype(torch.float32, scale=True),\n        v2.Normalize(mean=[0.4850, 0.4560, 0.4060], std=[0.2290, 0.2240, 0.2250]),\n    ])\n    sample_transform = transforms.Patchify(\n        patch_size=16, n_patches=n_patches_per_img\n    )\n    return img_transform, sample_transform\n
    "},{"location":"api/data/fake_clip/","title":"saev.data.fake_clip","text":"

    Fake CLIP model for testing with tiny-open-clip-model.

    This module provides a test-only vision transformer that works with the tiny-open-clip-model from HuggingFace, which uses 8x8 images and 2x2 patches instead of the standard 224x224 images with 16x16 patches.

    "},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit","title":"Vit(ckpt)","text":"

    Bases: Transformer, Module

    Source code in src/saev/data/fake_clip.py
    def __init__(self, ckpt: str):\n    super().__init__()\n\n    # Only support the tiny test model\n    assert ckpt == \"hf-hub:hf-internal-testing/tiny-open-clip-model\", (\n        f\"FakeClip only supports tiny-open-clip-model, got {ckpt}\"\n    )\n\n    clip, _ = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n
    "},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.patch_size","title":"patch_size property","text":"

    Tiny model uses 2x2 patches.

    "},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_resize","title":"make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

    Create resize transform for tiny model (8x8 images).

    Source code in src/saev/data/fake_clip.py
    @staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for tiny model (8x8 images).\"\"\"\n\n    def resize(img: Image.Image) -> Image.Image:\n        # Tiny model uses 8x8 images\n        size_px = (int(8 * scale), int(8 * scale))\n        return img.resize(size_px, resample=resample)\n\n    return resize\n
    "},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

    Create transforms for preprocessing: (img_transform, sample_transform | None).

    Source code in src/saev/data/fake_clip.py
    @staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    _, img_transform = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    return img_transform, None\n
    "},{"location":"api/data/indexed/","title":"saev.data.indexed","text":""},{"location":"api/data/indexed/#saev.data.indexed.Config","title":"Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False) dataclass","text":"

    Configuration for loading indexed activation data from disk

    Attributes:

    Name Type Description shards Path

    Directory with .bin shards and a metadata.json file.

    tokens Literal['special', 'content', 'all']

    Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

    layer int | Literal['all']

    Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

    debug bool

    Whether the dataloader process should log debug messages.

    "},{"location":"api/data/indexed/#saev.data.indexed.Dataset","title":"Dataset(cfg)","text":"

    Bases: Dataset

    Dataset of activations from disk.

    Attributes:

    Name Type Description cfg Config

    Configuration set via CLI args.

    md Metadata

    Activations metadata; automatically loaded from disk.

    layer_idx int

    Layer index into the shards if we are choosing a specific layer.

    Source code in src/saev/data/indexed.py
    def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n
    "},{"location":"api/data/indexed/#saev.data.indexed.Dataset.d_model","title":"d_model property","text":"

    Dimension of the underlying vision transformer's embedding space.

    "},{"location":"api/data/indexed/#saev.data.indexed.Dataset.Example","title":"Example","text":"

    Bases: TypedDict

    Individual example.

    "},{"location":"api/data/indexed/#saev.data.indexed.Dataset.__len__","title":"__len__()","text":"

    Dataset length depends on patches and layer.

    Source code in src/saev/data/indexed.py
    def __len__(self) -> int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n
    "},{"location":"api/data/models/","title":"saev.data.models","text":""},{"location":"api/data/models/#saev.data.models.Transformer","title":"Transformer","text":"

    Bases: ABC

    Protocol defining the interface for all Transformer models.

    "},{"location":"api/data/models/#saev.data.models.Transformer.patch_size","title":"patch_size abstractmethod property","text":"

    Patch size in pixels (e.g., 14 or 16).

    "},{"location":"api/data/models/#saev.data.models.Transformer.forward","title":"forward(batch) abstractmethod","text":"

    Run forward pass on batch of images.

    Source code in src/saev/data/models.py
    @abc.abstractmethod\ndef forward(\n    self, batch: Float[Tensor, \"batch 3 width height\"]\n) -> Float[Tensor, \"batch patches dim\"]:\n    \"\"\"Run forward pass on batch of images.\"\"\"\n
    "},{"location":"api/data/models/#saev.data.models.Transformer.get_residuals","title":"get_residuals() abstractmethod","text":"

    Return the list of residual blocks/layers for hook registration.

    Source code in src/saev/data/models.py
    @abc.abstractmethod\ndef get_residuals(self) -> list[torch.nn.Module]:\n    \"\"\"Return the list of residual blocks/layers for hook registration.\"\"\"\n
    "},{"location":"api/data/models/#saev.data.models.Transformer.get_token_i","title":"get_token_i(content_tokens_per_example) abstractmethod","text":"

    Return indices for selecting relevant tokens from activations.

    Source code in src/saev/data/models.py
    @abc.abstractmethod\ndef get_token_i(self, content_tokens_per_example: int) -> slice | torch.Tensor:\n    \"\"\"Return indices for selecting relevant tokens from activations.\"\"\"\n
    "},{"location":"api/data/models/#saev.data.models.Transformer.make_resize","title":"make_resize(ckpt, content_tokens_per_example, *, scale=1.0, resample=Image.LANCZOS) abstractmethod staticmethod","text":"

    Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.

    Source code in src/saev/data/models.py
    @staticmethod\n@abc.abstractmethod\ndef make_resize(\n    ckpt: str,\n    content_tokens_per_example: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n
    "},{"location":"api/data/models/#saev.data.models.Transformer.make_transforms","title":"make_transforms(ckpt, content_tokens_per_example) abstractmethod staticmethod","text":"

    Create transforms for preprocessing: (data_transform, dict_transform | None).

    Source code in src/saev/data/models.py
    @staticmethod\n@abc.abstractmethod\ndef make_transforms(\n    ckpt: str, content_tokens_per_example: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n
    "},{"location":"api/data/models/#saev.data.models.list_families","title":"list_families()","text":"

    List all ViT family names.

    Source code in src/saev/data/models.py
    def list_families() -> list[str]:\n    \"\"\"List all ViT family names.\"\"\"\n    return list(_global_model_registry.keys())\n
    "},{"location":"api/data/models/#saev.data.models.load_model_cls","title":"load_model_cls(family)","text":"

    Load a transformer family's class.

    Source code in src/saev/data/models.py
    @beartype.beartype\ndef load_model_cls(family: str) -> type[Transformer]:\n    \"\"\"Load a transformer family's class.\"\"\"\n    if family not in _global_model_registry:\n        raise ValueError(f\"Family '{family}' not found.\")\n\n    return _global_model_registry[family]\n
    "},{"location":"api/data/models/#saev.data.models.register_family","title":"register_family(cls)","text":"

    Register a new transformer family's class.

    Source code in src/saev/data/models.py
    @beartype.beartype\ndef register_family(cls: type[Transformer]):\n    \"\"\"Register a new transformer family's class.\"\"\"\n    if cls.family in _global_model_registry:\n        logger.warning(\"Overwriting key '%s' in registry.\", cls.family)\n    _global_model_registry[cls.family] = cls\n
    "},{"location":"api/data/ordered/","title":"saev.data.ordered","text":"

    Ordered (sequential) dataloader for activation data.

    This module provides a high-throughput dataloader that reads activation data from disk shards in sequential order, without shuffling. The implementation uses a single-threaded manager process to ensure data is delivered in the exact order it appears on disk.

    Patch labels are provided if there is a labels.bin file on disk.

    See the design decisions in src/saev/data/performance.md.

    Usage

    cfg = Config(shards=\"./shards\", layer=13, batch_size=4096) dataloader = DataLoader(cfg) for batch in dataloader: ... activations = batch[\"act\"] # [batch_size, d_model] ... image_indices = batch[\"example_idx\"] # [batch_size] ... patch_indices = batch[\"token_idx\"] # [batch_size] ... patch_labels = batch[\"patch_labels\"] # [batch_size]

    "},{"location":"api/data/ordered/#saev.data.ordered.Config","title":"Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0) dataclass","text":"

    Configuration for loading ordered (non-shuffled) activation data from disk

    Attributes:

    Name Type Description shards Path

    Directory with .bin shards and a metadata.json file.

    tokens Literal['content']

    Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

    layer int | Literal['all']

    Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

    batch_size int

    Batch size.

    batch_timeout_s float

    How long to wait for at least one batch.

    drop_last bool

    Whether to drop the last batch if it's smaller than the others.

    buffer_size int

    Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

    debug bool

    Whether the dataloader process should log debug messages.

    log_every_s float

    How frequently the dataloader process should log (debug) performance messages.

    "},{"location":"api/data/ordered/#saev.data.ordered.DataLoader","title":"DataLoader(cfg)","text":"

    High-throughput streaming loader that reads data from disk shards in order (no shuffling).

    Source code in src/saev/data/ordered.py
    def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n
    "},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.ExampleBatch","title":"ExampleBatch","text":"

    Bases: TypedDict

    Individual example.

    "},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__iter__","title":"__iter__()","text":"

    Yields batches in order.

    Source code in src/saev/data/ordered.py
    def __iter__(self) -> collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n < self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size < self.cfg.batch_size\n                    and n + actual_batch_size >= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n
    "},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__len__","title":"__len__()","text":"

    Returns the number of batches in an epoch.

    Source code in src/saev/data/ordered.py
    def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n
    "},{"location":"api/data/pe/","title":"saev.data.pe","text":"

    Perception Encoder (PE) models from Meta (Bolya et al., 2025).

    PE-Core: CLIP-style model for language alignment. PE-Spatial: Dense prediction model distilled from SAM 2.1.

    Both are available via timm.

    "},{"location":"api/data/pe/#saev.data.pe.Core","title":"Core(ckpt)","text":"

    Bases: _Base

    PE-Core: CLIP-style model for language alignment.

    Available checkpoints: - vit_pe_core_large_patch14_336.fb (L/14, 336px) - vit_pe_core_base_patch16_224.fb (B/16, 224px)

    Source code in src/saev/data/pe.py
    def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n
    "},{"location":"api/data/pe/#saev.data.pe.Spatial","title":"Spatial(ckpt)","text":"

    Bases: _Base

    PE-Spatial: Dense prediction model distilled from SAM 2.1.

    Available checkpoints: - vit_pe_spatial_large_patch14_448.fb (L/14, 448px) - vit_pe_spatial_base_patch16_512.fb (B/16, 512px)

    Source code in src/saev/data/pe.py
    def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n
    "},{"location":"api/data/saev.data/","title":"saev.data","text":""},{"location":"api/data/saev.data/#saev.data.IndexedConfig","title":"IndexedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False) dataclass","text":"

    Configuration for loading indexed activation data from disk

    Attributes:

    Name Type Description shards Path

    Directory with .bin shards and a metadata.json file.

    tokens Literal['special', 'content', 'all']

    Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

    layer int | Literal['all']

    Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

    debug bool

    Whether the dataloader process should log debug messages.

    "},{"location":"api/data/saev.data/#saev.data.IndexedDataset","title":"IndexedDataset(cfg)","text":"

    Bases: Dataset

    Dataset of activations from disk.

    Attributes:

    Name Type Description cfg Config

    Configuration set via CLI args.

    md Metadata

    Activations metadata; automatically loaded from disk.

    layer_idx int

    Layer index into the shards if we are choosing a specific layer.

    Source code in src/saev/data/indexed.py
    def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n
    "},{"location":"api/data/saev.data/#saev.data.IndexedDataset.d_model","title":"d_model property","text":"

    Dimension of the underlying vision transformer's embedding space.

    "},{"location":"api/data/saev.data/#saev.data.IndexedDataset.Example","title":"Example","text":"

    Bases: TypedDict

    Individual example.

    "},{"location":"api/data/saev.data/#saev.data.IndexedDataset.__len__","title":"__len__()","text":"

    Dataset length depends on patches and layer.

    Source code in src/saev/data/indexed.py
    def __len__(self) -> int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n
    "},{"location":"api/data/saev.data/#saev.data.Metadata","title":"Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1') dataclass","text":"

    Metadata for a sharded set of transformer activations.

    Parameters:

    Name Type Description Default family Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']

    The transformer family.

    required ckpt str

    The transformer checkpoint.

    required layers tuple[int, ...]

    Which layers were saved.

    required content_tokens_per_example int

    The number of content tokens per example.

    required cls_token bool

    Whether the transformer has a [CLS] token as well.

    required d_model int

    Model hidden dimension.

    required n_examples int

    Number of examples.

    required max_tokens_per_shard int

    The maximum number of tokens per shard.

    required data str

    base64-encoded string of pickle.dumps(dataset).

    required dataset Path

    Absolute path to the root directory of the original dataset.

    required pixel_agg PixelAgg

    (only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.

    MAJORITY dtype Literal['float32']

    How activations are stored.

    'float32' protocol Literal['1.0.0', '1.1', '2.1']

    Protocol version.

    '2.1'"},{"location":"api/data/saev.data/#saev.data.Metadata.examples_per_shard","title":"examples_per_shard property","text":"

    The number of examples per shard based on the protocol.

    Returns:

    Type Description int

    Number of examples that fit in a shard.

    "},{"location":"api/data/saev.data/#saev.data.Metadata.hash","title":"hash property","text":"

    First 8 bytes of a SHA256 hash of the metadata configuration.

    Returns:

    Type Description str

    Hexadecimal hash string uniquely identifying this configuration.

    "},{"location":"api/data/saev.data/#saev.data.Metadata.n_shards","title":"n_shards property","text":"

    Total number of shards needed to store all examples.

    Returns:

    Type Description int

    Number of shards required.

    "},{"location":"api/data/saev.data/#saev.data.Metadata.shard_shape","title":"shard_shape property","text":"

    Shape of each shard file.

    Returns:

    Type Description tuple[int, int, int, int]

    Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).

    "},{"location":"api/data/saev.data/#saev.data.Metadata.tokens_per_example","title":"tokens_per_example property","text":"

    Total number of tokens per example including [CLS] token if present.

    Returns:

    Type Description int

    Number of tokens plus one if [CLS] token is included.

    "},{"location":"api/data/saev.data/#saev.data.Metadata.dump","title":"dump(shards_root)","text":"

    Dumps a Metadata object to a metadata.json file in shards_root / hash.

    Parameters:

    Name Type Description Default shards_root Path

    Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.

    required Source code in src/saev/data/shards.py
    def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n
    "},{"location":"api/data/saev.data/#saev.data.Metadata.load","title":"load(shards_dir) classmethod","text":"

    Loads a Metadata object from a metadata.json file in shards_dir.

    Parameters:

    Name Type Description Default shards_dir Path

    Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in src/saev/data/shards.py

    @classmethod\ndef load(cls, shards_dir: pathlib.Path) -> tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/<hash> as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n
    "},{"location":"api/data/saev.data/#saev.data.OrderedConfig","title":"OrderedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0) dataclass","text":"

    Configuration for loading ordered (non-shuffled) activation data from disk

    Attributes:

    Name Type Description shards Path

    Directory with .bin shards and a metadata.json file.

    tokens Literal['content']

    Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.

    layer int | Literal['all']

    Which ViT layer(s) to read from disk. -2 selects the second-to-last layer. \"all\" enumerates every recorded layer.

    batch_size int

    Batch size.

    batch_timeout_s float

    How long to wait for at least one batch.

    drop_last bool

    Whether to drop the last batch if it's smaller than the others.

    buffer_size int

    Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

    debug bool

    Whether the dataloader process should log debug messages.

    log_every_s float

    How frequently the dataloader process should log (debug) performance messages.

    "},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader","title":"OrderedDataLoader(cfg)","text":"

    High-throughput streaming loader that reads data from disk shards in order (no shuffling).

    Source code in src/saev/data/ordered.py
    def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n
    "},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.ExampleBatch","title":"ExampleBatch","text":"

    Bases: TypedDict

    Individual example.

    "},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__iter__","title":"__iter__()","text":"

    Yields batches in order.

    Source code in src/saev/data/ordered.py
    def __iter__(self) -> collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n < self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size < self.cfg.batch_size\n                    and n + actual_batch_size >= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n
    "},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__len__","title":"__len__()","text":"

    Returns the number of batches in an epoch.

    Source code in src/saev/data/ordered.py
    def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n
    "},{"location":"api/data/saev.data/#saev.data.PixelAgg","title":"PixelAgg","text":"

    Bases: Enum

    How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig","title":"ShuffledConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False) dataclass","text":"

    Configuration for loading shuffled activation data from disk.

    Attributes:

    Name Type Description shards Path

    Directory with .bin shards and a metadata.json file.

    tokens Literal['special', 'content', 'all']

    Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_size","title":"batch_size = 1024 * 16 class-attribute instance-attribute","text":"

    Batch size.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_timeout_s","title":"batch_timeout_s = 30.0 class-attribute instance-attribute","text":"

    How long to wait for at least one batch.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.buffer_size","title":"buffer_size = 64 class-attribute instance-attribute","text":"

    Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.debug","title":"debug = False class-attribute instance-attribute","text":"

    Whether the dataloader process should log debug messages.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.drop_last","title":"drop_last = False class-attribute instance-attribute","text":"

    Whether to drop the last batch if it's smaller than the others.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.ignore_labels","title":"ignore_labels = dataclasses.field(default_factory=list) class-attribute instance-attribute","text":"

    If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.layer","title":"layer = -1 class-attribute instance-attribute","text":"

    Which transformer layer(s) to read from disk. -1 is the default, but must be changed. \"all\" enumerates every recorded layer.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.log_every_s","title":"log_every_s = 30.0 class-attribute instance-attribute","text":"

    How frequently the dataloader process should log (debug) performance messages.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.min_buffer_fill","title":"min_buffer_fill = 0.0 class-attribute instance-attribute","text":"

    Fraction of the reservoir that must be populated before yielding batches.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.n_threads","title":"n_threads = 4 class-attribute instance-attribute","text":"

    Number of dataloading threads.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.scale_norm","title":"scale_norm = False class-attribute instance-attribute","text":"

    Whether to scale norms to sqrt(D).

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.seed","title":"seed = 17 class-attribute instance-attribute","text":"

    Random seed.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.use_tmpdir","title":"use_tmpdir = False class-attribute instance-attribute","text":"

    If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader","title":"ShuffledDataLoader(cfg)","text":"

    High-throughput streaming loader that deterministically shuffles data from disk shards.

    Source code in src/saev/data/shuffled.py
    def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n
    "},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.ExampleBatch","title":"ExampleBatch","text":"

    Bases: TypedDict

    Individual example.

    "},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__iter__","title":"__iter__()","text":"

    Yields batches.

    Source code in src/saev/data/shuffled.py
    def __iter__(self) -> collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n < self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n
    "},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__len__","title":"__len__()","text":"

    Returns the number of batches in an epoch.

    Source code in src/saev/data/shuffled.py
    def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n
    "},{"location":"api/data/saev.data/#saev.data.make_ordered_config","title":"make_ordered_config(shuffled_cfg, **overrides)","text":"

    Create an OrderedConfig from a ShuffledConfig, with optional overrides.

    Defaults come from shuffled_cfg for fields present in OrderedConfig, and overrides take precedence. Unknown override fields raise TypeError from the OrderedConfig constructor, mirroring dataclasses.replace.

    Source code in src/saev/data/__init__.py
    @beartype.beartype\ndef make_ordered_config(\n    shuffled_cfg: ShuffledConfig, **overrides: object\n) -> OrderedConfig:\n    \"\"\"Create an `OrderedConfig` from a `ShuffledConfig`, with optional overrides.\n\n    Defaults come from `shuffled_cfg` for fields present in `OrderedConfig`, and `overrides` take precedence. Unknown override fields raise `TypeError` from the `OrderedConfig` constructor, mirroring `dataclasses.replace`.\n    \"\"\"\n    params: dict[str, object] = {}\n    for f in dataclasses.fields(OrderedConfig):\n        name = f.name\n        if hasattr(shuffled_cfg, name):\n            params[name] = getattr(shuffled_cfg, name)\n    params.update(overrides)\n    return OrderedConfig(**params)\n
    "},{"location":"api/data/shards/","title":"saev.data.shards","text":"

    Library code for reading and writing sharded activations to disk.

    "},{"location":"api/data/shards/#saev.data.shards.Index","title":"Index(*, idx, example_idx, content_token_idx, shard_idx, example_idx_in_shard, layer_idx_in_shard, token_idx_in_shard) dataclass","text":"

    Attributes:

    Name Type Description idx int

    The index of the activation.

    example_idx int

    The index of the original example (image, audio clip etc).

    content_token_idx int

    The token's index within an example's content. -1 for all special tokens.

    shard_idx int

    The shard index.

    example_idx_in_shard int

    The example index along the examples axis in a shard.

    token_idx_in_shard int

    The token index along the tokens axis in a shard.

    "},{"location":"api/data/shards/#saev.data.shards.IndexMap","title":"IndexMap(md, tokens, layer)","text":"

    Attributes:

    Name Type Description md Metadata

    Metadata

    tokens Literal['special', 'content', 'all']

    Which subset of tokens to load.

    layer int

    Which layer to load.

    layer_idx_lookup dict[int, int]

    The lookup from a transformer layer to the layer idx in the shard.

    Source code in src/saev/data/shards.py
    def __init__(\n    self,\n    md: Metadata,\n    tokens: tp.Literal[\"special\", \"content\", \"all\"],\n    layer: int | tp.Literal[\"all\"],\n):\n    if tokens == \"special\":\n        assert md.cls_token\n\n    self.md = md\n    self.tokens = tokens\n    self.layer = layer\n\n    if isinstance(layer, int):\n        err_msg = f\"No matche for layer; {layer} not in {md.layers}.\"\n        assert layer in md.layers, err_msg\n\n    self.layer_idx_lookup = {layer: i for i, layer in enumerate(md.layers)}\n
    "},{"location":"api/data/shards/#saev.data.shards.IndexMap.__len__","title":"__len__()","text":"

    Dataset length depends on patches and layer.

    Source code in src/saev/data/shards.py
    def __len__(self) -> int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    match (self.tokens, self.layer):\n        case (\"special\", \"all\"):\n            # Return a CLS token from a random example and random layer.\n            return self.md.n_examples * len(self.md.layers)\n        case (\"special\", int()):\n            # Return a CLS token from a random example and fixed layer.\n            return self.md.n_examples\n        case (\"content\", int()):\n            # Return a patch from a random example, fixed layer, and random patch.\n            return self.md.n_examples * self.md.content_tokens_per_example\n        case (\"content\", \"all\"):\n            # Return a patch from a random example, random layer and random patch.\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.content_tokens_per_example\n            )\n        case (\"all\", int()):\n            # Return a token from a random example, fixed layer, and random token (including special).\n            return self.md.n_examples * self.md.tokens_per_example\n        case (\"all\", \"all\"):\n            # Return a token from a random example, random layer and random token (including special).\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.tokens_per_example\n            )\n        case _:\n            tp.assert_never((self.cfg.tokens, self.cfg.layer))\n
    "},{"location":"api/data/shards/#saev.data.shards.LabelsWriter","title":"LabelsWriter(shards_dir, md)","text":"

    LabelsWriter handles writing patch-level segmentation labels to a single binary file.

    Parameters:

    Name Type Description Default shards_dir Path

    The shard directory; $SAEV_SCRATCH/saev/shards/ required md Metadata

    The Metadata object.

    required

    Attributes:

    Name Type Description labels UInt8[ndarray, 'n_examples n_patches']

    The integer patch labels.

    labels_path Path

    Where the integer patch labels are stored.

    md Metadata

    The dataset metadata.

    has_written bool

    Whether we have written any data to self.labels.

    Source code in src/saev/data/shards.py
    def __init__(self, shards_dir: pathlib.Path, md: Metadata):\n    assert disk.is_shards_dir(shards_dir)\n    self.logger = logging.getLogger(\"labels-writer\")\n    self.md = md\n    self.has_written = False\n\n    # Always create memory-mapped file for labels\n    # If nothing is written, it will be deleted in flush()\n    self.labels_path = shards_dir / \"labels.bin\"\n    self.labels = np.memmap(\n        self.labels_path,\n        mode=\"w+\",\n        dtype=np.uint8,\n        shape=(self.md.n_examples, self.md.content_tokens_per_example),\n    )\n    self.logger.info(\"Opened labels file '%s'.\", self.labels_path)\n
    "},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.flush","title":"flush()","text":"

    Flush the memory-mapped file to disk if anything was written.

    Source code in src/saev/data/shards.py
    def flush(self) -> None:\n    \"\"\"Flush the memory-mapped file to disk if anything was written.\"\"\"\n    if self.has_written:\n        self.labels.flush()\n        self.logger.info(\"Flushed labels to '%s'.\", self.labels_path)\n
    "},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.write_batch","title":"write_batch(batch_labels, start_idx)","text":"

    Write a batch of labels to the memory-mapped file.

    Parameters:

    Name Type Description Default batch_labels ndarray | Tensor

    Array of shape (batch_size, content_tokens_per_example) with uint8 dtype

    required start_idx int

    Starting index in the global labels array

    required Source code in src/saev/data/shards.py
    @beartype.beartype\ndef write_batch(self, batch_labels: np.ndarray | Tensor, start_idx: int):\n    \"\"\"\n    Write a batch of labels to the memory-mapped file.\n\n    Args:\n        batch_labels: Array of shape (batch_size, content_tokens_per_example) with uint8 dtype\n        start_idx: Starting index in the global labels array\n    \"\"\"\n    # Convert to numpy if needed\n    if isinstance(batch_labels, torch.Tensor):\n        batch_labels = batch_labels.cpu().numpy()\n\n    batch_size = len(batch_labels)\n    assert start_idx + batch_size <= self.md.n_examples\n    assert batch_labels.shape == (batch_size, self.md.content_tokens_per_example)\n    assert batch_labels.dtype == np.uint8\n\n    self.labels[start_idx : start_idx + batch_size] = batch_labels\n    self.has_written = True\n
    "},{"location":"api/data/shards/#saev.data.shards.Metadata","title":"Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1') dataclass","text":"

    Metadata for a sharded set of transformer activations.

    Parameters:

    Name Type Description Default family Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']

    The transformer family.

    required ckpt str

    The transformer checkpoint.

    required layers tuple[int, ...]

    Which layers were saved.

    required content_tokens_per_example int

    The number of content tokens per example.

    required cls_token bool

    Whether the transformer has a [CLS] token as well.

    required d_model int

    Model hidden dimension.

    required n_examples int

    Number of examples.

    required max_tokens_per_shard int

    The maximum number of tokens per shard.

    required data str

    base64-encoded string of pickle.dumps(dataset).

    required dataset Path

    Absolute path to the root directory of the original dataset.

    required pixel_agg PixelAgg

    (only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.

    MAJORITY dtype Literal['float32']

    How activations are stored.

    'float32' protocol Literal['1.0.0', '1.1', '2.1']

    Protocol version.

    '2.1'"},{"location":"api/data/shards/#saev.data.shards.Metadata.examples_per_shard","title":"examples_per_shard property","text":"

    The number of examples per shard based on the protocol.

    Returns:

    Type Description int

    Number of examples that fit in a shard.

    "},{"location":"api/data/shards/#saev.data.shards.Metadata.hash","title":"hash property","text":"

    First 8 bytes of a SHA256 hash of the metadata configuration.

    Returns:

    Type Description str

    Hexadecimal hash string uniquely identifying this configuration.

    "},{"location":"api/data/shards/#saev.data.shards.Metadata.n_shards","title":"n_shards property","text":"

    Total number of shards needed to store all examples.

    Returns:

    Type Description int

    Number of shards required.

    "},{"location":"api/data/shards/#saev.data.shards.Metadata.shard_shape","title":"shard_shape property","text":"

    Shape of each shard file.

    Returns:

    Type Description tuple[int, int, int, int]

    Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).

    "},{"location":"api/data/shards/#saev.data.shards.Metadata.tokens_per_example","title":"tokens_per_example property","text":"

    Total number of tokens per example including [CLS] token if present.

    Returns:

    Type Description int

    Number of tokens plus one if [CLS] token is included.

    "},{"location":"api/data/shards/#saev.data.shards.Metadata.dump","title":"dump(shards_root)","text":"

    Dumps a Metadata object to a metadata.json file in shards_root / hash.

    Parameters:

    Name Type Description Default shards_root Path

    Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.

    required Source code in src/saev/data/shards.py
    def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n
    "},{"location":"api/data/shards/#saev.data.shards.Metadata.load","title":"load(shards_dir) classmethod","text":"

    Loads a Metadata object from a metadata.json file in shards_dir.

    Parameters:

    Name Type Description Default shards_dir Path

    Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in src/saev/data/shards.py

    @classmethod\ndef load(cls, shards_dir: pathlib.Path) -> tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/<hash> as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n
    "},{"location":"api/data/shards/#saev.data.shards.PixelAgg","title":"PixelAgg","text":"

    Bases: Enum

    How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).

    "},{"location":"api/data/shards/#saev.data.shards.RecordedTransformer","title":"RecordedTransformer(model, content_tokens_per_example, cls_token, layers)","text":"

    Bases: Module

    A wrapper around a transformer model that records intermediate layer activations during forward passes.

    Parameters:

    Name Type Description Default model Module

    The transformer model to wrap.

    required content_tokens_per_example int

    Number of content tokens per example.

    required cls_token bool

    Whether to record the [CLS] token in addition to content tokens.

    required layers Sequence[int]

    Which transformer layers to record activations from.

    required

    Attributes:

    Name Type Description model Module

    The wrapped transformer model.

    content_tokens_per_example int

    Number of content tokens per example.

    cls_token bool

    Whether the [CLS] token is included in recorded activations.

    layers Sequence[int]

    Tuple of layer indices being recorded.

    token_i slice

    Token indices to extract from model outputs.

    logger

    Logger instance for this recorder.

    Source code in src/saev/data/shards.py
    def __init__(\n    self,\n    model: torch.nn.Module,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    layers: Sequence[int],\n):\n    super().__init__()\n\n    self.model = model\n\n    self.content_tokens_per_example = content_tokens_per_example\n    self.cls_token = cls_token\n    self.layers = layers\n\n    self.token_i = model.get_token_i(content_tokens_per_example)\n\n    self._storage = None\n    self._i = 0\n\n    self.logger = logging.getLogger(f\"recorder({model.name})\")\n\n    for i in self.layers:\n        self.model.get_residuals()[i].register_forward_hook(self.hook)\n
    "},{"location":"api/data/shards/#saev.data.shards.Shard","title":"Shard(name, n_examples) dataclass","text":"

    A single shard entry in shards.json, recording the filename and number of examples.

    Attributes:

    Name Type Description name str

    The filename of the shard (e.g., \"acts000000.bin\").

    n_examples int

    Number of examples stored in this shard.

    "},{"location":"api/data/shards/#saev.data.shards.ShardInfo","title":"ShardInfo(shards=list()) dataclass","text":"

    A container for shard metadata as recorded in shards.json.

    Parameters:

    Name Type Description Default shards list[Shard]

    A list of Shard objects.

    list()"},{"location":"api/data/shards/#saev.data.shards.ShardWriter","title":"ShardWriter(shards_root, md)","text":"

    ShardWriter is a stateful object that handles sharded activation writing to disk.

    Parameters:

    Name Type Description Default shards_root Path

    The $SAEV_SCRATCH/saev/shards path.

    required md Metadata

    The Metadata object for these shards.

    required

    Attributes:

    Name Type Description shards Path

    The $SAEV_SCRATCH/saev/shards/. shard int acts_path Path acts Float[ndarray, 'examples_per_shard n_layers all_patches d_model'] | None filled int labels_writer LabelsWriter

    The LabelsWriter writer.

    Source code in src/saev/data/shards.py
    def __init__(self, shards_root: pathlib.Path, md: Metadata):\n    assert disk.is_shards_root(shards_root)\n    self.md = md\n\n    self.logger = logging.getLogger(\"shard-writer\")\n\n    self.shards_dir = shards_root / md.hash\n    self.shards_dir.mkdir(exist_ok=True)\n\n    # builder for shard manifest\n    self._shards: ShardInfo = ShardInfo()\n\n    # Always initialize labels writer (it handles non-seg datasets internally)\n    self.labels_writer = LabelsWriter(self.shards_dir, md)\n\n    self.shard = -1\n    self.acts = None\n    self.next_shard()\n
    "},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__enter__","title":"__enter__()","text":"

    Context manager entry.

    Source code in src/saev/data/shards.py
    def __enter__(self):\n    \"\"\"Context manager entry.\"\"\"\n    return self\n
    "},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__exit__","title":"__exit__(exc_type, exc_val, exc_tb)","text":"

    Context manager exit - handle cleanup.

    Source code in src/saev/data/shards.py
    def __exit__(self, exc_type, exc_val, exc_tb):\n    \"\"\"Context manager exit - handle cleanup.\"\"\"\n    self.flush()\n\n    # Delete empty labels file if nothing was written\n    if not self.labels_writer.has_written:\n        if os.path.exists(self.labels_writer.labels_path):\n            os.remove(self.labels_writer.labels_path)\n            self.logger.info(\n                \"Removed empty labels file '%s'.\", self.labels_writer.labels_path\n            )\n
    "},{"location":"api/data/shards/#saev.data.shards.ShardWriter.write_batch","title":"write_batch(activations, start_idx, patch_labels=None)","text":"

    Write a batch of activations and (optionally) patch labels.

    Parameters:

    Name Type Description Default activations Float[Tensor, 'batch n_layers all_patches d_model']

    Batch of activations to write.

    required start_idx int

    Starting index for this batch.

    required patch_labels UInt8[Tensor, 'batch n_patches'] | None

    Optional patch labels for segmentation datasets.

    None Source code in src/saev/data/shards.py
    def write_batch(\n    self,\n    activations: Float[Tensor, \"batch n_layers all_patches d_model\"],\n    start_idx: int,\n    patch_labels: UInt8[Tensor, \"batch n_patches\"] | None = None,\n) -> None:\n    \"\"\"Write a batch of activations and (optionally) patch labels.\n\n    Args:\n        activations: Batch of activations to write.\n        start_idx: Starting index for this batch.\n        patch_labels: Optional patch labels for segmentation datasets.\n    \"\"\"\n    batch_size = len(activations)\n    end_idx = start_idx + batch_size\n\n    # Write activations (handling sharding)\n    offset = self.md.examples_per_shard * self.shard\n\n    if end_idx >= offset + self.md.examples_per_shard:\n        # We have run out of space in this mmap'ed file. Let's fill it as much as we can.\n        n_fit = offset + self.md.examples_per_shard - start_idx\n        self.acts[start_idx - offset : start_idx - offset + n_fit] = activations[\n            :n_fit\n        ]\n        self.filled = start_idx - offset + n_fit\n\n        # Write labels for the portion that fits\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                labels_to_write = (\n                    patch_labels[:n_fit].cpu().numpy().astype(np.uint8)\n                )\n            elif not isinstance(patch_labels, np.ndarray):\n                labels_to_write = np.array(patch_labels[:n_fit], dtype=np.uint8)\n            else:\n                labels_to_write = patch_labels[:n_fit]\n\n            self.labels_writer.write_batch(labels_to_write, start_idx)\n\n        self.next_shard()\n\n        # Recursively call write_batch for remaining data\n        if n_fit < batch_size:\n            self.write_batch(\n                activations[n_fit:],\n                start_idx + n_fit,\n                patch_labels[n_fit:] if patch_labels is not None else None,\n            )\n    else:\n        msg = f\"0 <= {start_idx} - {offset} <= {offset} + {self.md.examples_per_shard}\"\n        assert 0 <= start_idx - offset <= offset + self.md.examples_per_shard, msg\n        msg = (\n            f\"0 <= {end_idx} - {offset} <= {offset} + {self.md.examples_per_shard}\"\n        )\n        assert 0 <= end_idx - offset <= offset + self.md.examples_per_shard, msg\n        self.acts[start_idx - offset : end_idx - offset] = activations\n        self.filled = end_idx - offset\n\n        # Write labels if provided\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                patch_labels = patch_labels.cpu().numpy().astype(np.uint8)\n            elif not isinstance(patch_labels, np.ndarray):\n                patch_labels = np.array(patch_labels, dtype=np.uint8)\n\n            self.labels_writer.write_batch(patch_labels, start_idx)\n
    "},{"location":"api/data/shards/#saev.data.shards.get_dataloader","title":"get_dataloader(data, *, batch_size, n_workers, data_tr=None, mask_tr=None, sample_tr=None)","text":"

    Get a dataloader for a default map-style dataset.

    Parameters:

    Name Type Description Default data Config

    Config for the dataset.

    required batch_size int

    Batch size.

    required n_workers int

    Number of dataloader workers.

    required data_tr Callable | None

    Transform to be applied to each 'data' key (typically the raw data).

    None mask_tr Callable | None

    Transform to be applied to masks.

    None sample_tr Callable | None

    Transform to be applied to the entire sample dict.

    None

    Returns:

    Type Description DataLoader

    A PyTorch Dataloader that yields dictionaries with 'data' keys containing data batches, 'index' keys containing original dataset indices and 'label' keys containing label batches.

    Source code in src/saev/data/shards.py
    @beartype.beartype\ndef get_dataloader(\n    data: datasets.Config,\n    *,\n    batch_size: int,\n    n_workers: int,\n    data_tr: Callable | None = None,\n    mask_tr: Callable | None = None,\n    sample_tr: Callable | None = None,\n) -> torch.utils.data.DataLoader:\n    \"\"\"\n    Get a dataloader for a default map-style dataset.\n\n    Args:\n        data: Config for the dataset.\n        batch_size: Batch size.\n        n_workers: Number of dataloader workers.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        sample_tr: Transform to be applied to the entire sample dict.\n\n    Returns:\n        A PyTorch Dataloader that yields dictionaries with `'data'` keys containing data batches, `'index'` keys containing original dataset indices and `'label'` keys containing label batches.\n    \"\"\"\n    dataset = datasets.get_dataset(\n        data, data_transform=data_tr, mask_transform=mask_tr, sample_transform=sample_tr\n    )\n\n    dataloader = torch.utils.data.DataLoader(\n        dataset=dataset,\n        batch_size=batch_size,\n        drop_last=False,\n        num_workers=n_workers,\n        persistent_workers=n_workers > 0,\n        shuffle=False,\n        pin_memory=False,\n    )\n    return dataloader\n
    "},{"location":"api/data/shards/#saev.data.shards.pixel_to_patch_labels","title":"pixel_to_patch_labels(seg, n_patches, patch_size, pixel_agg=PixelAgg.MAJORITY, bg_label=0, max_classes=256)","text":"

    Convert pixel-level segmentation to patch-level labels using vectorized operations.

    Parameters:

    Name Type Description Default seg Image

    Pixel-level segmentation mask as PIL Image

    required n_patches int

    Total number of patches expected

    required patch_size int

    Size of each patch in pixels

    required pixel_agg PixelAgg

    How to aggregate pixel labels into patch labels

    MAJORITY bg_label int

    Background label index

    0 max_classes int

    Maximum number of classes (for bincount)

    256

    Returns:

    Type Description UInt8[Tensor, ' n_patches']

    Patch labels as uint8 tensor of shape (n_patches,)

    Source code in src/saev/data/shards.py
    @jaxtyped(typechecker=beartype.beartype)\ndef pixel_to_patch_labels(\n    seg: Image.Image,\n    n_patches: int,\n    patch_size: int,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n    bg_label: int = 0,\n    max_classes: int = 256,\n) -> UInt8[Tensor, \" n_patches\"]:\n    \"\"\"\n    Convert pixel-level segmentation to patch-level labels using vectorized operations.\n\n    Args:\n        seg: Pixel-level segmentation mask as PIL Image\n        n_patches: Total number of patches expected\n        patch_size: Size of each patch in pixels\n        pixel_agg: How to aggregate pixel labels into patch labels\n        bg_label: Background label index\n        max_classes: Maximum number of classes (for bincount)\n\n    Returns:\n        Patch labels as uint8 tensor of shape (n_patches,)\n    \"\"\"\n    # Convert to torch tensor for vectorized operations\n    seg_tensor = torch.from_numpy(np.array(seg, dtype=np.uint8))\n    assert seg_tensor.ndim == 2\n\n    h, w = seg_tensor.shape\n\n    # Calculate patch grid dimensions\n    patch_grid_h = h // patch_size\n    patch_grid_w = w // patch_size\n    assert patch_grid_w * patch_grid_h == n_patches, (\n        f\"Image size {w}x{h} with patch_size {patch_size} gives {patch_grid_w}x{patch_grid_h} = {patch_grid_w * patch_grid_h} patches, expected {n_patches}\"\n    )\n\n    # Reshape into patches using einops: (n_patches, patch_size * patch_size)\n    patches = einops.rearrange(\n        seg_tensor,\n        \"(h p1) (w p2) -> (h w) (p1 p2)\",\n        p1=patch_size,\n        p2=patch_size,\n        h=patch_grid_h,\n        w=patch_grid_w,\n    )\n\n    # Use vectorized bincount approach to get class counts for all patches at once\n    # counts[i, c] = number of times class c appears in patch i\n    offsets = torch.arange(n_patches, device=patches.device).unsqueeze(1) * max_classes\n    flat = (patches + offsets).reshape(-1)\n    counts = torch.bincount(flat, minlength=n_patches * max_classes).reshape(\n        n_patches, max_classes\n    )\n\n    if pixel_agg is PixelAgg.MAJORITY:\n        # Take the most common label in each patch\n        patch_labels = counts.argmax(dim=1)\n    elif pixel_agg is PixelAgg.PREFER_FG:\n        # Take the most common non-background label, or background if all background\n        nonbg = counts.clone()\n        nonbg[:, bg_label] = 0\n        has_nonbg = nonbg.sum(dim=1) > 0\n        nonbg_arg = nonbg.argmax(dim=1)\n        bg_tensor = torch.full_like(nonbg_arg, bg_label)\n        patch_labels = torch.where(has_nonbg, nonbg_arg, bg_tensor)\n    else:\n        tp.assert_never(pixel_agg)\n\n    return patch_labels.to(torch.uint8)\n
    "},{"location":"api/data/shards/#saev.data.shards.worker_fn","title":"worker_fn(*, family, ckpt, content_tokens_per_example, cls_token, d_model, layers, data, batch_size, n_workers, max_tokens_per_shard, shards_root, device, pixel_agg=PixelAgg.MAJORITY)","text":"

    Parameters:

    Name Type Description Default family str

    Transformer family (dinov2, dinov3, clip, etc).

    required ckpt str

    Transformer ckpt (hf-hub:imageomics/bioclip2, etc).

    required content_tokens_per_example int

    Number of content tokens per example.

    required cls_token bool

    Whether the transformer has a [CLS] token.

    required d_model int

    Hidden dimension of transformer.

    required layers list[int]

    The layers to record activations for.

    required data Config

    Config for the particular (image) dataset to load.

    required batch_size int

    Batch size for the dataset.

    required n_workers int

    Number of workers for loading examples fromm the dataset.

    required max_tokens_per_shard int

    Maximum number of tokens per disk shard.

    required pixel_agg PixelAgg

    Optional method for aggregating segmentation label pixels.

    MAJORITY shards_root Path

    Where to save shards. Should end with 'shards'. See disk-layout.md; this is $SAEV_SCRATCH/saev/shards.

    required device str

    Device for doing the computation.

    required

    Returns:

    Type Description Path

    Path to the shards directory.

    Source code in src/saev/data/shards.py
    @beartype.beartype\ndef worker_fn(\n    *,\n    family: str,\n    ckpt: str,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    d_model: int,\n    layers: list[int],\n    data: datasets.Config,\n    batch_size: int,\n    n_workers: int,\n    max_tokens_per_shard: int,\n    shards_root: pathlib.Path,\n    device: str,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n) -> pathlib.Path:\n    \"\"\"\n    Args:\n        family: Transformer family (dinov2, dinov3, clip, etc).\n        ckpt: Transformer ckpt (hf-hub:imageomics/bioclip2, etc).\n        content_tokens_per_example: Number of content tokens per example.\n        cls_token: Whether the transformer has a [CLS] token.\n        d_model: Hidden dimension of transformer.\n        layers: The layers to record activations for.\n        data: Config for the particular (image) dataset to load.\n        batch_size: Batch size for the dataset.\n        n_workers: Number of workers for loading examples fromm the dataset.\n        max_tokens_per_shard: Maximum number of tokens per disk shard.\n        pixel_agg: Optional method for aggregating segmentation label pixels.\n        shards_root: Where to save shards. Should end with 'shards'. See [disk-layout.md](../../developers/disk-layout.md); this is $SAEV_SCRATCH/saev/shards.\n        device: Device for doing the computation.\n\n    Returns:\n        Path to the shards directory.\n    \"\"\"\n    from saev import helpers\n    from saev.data import models\n\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n        torch.backends.cudnn.benchmark = True\n        torch.backends.cudnn.deterministic = True\n\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n    logger = logging.getLogger(\"worker_fn\")\n\n    if device == \"cuda\" and not torch.cuda.is_available():\n        logger.warning(\"No CUDA device available, using CPU.\")\n        device = \"cpu\"\n\n    assert shards_root.name == \"shards\"\n\n    model_cls = models.load_model_cls(family)\n    model_instance = model_cls(ckpt).to(device)\n    model = RecordedTransformer(\n        model_instance, content_tokens_per_example, cls_token, layers\n    )\n\n    data_tr, sample_tr = model_cls.make_transforms(ckpt, content_tokens_per_example)\n\n    mask_tr = None\n    if datasets.is_img_seg_dataset(data):\n        # For image segmentation datasets, create a transform that converts pixels to patches\n        # Use make_resize with NEAREST interpolation for segmentation masks\n        seg_resize_tr = model_cls.make_resize(\n            ckpt, content_tokens_per_example, scale=1.0, resample=Image.NEAREST\n        )\n\n        def seg_to_patches(seg):\n            \"\"\"Transform that resizes segmentation and converts to patch labels.\"\"\"\n\n            # Convert to patch labels\n            return pixel_to_patch_labels(\n                seg_resize_tr(seg),\n                content_tokens_per_example,\n                patch_size=model_instance.patch_size,\n                pixel_agg=pixel_agg,\n                bg_label=data.bg_label,\n            )\n\n        mask_tr = seg_to_patches\n\n    dataloader = get_dataloader(\n        data,\n        batch_size=batch_size,\n        n_workers=n_workers,\n        data_tr=data_tr,\n        mask_tr=mask_tr,\n        sample_tr=sample_tr,\n    )\n\n    n_batches = math.ceil(data.n_examples / batch_size)\n    logger.info(\"Dumping %d batches of %d examples.\", n_batches, batch_size)\n\n    model = model.to(device)\n\n    md = Metadata(\n        family=family,\n        ckpt=ckpt,\n        layers=tuple(layers),\n        content_tokens_per_example=content_tokens_per_example,\n        cls_token=cls_token,\n        d_model=d_model,\n        n_examples=data.n_examples,\n        max_tokens_per_shard=max_tokens_per_shard,\n        data=base64.b64encode(pickle.dumps(data)).decode(\"utf8\"),\n        dataset=data.root,\n        pixel_agg=pixel_agg,\n    )\n    md.dump(shards_root)\n\n    # Use context manager for proper cleanup\n    with ShardWriter(shards_root, md) as writer:\n        i = 0\n        # Calculate and write transformer activations.\n        with torch.inference_mode():\n            for batch in helpers.progress(dataloader, total=n_batches):\n                data = batch.get(\"data\").to(device)\n                grid = batch.get(\"grid\")\n                if grid is not None:\n                    grid = grid.to(device)\n                    out, cache = model(data, grid=grid)\n                else:\n                    out, cache = model(data)\n                # cache has shape [batch size, n layers, n patches + 1, d model]\n                del out\n\n                # Write activations and labels (if present) in one call\n                patch_labels = batch.get(\"patch_labels\")\n                if patch_labels is not None:\n                    logger.debug(\n                        \"Found patch_labels in batch: shape=%s\",\n                        patch_labels.shape\n                        if hasattr(patch_labels, \"shape\")\n                        else \"unknown\",\n                    )\n                    # Ensure correct shape\n                    assert patch_labels.shape == (\n                        len(cache),\n                        content_tokens_per_example,\n                    )\n                else:\n                    logger.debug(f\"No patch_labels in batch. Keys: {batch.keys()}\")\n\n                writer.write_batch(cache, i, patch_labels=patch_labels)\n\n                i += len(cache)\n\n    return shards_root / md.hash\n
    "},{"location":"api/data/shuffled/","title":"saev.data.shuffled","text":""},{"location":"api/data/shuffled/#saev.data.shuffled.Config","title":"Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False) dataclass","text":"

    Configuration for loading shuffled activation data from disk.

    Attributes:

    Name Type Description shards Path

    Directory with .bin shards and a metadata.json file.

    tokens Literal['special', 'content', 'all']

    Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_size","title":"batch_size = 1024 * 16 class-attribute instance-attribute","text":"

    Batch size.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_timeout_s","title":"batch_timeout_s = 30.0 class-attribute instance-attribute","text":"

    How long to wait for at least one batch.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.buffer_size","title":"buffer_size = 64 class-attribute instance-attribute","text":"

    Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.debug","title":"debug = False class-attribute instance-attribute","text":"

    Whether the dataloader process should log debug messages.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.drop_last","title":"drop_last = False class-attribute instance-attribute","text":"

    Whether to drop the last batch if it's smaller than the others.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.ignore_labels","title":"ignore_labels = dataclasses.field(default_factory=list) class-attribute instance-attribute","text":"

    If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.layer","title":"layer = -1 class-attribute instance-attribute","text":"

    Which transformer layer(s) to read from disk. -1 is the default, but must be changed. \"all\" enumerates every recorded layer.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.log_every_s","title":"log_every_s = 30.0 class-attribute instance-attribute","text":"

    How frequently the dataloader process should log (debug) performance messages.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.min_buffer_fill","title":"min_buffer_fill = 0.0 class-attribute instance-attribute","text":"

    Fraction of the reservoir that must be populated before yielding batches.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.n_threads","title":"n_threads = 4 class-attribute instance-attribute","text":"

    Number of dataloading threads.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.scale_norm","title":"scale_norm = False class-attribute instance-attribute","text":"

    Whether to scale norms to sqrt(D).

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.seed","title":"seed = 17 class-attribute instance-attribute","text":"

    Random seed.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.Config.use_tmpdir","title":"use_tmpdir = False class-attribute instance-attribute","text":"

    If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader","title":"DataLoader(cfg)","text":"

    High-throughput streaming loader that deterministically shuffles data from disk shards.

    Source code in src/saev/data/shuffled.py
    def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n
    "},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.ExampleBatch","title":"ExampleBatch","text":"

    Bases: TypedDict

    Individual example.

    "},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__iter__","title":"__iter__()","text":"

    Yields batches.

    Source code in src/saev/data/shuffled.py
    def __iter__(self) -> collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n < self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n
    "},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__len__","title":"__len__()","text":"

    Returns the number of batches in an epoch.

    Source code in src/saev/data/shuffled.py
    def __len__(self) -> int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n
    "},{"location":"api/data/siglip/","title":"saev.data.siglip","text":""},{"location":"api/data/siglip/#saev.data.siglip.Vit","title":"Vit(ckpt)","text":"

    Bases: Module, Transformer

    Source code in src/saev/data/siglip.py
    def __init__(self, ckpt: str):\n    super().__init__()\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model\n\n    assert isinstance(self.model, open_clip.timm_model.TimmModel)\n
    "},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_resize","title":"make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS) staticmethod","text":"

    Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.

    Source code in src/saev/data/siglip.py
    @staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -> Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    from PIL import Image\n\n    def resize(img: Image.Image) -> Image.Image:\n        # SigLIP typically uses 224x224 or 384x384 images\n        # We'll assume 224x224 for simplicity\n        resize_size_px = (int(224 * scale), int(224 * scale))\n        return img.resize(resize_size_px, resample=resample)\n\n    return resize\n
    "},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_transforms","title":"make_transforms(ckpt, n_patches_per_img) staticmethod","text":"

    Create transforms for preprocessing: (img_transform, sample_transform | None).

    Source code in src/saev/data/siglip.py
    @staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -> tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n
    "},{"location":"api/data/transforms/","title":"saev.data.transforms","text":""},{"location":"api/data/transforms/#saev.data.transforms.conv2d_to_tokens","title":"conv2d_to_tokens(x_bchw, conv)","text":"

    Conv2d then flatten spatial to L, return (B, L, D).

    Source code in src/saev/data/transforms.py
    @jaxtyped(typechecker=beartype.beartype)\ndef conv2d_to_tokens(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -> Float[Tensor, \"b n d\"]:\n    \"\"\"Conv2d then flatten spatial to L, return (B, L, D).\"\"\"\n    y_bdhw = conv(x_bchw)\n    return einops.rearrange(y_bdhw, \"b d h w -> b (h w) d\")\n
    "},{"location":"api/data/transforms/#saev.data.transforms.resize_to_patch_grid","title":"resize_to_patch_grid(img, *, p, n, resample=Image.LANCZOS)","text":"

    Resize image to (w, h) so that: - w % p == 0, h % p == 0 - (h/p) * (w/p) == N - Minimizes change in aspect ratio.

    Source code in src/saev/data/transforms.py
    @beartype.beartype\ndef resize_to_patch_grid(\n    img: Image.Image,\n    *,\n    p: int,\n    n: int,\n    resample: Image.Resampling | int = Image.LANCZOS,\n) -> Image.Image:\n    \"\"\"\n    Resize image to (w, h) so that:\n      - w % p == 0, h % p == 0\n      - (h/p) * (w/p) == N\n      - Minimizes change in aspect ratio.\n    \"\"\"\n    if p <= 0 or n <= 0:\n        raise ValueError(\"p and n must be positive integers\")\n\n    w0, h0 = img.size\n    a0 = w0 / h0\n\n    # Find the aspect ratio closest to a0\n    best_c = 0\n    best_dist = float(\"inf\")\n    for i in range(1, int(math.sqrt(n) + 1)):\n        if n % i != 0:\n            continue\n\n        for d in (i, n // i):\n            c, r = d, n // d\n            aspect = c / r\n            dist = abs(aspect - a0)\n\n            if dist < best_dist:\n                best_c = d\n                best_dist = dist\n\n    c = best_c\n    r = n // c\n    w, h = c * p, r * p\n    return img.resize((w, h), resample=resample)\n
    "},{"location":"api/data/transforms/#saev.data.transforms.unfolded_conv2d","title":"unfolded_conv2d(x_bchw, conv)","text":"

    Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels. Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.

    Source code in src/saev/data/transforms.py
    @jaxtyped(typechecker=beartype.beartype)\ndef unfolded_conv2d(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -> Float[Tensor, \"b n d\"]:\n    \"\"\"\n    Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels.\n    Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.\n    \"\"\"\n    k = conv.kernel_size[0]\n\n    assert conv.kernel_size == (k, k)\n    assert conv.stride == (k, k)\n    assert conv.padding == (0, 0)\n    assert conv.groups == 1\n    assert conv.dilation == (1, 1)\n\n    *b, c, h, w = x_bchw.shape\n\n    assert h % k == 0 and w % k == 0\n\n    tokens_bnd = einops.rearrange(\n        x_bchw, \"b c (hp p1) (wp p2) -> b (hp wp) (c p1 p2)\", p1=k, p2=k\n    ).contiguous()\n    w_dp = conv.weight.reshape(conv.out_channels, c * k * k)\n    tokens_bnd = tokens_bnd @ w_dp.T\n    if conv.bias is not None:\n        tokens_bnd = tokens_bnd + conv.bias[None, None, :]\n    return tokens_bnd\n
    "},{"location":"api/framework/inference/","title":"saev.framework.inference","text":"

    Script for dumping SAE inference artifacts in a single pass over the dataset.

    Default mode writes 5 files:

    1. mean_values.pt
    2. sparsity.pt
    3. distributions.pt
    4. token_acts.npz
    5. metrics.json

    If save=False, only metrics.json is written.

    metrics.json is serialized from saev.metrics.Metrics.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config","title":"Config(run=pathlib.Path('./runs/abcdefg'), data=OrderedConfig(), n_dists=25, ignore_labels=list(), force_recompute=False, save=True, device='cuda', slurm_acct='', slurm_partition='', n_hours=4.0, mem_gb=80, log_to=os.path.join('.', 'logs')) dataclass","text":"

    Configuration for computing image activations.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.data","title":"data = OrderedConfig() class-attribute instance-attribute","text":"

    Data configuration

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.device","title":"device = 'cuda' class-attribute instance-attribute","text":"

    Which accelerator to use.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.force_recompute","title":"force_recompute = False class-attribute instance-attribute","text":"

    Force recomputation even if files exist.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.ignore_labels","title":"ignore_labels = dataclasses.field(default_factory=list) class-attribute instance-attribute","text":"

    Which token labels to ignore when calculating summarized image activations.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.log_to","title":"log_to = os.path.join('.', 'logs') class-attribute instance-attribute","text":"

    Where to log Slurm job stdout/stderr.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.mem_gb","title":"mem_gb = 80 class-attribute instance-attribute","text":"

    Node memory in GB.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.n_dists","title":"n_dists = 25 class-attribute instance-attribute","text":"

    Number of features to save distributions for.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.n_hours","title":"n_hours = 4.0 class-attribute instance-attribute","text":"

    Slurm job length in hours.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.run","title":"run = pathlib.Path('./runs/abcdefg') class-attribute instance-attribute","text":"

    Path to the sae.pt file.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.save","title":"save = True class-attribute instance-attribute","text":"

    Whether to write token_acts/statistics files. If False, only metrics.json is written.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_acct","title":"slurm_acct = '' class-attribute instance-attribute","text":"

    Slurm account string. Empty means to not use Slurm.

    "},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_partition","title":"slurm_partition = '' class-attribute instance-attribute","text":"

    Slurm partition.

    "},{"location":"api/framework/inference/#saev.framework.inference.main","title":"main(cfg, sweep=None)","text":"

    Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.

    Parameters:

    Name Type Description Default cfg Annotated[Config, arg(name='')]

    Baseline config inference.

    required sweep Path | None

    Path to .py file defining the sweep parameters.

    None Source code in src/saev/framework/inference.py
    @beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")], sweep: pathlib.Path | None = None\n):\n    \"\"\"\n    Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.\n\n    Args:\n        cfg: Baseline config inference.\n        sweep: Path to .py file defining the sweep parameters.\n    \"\"\"\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    assert all(c.slurm_acct == cfgs[0].slurm_acct for c in cfgs)\n    cfg = cfgs[0]\n\n    if not cfg.slurm_acct:\n        for i, cfg_item in enumerate(cfgs, start=1):\n            logger.info(\"Running config %d/%d locally.\", i, len(cfgs))\n            worker_fn(cfg_item)\n        logger.info(\"Jobs done.\")\n        return 0\n\n    import submitit\n    from submitit.core.utils import UncompletedJobError\n\n    executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n    executor.update_parameters(\n        job_name=\"sae-inference\",\n        time=int(cfg.n_hours * 60),\n        partition=cfg.slurm_partition,\n        gpus_per_node=1,\n        ntasks_per_node=1,\n        mem=f\"{cfg.mem_gb}GB\",\n        stderr_to_stdout=True,\n        account=cfg.slurm_acct,\n    )\n    with executor.batch():\n        jobs = []\n        for i, cfg in enumerate(cfgs):\n            do, reason, _ = need_compute(cfg)\n            if not do:\n                continue\n\n            logger.info(reason)\n            jobs.append(executor.submit(worker_fn, cfg))\n\n    time.sleep(5.0)\n\n    for i, job in enumerate(jobs, start=1):\n        logger.info(\"Job %d/%d: %s %s\", i, len(jobs), job.job_id, job.state)\n\n    for i, job in enumerate(jobs, start=1):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", i, len(jobs))\n        except UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, i)\n\n    logger.info(\"Jobs done.\")\n    return 0\n
    "},{"location":"api/framework/saev.framework/","title":"saev.framework","text":"

    Submitit entrypoint modules for SAE workflows.

    saev.framework is for script-like modules (e.g. train/inference/shards) that need importable module paths for submitit launchers. Place reusable data/model utilities outside this package.

    "},{"location":"api/framework/shards/","title":"saev.framework.shards","text":"

    To save lots of activations, we want to do things in parallel, with lots of slurm jobs, and save multiple files, rather than just one.

    This script handles that additional complexity.

    Conceptually, activations are either thought of as

    1. A single [n_imgs x n_layers x (n_patches + 1), d_model] tensor. This is a dataset
    2. Multiple [n_imgs_per_shard, n_layers, (n_patches + 1), d_model] tensors. This is a set of sharded activations.
    "},{"location":"api/framework/shards/#saev.framework.shards.Config","title":"Config(data=datasets.Imagenet(), shards_root=pathlib.Path('$SAEV_SCRATCH/saev/shards/'), family='clip', ckpt='ViT-L-14/openai', batch_size=1024, n_workers=8, d_model=1024, layers=(lambda: [-2])(), content_tokens_per_example=256, cls_token=True, pixel_agg=PixelAgg.MAJORITY, max_tokens_per_shard=2400000, ssl=True, device='cuda', n_hours=24.0, slurm_acct='', slurm_partition='', log_to='./logs') dataclass","text":"

    Configuration for calculating and saving ViT activations.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.batch_size","title":"batch_size = 1024 class-attribute instance-attribute","text":"

    Batch size for ViT inference.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.ckpt","title":"ckpt = 'ViT-L-14/openai' class-attribute instance-attribute","text":"

    Specific model checkpoint.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.cls_token","title":"cls_token = True class-attribute instance-attribute","text":"

    Whether the model has a [CLS] token.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.content_tokens_per_example","title":"content_tokens_per_example = 256 class-attribute instance-attribute","text":"

    Number of content tokens per example (depends on model).

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.d_model","title":"d_model = 1024 class-attribute instance-attribute","text":"

    Dimension of the ViT activations (depends on model).

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.data","title":"data = dataclasses.field(default_factory=(datasets.Imagenet)) class-attribute instance-attribute","text":"

    Which dataset to use.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.device","title":"device = 'cuda' class-attribute instance-attribute","text":"

    Which device to use.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.family","title":"family = 'clip' class-attribute instance-attribute","text":"

    Which model family.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.layers","title":"layers = dataclasses.field(default_factory=(lambda: [-2])) class-attribute instance-attribute","text":"

    Which layers to save. By default, the second-to-last layer.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.log_to","title":"log_to = './logs' class-attribute instance-attribute","text":"

    Where to log Slurm job stdout/stderr.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.max_tokens_per_shard","title":"max_tokens_per_shard = 2400000 class-attribute instance-attribute","text":"

    Maximum number of activations per shard; 2.4M is approximately 10GB for 1024-dimensional 4-byte activations.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.n_hours","title":"n_hours = 24.0 class-attribute instance-attribute","text":"

    Slurm job length.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.n_workers","title":"n_workers = 8 class-attribute instance-attribute","text":"

    Number of dataloader workers.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.shards_root","title":"shards_root = pathlib.Path('$SAEV_SCRATCH/saev/shards/') class-attribute instance-attribute","text":"

    Where to write shards.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_acct","title":"slurm_acct = '' class-attribute instance-attribute","text":"

    Slurm account string.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_partition","title":"slurm_partition = '' class-attribute instance-attribute","text":"

    Slurm partition.

    "},{"location":"api/framework/shards/#saev.framework.shards.Config.ssl","title":"ssl = True class-attribute instance-attribute","text":"

    Whether to use SSL.

    "},{"location":"api/framework/shards/#saev.framework.shards.cli","title":"cli(cfg)","text":"

    Save ViT activations for use later on.

    Parameters:

    Name Type Description Default cfg Annotated[Config, arg(name='')]

    Configuration for activations.

    required Source code in src/saev/framework/shards.py
    @beartype.beartype\ndef cli(cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")]):\n    \"\"\"\n    Save ViT activations for use later on.\n\n    Args:\n        cfg: Configuration for activations.\n    \"\"\"\n    logger = logging.getLogger(\"dump\")\n\n    if not cfg.ssl:\n        logger.warning(\"Ignoring SSL certs. Try not to do this!\")\n        # https://github.com/openai/whisper/discussions/734#discussioncomment-4491761\n        # Ideally we don't have to disable SSL but we are only downloading weights.\n        import ssl\n\n        ssl._create_default_https_context = ssl._create_unverified_context\n\n    from saev.data import shards\n\n    kwargs = dict(\n        family=cfg.family,\n        ckpt=cfg.ckpt,\n        content_tokens_per_example=cfg.content_tokens_per_example,\n        cls_token=cfg.cls_token,\n        d_model=cfg.d_model,\n        layers=cfg.layers,\n        data=cfg.data,\n        batch_size=cfg.batch_size,\n        n_workers=cfg.n_workers,\n        max_tokens_per_shard=cfg.max_tokens_per_shard,\n        shards_root=cfg.shards_root,\n        device=cfg.device,\n        pixel_agg=cfg.pixel_agg,\n    )\n\n    # Actually record activations.\n    if cfg.slurm_acct:\n        import submitit\n\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n        executor.update_parameters(\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            cpus_per_task=cfg.n_workers + 4,\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n\n        job = executor.submit(shards.worker_fn, **kwargs)\n        logger.info(\"Running job '%s'.\", job.job_id)\n        job.result()\n\n    else:\n        shards.worker_fn(**kwargs)\n
    "},{"location":"api/framework/train/","title":"saev.framework.train","text":"

    Trains many SAEs in parallel to amortize the cost of loading a single batch of data over many SAE training runs.

    Checklist for making sure your training doesn't suck:

    • [ ] Data scaling: scale vectors so their average L2 norm is sqrt(n).
    • [ ] Initialize b_e such that each feature activates 10K * d_model / (n * d_sae) of the time, which means that on average, each example activates 10K features.
    • [x] Initialize b_d to 0.
    • [x] Sweep learning rate and sparsity coefficients.
    • [ ] Decay learning rate to 0 over the last 20% of training.
    • [ ] Warmup sparsity over all of training.
    • [x] Gradient clipping (clip at 1 with clip_grad_norm)
    • [x] Track dead latents through training
    "},{"location":"api/framework/train/#saev.framework.train.Config","title":"Config(train_data=saev.data.ShuffledConfig(), val_data=saev.data.ShuffledConfig(), n_train=100000000, n_val=10000000, sae=nn.SparseAutoencoderConfig(), objective=nn.objectives.Matryoshka(), n_sparsity_warmup=0, optim='adam', lr=0.0004, n_lr_warmup=500, grad_clip=1.0, track=True, wandb_project='saev', tags=(), log_every=25, runs_root=pathlib.Path('$SAEV_NFS/saev/runs'), device='cuda', seed=42, slurm_acct='', slurm_partition='', n_hours=24.0, mem_gb=128, log_to=os.path.join('.', 'logs')) dataclass","text":"

    Configuration for training a sparse autoencoder on a vision transformer.

    "},{"location":"api/framework/train/#saev.framework.train.Config.device","title":"device = 'cuda' class-attribute instance-attribute","text":"

    Hardware device.

    "},{"location":"api/framework/train/#saev.framework.train.Config.grad_clip","title":"grad_clip = 1.0 class-attribute instance-attribute","text":"

    Maximum gradient norm across all SAE parameters.

    "},{"location":"api/framework/train/#saev.framework.train.Config.log_every","title":"log_every = 25 class-attribute instance-attribute","text":"

    How often to log to WandB.

    "},{"location":"api/framework/train/#saev.framework.train.Config.log_to","title":"log_to = os.path.join('.', 'logs') class-attribute instance-attribute","text":"

    Where to log Slurm job stdout/stderr.

    "},{"location":"api/framework/train/#saev.framework.train.Config.lr","title":"lr = 0.0004 class-attribute instance-attribute","text":"

    Learning rate.

    "},{"location":"api/framework/train/#saev.framework.train.Config.mem_gb","title":"mem_gb = 128 class-attribute instance-attribute","text":"

    Node memory in GB.

    "},{"location":"api/framework/train/#saev.framework.train.Config.n_hours","title":"n_hours = 24.0 class-attribute instance-attribute","text":"

    Slurm job length in hours.

    "},{"location":"api/framework/train/#saev.framework.train.Config.n_lr_warmup","title":"n_lr_warmup = 500 class-attribute instance-attribute","text":"

    Number of learning rate warmup steps.

    "},{"location":"api/framework/train/#saev.framework.train.Config.n_sparsity_warmup","title":"n_sparsity_warmup = 0 class-attribute instance-attribute","text":"

    Number of sparsity coefficient warmup steps.

    "},{"location":"api/framework/train/#saev.framework.train.Config.n_train","title":"n_train = 100000000 class-attribute instance-attribute","text":"

    Number of SAE training samples.

    "},{"location":"api/framework/train/#saev.framework.train.Config.n_val","title":"n_val = 10000000 class-attribute instance-attribute","text":"

    Number of SAE evaluation samples.

    "},{"location":"api/framework/train/#saev.framework.train.Config.objective","title":"objective = nn.objectives.Matryoshka() class-attribute instance-attribute","text":"

    SAE objective configuration.

    "},{"location":"api/framework/train/#saev.framework.train.Config.optim","title":"optim = 'adam' class-attribute instance-attribute","text":"

    Optimizer for training.

    "},{"location":"api/framework/train/#saev.framework.train.Config.runs_root","title":"runs_root = pathlib.Path('$SAEV_NFS/saev/runs') class-attribute instance-attribute","text":"

    Root directory for runs.

    "},{"location":"api/framework/train/#saev.framework.train.Config.sae","title":"sae = nn.SparseAutoencoderConfig() class-attribute instance-attribute","text":"

    SAE configuration.

    "},{"location":"api/framework/train/#saev.framework.train.Config.seed","title":"seed = 42 class-attribute instance-attribute","text":"

    Random seed.

    "},{"location":"api/framework/train/#saev.framework.train.Config.slurm_acct","title":"slurm_acct = '' class-attribute instance-attribute","text":"

    Slurm account string. Empty means to not use Slurm.

    "},{"location":"api/framework/train/#saev.framework.train.Config.slurm_partition","title":"slurm_partition = '' class-attribute instance-attribute","text":"

    Slurm partition.

    "},{"location":"api/framework/train/#saev.framework.train.Config.tags","title":"tags = () class-attribute instance-attribute","text":"

    Tags to add to WandB run.

    "},{"location":"api/framework/train/#saev.framework.train.Config.track","title":"track = True class-attribute instance-attribute","text":"

    Whether to track with WandB.

    "},{"location":"api/framework/train/#saev.framework.train.Config.train_data","title":"train_data = saev.data.ShuffledConfig() class-attribute instance-attribute","text":"

    Training data.

    "},{"location":"api/framework/train/#saev.framework.train.Config.val_data","title":"val_data = saev.data.ShuffledConfig() class-attribute instance-attribute","text":"

    Validation data.

    "},{"location":"api/framework/train/#saev.framework.train.Config.wandb_project","title":"wandb_project = 'saev' class-attribute instance-attribute","text":"

    WandB project name.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics","title":"EvalMetrics(l0, l1, mse, normalized_mse, sse_sae, sse_baseline, n_dead, n_almost_dead, n_dense, freqs, mean_values, almost_dead_threshold, dense_threshold) dataclass","text":"

    Results of evaluating a trained SAE on a datset.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.almost_dead_threshold","title":"almost_dead_threshold instance-attribute","text":"

    Threshold for an \"almost dead\" neuron.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.dense_threshold","title":"dense_threshold instance-attribute","text":"

    Threshold for a dense neuron.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.freqs","title":"freqs instance-attribute","text":"

    How often each feature fired.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l0","title":"l0 instance-attribute","text":"

    Mean L0 across all examples.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l1","title":"l1 instance-attribute","text":"

    Mean L1 across all examples.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mean_values","title":"mean_values instance-attribute","text":"

    The mean value for each feature when it did fire.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mse","title":"mse instance-attribute","text":"

    Mean MSE across all examples.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_almost_dead","title":"n_almost_dead instance-attribute","text":"

    Number of neurons that fired on fewer than almost_dead_threshold of examples.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dead","title":"n_dead instance-attribute","text":"

    Number of neurons that never fired on any example.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dense","title":"n_dense instance-attribute","text":"

    Number of neurons that fired on more than dense_threshold of examples.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.normalized_mse","title":"normalized_mse instance-attribute","text":"

    Normalized reconstruction MSE (SAE SSE / mean-baseline SSE).

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_baseline","title":"sse_baseline instance-attribute","text":"

    Total reconstruction sum-squared error for the mean baseline.

    "},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_sae","title":"sse_sae instance-attribute","text":"

    Total reconstruction sum-squared error for the SAE.

    "},{"location":"api/framework/train/#saev.framework.train.evaluate","title":"evaluate(cfgs, saes, objectives)","text":"

    Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.

    The metrics computed are mean L0/L1/MSE losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values. A list of EvalMetrics is returned, one for each SAE.

    Source code in src/saev/framework/train.py
    @beartype.beartype\n@torch.no_grad()\ndef evaluate(\n    cfgs: list[Config], saes: torch.nn.ModuleList, objectives: torch.nn.ModuleList\n) -> list[EvalMetrics]:\n    \"\"\"\n    Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.\n\n    The metrics computed are mean ``L0``/``L1``/``MSE`` losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values.  A list of `EvalMetrics` is returned, one for each SAE.\n    \"\"\"\n\n    torch.cuda.empty_cache()\n\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    saes.eval()\n    objectives.eval()\n\n    cfg = cfgs[0]\n\n    almost_dead_lim = 1e-7\n    dense_lim = 1e-2\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.val_data)\n    n_val = min(dataloader.n_samples, cfg.n_val)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, n_val)\n\n    n_fired = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    values = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    total_l0_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_l1_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_mse_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_sse_sae = torch.zeros(len(cfgs), dtype=torch.float64, device=cfg.device)\n    sum_sq = torch.zeros((), dtype=torch.float64, device=cfg.device)\n    sum_vec = torch.zeros(\n        (saes[0].cfg.d_model,), dtype=torch.float64, device=cfg.device\n    )\n    n_tokens = 0\n\n    for batch in helpers.progress(dataloader, desc=\"eval\", every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        batch_size = acts_BD.shape[0]\n        acts_BD_f64 = acts_BD.to(torch.float64)\n        sum_sq += torch.sum(acts_BD_f64 * acts_BD_f64)\n        sum_vec += acts_BD_f64.sum(dim=0)\n        n_tokens += batch_size\n        for i, (sae, objective) in enumerate(zip(saes, objectives)):\n            # Objective now handles the forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            # Get f_x for metrics\n            residual = acts_BD - fwd.x_hats[:, -1, :]\n            total_sse_sae[i] += torch.sum((residual.to(torch.float64)) ** 2)\n            n_fired[i] += einops.reduce(\n                fwd.f_x > 0, \"batch d_sae -> d_sae\", \"sum\"\n            ).cpu()\n            values[i] += einops.reduce(fwd.f_x, \"batch d_sae -> d_sae\", \"sum\").cpu()\n            total_l0_sum[i] += loss.l0.cpu().item() * batch_size\n            total_l1_sum[i] += loss.l1.cpu().item() * batch_size\n            total_mse_sum[i] += loss.mse.cpu().item() * batch_size\n\n    msg = \"Validation dataloader yielded zero tokens; cannot compute normalized MSE.\"\n    assert n_tokens > 0, msg\n    sum_vec_sq = torch.dot(sum_vec, sum_vec)\n    sse_baseline = sum_sq - sum_vec_sq / n_tokens\n    msg = (\n        f\"Validation baseline variance non-positive: \"\n        f\"sse_baseline={sse_baseline.item():.6e}\"\n    )\n    assert sse_baseline > 0, msg\n    sse_baseline_value = sse_baseline.item()\n\n    mean_values = values / n_fired\n    freqs = n_fired / n_tokens\n\n    l0 = (total_l0_sum / n_tokens).tolist()\n    l1 = (total_l1_sum / n_tokens).tolist()\n    mse = (total_mse_sum / n_tokens).tolist()\n    sse_sae = total_sse_sae.tolist()\n    normalized_mse = (total_sse_sae / sse_baseline_value).tolist()\n    sse_baseline_all = [sse_baseline_value] * len(cfgs)\n\n    n_dead = einops.reduce(freqs == 0, \"n_saes d_sae -> n_saes\", \"sum\").tolist()\n    n_almost_dead = einops.reduce(\n        freqs < almost_dead_lim, \"n_saes d_sae -> n_saes\", \"sum\"\n    ).tolist()\n    n_dense = einops.reduce(freqs > dense_lim, \"n_saes d_sae -> n_saes\", \"sum\").tolist()\n\n    metrics = []\n    for i in range(len(cfgs)):\n        metrics.append(\n            EvalMetrics(\n                l0=l0[i],\n                l1=l1[i],\n                mse=mse[i],\n                normalized_mse=normalized_mse[i],\n                sse_sae=sse_sae[i],\n                sse_baseline=sse_baseline_all[i],\n                n_dead=n_dead[i],\n                n_almost_dead=n_almost_dead[i],\n                n_dense=n_dense[i],\n                freqs=freqs[i],\n                mean_values=mean_values[i],\n                almost_dead_threshold=almost_dead_lim,\n                dense_threshold=dense_lim,\n            )\n        )\n\n    return metrics\n
    "},{"location":"api/framework/train/#saev.framework.train.main","title":"main(cfg, sweep=None, max_parallel=None)","text":"

    Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.

    Parameters:

    Name Type Description Default cfg Annotated[Config, arg(name='')]

    Baseline config for training an SAE.

    required sweep Path | None

    Path to .py file defining the sweep parameters.

    None max_parallel int | None

    Maximum SAEs to train concurrently within a single worker.

    None Source code in src/saev/framework/train.py
    @beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")],\n    sweep: pathlib.Path | None = None,\n    max_parallel: int | None = None,\n):\n    \"\"\"\n    Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.\n\n    Args:\n        cfg: Baseline config for training an SAE.\n        sweep: Path to .py file defining the sweep parameters.\n        max_parallel: Maximum SAEs to train concurrently within a single worker.\n    \"\"\"\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n\n    import submitit\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    cfgs = split_cfgs(cfgs)\n    # codex resume 019ac16a-dc07-78e3-82c7-e5c08a6c6f0c\n    if max_parallel:\n        cfgs = [\n            subgroup\n            for group in cfgs\n            for subgroup in [\n                group[start:end]\n                for start, end in helpers.batched_idx(len(group), max_parallel)\n            ]\n        ]\n\n    logger.info(\"Running %d training jobs.\", len(cfgs))\n\n    # Use the first resolved config for submitit parameters (n_hours, mem_gb, etc.) so that sweep values take effect instead of CLI defaults.\n    cfg = cfgs[0][0]\n\n    if cfg.slurm_acct:\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n        executor.update_parameters(\n            job_name=\"sae-train\",\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            mem=f\"{cfg.mem_gb}GB\",\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n    else:\n        executor = submitit.DebugExecutor(folder=cfg.log_to)\n\n    try:\n        cloudpickle.dumps(worker_fn)\n        for group in cfgs:\n            cloudpickle.dumps(group)\n    except TypeError as err:\n        raise AssertionError(f\"Failed to pickle: {err}\")\n\n    with executor.batch():\n        jobs = [executor.submit(worker_fn, group) for group in cfgs]\n\n    # Give the executor five seconds to fire the jobs off.\n    time.sleep(5.0)\n\n    # Log initial status.\n    for j, job in enumerate(jobs):\n        logger.info(\"Job %d/%d: %s %s\", j + 1, len(jobs), job.job_id, job.state)\n\n    for j, job in enumerate(jobs):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", j + 1, len(jobs))\n        except submitit.core.utils.UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, j)\n\n    logger.info(\"Jobs done.\")\n
    "},{"location":"api/framework/train/#saev.framework.train.split_cfgs","title":"split_cfgs(cfgs)","text":"

    Splits configs into groups that can be parallelized.

    Parameters:

    Name Type Description Default cfgs list[Config]

    A list of configs from a sweep file.

    required

    Returns:

    Type Description list[list[Config]]

    A list of lists, where the configs in each sublist do not differ in any keys that are in CANNOT_PARALLELIZE. This means that each sublist is a valid \"parallel\" set of configs for train.

    Source code in src/saev/framework/train.py
    @beartype.beartype\ndef split_cfgs(cfgs: list[Config]) -> list[list[Config]]:\n    \"\"\"\n    Splits configs into groups that can be parallelized.\n\n    Arguments:\n        cfgs: A list of configs from a sweep file.\n\n    Returns:\n        A list of lists, where the configs in each sublist do not differ in any keys that are in `CANNOT_PARALLELIZE`. This means that each sublist is a valid \"parallel\" set of configs for `train`.\n    \"\"\"\n    groups = collections.defaultdict(list)\n    for cfg in cfgs:\n        key = _parallel_key(cfg)\n        groups[key].append(cfg)\n\n    return [\n        [\n            dataclasses.replace(\n                cfg,\n                train_data=dataclasses.replace(cfg.train_data, seed=cfg.seed),\n                val_data=dataclasses.replace(cfg.val_data, seed=cfg.seed),\n            )\n            for cfg in group\n        ]\n        for _, group in sorted(groups.items())\n    ]\n
    "},{"location":"api/framework/train/#saev.framework.train.train","title":"train(cfgs)","text":"

    Explicitly declare the optimizer, schedulers, dataloader, etc outside of main so that all the variables are dropped from scope and can be garbage collected.

    Source code in src/saev/framework/train.py
    @beartype.beartype\ndef train(\n    cfgs: list[Config],\n) -> tuple[\n    torch.nn.ModuleList, torch.nn.ModuleList, saev.utils.wandb.ParallelWandbRun, int\n]:\n    \"\"\"\n    Explicitly declare the optimizer, schedulers, dataloader, etc outside of `main` so that all the variables are dropped from scope and can be garbage collected.\n    \"\"\"\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    logger.info(\"Parallelizing %d runs.\", len(cfgs))\n\n    cfg = cfgs[0]\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.train_data)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, cfg.n_train)\n\n    saes, objectives, param_groups = make_saes(\n        [(c.sae, c.objective) for c in cfgs], dataloader\n    )\n\n    mode = \"online\" if cfg.track else \"disabled\"\n    tags = list(cfg.tags)\n\n    # Add metadata to configs for WandB logging\n    metadata_dict = dataclasses.asdict(dataloader.metadata)\n    wandb_configs = []\n    for c in cfgs:\n        cfg_dict = dataclasses.asdict(c)\n        cfg_dict[\"train_data\"][\"metadata\"] = metadata_dict\n        wandb_configs.append(cfg_dict)\n\n    run = saev.utils.wandb.ParallelWandbRun(\n        cfg.wandb_project, wandb_configs, mode, tags\n    )\n    slurm_job_id = os.environ.get(\"SLURM_JOB_ID\")\n    if slurm_job_id:\n        run.set_summary(\"slurm_job_id\", slurm_job_id)\n\n    # Build per-SAE bundles of optimizers/param_groups/schedulers so each config's LR and warmup drive both Muon and Adam param groups for that SAE. We reshape the flat param_groups into per-SAE lists because we need to:\n    #   (a) build schedulers with that SAE's cfg\n    #   (b) step/zero only that SAE's optimizers\n    #   (c) log that SAE's LR without fishing through a mixed flat list.\n    grouped_pgs: list[list[dict[str, object]]] = []\n    optimizers: list[list[torch.optim.Optimizer]] = []\n    lr_schedulers: list[list[saev.utils.scheduling.WarmupCosine]] = []\n\n    for i, (sae, cfg, param_group) in enumerate(zip(saes, cfgs, param_groups)):\n        if cfg.optim == \"adam\":\n            opts = [torch.optim.Adam([param_group], fused=True)]\n        elif cfg.optim == \"muon\":\n            muon_params = [p for p in sae.parameters() if p.ndim == 2]\n            msg = f\"Muon optimizer requires 2D params; SAE {i} has none.\"\n            assert muon_params, msg\n            adam_params = [p for p in sae.parameters() if p.ndim != 2]\n            msg = f\"Adam optimizer requires non-2D params; SAE {i} has none.\"\n            assert adam_params, msg\n\n            opts = [\n                torch.optim.Muon(muon_params, lr=0.0),\n                torch.optim.Adam(adam_params, lr=0.0, fused=True),\n            ]\n        else:\n            tp.assert_never(cfg.optim)\n\n        pgs = [pg for opt in opts for pg in opt.param_groups]\n        scheds = [\n            saev.utils.scheduling.WarmupCosine(\n                0.0, cfg.n_lr_warmup, cfg.lr, len(dataloader), 0.0\n            )\n            for _ in pgs\n        ]\n\n        optimizers.append(opts)\n        grouped_pgs.append(pgs)\n        lr_schedulers.append(scheds)\n\n    param_groups = grouped_pgs\n\n    saes.train()\n    saes = saes.to(cfg.device)\n    objectives.train()\n    objectives = objectives.to(cfg.device)\n\n    global_step, n_patches_seen = 0, 0\n    dl_monitor = DataloaderMonitor(dataloader)\n\n    for batch in helpers.progress(dataloader, every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        for sae in saes:\n            sae.normalize_w_dec()\n        # Forward passes and loss calculations.\n        losses = []\n        fwds = []\n        for sae, objective in zip(saes, objectives):\n            # Objective handles the SAE forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            losses.append(loss)\n            fwds.append(fwd)\n\n        n_patches_seen += len(acts_BD)\n\n        for loss in losses:\n            loss.loss.backward()\n\n        # remove parallel gradients or normalize columns?\n        for sae in saes:\n            sae.remove_parallel_grads()\n\n        # Calculate gradient norms before optimizer step\n        grad_norms = []\n        for sae, cfg in zip(saes, cfgs):\n            # Clip gradients and get the gradient norm\n            grad_norm = torch.nn.utils.clip_grad_norm_(\n                sae.parameters(), max_norm=cfg.grad_clip\n            )\n\n            grad_norms.append(grad_norm)\n\n        # Log metrics after gradient computation\n        if (global_step + 1) % cfg.log_every == 0:\n            with torch.no_grad():\n                now = time.time()\n                dl_metrics = dl_monitor.compute(now=now)\n\n                metadata = dataloader.metadata\n                entropy_metrics = statistics.calc_batch_entropy(\n                    batch[\"example_idx\"].to(\"cpu\"),\n                    batch[\"token_idx\"].to(\"cpu\"),\n                    metadata.n_examples,\n                    metadata.content_tokens_per_example,\n                )\n                dl_metrics.update(entropy_metrics)\n\n                acts_bd_f64 = acts_BD.to(torch.float64)\n                n_batch = acts_bd_f64.shape[0]\n                msg = \"Batch is empty; cannot compute normalized MSE.\"\n                assert n_batch > 0, msg\n                batch_sum_sq = torch.sum(acts_bd_f64 * acts_bd_f64)\n                batch_sum_vec = acts_bd_f64.sum(dim=0)\n                batch_baseline_sse = (\n                    batch_sum_sq - torch.dot(batch_sum_vec, batch_sum_vec) / n_batch\n                )\n                msg = f\"Batch baseline variance non-positive: sse_baseline={batch_baseline_sse.item():.6e}\"\n                assert batch_baseline_sse > 0, msg\n                batch_baseline_sse_value = batch_baseline_sse.item()\n\n                metrics = []\n                for i, (loss, sae, objective, fwd) in enumerate(\n                    zip(losses, saes, objectives, fwds)\n                ):\n                    current_lr = param_groups[i][0][\"lr\"]\n                    # Explained variance: 1 - Var(x - x_hat) / Var(x)\n                    residual = acts_BD - fwd.x_hats[:, -1, :]\n                    batch_sse_sae_value = torch.sum(\n                        (residual.to(torch.float64)) ** 2\n                    ).item()\n                    normalized_mse_value = (\n                        batch_sse_sae_value / batch_baseline_sse_value\n                    )\n                    explained_var = 1 - residual.var() / acts_BD.var()\n\n                    # Dead unit percentage: fraction of units that never activate\n                    dead_pct = ((fwd.f_x.abs() > 1e-12).sum(0) == 0).float().mean()\n\n                    # Dictionary coherence: max |<w_i, w_j>| for i != j\n                    W = sae.W_dec  # (d_sae, d_model)\n                    # Normalize each row (each SAE feature)\n                    W_norm = W / W.norm(dim=1, keepdim=True)\n                    coherence = (W_norm @ W_norm.T).abs().triu(1).max()\n\n                    # Average decoder row L2 norm (since W_dec is d_sae x d_model)\n                    avg_w_row_norm = sae.W_dec.norm(dim=1).mean()\n\n                    metric = {\n                        **{f\"loss/{key}\": val for key, val in loss.metrics().items()},\n                        \"progress/n_patches_seen\": n_patches_seen,\n                        \"progress/learning_rate\": current_lr,\n                        \"metrics/explained_variance\": explained_var.item(),\n                        \"metrics/dead_unit_pct\": dead_pct.item(),\n                        \"metrics/dictionary_coherence\": coherence.item(),\n                        \"metrics/avg_decoder_row_norm\": avg_w_row_norm.item(),\n                        \"metrics/grad_norm\": grad_norms[i].item(),\n                        \"metrics/sse_sae\": batch_sse_sae_value,\n                        \"metrics/sse_baseline\": batch_baseline_sse_value,\n                        \"metrics/normalized_mse\": normalized_mse_value,\n                        **dl_metrics,\n                    }\n\n                    metrics.append(metric)\n                run.log(metrics, step=global_step)\n\n                logger.info(\n                    \", \".join(\n                        f\"{key}: {value:.5f}\"\n                        for key, value in losses[0].metrics().items()\n                    )\n                )\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.step()\n\n        # Update LR and sparsity coefficients.\n        for pgs, scheds in zip(param_groups, lr_schedulers):\n            for pg, sched in zip(pgs, scheds):\n                pg[\"lr\"] = sched.step()\n\n        # for objective, scheduler in zip(objectives, sparsity_schedulers):\n        #     objective.sparsity_coeff = scheduler.step()\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.zero_grad()\n\n        global_step += 1\n\n    return saes, objectives, run, global_step\n
    "},{"location":"api/nn/modeling/","title":"saev.nn.modeling","text":"

    Neural network architectures for sparse autoencoders.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.AuxK","title":"AuxK(key='auxk', k_aux=512, alpha=1 / 32) dataclass","text":"

    AuxK auxiliary reconstruction loss for dead latents.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK","title":"BatchTopK(key='batch-top-k', top_k=32, sparsity=NoSparsity(), momentum=0.1, aux=AuxK()) dataclass","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK.top_k","title":"top_k = 32 class-attribute instance-attribute","text":"

    How many values are allowed to be non-zero per sample in the batch.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation","title":"BatchTopKActivation(cfg)","text":"

    Bases: Module

    BatchTopK activation and inference-time threshold for sparse autoencoders.

    This module implements a BatchTopK nonlinearity that enforces a fixed sparsity budget across a batch, together with an inference-time approximation that replaces the batch-coupled operation with a simple elementwise threshold.

    Training mode (model.train()): Given pre-activation codes x with shape [batch, d_sae], the BatchTopK activation flattens the batch to shape [batch * d_sae], selects the largest (batch * top_k) entries by value, and sets all other entries to zero. This enforces an average of exactly top_k active features per example while allowing the \"activation budget\" to move between examples in the batch.

    During training, we also estimate an inference threshold theta that approximates the effective cutoff induced by BatchTopK. For each batch, we compute the minimum positive activation that survives the BatchTopK mask and update an exponential moving average of this quantity. This running estimate plays the same role as BatchNorm running statistics: it is updated only in training mode and treated as fixed at inference.\n

    Eval mode (model.eval()): At inference time we do not apply a batch-coupled top-k, since that would make each example depend on the rest of the eval batch. Instead, we use the stored running threshold theta to define a JumpReLU nonlinearity:

        y = x if x > theta else 0\n\napplied elementwise and independently to each example. This preserves the approximate sparsity level learned during training, but makes the layer deterministic and sample-wise independent for evaluation, probing, and downstream use.\n
    Inputs

    x: Tensor of shape [batch, d_sae] containing pre-activation codes.

    Outputs

    Tensor of shape [batch, d_sae] with the same dtype and device as x, where either: - in training mode: exactly (batch * top_k) entries are non-zero across the batch due to the BatchTopK mask, or - in eval mode: entries are zeroed by an elementwise JumpReLU with the learned threshold theta.

    Source code in src/saev/nn/modeling.py
    def __init__(self, cfg: BatchTopK):\n    super().__init__()\n    self.cfg = cfg\n\n    self.register_buffer(\"threshold\", torch.tensor(0.0))\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation.forward","title":"forward(x)","text":"

    Apply top-k activation to each sample in the batch.

    Source code in src/saev/nn/modeling.py
    def forward(self, x: Float[Tensor, \"batch d_sae\"]) -> Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to each sample in the batch.\n    \"\"\"\n\n    if not self.training:\n        if self.threshold <= 0:\n            return torch.where(x > 0, x, torch.zeros_like(x))\n\n        return torch.where(x > self.threshold, x, torch.zeros_like(x))\n\n    bsz, d_sae = x.shape\n    x_flat = x.flatten()\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k * bsz, d_sae * bsz)\n    _, idxs = torch.topk(x_flat, k, sorted=False)\n    mask = torch.zeros_like(x_flat).scatter(-1, idxs, 1.0).reshape(x.shape)\n\n    x = torch.mul(mask, x)\n\n    with torch.no_grad():\n        pos = x[x > 0]\n        if pos.numel() >= 0:\n            self.threshold.mul_(1 - self.cfg.momentum).add_(\n                self.cfg.momentum * pos.min()\n            )\n\n    return x\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.NoAux","title":"NoAux(key='no-aux') dataclass","text":"

    No auxiliary loss (e.g., for ReLU).

    "},{"location":"api/nn/modeling/#saev.nn.modeling.NoSparsity","title":"NoSparsity(key='no-sparsity') dataclass","text":"

    No explicit sparsity penalty (e.g. for TopK/BatchTopK where k controls sparsity).

    "},{"location":"api/nn/modeling/#saev.nn.modeling.Relu","title":"Relu(key='relu', sparsity=L1Sparsity(coeff=0.0004), aux=NoAux()) dataclass","text":"

    Vanilla ReLU

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder","title":"SparseAutoencoder(cfg)","text":"

    Bases: Module

    Sparse auto-encoder (SAE)

    Source code in src/saev/nn/modeling.py
    def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec.\n    #\n    # .clone() is critical here. Without it, W_enc is a transposed VIEW sharing storage with W_dec, which causes two bugs:\n    #\n    # 1. load_state_dict breaks: loading W_enc overwrites W_dec (shared memory), then loading W_dec overwrites W_enc. The loaded SAE ends up with one weight being the transpose of the other instead of the independently-trained values.\n    #\n    # 2. Any code that mutates W_dec in-place (e.g. normalize_w_dec) silently mutates W_enc too. The datapoint init in train.make_saes() relied on this accident: normalize_w_dec() kept W_enc columns unit-norm via shared storage. With .clone(), make_saes() must explicitly sync W_enc after normalizing W_dec.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.EncodeOut","title":"EncodeOut","text":"

    Bases: NamedTuple

    Outputs of encode: pre-activations and activated latents.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.Output","title":"Output","text":"

    Bases: NamedTuple

    Full SAE forward outputs for objectives and metrics.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.decode","title":"decode(f_x, *, prefixes=None)","text":"

    Decode latent features to reconstructions.

    Parameters:

    Name Type Description Default f_x Float[Tensor, 'batch d_sae']

    Latent features of shape (batch, d_sae)

    required prefixes Int64[Tensor, ' n_prefixes'] | None

    Optional tensor of prefix lengths for Matryoshka decoding.

    None

    Returns:

    Type Description Float[Tensor, 'batch n_prefixes d_model']

    Matryoshka reconstructions (batch, n_prefixes, d_model).

    Source code in src/saev/nn/modeling.py
    def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -> Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] > prefixes[:-1])\n    assert 1 <= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -> (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -> ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.forward","title":"forward(x)","text":"

    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.

    Parameters:

    Name Type Description Default x Float[Tensor, 'batch d_model']

    a batch of transformer activations.

    required Source code in src/saev/nn/modeling.py
    def forward(self, x: Float[Tensor, \"batch d_model\"]) -> Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.normalize_w_dec","title":"normalize_w_dec()","text":"

    Set W_dec to unit-norm columns.

    Source code in src/saev/nn/modeling.py
    @torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.remove_parallel_grads","title":"remove_parallel_grads()","text":"

    Update grads so that they remove the parallel component

    Source code in src/saev/nn/modeling.py
    @torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -> d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq > 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -> d_sae d_model\",\n    )\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig","title":"SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True) dataclass","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.activation","title":"activation = TopK() class-attribute instance-attribute","text":"

    Activation function.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_model","title":"d_model = 1024 class-attribute instance-attribute","text":"

    Size of x.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_sae","title":"d_sae = 1024 * 16 class-attribute instance-attribute","text":"

    Number of features in SAE latent space; size of f(x).

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.normalize_w_dec","title":"normalize_w_dec = True class-attribute instance-attribute","text":"

    Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".

    "},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_blend","title":"reinit_blend = 0.8 class-attribute instance-attribute","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"reinit_enc_dec_tranpose = True class-attribute instance-attribute","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.remove_parallel_grads","title":"remove_parallel_grads = True class-attribute instance-attribute","text":"

    Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.TopK","title":"TopK(key='top-k', top_k=32, sparsity=NoSparsity(), aux=AuxK()) dataclass","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.TopK.top_k","title":"top_k = 32 class-attribute instance-attribute","text":"

    How many values are allowed to be non-zero.

    "},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation","title":"TopKActivation(cfg)","text":"

    Bases: Module

    Top-K activation function. For use as activation function of sparse encoder.

    Source code in src/saev/nn/modeling.py
    def __init__(self, cfg: TopK):\n    super().__init__()\n    self.cfg = cfg\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation.forward","title":"forward(x)","text":"

    Apply top-k activation to the input tensor.

    Source code in src/saev/nn/modeling.py
    def forward(self, x: Float[Tensor, \"batch d_sae\"]) -> Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to the input tensor.\n    \"\"\"\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k, d_sae)\n    _, idxs = torch.topk(x, k, dim=-1, sorted=False)\n    mask = torch.zeros_like(x).scatter(-1, idxs, 1.0)\n\n    return torch.mul(mask, x)\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.dump","title":"dump(fpath, sae)","text":"

    Save an SAE checkpoint to disk along with configuration, using the trick from equinox.

    Parameters:

    Name Type Description Default fpath Path | str

    filepath to save checkpoint to.

    required sae SparseAutoencoder

    sparse autoencoder checkpoint to save.

    required Source code in src/saev/nn/modeling.py
    @beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n
    "},{"location":"api/nn/modeling/#saev.nn.modeling.load","title":"load(fpath, *, device='cpu')","text":"

    Loads a sparse autoencoder from disk.

    Source code in src/saev/nn/modeling.py
    @beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -> SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n
    "},{"location":"api/nn/objectives/","title":"saev.nn.objectives","text":""},{"location":"api/nn/objectives/#saev.nn.objectives.Loss","title":"Loss() dataclass","text":"

    The loss term for an autoencoder training batch.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.Loss.loss","title":"loss property","text":"

    Total loss.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka","title":"Matryoshka(n_prefixes=10, dead_threshold_tokens=10000000) dataclass","text":"

    Config for the Matryoshka loss for another arbitrary SAE class.

    Reference code is here: https://github.com/noanabeshima/matryoshka-saes and the original reading is https://sparselatents.com/matryoshka.html and https://arxiv.org/pdf/2503.17547

    "},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.dead_threshold_tokens","title":"dead_threshold_tokens = 10000000 class-attribute instance-attribute","text":"

    Tokens without activation before a latent is considered dead.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.n_prefixes","title":"n_prefixes = 10 class-attribute instance-attribute","text":"

    Number of random length prefixes to use for loss calculation.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss","title":"MatryoshkaLoss(mse, sparsity, l0, l1, aux, n_dead) dataclass","text":"

    Bases: Loss

    The composite loss terms for an training batch.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.aux","title":"aux instance-attribute","text":"

    Auxiliary loss term (e.g., AuxK).

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l0","title":"l0 instance-attribute","text":"

    Sum of L0 magnitudes of hidden activations for all prefix lengths.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l1","title":"l1 instance-attribute","text":"

    Sum of L1 magnitudes of hidden activations for all prefix lengths.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.loss","title":"loss property","text":"

    Total loss.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.mse","title":"mse instance-attribute","text":"

    Average of reconstruction loss (mean squared error) for all prefix lengths.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.n_dead","title":"n_dead instance-attribute","text":"

    Number of dead latents (per aux loss threshold).

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.sparsity","title":"sparsity instance-attribute","text":"

    Sparsity loss, typically lambda * L1.

    "},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaObjective","title":"MatryoshkaObjective(cfg)","text":"

    Bases: Objective

    Torch module for calculating the matryoshka loss for an SAE.

    Source code in src/saev/nn/objectives.py
    def __init__(self, cfg: Matryoshka):\n    super().__init__()\n    self.cfg = cfg\n    self.toks_since_active: Tensor | None = None\n
    "},{"location":"api/nn/objectives/#saev.nn.objectives.sample_prefixes","title":"sample_prefixes(d_sae, n_prefixes, min_prefix_length=1, pareto_power=0.5)","text":"

    Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)

    Parameters:

    Name Type Description Default d_sae int

    Total number of latent dimensions

    required n_prefixes int

    Number of prefixes to sample

    required min_prefix_length int

    Minimum length of any prefix

    1 pareto_power float

    Power parameter for Pareto distribution (lower = more uniform)

    0.5

    Returns:

    Type Description Int64[Tensor, ' n_prefixes']

    torch.Tensor: Sorted prefix lengths

    Source code in src/saev/nn/objectives.py
    @torch.no_grad()\n@jaxtyped(typechecker=beartype.beartype)\ndef sample_prefixes(\n    d_sae: int, n_prefixes: int, min_prefix_length: int = 1, pareto_power: float = 0.5\n) -> Int64[Tensor, \" n_prefixes\"]:\n    \"\"\"\n    Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with\n    Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)\n\n    Args:\n        d_sae: Total number of latent dimensions\n        n_prefixes: Number of prefixes to sample\n        min_prefix_length: Minimum length of any prefix\n        pareto_power: Power parameter for Pareto distribution (lower = more uniform)\n\n    Returns:\n        torch.Tensor: Sorted prefix lengths\n    \"\"\"\n    if n_prefixes <= 1:\n        return torch.tensor([d_sae], dtype=torch.int64)\n\n    assert n_prefixes <= d_sae\n\n    # Calculate probability distribution favoring shorter prefixes\n    lengths = torch.arange(1, d_sae)\n    pareto_cdf = 1 - ((min_prefix_length / lengths.float()) ** pareto_power)\n    pareto_pdf = torch.cat([pareto_cdf[:1], pareto_cdf[1:] - pareto_cdf[:-1]])\n    probability_dist = pareto_pdf / pareto_pdf.sum()\n\n    # Sample and sort prefix lengths\n    sampled_indices = torch.multinomial(\n        probability_dist, num_samples=n_prefixes - 1, replacement=False\n    )\n\n    # Convert indices to actual prefix lengths\n    prefixes = lengths[sampled_indices]\n\n    # Add n_latents as the final prefix\n    prefixes = torch.cat((prefixes.detach().clone(), torch.tensor([d_sae])))\n\n    prefixes, _ = torch.sort(prefixes, descending=False)\n\n    return prefixes.to(torch.int64)\n
    "},{"location":"api/nn/saev.nn/","title":"saev.nn","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder","title":"SparseAutoencoder(cfg)","text":"

    Bases: Module

    Sparse auto-encoder (SAE)

    Source code in src/saev/nn/modeling.py
    def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec.\n    #\n    # .clone() is critical here. Without it, W_enc is a transposed VIEW sharing storage with W_dec, which causes two bugs:\n    #\n    # 1. load_state_dict breaks: loading W_enc overwrites W_dec (shared memory), then loading W_dec overwrites W_enc. The loaded SAE ends up with one weight being the transpose of the other instead of the independently-trained values.\n    #\n    # 2. Any code that mutates W_dec in-place (e.g. normalize_w_dec) silently mutates W_enc too. The datapoint init in train.make_saes() relied on this accident: normalize_w_dec() kept W_enc columns unit-norm via shared storage. With .clone(), make_saes() must explicitly sync W_enc after normalizing W_dec.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n
    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.EncodeOut","title":"EncodeOut","text":"

    Bases: NamedTuple

    Outputs of encode: pre-activations and activated latents.

    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.Output","title":"Output","text":"

    Bases: NamedTuple

    Full SAE forward outputs for objectives and metrics.

    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.decode","title":"decode(f_x, *, prefixes=None)","text":"

    Decode latent features to reconstructions.

    Parameters:

    Name Type Description Default f_x Float[Tensor, 'batch d_sae']

    Latent features of shape (batch, d_sae)

    required prefixes Int64[Tensor, ' n_prefixes'] | None

    Optional tensor of prefix lengths for Matryoshka decoding.

    None

    Returns:

    Type Description Float[Tensor, 'batch n_prefixes d_model']

    Matryoshka reconstructions (batch, n_prefixes, d_model).

    Source code in src/saev/nn/modeling.py
    def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -> Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] > prefixes[:-1])\n    assert 1 <= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -> (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -> ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n
    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.forward","title":"forward(x)","text":"

    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.

    Parameters:

    Name Type Description Default x Float[Tensor, 'batch d_model']

    a batch of transformer activations.

    required Source code in src/saev/nn/modeling.py
    def forward(self, x: Float[Tensor, \"batch d_model\"]) -> Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n
    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.normalize_w_dec","title":"normalize_w_dec()","text":"

    Set W_dec to unit-norm columns.

    Source code in src/saev/nn/modeling.py
    @torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n
    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.remove_parallel_grads","title":"remove_parallel_grads()","text":"

    Update grads so that they remove the parallel component

    Source code in src/saev/nn/modeling.py
    @torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -> d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq > 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -> d_sae d_model\",\n    )\n
    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig","title":"SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True) dataclass","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.activation","title":"activation = TopK() class-attribute instance-attribute","text":"

    Activation function.

    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_model","title":"d_model = 1024 class-attribute instance-attribute","text":"

    Size of x.

    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_sae","title":"d_sae = 1024 * 16 class-attribute instance-attribute","text":"

    Number of features in SAE latent space; size of f(x).

    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.normalize_w_dec","title":"normalize_w_dec = True class-attribute instance-attribute","text":"

    Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".

    "},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_blend","title":"reinit_blend = 0.8 class-attribute instance-attribute","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"reinit_enc_dec_tranpose = True class-attribute instance-attribute","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.remove_parallel_grads","title":"remove_parallel_grads = True class-attribute instance-attribute","text":"

    Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.

    "},{"location":"api/nn/saev.nn/#saev.nn.dump","title":"dump(fpath, sae)","text":"

    Save an SAE checkpoint to disk along with configuration, using the trick from equinox.

    Parameters:

    Name Type Description Default fpath Path | str

    filepath to save checkpoint to.

    required sae SparseAutoencoder

    sparse autoencoder checkpoint to save.

    required Source code in src/saev/nn/modeling.py
    @beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n
    "},{"location":"api/nn/saev.nn/#saev.nn.load","title":"load(fpath, *, device='cpu')","text":"

    Loads a sparse autoencoder from disk.

    Source code in src/saev/nn/modeling.py
    @beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -> SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n
    "},{"location":"api/utils/monitoring/","title":"saev.utils.monitoring","text":""},{"location":"api/utils/monitoring/#saev.utils.monitoring.DataloaderMonitor","title":"DataloaderMonitor(dataloader, process_factory=None)","text":"

    Tracks IO and CPU activity for the dataloader manager process and its children.

    The monitor owns the dataloader handle and psutil processes internally, so callers simply construct it with the dataloader and then call compute() whenever metrics are needed.

    Source code in src/saev/utils/monitoring.py
    def __init__(\n    self,\n    dataloader: object,\n    process_factory: Callable[[int], psutil.Process] | None = None,\n) -> None:\n    self.dataloader = dataloader\n    self.process_factory = process_factory or psutil.Process\n    self._reset_state()\n
    "},{"location":"api/utils/saev.utils/","title":"saev.utils","text":""},{"location":"api/utils/scheduling/","title":"saev.utils.scheduling","text":""},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter","title":"BatchLimiter(dataloader, n_samples)","text":"

    Limits the number of batches to only return n_samples total samples.

    Source code in src/saev/utils/scheduling.py
    def __init__(self, dataloader: DataLoaderLike, n_samples: int):\n    self.dataloader = dataloader\n    self.n_samples = n_samples\n    self.batch_size = dataloader.batch_size\n    self.drop_last = dataloader.drop_last\n
    "},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter.__getattr__","title":"__getattr__(name)","text":"

    Pass through attribute access to the wrapped dataloader.

    Source code in src/saev/utils/scheduling.py
    def __getattr__(self, name: str) -> Any:\n    \"\"\"Pass through attribute access to the wrapped dataloader.\"\"\"\n    # __getattr__ is only called when the attribute wasn't found on self\n    # So we delegate to the wrapped dataloader\n    try:\n        return getattr(self.dataloader, name)\n    except AttributeError:\n        # Re-raise with more context about where the attribute was not found\n        raise AttributeError(\n            f\"'{self.__class__.__name__}' object and its wrapped dataloader have no attribute '{name}'\"\n        )\n
    "},{"location":"api/utils/scheduling/#saev.utils.scheduling.Warmup","title":"Warmup(init, final, n_steps)","text":"

    Bases: Scheduler

    Linearly increases from init to final over n_warmup_steps steps.

    Source code in src/saev/utils/scheduling.py
    def __init__(self, init: float, final: float, n_steps: int):\n    self.final = final\n    self.init = init\n    self.n_steps = n_steps\n    self._step = 0\n
    "},{"location":"api/utils/scheduling/#saev.utils.scheduling.WarmupCosine","title":"WarmupCosine(init, n_warmup, peak, n_steps, final)","text":"

    Bases: Scheduler

    Linearly increases from init to peak over n_warmup steps, then decrease down to final using cosine decay over n_steps - n_warmup.

    Source code in src/saev/utils/scheduling.py
    def __init__(\n    self, init: float, n_warmup: int, peak: float, n_steps: int, final: float\n):\n    self.init = init\n    self.peak = peak\n    self.final = final\n    self.n_warmup = n_warmup\n    self.n_steps = n_steps\n    self._step = 0\n
    "},{"location":"api/utils/statistics/","title":"saev.utils.statistics","text":""},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator","title":"PercentileEstimator(percentile, total, lr=0.001, shape=())","text":"Source code in src/saev/utils/statistics.py
    def __init__(\n    self,\n    percentile: float | int,\n    total: int,\n    lr: float = 1e-3,\n    shape: tuple[int, ...] = (),\n):\n    self.percentile = percentile\n    self.total = total\n    self.lr = lr\n\n    self._estimate = torch.zeros(shape)\n    self._step = 0\n
    "},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator.update","title":"update(x)","text":"

    Update the estimator with a new value.

    This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.

    Parameters:

    Name Type Description Default x float | Tensor

    The new value to incorporate into the estimation

    required Source code in src/saev/utils/statistics.py
    def update(self, x: float | Tensor):\n    \"\"\"\n    Update the estimator with a new value.\n\n    This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.\n\n    Arguments:\n        x: The new value to incorporate into the estimation\n    \"\"\"\n    self._step += 1\n\n    step_size = self.lr * (self.total - self._step) / self.total\n\n    # Is a no-op if it's already on the same device.\n    if isinstance(x, Tensor):\n        self._estimate = self._estimate.to(x.device)\n\n    self._estimate += step_size * (\n        torch.sign(x - self._estimate) + 2 * self.percentile / 100 - 1.0\n    )\n
    "},{"location":"api/utils/statistics/#saev.utils.statistics.calc_batch_entropy","title":"calc_batch_entropy(example_idx, token_idx, n_examples, content_tokens_per_example)","text":"

    Compute entropy and coverage metrics for a batch of shuffled indices.

    The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.

    Source code in src/saev/utils/statistics.py
    @beartype.beartype\ndef calc_batch_entropy(\n    example_idx: IndexLike,\n    token_idx: IndexLike,\n    n_examples: int,\n    content_tokens_per_example: int,\n) -> dict[str, float]:\n    \"\"\"\n    Compute entropy and coverage metrics for a batch of shuffled indices.\n\n    The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.\n    \"\"\"\n    example_idx_t = _to_tensor(example_idx)\n    token_idx_t = _to_tensor(token_idx)\n    if n_examples <= 0:\n        raise ValueError(\"n_examples must be positive.\")\n    if content_tokens_per_example <= 0:\n        raise ValueError(\"content_tokens_per_example must be positive.\")\n\n    if example_idx_t.ndim != 1:\n        raise ValueError(\"example_idx must be 1D.\")\n    if token_idx_t.ndim != 1:\n        raise ValueError(\"token_idx must be 1D.\")\n    if example_idx_t.numel() == 0:\n        raise ValueError(\"example_idx must contain at least one element.\")\n\n    _assert_batch_dim(example_idx_t, token_idx_t)\n\n    example_metrics = _add_prefix(\n        \"loader/example\", _entropy_metrics(example_idx_t, n_examples)\n    )\n    token_metrics = _add_prefix(\n        \"loader/token\", _entropy_metrics(token_idx_t, content_tokens_per_example)\n    )\n\n    return {**example_metrics, **token_metrics}\n
    "},{"location":"api/utils/wandb/","title":"saev.utils.wandb","text":""},{"location":"api/utils/wandb/#saev.utils.wandb.ParallelWandbRun","title":"ParallelWandbRun(project, cfgs, mode, tags, dir='.wandb')","text":"

    Inspired by https://community.wandb.ai/t/is-it-possible-to-log-to-multiple-runs-simultaneously/4387

    Source code in src/saev/utils/wandb.py
    def __init__(\n    self,\n    project: str,\n    cfgs: list[dict[str, object]],\n    mode: str,\n    tags: list[str],\n    dir: str = \".wandb\",\n):\n    cfg, *cfgs = cfgs\n    self.project = project\n    self.cfgs = cfgs\n    self.mode = mode\n    self.tags = tags\n    self.dir = dir\n    self.summary_updates: dict[str, object] = {}\n\n    self.live_run = wandb.init(\n        project=project,\n        config=cfg,\n        mode=mode,\n        tags=tags,\n        dir=dir,\n        settings=wandb.Settings(init_timeout=300),\n    )\n\n    self.metric_queues: list[MetricQueue] = [[] for _ in self.cfgs]\n
    "},{"location":"developers/contributing/","title":"Contributing","text":""},{"location":"developers/contributing/#project-layout","title":"Project layout","text":"
    docs/\n    mkdocs.yml    # The configuration file.\n    src/\n        index.md  # The documentation homepage.\n        ...       # Other markdown pages, images and other files.\n
    "},{"location":"developers/datapoint-init/","title":"Datapoint Initialization","text":"

    Datapoint initialization is an SAE weight initializations strategy independently proposed by Anthropic and Pierre Peigne for improving SAE training.

    Conceptually, we initialize each decoder column to look like a real datapoint, so every latent starts with a patch of input space where it \"wins\" and gets some gradient. Here's the algorithm:

    1. Select \\(n\\) random data points from your training data.
    2. Compute the mean \\(\\mu\\) and zero-center the data: \\(x_0 = x - \\mu\\).
    3. Linearly blend each zero-centered datapoint with Kaiming initialization: \\(w = p \\cdot (x - \\mu) + (1 - p) \\cdot r\\) where \\(p\\) is your blend probability and \\(r\\) is a randomly sampled Kaiming initalization vector.
    4. Initialize \\(W_\\text{enc}\\) as a concatenation of \\(n\\) blended vectors.
    5. Initialize \\(W_\\text{dec}\\) as \\(W_\\text{enc}^T\\).

    Anthropic suggests \\(p = 0.8\\) for SAEs and 0.4 for \"weakly causal crosscoders\". I interpret this that there is no universally appropriate \\(p\\).

    "},{"location":"developers/disk-layout/","title":"Storage & Run Manifest Spec (v1)","text":"

    There are two main locations:

    1. $SAEV_SCRATCH/saev/shards: where we store transformer activations (referred to as shards_root in the codebase).
    2. $SAEV_NFS/saev/runs: where we store checkpoints and other computed intermediate stuff like example images, probe1d results, etc. (referred to as runs_root in the codebase).

    Visually, these are:

    $SAEV_SCRATCH/saev/\n  shards/\n    <shard_hash>/\n      metadata.json\n      shards.json\n      acts000000.bin\n      acts000001.bin\n      ...\n      labels.bin\n

    and

    $SAEV_NFS/saev/\n  runs/\n    <run_id>/\n      checkpoint/           # output of train.py on <shard_hash>\n        sae.pt\n        config.json\n      links/                # Symlinks\n        train-shards        # $SCRATCH/saev/shards/<shard_hash>\n        train-dataset       # Whatever the original image dataset was\n        val-shards          # $SCRATCH/saev/shards/<shard_hash>\n        val-dataset         # Whatever the original image dataset was\n      inference/            # outputs from dump.py\n        <shard_hash>/\n          config.json\n          token_acts.npz\n          visuals/          # output of visuals.py\n

    Each $SAEV_SCRATCH/shards/<shard_hash>/ MUST include:

    • metadata.json (UTF-8, canonical spec; see protocol.md)
    • shards.json (UTF-8, shard index and sizes; see protocol.md)
    • acts*.bin (binary shards; format in protocol.md)
    • labels.bin (binary patch labels aligned to shards; format in protocol.md)

    Note

    Immutability: Files under saev/shards/<shard_hash>/ MUST be treated as read-only after publication. Any change yields a new shard_hash.

    All CLI entrypoints should accept a single --run <path> argument. Every other path MUST be resolved from the run root:

    • ViT activations: links/shards \u2192 saev/shards/<shard_hash>
    • Dataset: links/dataset \u2192 Dataset root, wherever it is on disk.
    • SAE checkpoint: checkpoint/sae.pt

    Example resolution:

    run = pathlib.Path(cfg.run)\nshards_root = (run / \"links\" / \"shards\").resolve()\ndataset_root = (run / \"links\" / \"dataset\").resolve()\nckpt = run / \"checkpoint\" / \"sae.pt\"\nlabels = vit_root / \"labels.bin\"\n
    • $SAEV_SCRATCH and $SAEV_NFS should be set for all users/processes running saev tools.
    "},{"location":"developers/disk-layout/#faqs","title":"FAQs","text":"
    • Where do patch labels live? Next to acts*.bin in $SAEV_SCRATCH/shards/<shard_hash>/labels.bin. Scripts discover them via links/shards/labels.bin.

    • Can I put datasets directly in $SAEV_SCRATCH? Sure, but not in $SAEV_SCRATCH/shards.

    "},{"location":"developers/naming/","title":"Variable Naming","text":""},{"location":"developers/protocol/","title":"saev Sharded Activation File Protocol","text":"

    saev caches activations to disk rather than run ViT or LLM inference when training SAEs. Gemma Scope makes this decision as well (see Section 3.3.2 of https://arxiv.org/pdf/2408.05147). saev.data has a specific protocol to support this in on OSC, a super computer center, and take advantage of OSC's specific disk performance.

    Goal: loss-lessly persist very large Transformer (ViT or LLM) activations in a form that is:

    • mem-mappable
    • Parameterized solely by the experiment configuration (scripts/shards.py:Config)
    • Referenced by a content-hash, so identical configs collide, divergent ones never do
    • Can be read quickly in a random order for training, and can be read (slowly) with random-access for visuals.

    This document is the single normative source. Any divergence in code is a bug.

    "},{"location":"developers/protocol/#1-directory-layout","title":"1. Directory layout","text":"
    <dump_to>/<HASH>/\n    metadata.json    # UTF-8 JSON, human-readable, describes data-generating config\n    shards.json      # UTF-8 JSON, human-readable, describes shards.\n    acts000000.bin   # shard 0\n    acts000001.bin   # shard 1\n    ...\n    actsNNNNNN.bin   # shard NNNNNN  (zero-padded width=6)\n    labels.bin       # patch labels (optional)\n

    HASH = sha256(json.dumps(metadata, sort_keys=True, separators=(',', ':')).encode('utf-8')) Guards against silent config drift.

    "},{"location":"developers/protocol/#2-json-file-schemas","title":"2. JSON file schemas","text":""},{"location":"developers/protocol/#21-metadatajson","title":"2.1. metadata.json","text":"field type semantic family string \"clip\" \\| \"siglip\" \\| \"dinov2\" ckpt string model identifier (OpenCLIP, HF, etc.) layers int[] ViT residual\u2010block indices recorded patches_per_ex int example patches only (excludes CLS) cls_token bool true -> patch 0 is CLS, else no CLS d_model int activation dimensionality n_examples int total examples in dataset patches_per_shard int logical activations per shard (see #3) data object opaque dataset description dataset string absolute path to original dataset root dtype string numpy dtype. Fixed \"float32\" for now. protocol string \"2.1\" (shards after big refactor)

    The data object is base64.b64encode(pickle.dumps(img_ds)).decode('utf8').

    The dataset field stores the absolute path to the root directory of the original image dataset, allowing runs to create symlinks back to the source images for visualization and analysis.

    "},{"location":"developers/protocol/#22-shardsjson","title":"2.2. shards.json","text":"

    A single array of shard objects, each of which has the following fields:

    field type semantic name string shard filename (acts000000.bin). n_examples int the number of examples in the shard."},{"location":"developers/protocol/#3-shard-sizing-maths","title":"3. Shard sizing maths","text":"
    tokens_per_ex = patches_per_ex + (1 if cls_token else 0)\n\nexamples_per_shard = floor(patches_per_shard / (tokens_per_ex * len(layers)))\n\nshape_per_shard = (\n    examples_per_shard,\n    len(layers),\n    tokens_per_ex,\n    d_model,\n)\n

    patches_per_shard is a budget (default ~2.4 M) chosen so a shard is approximately 10 GiB for Float32 @ d_model = 1024.

    The last shard will have a smaller value for examples_per_shard; this value is documented in n_examples in shards.json

    "},{"location":"developers/protocol/#4-data-layout-and-global-indexing","title":"4. Data Layout and Global Indexing","text":"

    The entire dataset of activations is treated as a single logical 4D tensor with the shape (n_examples, len(layers), tokens_per_ex, d_model). This logical tensor is C-contiguous with axes ordered [Example, Layer, Token, Dimension].

    Physically, this tensor is split along the first axis (Example) into multiple shards, where each shard is a single binary file. The number of examples in each shard is constant, except for the final shard, which may be smaller.

    To locate an arbitrary activation vector, a reader must convert a logical coordinate (global_ex_idx, layer_value, token_idx) into a file path and an offset within that file.

    "},{"location":"developers/protocol/#41-definitions","title":"4.1 Definitions","text":"

    Let the parameters from metadata.json be:

    • L = len(layers)
    • P = patches_per_ex
    • T = P + (1 if cls_token else 0) (Total tokens per example)
    • D = d_model
    • S = n_examples from shards.json or examples_per_shard from Section 3 (shard sizing).
    "},{"location":"developers/protocol/#42-coordinate-transformations","title":"4.2 Coordinate Transformations","text":"

    Given a logical coordinate:

    • global_ex_idx: integer, with 0 <= global_ex_idx < n_examples
    • layer: integer, must be an element of layers
    • token_idx: integer, 0 <= token_idx < T

    The physical location is found as follows:

    1. Identify Shard:

      • shard_idx = global_ex_idx // S
      • ex_in_shard = global_ex_idx % S The target file is acts{shard_idx:06d}.bin.
    2. Identify Layer Index: The stored data contains a subset of the ViT's layers. The logical layer_value must be mapped to its index in the stored layers array.

      • layer_idx = layers.index(layer) A reader must raise an error if layer is not in layers.
    3. Calculate Offset: The data within a shard is a 4D tensor of shape (S, L, T, D). The offset to the first byte of the desired activation vector [ex_in_shard, layer_idx , token_idx] is:

      • offset_in_vectors = (ex_in_shard * L * T) + (layer_idx * T) + token_idx
      • offset_in_bytes = offset_in_vectors * D * 4 (assuming 4 bytes for float32)

    A reader can then seek to offset_in_bytes and read \\(D \\times 4\\) bytes to retrieve the vector.

    Alternatively, rather than calculate the offset, readers can memmap the shard, then use Numpy indexing to get the activation vector.

    "},{"location":"developers/protocol/#43-token-axis-layout","title":"4.3 Token Axis Layout","text":"

    The token axis of length \\(T\\) is ordered as follows: * If cls_token is true: * Index 0: [CLS] token activation * Indices 1 to \\(P\\): Patch token activations * If cls_token is false: * Indices 0 to \\(P-1\\): Patch token activations

    The relative order of patch tokens is preserved exactly as produced by the upstream Vision Transformer.

    "},{"location":"developers/protocol/#5-versioning-compatibility","title":"5 Versioning & compatibility","text":"
    • Major changes (shape reorder, dtype switch, new required JSON keys) increment the major protocol version number at the top of this document and must emit a breaking warning in loader code.
    • Minor, backward-compatible additions (new optional JSON key) merely update this doc and the minor protocol version number.

    That's it. Anything else you find in code that contradicts this document, fix the code or update the spec.

    "},{"location":"developers/workflows/","title":"Workflows","text":"
    1. Generate inference activations (and thus visuals) for both training and validation splits.
    "},{"location":"users/bird-mae-debugging/","title":"Debugging Bird-MAE Activations","text":"

    This is an example of the kind of debugging you might have to do when training SAEs on a new model. The short version: Bird-MAE has an \"emergent outlier feature\" in dimension 296 that blows up after the first MLP. The fix is to record activations after the pre-MLP LayerNorm (block.norm2) instead of the raw residual stream, because the LayerNorm learns to suppress the outlier.

    "},{"location":"users/bird-mae-debugging/#symptom-80-dead-neurons","title":"Symptom: 80% dead neurons","text":"

    While training TopK SAEs on BirdMAE activations taken from birdsong, ~80% of my neurons were dead from the very start of training.

    "},{"location":"users/bird-mae-debugging/#comparing-to-known-good-activations","title":"Comparing to known-good activations","text":"

    First, I compared activations from BirdMAE to DINOv3 activations (which I know are well-behaved). I recorded 300K content token activation vectors from layer 14/24 from DINOv3 ViT-L/16 and BirdMAE-L. Each vector has 1024 dimensions. I flattened these vectors; for each of BirdMAE and DINOv3, I have a list of 307.2M neuron activations (300K x 1024 = 307,200,000). I plotted a histogram below. Note the log scale on the y-axis.

    I zoomed in on the left-most cluster, ignoring the right cluster. While BirdMAE is more spread out, the shapes look good enough for now.

    "},{"location":"users/bird-mae-debugging/#finding-the-outlier-dimension-296","title":"Finding the outlier: dimension 296","text":"

    Looking at the right cluster, I realized that all of these values are from neuron 296 of 1024. Here, I colored activations based on their neuron: all BirdMAE neurons besides 296 are blue, DINOv3 is orange, and neuron 296 is red.

    My activation matrix is \\(\\mathbb{R}^{300K \\times 1024}\\) for each dataset. In code, what I see is:

    bird_acts.shape  # (300K, 1024)\nbird_acts[:, 295].min()  # 2549.54\nbird_acts[:, 295].max()  # 4625.12\n

    Something is broken inside of BirdMAE.

    "},{"location":"users/bird-mae-debugging/#tracing-the-outlier-through-the-residual-stream","title":"Tracing the outlier through the residual stream","text":"

    Where in BirdMAE does this abnormality show up? Consider transformers as residual streams. After what layer does dimension 296/1024 blow up? See this diagram below: for a single random example from BirdMAE, we will track both the average neuron and neuron 296's value through the 24 transformer layers.

    BirdMAE uses 256 content tokens for a single example. We take the average value of each neuron in the residual stream before each transformer block (the green \"Graph #1\" circle in the above diagram) and after the final transformer block. We plot each of the 1023 \"well-behaved\" neurons in light blue. We plot our degenerate neuron 296 in red. Note the log scale on the y-axis.

    Our well-behaved neurons mostly stay in (-10, 10). Neuron 296 jumps straight to ~2.2K after the first residual block and is never fixed again. It's well-behaved coming out of the patch embedding before the first residual block.

    "},{"location":"users/bird-mae-debugging/#narrowing-it-down-the-first-mlp","title":"Narrowing it down: the first MLP","text":"

    Below is the output from the attention layers (Graph #2) in our architecture diagram.

    Neuron 296 is mostly well-behaved; it's a little big after the second attention layer, but not insane.

    Here, we can see that the output of the first MLP produces an abnormally high value for neuron 296. Why?

    Here's a architecture diagram of BirdMAE's MLPs according to the model definition on HuggingFace. Let's look at the trainable parameters in these MLP across layers, starting from the end and working backwards.

    fc2 has a weight parameter with shape (4096, 1024) and a bias parameter with shape (1024,). I take the L2 norm of fc2.weight's columns to see if col 296/1024 is different.

    fc2.weight does appear to be different, and abnormally large (note the log scale). fc2.bias is also different, but it's not immediately obvious what's going on there to me.

    "},{"location":"users/bird-mae-debugging/#root-cause-emergent-outlier-features","title":"Root cause: emergent outlier features","text":"

    This is a known phenomenon in transformers called \"emergent outlier features.\" After extensive pretraining, a single dimension in the residual stream accumulates a very large magnitude. The model never needs to \"fix\" this because the pre-attention and pre-MLP `LayerNormss learn to suppress it: the learned multiplicative weight for dimension 296 is very small, and the bias is approximately 1. So later layers never actually \"see\" the outlier in practice.

    We verified this by inspecting norm2.weight across layers and confirming that the learned scale for dimension 296 is near-zero, but that analysis is not reproduced here.

    The BirdMAE authors never had to deal with this because all downstream use of the model goes through LayerNorm first.

    "},{"location":"users/bird-mae-debugging/#fix-record-after-layernorm","title":"Fix: record after LayerNorm","text":"

    The fix is to record activations after block.norm2 (the pre-MLP LayerNorm) instead of from the raw residual stream. In saev, this is implemented as:

    def get_residuals(self) -> list[torch.nn.Module]:\n    return [block.norm2 for block in self.model.blocks]\n

    After this change, the outlier is suppressed and SAE training works normally.

    "},{"location":"users/bird-mae-debugging/#lessons","title":"Lessons","text":"
    1. Compare activation distributions to a known-good model. Histogramming flattened activations from 300K tokens is cheap and can reveal outliers.
    2. Emergent outlier features are real. If a single dimension dominates your activation distribution, check whether it's a known artifact of pretraining before assuming your recording code is wrong.
    3. Record after LayerNorm, not from the raw residual stream. The residual stream can carry high-magnitude \"bookkeeping\" values that LayerNorm suppresses. Recording post-norm avoids this entirely.
    "},{"location":"users/glossary/","title":"Glossary","text":"

    Definitions for words used in the code and documentation.

    • example: one dataset item (image, sentence, audio clip, point cloud, graph instance).
    • token: one model position in the encoder\u2019s residual stream (the thing with hidden size d_model). Always \"token\" inside the model.
    • content token: tokens derived from the raw input (image patches, wordpieces, audio windows, nodes, etc.).
    • special token: tokens not directly derived from the raw input (class/summary token, [SEP], [MASK], [PAD], register tokens, etc.).
    • sequence length L: total tokens per example (content + special). If variable, call it \u201cragged\u201d.
    • layer: an integer index into the encoder\u2019s stack.
    • activation kind (optional but useful): which stream you saved (e.g., resid_pre, resid_post, mlp_out, attn_out, qkv, head_out).

    Modality-specific vocab:

    • patch (vision): a 2D content token. Often laid out on a grid with shape (H_patches, W_patches).
    • frame/token or tube (video): content token in time \u00d7 space; often (T, H, W).
    • wordpiece / subword (text): content token from a tokenizer.
    • window / frame (audio): time\u2013frequency window.
    • node (graph), point (point cloud).
    "},{"location":"users/guide/","title":"Guide","text":"

    This guide explains how to transition from the ADE20K demo to using saev with your own custom datasets.

    Here are the steps:

    1. Save ViT activations to disk
    2. Train SAEs on activations
    3. Evaluate the SAE checkpoints
    4. Visualize Learned Features

    Note

    saev assumes you are running on NVIDIA GPUs. On a multi-GPU system, prefix your commands with CUDA_VISIBLE_DEVICES=X to run on GPU X.

    "},{"location":"users/guide/#save-vit-activations-to-disk","title":"Save ViT Activations to Disk","text":"

    To save activations to disk, we need to specify:

    1. Which model we would like to use
    2. Which layers we would like to save.
    3. Where on disk and how we would like to save activations.
    4. Which images we want to save activations for.

    The saev/framework/shards.py script does all of this for us.

    Run uv run launch.py shards --help to see all the configuration.

    In practice, you might run:

    uv run launch.py shards \\\n  --shards-root /fs/scratch/PAS2136/samuelstevens/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-16/openai \\\n  --d-model 768 \\\n  --layers 6 7 8 9 10 11 \\\n  --content-tokens-per-example 196 \\\n  --batch-size 512 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  data:img-seg-folder \\\n  --data.root /fs/scratch/PAS2136/samuelstevens/datasets/ADEChallengeData2016/ \\\n  --data.split training\n

    This will save activations for the CLIP-pretrained model ViT-B/16, which has a residual stream dimension of 768, and has 196 patches per image (224 / 16 = 14; 14 x 14 = 196). It will save the last 6 layers. It will write 2.4M patches per shard, and save shards to a new directory /fs/scratch/PAS2136/samuelstevens/saev/shards.

    Note

    A note on storage space: A ViT-B/16 on ImageNet-1K will save 1.2M images x 197 patches/layer/image x 1 layer = ~240M activations, each of which take up 768 floats x 4 bytes/float = 3072 bytes, for a total of 723GB for the entire dataset. As you scale to larger models (ViT-L has 1024 dimensions, 14x14 patches are 224 patches/layer/image), recorded activations will grow even larger.

    This script will also save a metadata.json file that will record the relevant metadata for these activations, which will be read by future steps. The activations will be in .bin files, numbered starting from 000000.

    To add your own models, see the guide to extending in saev.activations.

    "},{"location":"users/guide/#train-saes-on-activations","title":"Train SAEs on Activations","text":"

    To train an SAE, we need to specify:

    1. Which activations to use as input.
    2. SAE architectural stuff.
    3. Optimization-related stuff.

    The train.py script handles this.

    Run uv run train.py --help to see all the configuration.

    The most important options are:

    • --runs-root: where to store runs.
    • --train-data and --val-data: How to load the training and validation data. You probably want to specify both --{train,val}-data.shards (the shard directory) and --{train,val}-data.layer (which layer to use).
    • sae.activation: sae.activation:relu to use the ReLU activation.

    This is a full example:

    uv run train.py \\\n  --runs-root /fs/ess/PAS2136/samuelstevens/saev/runs \\\n  --lr 4e-3 \\\n  --sae.exp-factor 16 \\\n  --sae.d-model 1024 \\\n  --tag ade20k-v0.1 \\\n  --n-train 100_000_000 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --train-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/51567c6c \\\n  --train-data.layer 11 \\\n  --val-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3e27794f \\\n  --val-data.layer 11 \\\n  sae.activation:relu \\\n  objective:matryoshka \\\n  --objective.sparsity-coeff 1e-3 \\\n

    This will train one (1) sparse autoencoder on the data. See the section on sweeps to learn how to train multiple SAEs in parallel using one or more GPUs.

    "},{"location":"users/guide/#loader-entropy-metrics","title":"Loader Entropy Metrics","text":"

    The training loop logs additional loader diagnostics derived from calc_batch_entropy in train.py. Every batch contributes two entropy measurements in natural log units:

    • loader/example_entropy and loader/example_entropy_normalized summarize how evenly the shuffled loader samples example indices. Normalization divides the raw entropy by ln(metadata.n_examples) so perfectly uniform sampling is 1.0.
    • loader/token_entropy and loader/token_entropy_normalized do the same for patch indices using ln(metadata.content_tokens_per_example) as the normalizer.
    • loader/example_coverage and loader/token_coverage report the fraction of distinct example or patch indices seen in the current batch relative to their theoretical support.

    All eight metrics appear alongside the existing loader/read_mb counters, helping spot skewed sampling or under-covered patches mid-run.

    "},{"location":"users/guide/#evaluation","title":"Evaluation","text":"

    After training an SAE, you probably want to use the SAE. While you can use the SAE as a regular PyTorch torch.nn.Module in combination with a saev.data.OrderedDataLoader or saev.data.IndexedDataset.

    However, most SAEs are evaluated with a similar set of metrics (normalized MSE, L0, etc). The saev/framework/inference.py script calculates these metrics. You can run uv run launch.py inference --help to see all the options.

    The most important options are:

    • --run: The path to the SAE run directory.
    • --data: The options for the OrderedDataLoader. Specifically, you need to set --data.shards and --data.layer, just like for training.
    uv run launch.py inference \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/z55bntm1/ \\\n  --data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/614861a0 \\\n  --data.layer 11\n
    "},{"location":"users/guide/#visualize-learned-features","title":"Visualize Learned Features","text":"

    Now that you've trained an SAE, you probably want to look at its learned features. One way to visualize an individual learned feature is by picking out images that maximize the activation of feature. We use the saved sparse token_acts.npz file from the previous inference step.

    Warning

    Because there are so many different ways to visualize SAE features, I moved it to contrib/trait_discovery (used for our preprint \"Towards Open-Ended Visual Scientific Discovery with Sparse Autoencoders\").

    The most important options:

    • --run: The path to the SAE run directory.
    • --shards: The shards directory.
    • --latents: The 0-indexed latents to save images for.
    • --n-latents: The number of randomly selected latents to save images for.

    So first, move into the contrib/trait_discovery:

    cd contrib/trait_discovery\n

    Then run the script that generates highlighted images:

    uv run scripts/launch.py visuals \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/unu6dbfb \\\n  --shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3802cb66 \\\n  --latents 0 1 2 3 4 5 6 7 8 9 49 56 57 125 202 \\\n  --n-latents 20 \\\n

    Note

    Because of limitations in the SAE training process, not all SAE latents are equally interesting. Some latents are dead, some are dense, some only fire on two images, etc. Typically, you want neurons that fire very strongly (high value) and fairly infrequently (low frequency). You might be interested in particular, fixed latents (--include-latents). I recommend using saev/interactive/metrics.py with marimo to figure out good thresholds.

    "},{"location":"users/guide/#sweeps","title":"Sweeps","text":"

    tl;dr: basically the slow part of training SAEs is loading vit activations from disk, and since SAEs are pretty small compared to other models, you can train a bunch of different SAEs in parallel on the same data using a big GPU. That way you can sweep learning rate, lambda, etc. all on one GPU.

    "},{"location":"users/guide/#why-parallel-sweeps","title":"Why Parallel Sweeps","text":"

    SAE training optimizes for a unique bottleneck compared to typical ML workflows: disk I/O rather than GPU computation. When training on vision transformer activations, loading the pre-computed activation data from disk is often the slowest part of the process, not the SAE training itself.

    A single set of ImageNet activations for a vision transformer can require terabytes of storage. Reading this data repeatedly for each hyperparameter configuration would be extremely inefficient.

    "},{"location":"users/guide/#parallelized-training-architecture","title":"Parallelized Training Architecture","text":"

    To address this bottleneck, we implement parallel training that allows multiple SAE configurations to train simultaneously on the same data batch:

    \nflowchart TD\n    A[Pre-computed ViT Activations] -->|Slow I/O| B[Memory Buffer]\n    B -->|Shared Batch| C[SAE Model 1]\n    B -->|Shared Batch| D[SAE Model 2]\n    B -->|Shared Batch| E[SAE Model 3]\n    B -->|Shared Batch| F[...]\n

    This approach:

    • Loads each batch of activations once from disk
    • Uses that same batch for multiple SAE models with different hyperparameters
    • Amortizes the slow I/O cost across all models in the sweep
    "},{"location":"users/guide/#running-a-sweep","title":"Running a Sweep","text":"

    The train command accepts a --sweep parameter that points to a TOML file defining the hyperparameter grid:

    uv run launch.py train --sweep configs/my_sweep.toml\n

    Here's an example sweep configuration file:

    [sae]\nsparsity_coeff = [1e-4, 2e-4, 3e-4]\nd_model = 768\nd_sae = [6144, 12288]\n\n[data]\nscale_mean = true\n

    This would train 6 models (3 sparsity coefficients \u00d7 2 SAE widths), each sharing the same data loading operation.

    "},{"location":"users/guide/#limitations","title":"Limitations","text":"

    Not all parameters can be swept in parallel. Parameters that affect data loading (like batch_size or dataset configuration) will cause the sweep to split into separate parallel groups. The system automatically handles this division to maximize efficiency.

    "},{"location":"users/inference/","title":"Inference","text":"

    If you want to get started quickly, try the inference notebook in marimo or on Google Colab.

    Briefly, you need to:

    1. Download a checkpoint.
    2. Get the code.
    3. Load the checkpoint.
    4. Get activations.

    Details are below.

    "},{"location":"users/inference/#download-a-checkpoint","title":"Download a Checkpoint","text":"

    First, download an SAE checkpoint from the Huggingface collection.

    "},{"location":"users/inference/#single-checkpoint-repos","title":"Single-checkpoint repos","text":"

    Some repos (CLIP, BioCLIP, DINOv2) contain a single sae.pt at the root. For instance, the SAE trained on OpenAI's CLIP ViT-B/16 with ImageNet-1K activations is here.

    You can use wget if you want:

    wget https://huggingface.co/osunlp/SAE_CLIP_24K_ViT-B-16_IN1K/resolve/main/sae.pt\n
    "},{"location":"users/inference/#multi-checkpoint-repos","title":"Multi-checkpoint repos","text":"

    The DINOv3 repos contain multiple checkpoints organized by layer and sparsity level. Each repo has a manifest.jsonl with metadata (layer, L0, MSE) for every checkpoint, so you can pick the right one programmatically.

    Download a specific checkpoint:

    from huggingface_hub import hf_hub_download\n\n# Pick a specific layer and run ID from the repo's README or manifest.jsonl\npath = hf_hub_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\", \"layer_23/lnleoyf6/sae.pt\")\n

    Download all checkpoints in a repo:

    from huggingface_hub import snapshot_download\n\nsnapshot_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\")\n

    Available DINOv3 repos:

    • osunlp/SAE_DINOv3_ViT-S-16_IN1K (layers 6-11)
    • osunlp/SAE_DINOv3_ViT-B-16_IN1K (layers 6-11)
    • osunlp/SAE_DINOv3_ViT-L-16_IN1K (layers 13-23)
    • osunlp/SAE_DINOv3_TopK_ViT-L-16_IN1K (layers 13-23)
    "},{"location":"users/inference/#get-the-code","title":"Get the Code","text":"

    The easiest way to do this is to clone the code:

    git clone https://github.com/Imageomics/saev\n

    You can also install the package from git if you use uv (not sure about pip or cuda):

    uv add git+https://github.com/Imageomics/saev\n

    Or clone it and install it as an editable with pip, lik pip install -e . in your virtual environment.

    Then you can do things like from saev import ....

    Note

    If you struggle to get saev installed, open an issue on GitHub and I will figure out how to make it easier.

    "},{"location":"users/inference/#load-the-checkpoint","title":"Load the Checkpoint","text":"
    import saev.nn\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n

    Now you have a pretrained SAE.

    "},{"location":"users/inference/#get-activations","title":"Get Activations","text":"

    This is the hardest part. We need to:

    1. Pass an image into a ViT
    2. Record the dense ViT activations at the same layer that the SAE was trained on.
    3. Pass the activations into the SAE to get sparse activations.
    4. Do something interesting with the sparse SAE activations.

    There are examples of this in the demo code: for classification and semantic segmentation. If the permalinks change, you are looking for the get_sae_latents() functions in both files.

    Below is example code to do it using the saev package.

    import saev.nn\nimport saev.data.models\nimport saev.data.shards\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n\nvit_cls = saev.data.models.load_model_cls(\"clip\")\nvit = vit_cls(\"ViT-B-16/openai\").to(device)\nvit = saev.data.shards.RecordedTransformer(vit, 196, True, [10])\n\nimg_tr, _ = vit_cls.make_transforms(\"ViT-B-16/openai\", 196)\nimg = Image.open(\"example.jpg\")\n\nx = img_tr(img)\n# Add a batch dimension.\nx = x[None, ...]\n_, vit_acts = vit(x)\n# Select the only layer and ignore the CLS token.\nvit_acts = vit_acts[:, 0, 1:, :]\n\nout = sae(vit_acts)\n# out.f_x: sparse SAE latents (batch, d_sae)\n# out.x_hats: reconstructed activations (batch, n_prefixes, d_model)\n

    Now you have the sparse representation of all patches in the image (out.f_x) and the reconstructed activations (out.x_hats).

    You might select the dimensions with maximal values for each patch and see what other images are maximally activating.

    "},{"location":"users/new-project/","title":"New Project Structure","text":"

    saev is structured like big_vision, Google's ViT codebase. To get the most use out of saev, you should not use it as a requirement in your project; rather, you should build inside of the source code of saev. This is a guide to that process.

    TL;DR:

    1. Fork saev.
    2. Clone your fork.
    3. Create a new directory in contrib/.
    4. Update both src/saev and your new contrib directory as necessary.
    5. (Hopefully) publish.
    6. If your changes to src/saev are broadly useful and not overly restrictive, open a PR with your changes to src/saev.

    I am currently applying SAEs to audio of birdsong, so this is how I'll develop it.

    First, fork and clone saev. Do this however you want, but GitHub has a guide on it.

    Second, you probably want to store code related to your project in this repo. Make a new directory in contrib/. I'm calling my new subproject \"birdsong.\"

    [I] samuelstevens@host ~/p/saev (main)> tree -L 1 contrib/\ncontrib/\n\u251c\u2500\u2500 birdsong\n\u251c\u2500\u2500 interactive_interp\n\u2514\u2500\u2500 trait_discovery\n

    Use uv to make a new package inside your new project:

    [I] samuelstevens@host ~/p/s/c/birdsong (main)> uv init --package .\nAdding `birdsong` as member of workspace `~/projects/saev`\nInitialized project `birdsong` at `~/projects/saev/contrib/birdsong`\n

    Now you have some additional files.

    [I] samuelstevens@ascend-login02 ~/p/s/c/birdsong (main)> tree\n.\n\u251c\u2500\u2500 pyproject.toml\n\u251c\u2500\u2500 README.md\n\u2514\u2500\u2500 src\n    \u2514\u2500\u2500 birdsong\n        \u2514\u2500\u2500 __init__.py\n

    Now I can write scripts and source code for birdsong-specific stuff in here. I'll probably add a notebook for looking at instances of birdsongs before and after using SAEs to identify patterns under a new birdsong/notebooks directory, and will add birdsong/logbook.md to store ongoing TODO items, and so on.

    To train SAEs on audio files, I'll need to add a new dataset type to save activations. In order to do this, I'll edit src/saev/data/datasets.py.

    I'll also need to add another model to the dataset, one that expects audio files. Since I don't think that DINOv3, OpenCLIP, or the other existing model families will be suitable, I'll need to add a new model family. Again, this will need to go somewhere in src/saev/data.

    If I'm smart about it, these changes will be nice and non-destructive, and other users of saev can benefit from them. After I publish some results, to share this code with others, I'll open a PR from my fork/branch to main with the new datasets/models. But I won't open a PR with birdsong because that's specific to me, rather than to the library.1

    1. Technically, birdsong will be in saev because I'm a sort of privileged user because I'm the main developer. But other folks probably want their project-specific code attached to their GitHub page, rather than OSU-NLP's.\u00a0\u21a9

    "},{"location":"users/sweeps/","title":"Sweeps","text":"

    Hyperparameter sweeps in saev train multiple SAE configurations in parallel on a single GPU, amortizing the cost of loading activation data from disk across all models. Furthermore, sweeps make it easy to train multiple SAEs with one command across multiple GPUs using Slurm.

    "},{"location":"users/sweeps/#quick-start","title":"Quick Start","text":"

    Create a Python file defining your sweep:

    # sweeps/my_sweep.py\n\n\ndef make_cfgs() -> list[dict]:\n    cfgs = []\n\n    # Grid search over learning rate and sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"objective\": {\"sparsity_coeff\": sparsity},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

    Run the sweep:

    uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23\n

    This trains 9 SAEs (3 learning rates x 3 sparsity coefficients) in parallel.

    "},{"location":"users/sweeps/#why-parallel-sweeps","title":"Why Parallel Sweeps?","text":"

    SAE training is bottlenecked by disk I/O, not GPU computation. Loading terabytes of pre-computed ViT activations from disk is the slowest part. By training multiple SAE configurations on the same batch simultaneously, we amortize the I/O cost:

    \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 ViT Activations (disk) \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n            \u2502 (slow I/O, once per batch)\n            \u25bc\n      \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n      \u2502  Batch   \u2502\n      \u2514\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2518\n            \u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n            \u25bc         \u25bc         \u25bc         \u25bc\n         SAE #1    SAE #2    SAE #3     ...\n        (lr=3e-4) (lr=1e-3) (lr=3e-3)\n
    "},{"location":"users/sweeps/#sweep-configuration","title":"Sweep Configuration","text":""},{"location":"users/sweeps/#python-based-sweeps","title":"Python-Based Sweeps","text":"

    Python sweeps give you full control over config generation. Your sweep file must define a make_cfgs() function that returns a list of dicts.

    Grid search example:

    def make_cfgs():\n    cfgs = []\n\n    for lr in [1e-4, 3e-4, 1e-3]:\n        for d_sae in [8192, 16384, 32768]:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

    Paired parameters (not a grid):

    def make_cfgs():\n    cfgs = []\n\n    # Grid over lr x sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            # Paired layers (train and val use same layer)\n            for layer in [6, 7, 8, 9, 10, 11]:\n                cfg = {\n                    \"lr\": lr,\n                    \"objective\": {\"sparsity_coeff\": sparsity},\n                    \"train_data\": {\"layer\": layer},\n                    \"val_data\": {\"layer\": layer},\n                }\n                cfgs.append(cfg)\n\n    return cfgs\n

    This generates 54 configs (3 x 3 x 6) where each train/val pair uses the same layer, avoiding the 162 configs you'd get from a full grid (3 x 3 x 6 x 6).

    Conditional sweeps:

    def make_cfgs():\n    cfgs = []\n\n    for d_sae in [8192, 16384, 32768]:\n        # Use different LR for different SAE widths\n        lrs = [1e-3, 3e-3] if d_sae <= 16384 else [3e-4, 1e-3]\n\n        for lr in lrs:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n
    "},{"location":"users/sweeps/#command-line-overrides","title":"Command-Line Overrides","text":"

    Command-line arguments override sweep parameters with deep merging. The precedence order is: CLI > Sweep > Default.

    uv run train.py --sweep sweeps/my_sweep.py \\\n  --lr 5e-4  # Overrides all LRs in the sweep\n

    Override nested config fields with dotted notation:

    uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23 \\\n  --sae.d-sae 16384\n

    Deep merging means that when you override a nested field, only that specific field is replaced\u2014other fields in the nested config are preserved from the sweep or default values.

    "},{"location":"users/sweeps/#parallel-groups","title":"Parallel Groups","text":"

    Not all parameters can vary within a parallel sweep. Parameters that affect data loading (like train_data, n_train, device) must be identical across all configs in a parallel group.

    When configs differ in these parameters, they're automatically split into separate Slurm jobs:

    def make_cfgs():\n    cfgs = []\n\n    # These will run in 2 separate jobs\n    for layer in [6, 12]:  # Different data loading\n        for lr in [1e-4, 3e-4]:  # Can parallelize\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

    This creates 2 parallel groups: - Job 1: layer=6, lr=[1e-4, 3e-4] - Job 2: layer=12, lr=[1e-4, 3e-4]

    Implementation detail

    See CANNOT_PARALLELIZE in train.py for the full list of parameters that split parallel groups. The split_cfgs() function handles grouping automatically.

    "},{"location":"users/sweeps/#module-loading","title":"Module Loading","text":"

    Your sweep file is executed as a Python module, so you can use imports and helper functions:

    def make_cfgs():\n    cfgs = []\n\n    # You can use helper functions\n    base_layers = list(range(6, 24, 2))\n\n    for layer in base_layers:\n        for lr in [1e-4, 3e-4]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"val_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"sae\": {\"d_model\": 1024, \"d_sae\": 16384},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

    Import mechanics

    The sweep file is loaded with importlib.import_module(), so it must be importable as a Python module. Place sweep files in a location where Python can find them (typically the project root or a sweeps/ subdirectory).

    "},{"location":"users/sweeps/#slurm-integration","title":"Slurm Integration","text":"

    When running with --slurm-acct, each parallel group becomes a separate Slurm job:

    uv run train.py --sweep sweeps/large.py \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --n-hours 24\n

    The system automatically: - Groups configs that can parallelize - Submits one Slurm job per group - Waits for all jobs to complete - Reports results

    "},{"location":"users/sweeps/#seed-management","title":"Seed Management","text":"

    Seeds are automatically incremented for each config to ensure reproducibility:

    # Base config has seed=42\n# Sweep generates 9 configs with seeds: 42, 43, 44, ..., 50\n

    Override the base seed on the command line:

    uv run train.py --sweep sweeps/my_sweep.py --seed 100\n
    "},{"location":"users/sweeps/#examples","title":"Examples","text":"

    Simple grid:

    # sweeps/simple.py\ndef make_cfgs():\n    return [\n        {\"lr\": lr, \"objective\": {\"sparsity_coeff\": sp}}\n        for lr in [1e-4, 3e-4, 1e-3]\n        for sp in [4e-4, 8e-4, 1.6e-3]\n    ]\n

    Layer sweep with paired train/val:

    # sweeps/layers.py\ndef make_cfgs():\n    cfgs = []\n\n    for layer in range(6, 24, 2):  # Layers 6, 8, 10, ..., 22\n        for lr in [3e-4, 1e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n                \"val_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n

    Architecture sweep:

    # sweeps/architecture.py\ndef make_cfgs():\n    cfgs = []\n\n    architectures = [\n        (\"small\", 8192, 1e-3),\n        (\"medium\", 16384, 5e-4),\n        (\"large\", 32768, 3e-4),\n    ]\n\n    for name, d_sae, lr in architectures:\n        cfg = {\n            \"lr\": lr,\n            \"sae\": {\"d_sae\": d_sae},\n            \"tag\": name,\n        }\n        cfgs.append(cfg)\n\n    return cfgs\n
    "}]} \ No newline at end of file diff --git a/docs/api/sitemap.xml b/docs/api/sitemap.xml index d9f457c..0f1c9aa 100644 --- a/docs/api/sitemap.xml +++ b/docs/api/sitemap.xml @@ -2,198 +2,198 @@ https://imageomics.github.io/saev/api/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/colors/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/configs/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/disk/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/helpers/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/metrics/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/saev/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/summary/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/viz/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/bird_mae/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/buffers/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/clip/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/datasets/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/dinov2/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/dinov3/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/fake_clip/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/indexed/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/models/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/ordered/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/pe/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/saev.data/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/shards/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/shuffled/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/siglip/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/data/transforms/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/framework/inference/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/framework/saev.framework/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/framework/shards/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/framework/train/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/nn/modeling/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/nn/objectives/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/nn/saev.nn/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/utils/monitoring/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/utils/saev.utils/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/utils/scheduling/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/utils/statistics/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/api/utils/wandb/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/developers/contributing/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/developers/datapoint-init/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/developers/disk-layout/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/developers/naming/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/developers/protocol/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/developers/workflows/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/users/bird-mae-debugging/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/users/glossary/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/users/guide/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/users/inference/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/users/new-project/ - 2026-03-06 + 2026-06-12 https://imageomics.github.io/saev/api/users/sweeps/ - 2026-03-06 + 2026-06-12 \ No newline at end of file diff --git a/docs/api/sitemap.xml.gz b/docs/api/sitemap.xml.gz index 1e7c75db1d70f9824c2feeb5db6d644a0c439975..12d1a517173df19e83d1289da6c055b1920dae41 100644 GIT binary patch literal 553 zcmV+^0@nQ>iwFn+Oe<;v|8r?{Wo=<_E_iKh0L_@ej+`(IK=1n%BkqvdskCaVnb}*v zpgpewG2opfMsa}Meti=db@$vJ93&tK!G4PEILZ0>&Dp~Ofsy>BdTfr>0enXW{cx#% z{rpjXt)8zB=Q&u0cu1W+U#c<2>AO?W;f7Kivnd|9zda934_2VgZxwq}Q z)L&))t~TEtSqdym;CmcI5e)K8QvV2c0$D3Kh#`HJMA#~l`e)~Kc$Y+2@V96j=%`hT z!Mr#P@*s2H3rEGq(OI0tA@U<-C;8DxS(BDi85m#0F?xN2mm(Yly@59{;v7jVSa~<4 zU?3DQ9FzP!np>K!Bu+BwV7LH`lv!*6&cQg8Fm=SBJ@*uxsA>A39jp{?X(~&3Kkd+< z44f@~KB{FfiAT~0OL8Cdk|^HqStsr1g)yjog)ZU(EDW;wtQWP)95Jl}9C?fsgiW)v zY=g2F`?Ad4Wr%HAO;T2%|k6_te4DtF0plf?|a2fyr;dBXO literal 556 zcmV+{0@M8;iwFn+8LDXl|8r?{Wo=<_E_iKh0L_@qZk#X>fbV&V6?b5}k)pQR-Q4;F z?Kv`dfXUh(?)7OSolY0&xB*3=J_|yD&%$%R!tT`;;8OdF$Z_Tkffa^)48!y$L z-@oeb)${e?9KB%(hcwyorJ6#Rf41$qt_@=Vt7dMTr7>Y>&;4>iTdMYR2~5=INimRvawp>*;uUs*jKL@hMHY zkL}B~|JVD{%K7fekmuqMZosiLg+{&=Mwr0NP~03Ag2%oz#nvf|5Uo}Iy)ease@jP( z3aahU>ldp)1WO!-@^P}E^af`M9Ql!wm3!Zgn9nF-xko`kwr=mRdm%f+QLiFtk;BdUC;Im$-_W>Jw4U=rf@U zHKqs?!i0RQje4gj1js2X#$dCjKs^~5G&ze8hSrji9yycbqE&4b)Ec@tj%Dq&BaMcb u%T^WdAQ3ebr;u<+-$@*-b&8{W)GE%xE5Q7P@Y!Ar;rb5}y9l!b8vp?Qm>MYn diff --git a/docs/api/users/guide/index.html b/docs/api/users/guide/index.html index e6fa015..738a78a 100644 --- a/docs/api/users/guide/index.html +++ b/docs/api/users/guide/index.html @@ -2261,7 +2261,7 @@

    Parallelized Training Architecture

    Running a Sweep

    The train command accepts a --sweep parameter that points to a TOML file defining the hyperparameter grid:

    -
    uv run python -m saev train --sweep configs/my_sweep.toml
    +
    uv run launch.py train --sweep configs/my_sweep.toml
     

    Here's an example sweep configuration file:

    [sae]
    diff --git a/docs/api/users/sweeps/index.html b/docs/api/users/sweeps/index.html
    index 35da9bd..10e2e85 100644
    --- a/docs/api/users/sweeps/index.html
    +++ b/docs/api/users/sweeps/index.html
    @@ -2095,6 +2095,7 @@ 

    Quick StartCreate a Python file defining your sweep:

    # sweeps/my_sweep.py
     
    +
     def make_cfgs() -> list[dict]:
         cfgs = []
     
    
    From 8aedea499d14027b94f8c3b3b63d333e1ae0928b Mon Sep 17 00:00:00 2001
    From: Matthew Thompson 
    Date: Fri, 12 Jun 2026 12:32:37 -0400
    Subject: [PATCH 6/9] Format with latest ruff (0.15.17) as part of docs build
    
    ---
     contrib/birdsong/notebooks/001_explore.py     |    3 +-
     contrib/birdsong/notebooks/clips.py           |    6 +-
     .../freshwater_fish/scripts/make_gallery.py   |   59 +-
     .../interactive_interp/semseg/interactive.py  |    3 +-
     contrib/interactive_interp/semseg/training.py |    3 +-
     examples/inference.py                         | 1543 ++++++++---------
     6 files changed, 827 insertions(+), 790 deletions(-)
    
    diff --git a/contrib/birdsong/notebooks/001_explore.py b/contrib/birdsong/notebooks/001_explore.py
    index 37fed19..102b2ba 100644
    --- a/contrib/birdsong/notebooks/001_explore.py
    +++ b/contrib/birdsong/notebooks/001_explore.py
    @@ -270,7 +270,8 @@ def _finalize_df(rows: list[dict[str, object]]):
             )
     
             df = (
    -            df.unnest("config/sae", "config/train_data/metadata", separator="/")
    +            df
    +            .unnest("config/sae", "config/train_data/metadata", separator="/")
                 .unnest("config/sae/activation", separator="/")
                 .unnest(
                     "config/sae/activation/aux",
    diff --git a/contrib/birdsong/notebooks/clips.py b/contrib/birdsong/notebooks/clips.py
    index cd6f88e..637616a 100644
    --- a/contrib/birdsong/notebooks/clips.py
    +++ b/contrib/birdsong/notebooks/clips.py
    @@ -92,7 +92,8 @@ def add_target(obs: pl.DataFrame, fields: list[str]) -> pl.DataFrame:
             obs = obs.with_columns([pl.col(field).fill_null("unknown") for field in fields])
     
             combos = (
    -            obs.select(fields)
    +            obs
    +            .select(fields)
                 .unique(maintain_order=True)  # first-seen ordering
                 .with_columns(pl.arange(0, pl.len(), dtype=pl.Int32).alias("target"))
             )
    @@ -101,7 +102,8 @@ def add_target(obs: pl.DataFrame, fields: list[str]) -> pl.DataFrame:
     
             target2fields = {
                 target: tuple(rest)
    -            for target, *rest in obs.unique(pl.col("target"))
    +            for target, *rest in obs
    +            .unique(pl.col("target"))
                 .select("target", *fields)
                 .iter_rows()
             }
    diff --git a/contrib/freshwater_fish/scripts/make_gallery.py b/contrib/freshwater_fish/scripts/make_gallery.py
    index 4a2d7fe..d77eeda 100644
    --- a/contrib/freshwater_fish/scripts/make_gallery.py
    +++ b/contrib/freshwater_fish/scripts/make_gallery.py
    @@ -74,9 +74,7 @@ def build_features(
         """Collect features that have pre-rendered images on disk."""
         features = []
         available = {
    -        int(d.name)
    -        for d in images_dpath.iterdir()
    -        if d.is_dir() and d.name.isdigit()
    +        int(d.name) for d in images_dpath.iterdir() if d.is_dir() and d.name.isdigit()
         }
     
         for row in var_df.iter_rows(named=True):
    @@ -275,15 +273,49 @@ def build_features(
     
     def main():
         parser = argparse.ArgumentParser(description=__doc__)
    -    parser.add_argument("--run", type=pathlib.Path, required=True, help="Run directory (e.g. /fs/ess/.../runs/um6hbn05)")
    -    parser.add_argument("--shard", type=str, required=True, help="Shard ID (e.g. 8692dfa9)")
    -    parser.add_argument("--dataset", type=pathlib.Path, default=None, help="Dataset root (segfolder with images/ dir, for label ordering)")
    -    parser.add_argument("--split", type=str, default="validation", help="Dataset split name (default: validation)")
    -    parser.add_argument("--hf-config", type=str, default="trait_segmentation", help="HuggingFace FishVista config name")
    -    parser.add_argument("--hf-split", type=str, default="val", help="HuggingFace split name (default: val)")
    -    parser.add_argument("--out", type=pathlib.Path, default=pathlib.Path("gallery.html"), help="Output HTML path")
    +    parser.add_argument(
    +        "--run",
    +        type=pathlib.Path,
    +        required=True,
    +        help="Run directory (e.g. /fs/ess/.../runs/um6hbn05)",
    +    )
    +    parser.add_argument(
    +        "--shard", type=str, required=True, help="Shard ID (e.g. 8692dfa9)"
    +    )
    +    parser.add_argument(
    +        "--dataset",
    +        type=pathlib.Path,
    +        default=None,
    +        help="Dataset root (segfolder with images/ dir, for label ordering)",
    +    )
    +    parser.add_argument(
    +        "--split",
    +        type=str,
    +        default="validation",
    +        help="Dataset split name (default: validation)",
    +    )
    +    parser.add_argument(
    +        "--hf-config",
    +        type=str,
    +        default="trait_segmentation",
    +        help="HuggingFace FishVista config name",
    +    )
    +    parser.add_argument(
    +        "--hf-split",
    +        type=str,
    +        default="val",
    +        help="HuggingFace split name (default: val)",
    +    )
    +    parser.add_argument(
    +        "--out",
    +        type=pathlib.Path,
    +        default=pathlib.Path("gallery.html"),
    +        help="Output HTML path",
    +    )
         parser.add_argument("--quality", type=int, default=80, help="JPEG quality (0-100)")
    -    parser.add_argument("--title", type=str, default="", help="Gallery subtitle/description")
    +    parser.add_argument(
    +        "--title", type=str, default="", help="Gallery subtitle/description"
    +    )
         args = parser.parse_args()
     
         inference_dpath = args.run / "inference" / args.shard
    @@ -313,7 +345,10 @@ def main():
         n_imgs = sum(len(f["images"]) for f in features)
         logger.info("Total images: %d", n_imgs)
     
    -    title = args.title or f"SAE run {args.run.name}, shard {args.shard} | {len(features)} features, {n_imgs} images"
    +    title = (
    +        args.title
    +        or f"SAE run {args.run.name}, shard {args.shard} | {len(features)} features, {n_imgs} images"
    +    )
         html = HTML_TEMPLATE.replace("FEATURES_JSON", json.dumps(features))
         html = html.replace("TITLE_PLACEHOLDER", title)
         html = html.replace("RUN_ID_PLACEHOLDER", args.run.name)
    diff --git a/contrib/interactive_interp/semseg/interactive.py b/contrib/interactive_interp/semseg/interactive.py
    index 6a0ed31..661e34f 100644
    --- a/contrib/interactive_interp/semseg/interactive.py
    +++ b/contrib/interactive_interp/semseg/interactive.py
    @@ -570,7 +570,8 @@ def make_upsampled_pred(
             logits_WHC: Float[Tensor, "width height classes"],
         ) -> Uint8[Tensor, "width height"]:
             return (
    -            torch.nn.functional.interpolate(
    +            torch.nn.functional
    +            .interpolate(
                     logits_WHC.max(axis=-1).indices.view((1, 1, 16, 16)).float(),
                     scale_factor=14,
                 )
    diff --git a/contrib/interactive_interp/semseg/training.py b/contrib/interactive_interp/semseg/training.py
    index 7b5979e..42ffe9b 100644
    --- a/contrib/interactive_interp/semseg/training.py
    +++ b/contrib/interactive_interp/semseg/training.py
    @@ -358,7 +358,8 @@ def __getitem__(self, i: int) -> dict[str, object]:
     
             pw, ph = self.patch_size_px
             patch_labels = (
    -            einops.rearrange(pixel_labels, "(w pw) (h ph) -> w h (pw ph)", pw=pw, ph=ph)
    +            einops
    +            .rearrange(pixel_labels, "(w pw) (h ph) -> w h (pw ph)", pw=pw, ph=ph)
                 .mode(axis=-1)
                 .values
             )
    diff --git a/examples/inference.py b/examples/inference.py
    index 2b44390..d143132 100644
    --- a/examples/inference.py
    +++ b/examples/inference.py
    @@ -213,778 +213,776 @@ def _():
     def _():
         DINOV2_IMAGENET1K_SCALAR = 2.0181241035461426
     
    -    DINOV2_IMAGENET1K_MEAN = torch.tensor(
    -        [
    -            0.1450997292995453,
    -            -1.0630134344100952,
    -            -0.3518574833869934,
    -            -0.38624095916748047,
    -            -0.4866980314254761,
    -            -0.28983384370803833,
    -            0.9997676014900208,
    -            -1.231179118156433,
    -            -0.7889889478683472,
    -            -0.4450306296348572,
    -            -0.09231726080179214,
    -            0.13243812322616577,
    -            0.09571082890033722,
    -            -0.29342857003211975,
    -            0.05933428555727005,
    -            -0.21923032402992249,
    -            0.08959043025970459,
    -            -0.6981018781661987,
    -            0.4853704869747162,
    -            -0.29948222637176514,
    -            0.3107207119464874,
    -            -0.3812718093395233,
    -            -0.5013473033905029,
    -            2.88395094871521,
    -            -0.5611682534217834,
    -            -0.3514024615287781,
    -            0.025546086952090263,
    -            -0.24438244104385376,
    -            -0.23365195095539093,
    -            -0.2533780336380005,
    -            0.4445696473121643,
    -            1.1176759004592896,
    -            -0.4188934564590454,
    -            0.09051182866096497,
    -            -0.04133417829871178,
    -            -0.008052834309637547,
    -            -0.5118610858917236,
    -            0.22084011137485504,
    -            -0.7333402633666992,
    -            0.8644523620605469,
    -            -0.43727627396583557,
    -            -0.22333095967769623,
    -            -1.5415295362472534,
    -            -0.24187016487121582,
    -            -0.33239033818244934,
    -            -1.2828021049499512,
    -            -0.21485395729541779,
    -            0.6667488813400269,
    -            -0.25890952348709106,
    -            -0.8630414009094238,
    -            1.5059994459152222,
    -            -0.00952776987105608,
    -            0.18695995211601257,
    -            0.0200128685683012,
    -            -0.221832275390625,
    -            1.2800148725509644,
    -            -0.1416555792093277,
    -            0.61446613073349,
    -            0.053658585995435715,
    -            -0.08877403289079666,
    -            1.0190010070800781,
    -            -0.308927446603775,
    -            -0.3903353214263916,
    -            -0.35504740476608276,
    -            -0.7907304763793945,
    -            -0.18439480662345886,
    -            -0.1797204464673996,
    -            0.8199827075004578,
    -            -0.1736353039741516,
    -            -0.16373644769191742,
    -            0.7541728019714355,
    -            -0.3236996829509735,
    -            0.8245170712471008,
    -            0.3411649167537689,
    -            -0.21873517334461212,
    -            -0.7620954513549805,
    -            -0.10635858029127121,
    -            -0.592278003692627,
    -            0.8314691781997681,
    -            -0.2021609991788864,
    -            -0.24301563203334808,
    -            -0.03504444658756256,
    -            -0.061244938522577286,
    -            -0.36000630259513855,
    -            -0.38578882813453674,
    -            -1.2314008474349976,
    -            -0.3416382968425751,
    -            0.5925644636154175,
    -            0.32259607315063477,
    -            0.13169726729393005,
    -            -0.131134033203125,
    -            0.05763484537601471,
    -            -0.7130515575408936,
    -            -0.5685354471206665,
    -            0.04428980499505997,
    -            0.9245452880859375,
    -            0.37724241614341736,
    -            -0.4426809549331665,
    -            0.5091503262519836,
    -            -0.08006338775157928,
    -            -0.18945513665676117,
    -            -0.770736575126648,
    -            -0.3588047921657562,
    -            0.04727765917778015,
    -            -0.16137081384658813,
    -            -0.021555813029408455,
    -            0.6381930708885193,
    -            0.30161890387535095,
    -            -0.0710706040263176,
    -            -0.13884945213794708,
    -            -0.22726555168628693,
    -            -0.6134527921676636,
    -            0.2969088852405548,
    -            -0.2334780991077423,
    -            -0.46334928274154663,
    -            -0.3058214485645294,
    -            0.5196799039840698,
    -            0.6341780424118042,
    -            0.12271945178508759,
    -            -1.0072089433670044,
    -            -0.1198473796248436,
    -            -0.24667270481586456,
    -            -0.19228138029575348,
    -            -0.3955901861190796,
    -            -0.19902971386909485,
    -            0.7407659292221069,
    -            2.3908257484436035,
    -            0.02820657566189766,
    -            0.07064329087734222,
    -            -0.2637694776058197,
    -            0.2560977339744568,
    -            0.3973558247089386,
    -            -0.17345857620239258,
    -            -0.9541534185409546,
    -            -0.21434728801250458,
    -            0.41178393363952637,
    -            -0.008175228722393513,
    -            0.5115303993225098,
    -            -0.9667210578918457,
    -            1.6499103307724,
    -            -1.8320564031600952,
    -            1.1143667697906494,
    -            0.24006624519824982,
    -            -0.02112947776913643,
    -            -0.4952388405799866,
    -            1.1000680923461914,
    -            -0.4901401102542877,
    -            0.22758258879184723,
    -            -0.6699370741844177,
    -            0.6926363706588745,
    -            -0.5719613432884216,
    -            0.008403707295656204,
    -            2.0220773220062256,
    -            -0.1789812445640564,
    -            -0.8777256011962891,
    -            0.3709064722061157,
    -            -0.2629733681678772,
    -            0.08407248556613922,
    -            -0.27063870429992676,
    -            0.09993340820074081,
    -            -0.3755860924720764,
    -            0.07000888139009476,
    -            0.3775370419025421,
    -            0.5653945207595825,
    -            -0.11404427886009216,
    -            -0.06088113784790039,
    -            -0.0898045226931572,
    -            0.19868576526641846,
    -            0.14287644624710083,
    -            -0.669394314289093,
    -            -0.07882463932037354,
    -            -0.12379930168390274,
    -            -0.010277876630425453,
    -            -0.5625343918800354,
    -            -0.6508009433746338,
    -            0.06929764896631241,
    -            -2.0470166206359863,
    -            1.0193544626235962,
    -            -0.9747569561004639,
    -            -0.25624850392341614,
    -            -0.04412469267845154,
    -            -0.01941649615764618,
    -            0.04781557247042656,
    -            -0.2561051845550537,
    -            -0.09596704691648483,
    -            -1.0529744625091553,
    -            -0.32774603366851807,
    -            -0.1931363344192505,
    -            -0.36885082721710205,
    -            -0.9351740479469299,
    -            -0.47905397415161133,
    -            -0.678762674331665,
    -            2.336048126220703,
    -            0.26323413848876953,
    -            -0.36512619256973267,
    -            -0.3650853633880615,
    -            -0.8287989497184753,
    -            0.5866581201553345,
    -            -0.420742005109787,
    -            0.008546118624508381,
    -            -0.7811568975448608,
    -            -0.34993329644203186,
    -            -0.373068243265152,
    -            0.028424998745322227,
    -            -0.537581205368042,
    -            -0.15937983989715576,
    -            -0.5638740062713623,
    -            -0.4413940906524658,
    -            -0.05887509509921074,
    -            -0.12291032075881958,
    -            -0.26565149426460266,
    -            -0.23059803247451782,
    -            -0.2925986349582672,
    -            0.04849022254347801,
    -            -0.4770037531852722,
    -            0.040383752435445786,
    -            -0.8186637759208679,
    -            -0.062463242560625076,
    -            -0.3251510262489319,
    -            -0.4319412112236023,
    -            -0.34569647908210754,
    -            0.9713658690452576,
    -            -0.25668394565582275,
    -            -0.37531179189682007,
    -            0.5259386301040649,
    -            -0.06112021207809448,
    -            0.06980857998132706,
    -            -0.38363778591156006,
    -            -0.1948518007993698,
    -            -0.7897586822509766,
    -            -0.600932776927948,
    -            -0.4269576072692871,
    -            -0.32002967596054077,
    -            0.08897170424461365,
    -            -0.3079395294189453,
    -            -0.05779555067420006,
    -            -0.782086968421936,
    -            1.9608103036880493,
    -            0.1145739033818245,
    -            0.06164107844233513,
    -            -0.3024725317955017,
    -            -0.6308553218841553,
    -            -0.7640243172645569,
    -            -4.433685302734375,
    -            -0.31690648198127747,
    -            -0.019084235653281212,
    -            -0.09761863201856613,
    -            -0.029514605179429054,
    -            -0.5096182823181152,
    -            1.112805962562561,
    -            -0.3302820324897766,
    -            -0.23730400204658508,
    -            0.044646695256233215,
    -            -0.805400013923645,
    -            -7.766678333282471,
    -            -0.2016162872314453,
    -            -0.5018128752708435,
    -            0.6819560527801514,
    -            -0.2735823392868042,
    -            -2.2288968563079834,
    -            -0.36170846223831177,
    -            -0.7745882868766785,
    -            0.4644778370857239,
    -            0.2525951564311981,
    -            -0.22642317414283752,
    -            -0.5394997596740723,
    -            -0.5064775347709656,
    -            -0.5716705918312073,
    -            0.19713695347309113,
    -            -0.5411649942398071,
    -            -0.17092496156692505,
    -            0.45778003334999084,
    -            0.6894896030426025,
    -            -0.21671152114868164,
    -            -0.9160588383674622,
    -            -0.10307890176773071,
    -            0.11703722178936005,
    -            -0.7433905601501465,
    -            -1.5170584917068481,
    -            2.163774013519287,
    -            -1.542649507522583,
    -            -0.1601075381040573,
    -            -0.5249155163764954,
    -            0.44509291648864746,
    -            -0.5261067152023315,
    -            -0.02273540571331978,
    -            -0.28311043977737427,
    -            0.9144242405891418,
    -            0.43954336643218994,
    -            -0.2469814419746399,
    -            0.18752114474773407,
    -            -0.6066163778305054,
    -            -0.14480441808700562,
    -            -0.3546217679977417,
    -            -0.11870954185724258,
    -            -0.09891107678413391,
    -            -0.377458781003952,
    -            0.33304381370544434,
    -            -0.156569704413414,
    -            -0.9730328321456909,
    -            -0.5034677386283875,
    -            0.042613230645656586,
    -            0.08271210640668869,
    -            -0.2368200123310089,
    -            -0.07397157698869705,
    -            0.011974042281508446,
    -            -0.2115129977464676,
    -            -0.3752884566783905,
    -            -0.24985794723033905,
    -            -0.25223013758659363,
    -            1.8311675786972046,
    -            -0.1650543361902237,
    -            -0.031050190329551697,
    -            0.10702164471149445,
    -            0.8963613510131836,
    -            -0.9483885169029236,
    -            -0.8156309723854065,
    -            -1.7132004499435425,
    -            0.08163392543792725,
    -            0.4886241555213928,
    -            -0.016470594331622124,
    -            -0.37671732902526855,
    -            -0.025105634704232216,
    -            -0.2695018947124481,
    -            -0.8450148701667786,
    -            -0.9802296757698059,
    -            -0.21868866682052612,
    -            -0.5872927308082581,
    -            1.019242763519287,
    -            0.01872517168521881,
    -            0.5087792873382568,
    -            0.06771136820316315,
    -            1.4142885208129883,
    -            0.13146139681339264,
    -            -0.36489933729171753,
    -            0.37572142481803894,
    -            -0.3490581810474396,
    -            -0.13830198347568512,
    -            -1.8019393682479858,
    -            1.5129766464233398,
    -            0.07059808075428009,
    -            1.7206473350524902,
    -            0.02890164405107498,
    -            0.3628808557987213,
    -            0.3914141058921814,
    -            0.4993101954460144,
    -            0.3969678580760956,
    -            -0.058554816991090775,
    -            -0.3434300422668457,
    -            -0.4157616198062897,
    -            -0.7624511122703552,
    -            -0.3997197449207306,
    -            1.4573990106582642,
    -            -0.3363801836967468,
    -            -0.46490129828453064,
    -            -0.7445303797721863,
    -            -0.3460237979888916,
    -            -0.6315308809280396,
    -            0.8536337018013,
    -            -0.08939796686172485,
    -            -0.21093742549419403,
    -            -0.08742645382881165,
    -            -0.020040960982441902,
    -            0.09354449808597565,
    -            -0.809800386428833,
    -            -0.0018062496092170477,
    -            -1.0083088874816895,
    -            0.3428219258785248,
    -            0.012708818539977074,
    -            -0.3535612225532532,
    -            1.9481208324432373,
    -            0.013826621696352959,
    -            -0.026771225035190582,
    -            0.18734635412693024,
    -            0.9365230798721313,
    -            1.247671025339514e-05,
    -            -0.4420109987258911,
    -            0.10769690573215485,
    -            -0.6858118176460266,
    -            -0.24754805862903595,
    -            1.0027467012405396,
    -            -0.26436665654182434,
    -            -0.33883318305015564,
    -            0.38209766149520874,
    -            0.479579895734787,
    -            -0.5910238027572632,
    -            0.1890297830104828,
    -            -0.29854580760002136,
    -            -0.5636696219444275,
    -            -0.504091739654541,
    -            -0.32814571261405945,
    -            -0.748496949672699,
    -            -0.3217906653881073,
    -            -0.12439341843128204,
    -            -0.3949342668056488,
    -            0.09739203751087189,
    -            -0.4254276752471924,
    -            0.8690429329872131,
    -            -0.26380032300949097,
    -            -1.2738139629364014,
    -            -0.12694764137268066,
    -            -0.7331164479255676,
    -            0.11337947845458984,
    -            -0.7573927640914917,
    -            -0.41507089138031006,
    -            -0.18960340321063995,
    -            1.2390563488006592,
    -            -0.10859012603759766,
    -            -0.021934548392891884,
    -            -0.05041227489709854,
    -            -0.055214136838912964,
    -            0.20024456083774567,
    -            -0.2689618766307831,
    -            -0.3135489821434021,
    -            -0.07520166784524918,
    -            -0.5906742811203003,
    -            0.2828388512134552,
    -            0.05117213353514671,
    -            1.4600849151611328,
    -            -0.1967628449201584,
    -            0.011182722635567188,
    -            0.028878701850771904,
    -            -0.12146933376789093,
    -            0.6056286096572876,
    -            0.22920559346675873,
    -            -0.008979334495961666,
    -            -0.2874019742012024,
    -            -0.4887332320213318,
    -            0.8754663467407227,
    -            -0.05393843352794647,
    -            -0.2956174910068512,
    -            -0.18953847885131836,
    -            -0.19063766300678253,
    -            -0.8141281008720398,
    -            0.11052622646093369,
    -            -0.020359158515930176,
    -            -0.1262499988079071,
    -            -1.7762614488601685,
    -            -0.4864279627799988,
    -            -0.8644945621490479,
    -            0.1278448849916458,
    -            1.1127605438232422,
    -            -0.595068097114563,
    -            -0.06630692631006241,
    -            1.5608118772506714,
    -            -0.9473971724510193,
    -            -0.1827543079853058,
    -            -0.25564679503440857,
    -            -0.4378860294818878,
    -            -0.8285927176475525,
    -            -1.1397618055343628,
    -            -0.06226593255996704,
    -            -0.09025824069976807,
    -            -0.518083393573761,
    -            -0.893482506275177,
    -            0.5022943615913391,
    -            -0.5922176837921143,
    -            0.2571451961994171,
    -            0.25571396946907043,
    -            0.832092821598053,
    -            -0.061823680996894836,
    -            -0.08963754773139954,
    -            -0.42173218727111816,
    -            -0.4375287890434265,
    -            -0.43921560049057007,
    -            0.5626742243766785,
    -            -0.011294233612716198,
    -            0.626301646232605,
    -            -0.28029197454452515,
    -            0.15464802086353302,
    -            -0.7071759700775146,
    -            -0.0337684191763401,
    -            -0.20901329815387726,
    -            -0.29788798093795776,
    -            0.6644192934036255,
    -            -0.049459852278232574,
    -            0.039552830159664154,
    -            -0.2790898084640503,
    -            0.3250356614589691,
    -            -0.12668772041797638,
    -            -0.46142634749412537,
    -            -0.35542988777160645,
    -            -1.1817448139190674,
    -            0.007615066133439541,
    -            -0.43865758180618286,
    -            -0.16142761707305908,
    -            -0.37852972745895386,
    -            -0.582589328289032,
    -            0.4371003210544586,
    -            -0.2603273391723633,
    -            -0.03284638375043869,
    -            0.8895729184150696,
    -            -0.025997856631875038,
    -            0.5761443376541138,
    -            -0.28437164425849915,
    -            -0.11191761493682861,
    -            -0.07794637233018875,
    -            0.02127309888601303,
    -            -0.10069284588098526,
    -            -0.2177346795797348,
    -            -1.029278039932251,
    -            -0.5014596581459045,
    -            -0.5774326920509338,
    -            -0.2856050431728363,
    -            -0.24715296924114227,
    -            0.1243511438369751,
    -            0.042631667107343674,
    -            -0.846584677696228,
    -            -0.7308683395385742,
    -            -0.09307371079921722,
    -            -0.35250845551490784,
    -            0.12801845371723175,
    -            -0.5423708558082581,
    -            -0.22422067821025848,
    -            1.574460744857788,
    -            -0.27640238404273987,
    -            -0.37266722321510315,
    -            -0.12533603608608246,
    -            0.3177711069583893,
    -            -0.4530303478240967,
    -            0.24940718710422516,
    -            -0.1272897720336914,
    -            0.6882254481315613,
    -            -0.2153051793575287,
    -            -0.6189695000648499,
    -            -0.38704702258110046,
    -            -0.14360225200653076,
    -            -0.08159925043582916,
    -            0.4714410603046417,
    -            -0.16035029292106628,
    -            0.005880486220121384,
    -            -0.5742312669754028,
    -            -0.33733850717544556,
    -            -0.39702731370925903,
    -            -0.14614750444889069,
    -            -0.06936132907867432,
    -            0.2528288662433624,
    -            -0.25900882482528687,
    -            0.45907658338546753,
    -            -0.20694994926452637,
    -            0.4083366394042969,
    -            -0.9925484657287598,
    -            -0.17098328471183777,
    -            0.3215583860874176,
    -            -0.33823585510253906,
    -            -0.07112737745046616,
    -            -0.05322866141796112,
    -            0.19237284362316132,
    -            -0.6257429122924805,
    -            0.23328493535518646,
    -            -0.17247024178504944,
    -            -0.3362499177455902,
    -            -0.17041970789432526,
    -            -0.014526017010211945,
    -            -0.12138030678033829,
    -            0.0698552280664444,
    -            -0.609315037727356,
    -            0.8142863512039185,
    -            -2.295081615447998,
    -            -0.07903101295232773,
    -            -0.48268306255340576,
    -            -0.2097805291414261,
    -            -0.4481655955314636,
    -            -1.059373378753662,
    -            0.17675237357616425,
    -            -0.5335419774055481,
    -            0.7713444232940674,
    -            0.6341530084609985,
    -            1.1411781311035156,
    -            -0.18365903198719025,
    -            -0.4029919505119324,
    -            -0.34328755736351013,
    -            -1.1935101747512817,
    -            -0.4249494671821594,
    -            0.10720300674438477,
    -            -0.13509584963321686,
    -            -0.610278844833374,
    -            -0.1007867231965065,
    -            -0.13094481825828552,
    -            0.3319343030452728,
    -            -0.22466504573822021,
    -            -0.33384865522384644,
    -            -0.3001727759838104,
    -            -0.48621413111686707,
    -            0.10271137952804565,
    -            -0.3953743577003479,
    -            -0.3412061631679535,
    -            -1.3808176517486572,
    -            -0.3035687804222107,
    -            0.27737119793891907,
    -            -0.10266303271055222,
    -            -0.472690224647522,
    -            0.03376518189907074,
    -            -0.2053908109664917,
    -            -0.46477705240249634,
    -            -0.0046875146217644215,
    -            0.8462978005409241,
    -            -0.7554765343666077,
    -            -0.9736349582672119,
    -            -0.14118513464927673,
    -            -0.2665828466415405,
    -            -0.9371470212936401,
    -            -0.007497116923332214,
    -            0.6816821098327637,
    -            0.20980679988861084,
    -            -0.5602611303329468,
    -            -0.7874919176101685,
    -            -0.01479698158800602,
    -            -0.45345690846443176,
    -            -0.12117742747068405,
    -            -0.5790822505950928,
    -            -0.27737149596214294,
    -            0.08818025887012482,
    -            -0.25239622592926025,
    -            1.1271374225616455,
    -            0.0044799973256886005,
    -            0.2183203548192978,
    -            -2.0634095668792725,
    -            -0.007129574194550514,
    -            0.32677894830703735,
    -            0.019878007471561432,
    -            0.060301825404167175,
    -            -0.6844122409820557,
    -            0.35185739398002625,
    -            -0.0028550554998219013,
    -            -0.5629953145980835,
    -            0.06621643155813217,
    -            -0.043473124504089355,
    -            -0.3398932218551636,
    -            -0.1782192587852478,
    -            -0.24575252830982208,
    -            -0.20299431681632996,
    -            -0.3652290999889374,
    -            -0.9888001680374146,
    -            -0.30628740787506104,
    -            0.6184420585632324,
    -            -0.33409008383750916,
    -            0.20486755669116974,
    -            -0.8251897692680359,
    -            -0.08471876382827759,
    -            -0.5613390803337097,
    -            0.057765014469623566,
    -            0.5359746813774109,
    -            -0.7063419818878174,
    -            0.28122395277023315,
    -            -0.004502696450799704,
    -            -0.6543170213699341,
    -            0.04663177207112312,
    -            -0.05775964632630348,
    -            -6.37779594399035e-05,
    -            0.46121329069137573,
    -            -0.004464420489966869,
    -            1.4332563877105713,
    -            0.20597098767757416,
    -            -0.17879879474639893,
    -            0.4316228926181793,
    -            -1.2352955341339111,
    -            -0.19363455474376678,
    -            -0.32174810767173767,
    -            -0.23037514090538025,
    -            0.17044368386268616,
    -            0.13070613145828247,
    -            1.2171069383621216,
    -            -1.171966314315796,
    -            0.04596511274576187,
    -            -0.1690378040075302,
    -            -0.030221890658140182,
    -            0.3216114342212677,
    -            -0.08577033132314682,
    -            -0.26656001806259155,
    -            -0.4321160316467285,
    -            -0.22010475397109985,
    -            -0.6187731623649597,
    -            -0.4711909890174866,
    -            -0.3499036431312561,
    -            0.13558903336524963,
    -            -0.2124641239643097,
    -            -0.28327351808547974,
    -            0.12788993120193481,
    -            -1.3083688020706177,
    -            -0.0332779586315155,
    -            -0.4718656837940216,
    -            1.031941533088684,
    -            -0.07811620831489563,
    -            -0.5331435799598694,
    -            -0.2602376341819763,
    -            -0.8461449146270752,
    -            0.18593788146972656,
    -            0.5763140320777893,
    -            -0.45714831352233887,
    -            -0.1056162416934967,
    -            0.2665534019470215,
    -            -0.4580163061618805,
    -            -0.25224190950393677,
    -            -0.2334505170583725,
    -            -0.6723064184188843,
    -            0.12331533432006836,
    -            0.054681699723005295,
    -            -0.14116793870925903,
    -            -0.10254379361867905,
    -            2.0082550048828125,
    -            -1.4980225563049316,
    -            0.00379346776753664,
    -            -0.8470208644866943,
    -            0.06866040825843811,
    -            -0.3133383095264435,
    -            -0.20381635427474976,
    -            -0.03295162320137024,
    -            1.1624072790145874,
    -            -1.2590479850769043,
    -            -0.5051106810569763,
    -            -0.5310556292533875,
    -            0.11350126564502716,
    -            -0.5141156315803528,
    -            1.0333826541900635,
    -            -0.5528491735458374,
    -            -0.6508246064186096,
    -            -1.0594176054000854,
    -            -0.03546600416302681,
    -            -0.0008655009442009032,
    -            0.06422116607427597,
    -            -0.5845358371734619,
    -            -0.049052149057388306,
    -            -0.578079104423523,
    -            -0.46709108352661133,
    -            -0.6544204354286194,
    -            -0.13105393946170807,
    -            -0.12359122931957245,
    -            0.19125737249851227,
    -            -0.9108084440231323,
    -            -0.24640944600105286,
    -            -0.5813102126121521,
    -            -0.2342103123664856,
    -            0.645296573638916,
    -            0.4200597405433655,
    -            1.030412197113037,
    -            0.026015933603048325,
    -            0.03929654508829117,
    -            -0.18394766747951508,
    -            -0.2946997582912445,
    -            0.029773380607366562,
    -            -1.1292797327041626,
    -            -0.3272054195404053,
    -            -0.19441728293895721,
    -            -0.8372487425804138,
    -            0.5765964984893799,
    -            -0.28797629475593567,
    -            -0.6211466789245605,
    -            0.09933445602655411,
    -            -0.5617806911468506,
    -            1.163861870765686,
    -            0.1421220600605011,
    -            -0.790323793888092,
    -            -0.4003753960132599,
    -            -0.6941299438476562,
    -            -0.5033494830131531,
    -            -0.2234964221715927,
    -            -0.12398113310337067,
    -            -0.26237404346466064,
    -            -0.4991702139377594,
    -            -0.7963886260986328,
    -            -0.012063371017575264,
    -            -1.1415417194366455,
    -            0.40668150782585144,
    -            0.33048388361930847,
    -            1.3195141553878784,
    -            -0.0008099540136754513,
    -            -0.06793856620788574,
    -        ]
    -    )
    +    DINOV2_IMAGENET1K_MEAN = torch.tensor([
    +        0.1450997292995453,
    +        -1.0630134344100952,
    +        -0.3518574833869934,
    +        -0.38624095916748047,
    +        -0.4866980314254761,
    +        -0.28983384370803833,
    +        0.9997676014900208,
    +        -1.231179118156433,
    +        -0.7889889478683472,
    +        -0.4450306296348572,
    +        -0.09231726080179214,
    +        0.13243812322616577,
    +        0.09571082890033722,
    +        -0.29342857003211975,
    +        0.05933428555727005,
    +        -0.21923032402992249,
    +        0.08959043025970459,
    +        -0.6981018781661987,
    +        0.4853704869747162,
    +        -0.29948222637176514,
    +        0.3107207119464874,
    +        -0.3812718093395233,
    +        -0.5013473033905029,
    +        2.88395094871521,
    +        -0.5611682534217834,
    +        -0.3514024615287781,
    +        0.025546086952090263,
    +        -0.24438244104385376,
    +        -0.23365195095539093,
    +        -0.2533780336380005,
    +        0.4445696473121643,
    +        1.1176759004592896,
    +        -0.4188934564590454,
    +        0.09051182866096497,
    +        -0.04133417829871178,
    +        -0.008052834309637547,
    +        -0.5118610858917236,
    +        0.22084011137485504,
    +        -0.7333402633666992,
    +        0.8644523620605469,
    +        -0.43727627396583557,
    +        -0.22333095967769623,
    +        -1.5415295362472534,
    +        -0.24187016487121582,
    +        -0.33239033818244934,
    +        -1.2828021049499512,
    +        -0.21485395729541779,
    +        0.6667488813400269,
    +        -0.25890952348709106,
    +        -0.8630414009094238,
    +        1.5059994459152222,
    +        -0.00952776987105608,
    +        0.18695995211601257,
    +        0.0200128685683012,
    +        -0.221832275390625,
    +        1.2800148725509644,
    +        -0.1416555792093277,
    +        0.61446613073349,
    +        0.053658585995435715,
    +        -0.08877403289079666,
    +        1.0190010070800781,
    +        -0.308927446603775,
    +        -0.3903353214263916,
    +        -0.35504740476608276,
    +        -0.7907304763793945,
    +        -0.18439480662345886,
    +        -0.1797204464673996,
    +        0.8199827075004578,
    +        -0.1736353039741516,
    +        -0.16373644769191742,
    +        0.7541728019714355,
    +        -0.3236996829509735,
    +        0.8245170712471008,
    +        0.3411649167537689,
    +        -0.21873517334461212,
    +        -0.7620954513549805,
    +        -0.10635858029127121,
    +        -0.592278003692627,
    +        0.8314691781997681,
    +        -0.2021609991788864,
    +        -0.24301563203334808,
    +        -0.03504444658756256,
    +        -0.061244938522577286,
    +        -0.36000630259513855,
    +        -0.38578882813453674,
    +        -1.2314008474349976,
    +        -0.3416382968425751,
    +        0.5925644636154175,
    +        0.32259607315063477,
    +        0.13169726729393005,
    +        -0.131134033203125,
    +        0.05763484537601471,
    +        -0.7130515575408936,
    +        -0.5685354471206665,
    +        0.04428980499505997,
    +        0.9245452880859375,
    +        0.37724241614341736,
    +        -0.4426809549331665,
    +        0.5091503262519836,
    +        -0.08006338775157928,
    +        -0.18945513665676117,
    +        -0.770736575126648,
    +        -0.3588047921657562,
    +        0.04727765917778015,
    +        -0.16137081384658813,
    +        -0.021555813029408455,
    +        0.6381930708885193,
    +        0.30161890387535095,
    +        -0.0710706040263176,
    +        -0.13884945213794708,
    +        -0.22726555168628693,
    +        -0.6134527921676636,
    +        0.2969088852405548,
    +        -0.2334780991077423,
    +        -0.46334928274154663,
    +        -0.3058214485645294,
    +        0.5196799039840698,
    +        0.6341780424118042,
    +        0.12271945178508759,
    +        -1.0072089433670044,
    +        -0.1198473796248436,
    +        -0.24667270481586456,
    +        -0.19228138029575348,
    +        -0.3955901861190796,
    +        -0.19902971386909485,
    +        0.7407659292221069,
    +        2.3908257484436035,
    +        0.02820657566189766,
    +        0.07064329087734222,
    +        -0.2637694776058197,
    +        0.2560977339744568,
    +        0.3973558247089386,
    +        -0.17345857620239258,
    +        -0.9541534185409546,
    +        -0.21434728801250458,
    +        0.41178393363952637,
    +        -0.008175228722393513,
    +        0.5115303993225098,
    +        -0.9667210578918457,
    +        1.6499103307724,
    +        -1.8320564031600952,
    +        1.1143667697906494,
    +        0.24006624519824982,
    +        -0.02112947776913643,
    +        -0.4952388405799866,
    +        1.1000680923461914,
    +        -0.4901401102542877,
    +        0.22758258879184723,
    +        -0.6699370741844177,
    +        0.6926363706588745,
    +        -0.5719613432884216,
    +        0.008403707295656204,
    +        2.0220773220062256,
    +        -0.1789812445640564,
    +        -0.8777256011962891,
    +        0.3709064722061157,
    +        -0.2629733681678772,
    +        0.08407248556613922,
    +        -0.27063870429992676,
    +        0.09993340820074081,
    +        -0.3755860924720764,
    +        0.07000888139009476,
    +        0.3775370419025421,
    +        0.5653945207595825,
    +        -0.11404427886009216,
    +        -0.06088113784790039,
    +        -0.0898045226931572,
    +        0.19868576526641846,
    +        0.14287644624710083,
    +        -0.669394314289093,
    +        -0.07882463932037354,
    +        -0.12379930168390274,
    +        -0.010277876630425453,
    +        -0.5625343918800354,
    +        -0.6508009433746338,
    +        0.06929764896631241,
    +        -2.0470166206359863,
    +        1.0193544626235962,
    +        -0.9747569561004639,
    +        -0.25624850392341614,
    +        -0.04412469267845154,
    +        -0.01941649615764618,
    +        0.04781557247042656,
    +        -0.2561051845550537,
    +        -0.09596704691648483,
    +        -1.0529744625091553,
    +        -0.32774603366851807,
    +        -0.1931363344192505,
    +        -0.36885082721710205,
    +        -0.9351740479469299,
    +        -0.47905397415161133,
    +        -0.678762674331665,
    +        2.336048126220703,
    +        0.26323413848876953,
    +        -0.36512619256973267,
    +        -0.3650853633880615,
    +        -0.8287989497184753,
    +        0.5866581201553345,
    +        -0.420742005109787,
    +        0.008546118624508381,
    +        -0.7811568975448608,
    +        -0.34993329644203186,
    +        -0.373068243265152,
    +        0.028424998745322227,
    +        -0.537581205368042,
    +        -0.15937983989715576,
    +        -0.5638740062713623,
    +        -0.4413940906524658,
    +        -0.05887509509921074,
    +        -0.12291032075881958,
    +        -0.26565149426460266,
    +        -0.23059803247451782,
    +        -0.2925986349582672,
    +        0.04849022254347801,
    +        -0.4770037531852722,
    +        0.040383752435445786,
    +        -0.8186637759208679,
    +        -0.062463242560625076,
    +        -0.3251510262489319,
    +        -0.4319412112236023,
    +        -0.34569647908210754,
    +        0.9713658690452576,
    +        -0.25668394565582275,
    +        -0.37531179189682007,
    +        0.5259386301040649,
    +        -0.06112021207809448,
    +        0.06980857998132706,
    +        -0.38363778591156006,
    +        -0.1948518007993698,
    +        -0.7897586822509766,
    +        -0.600932776927948,
    +        -0.4269576072692871,
    +        -0.32002967596054077,
    +        0.08897170424461365,
    +        -0.3079395294189453,
    +        -0.05779555067420006,
    +        -0.782086968421936,
    +        1.9608103036880493,
    +        0.1145739033818245,
    +        0.06164107844233513,
    +        -0.3024725317955017,
    +        -0.6308553218841553,
    +        -0.7640243172645569,
    +        -4.433685302734375,
    +        -0.31690648198127747,
    +        -0.019084235653281212,
    +        -0.09761863201856613,
    +        -0.029514605179429054,
    +        -0.5096182823181152,
    +        1.112805962562561,
    +        -0.3302820324897766,
    +        -0.23730400204658508,
    +        0.044646695256233215,
    +        -0.805400013923645,
    +        -7.766678333282471,
    +        -0.2016162872314453,
    +        -0.5018128752708435,
    +        0.6819560527801514,
    +        -0.2735823392868042,
    +        -2.2288968563079834,
    +        -0.36170846223831177,
    +        -0.7745882868766785,
    +        0.4644778370857239,
    +        0.2525951564311981,
    +        -0.22642317414283752,
    +        -0.5394997596740723,
    +        -0.5064775347709656,
    +        -0.5716705918312073,
    +        0.19713695347309113,
    +        -0.5411649942398071,
    +        -0.17092496156692505,
    +        0.45778003334999084,
    +        0.6894896030426025,
    +        -0.21671152114868164,
    +        -0.9160588383674622,
    +        -0.10307890176773071,
    +        0.11703722178936005,
    +        -0.7433905601501465,
    +        -1.5170584917068481,
    +        2.163774013519287,
    +        -1.542649507522583,
    +        -0.1601075381040573,
    +        -0.5249155163764954,
    +        0.44509291648864746,
    +        -0.5261067152023315,
    +        -0.02273540571331978,
    +        -0.28311043977737427,
    +        0.9144242405891418,
    +        0.43954336643218994,
    +        -0.2469814419746399,
    +        0.18752114474773407,
    +        -0.6066163778305054,
    +        -0.14480441808700562,
    +        -0.3546217679977417,
    +        -0.11870954185724258,
    +        -0.09891107678413391,
    +        -0.377458781003952,
    +        0.33304381370544434,
    +        -0.156569704413414,
    +        -0.9730328321456909,
    +        -0.5034677386283875,
    +        0.042613230645656586,
    +        0.08271210640668869,
    +        -0.2368200123310089,
    +        -0.07397157698869705,
    +        0.011974042281508446,
    +        -0.2115129977464676,
    +        -0.3752884566783905,
    +        -0.24985794723033905,
    +        -0.25223013758659363,
    +        1.8311675786972046,
    +        -0.1650543361902237,
    +        -0.031050190329551697,
    +        0.10702164471149445,
    +        0.8963613510131836,
    +        -0.9483885169029236,
    +        -0.8156309723854065,
    +        -1.7132004499435425,
    +        0.08163392543792725,
    +        0.4886241555213928,
    +        -0.016470594331622124,
    +        -0.37671732902526855,
    +        -0.025105634704232216,
    +        -0.2695018947124481,
    +        -0.8450148701667786,
    +        -0.9802296757698059,
    +        -0.21868866682052612,
    +        -0.5872927308082581,
    +        1.019242763519287,
    +        0.01872517168521881,
    +        0.5087792873382568,
    +        0.06771136820316315,
    +        1.4142885208129883,
    +        0.13146139681339264,
    +        -0.36489933729171753,
    +        0.37572142481803894,
    +        -0.3490581810474396,
    +        -0.13830198347568512,
    +        -1.8019393682479858,
    +        1.5129766464233398,
    +        0.07059808075428009,
    +        1.7206473350524902,
    +        0.02890164405107498,
    +        0.3628808557987213,
    +        0.3914141058921814,
    +        0.4993101954460144,
    +        0.3969678580760956,
    +        -0.058554816991090775,
    +        -0.3434300422668457,
    +        -0.4157616198062897,
    +        -0.7624511122703552,
    +        -0.3997197449207306,
    +        1.4573990106582642,
    +        -0.3363801836967468,
    +        -0.46490129828453064,
    +        -0.7445303797721863,
    +        -0.3460237979888916,
    +        -0.6315308809280396,
    +        0.8536337018013,
    +        -0.08939796686172485,
    +        -0.21093742549419403,
    +        -0.08742645382881165,
    +        -0.020040960982441902,
    +        0.09354449808597565,
    +        -0.809800386428833,
    +        -0.0018062496092170477,
    +        -1.0083088874816895,
    +        0.3428219258785248,
    +        0.012708818539977074,
    +        -0.3535612225532532,
    +        1.9481208324432373,
    +        0.013826621696352959,
    +        -0.026771225035190582,
    +        0.18734635412693024,
    +        0.9365230798721313,
    +        1.247671025339514e-05,
    +        -0.4420109987258911,
    +        0.10769690573215485,
    +        -0.6858118176460266,
    +        -0.24754805862903595,
    +        1.0027467012405396,
    +        -0.26436665654182434,
    +        -0.33883318305015564,
    +        0.38209766149520874,
    +        0.479579895734787,
    +        -0.5910238027572632,
    +        0.1890297830104828,
    +        -0.29854580760002136,
    +        -0.5636696219444275,
    +        -0.504091739654541,
    +        -0.32814571261405945,
    +        -0.748496949672699,
    +        -0.3217906653881073,
    +        -0.12439341843128204,
    +        -0.3949342668056488,
    +        0.09739203751087189,
    +        -0.4254276752471924,
    +        0.8690429329872131,
    +        -0.26380032300949097,
    +        -1.2738139629364014,
    +        -0.12694764137268066,
    +        -0.7331164479255676,
    +        0.11337947845458984,
    +        -0.7573927640914917,
    +        -0.41507089138031006,
    +        -0.18960340321063995,
    +        1.2390563488006592,
    +        -0.10859012603759766,
    +        -0.021934548392891884,
    +        -0.05041227489709854,
    +        -0.055214136838912964,
    +        0.20024456083774567,
    +        -0.2689618766307831,
    +        -0.3135489821434021,
    +        -0.07520166784524918,
    +        -0.5906742811203003,
    +        0.2828388512134552,
    +        0.05117213353514671,
    +        1.4600849151611328,
    +        -0.1967628449201584,
    +        0.011182722635567188,
    +        0.028878701850771904,
    +        -0.12146933376789093,
    +        0.6056286096572876,
    +        0.22920559346675873,
    +        -0.008979334495961666,
    +        -0.2874019742012024,
    +        -0.4887332320213318,
    +        0.8754663467407227,
    +        -0.05393843352794647,
    +        -0.2956174910068512,
    +        -0.18953847885131836,
    +        -0.19063766300678253,
    +        -0.8141281008720398,
    +        0.11052622646093369,
    +        -0.020359158515930176,
    +        -0.1262499988079071,
    +        -1.7762614488601685,
    +        -0.4864279627799988,
    +        -0.8644945621490479,
    +        0.1278448849916458,
    +        1.1127605438232422,
    +        -0.595068097114563,
    +        -0.06630692631006241,
    +        1.5608118772506714,
    +        -0.9473971724510193,
    +        -0.1827543079853058,
    +        -0.25564679503440857,
    +        -0.4378860294818878,
    +        -0.8285927176475525,
    +        -1.1397618055343628,
    +        -0.06226593255996704,
    +        -0.09025824069976807,
    +        -0.518083393573761,
    +        -0.893482506275177,
    +        0.5022943615913391,
    +        -0.5922176837921143,
    +        0.2571451961994171,
    +        0.25571396946907043,
    +        0.832092821598053,
    +        -0.061823680996894836,
    +        -0.08963754773139954,
    +        -0.42173218727111816,
    +        -0.4375287890434265,
    +        -0.43921560049057007,
    +        0.5626742243766785,
    +        -0.011294233612716198,
    +        0.626301646232605,
    +        -0.28029197454452515,
    +        0.15464802086353302,
    +        -0.7071759700775146,
    +        -0.0337684191763401,
    +        -0.20901329815387726,
    +        -0.29788798093795776,
    +        0.6644192934036255,
    +        -0.049459852278232574,
    +        0.039552830159664154,
    +        -0.2790898084640503,
    +        0.3250356614589691,
    +        -0.12668772041797638,
    +        -0.46142634749412537,
    +        -0.35542988777160645,
    +        -1.1817448139190674,
    +        0.007615066133439541,
    +        -0.43865758180618286,
    +        -0.16142761707305908,
    +        -0.37852972745895386,
    +        -0.582589328289032,
    +        0.4371003210544586,
    +        -0.2603273391723633,
    +        -0.03284638375043869,
    +        0.8895729184150696,
    +        -0.025997856631875038,
    +        0.5761443376541138,
    +        -0.28437164425849915,
    +        -0.11191761493682861,
    +        -0.07794637233018875,
    +        0.02127309888601303,
    +        -0.10069284588098526,
    +        -0.2177346795797348,
    +        -1.029278039932251,
    +        -0.5014596581459045,
    +        -0.5774326920509338,
    +        -0.2856050431728363,
    +        -0.24715296924114227,
    +        0.1243511438369751,
    +        0.042631667107343674,
    +        -0.846584677696228,
    +        -0.7308683395385742,
    +        -0.09307371079921722,
    +        -0.35250845551490784,
    +        0.12801845371723175,
    +        -0.5423708558082581,
    +        -0.22422067821025848,
    +        1.574460744857788,
    +        -0.27640238404273987,
    +        -0.37266722321510315,
    +        -0.12533603608608246,
    +        0.3177711069583893,
    +        -0.4530303478240967,
    +        0.24940718710422516,
    +        -0.1272897720336914,
    +        0.6882254481315613,
    +        -0.2153051793575287,
    +        -0.6189695000648499,
    +        -0.38704702258110046,
    +        -0.14360225200653076,
    +        -0.08159925043582916,
    +        0.4714410603046417,
    +        -0.16035029292106628,
    +        0.005880486220121384,
    +        -0.5742312669754028,
    +        -0.33733850717544556,
    +        -0.39702731370925903,
    +        -0.14614750444889069,
    +        -0.06936132907867432,
    +        0.2528288662433624,
    +        -0.25900882482528687,
    +        0.45907658338546753,
    +        -0.20694994926452637,
    +        0.4083366394042969,
    +        -0.9925484657287598,
    +        -0.17098328471183777,
    +        0.3215583860874176,
    +        -0.33823585510253906,
    +        -0.07112737745046616,
    +        -0.05322866141796112,
    +        0.19237284362316132,
    +        -0.6257429122924805,
    +        0.23328493535518646,
    +        -0.17247024178504944,
    +        -0.3362499177455902,
    +        -0.17041970789432526,
    +        -0.014526017010211945,
    +        -0.12138030678033829,
    +        0.0698552280664444,
    +        -0.609315037727356,
    +        0.8142863512039185,
    +        -2.295081615447998,
    +        -0.07903101295232773,
    +        -0.48268306255340576,
    +        -0.2097805291414261,
    +        -0.4481655955314636,
    +        -1.059373378753662,
    +        0.17675237357616425,
    +        -0.5335419774055481,
    +        0.7713444232940674,
    +        0.6341530084609985,
    +        1.1411781311035156,
    +        -0.18365903198719025,
    +        -0.4029919505119324,
    +        -0.34328755736351013,
    +        -1.1935101747512817,
    +        -0.4249494671821594,
    +        0.10720300674438477,
    +        -0.13509584963321686,
    +        -0.610278844833374,
    +        -0.1007867231965065,
    +        -0.13094481825828552,
    +        0.3319343030452728,
    +        -0.22466504573822021,
    +        -0.33384865522384644,
    +        -0.3001727759838104,
    +        -0.48621413111686707,
    +        0.10271137952804565,
    +        -0.3953743577003479,
    +        -0.3412061631679535,
    +        -1.3808176517486572,
    +        -0.3035687804222107,
    +        0.27737119793891907,
    +        -0.10266303271055222,
    +        -0.472690224647522,
    +        0.03376518189907074,
    +        -0.2053908109664917,
    +        -0.46477705240249634,
    +        -0.0046875146217644215,
    +        0.8462978005409241,
    +        -0.7554765343666077,
    +        -0.9736349582672119,
    +        -0.14118513464927673,
    +        -0.2665828466415405,
    +        -0.9371470212936401,
    +        -0.007497116923332214,
    +        0.6816821098327637,
    +        0.20980679988861084,
    +        -0.5602611303329468,
    +        -0.7874919176101685,
    +        -0.01479698158800602,
    +        -0.45345690846443176,
    +        -0.12117742747068405,
    +        -0.5790822505950928,
    +        -0.27737149596214294,
    +        0.08818025887012482,
    +        -0.25239622592926025,
    +        1.1271374225616455,
    +        0.0044799973256886005,
    +        0.2183203548192978,
    +        -2.0634095668792725,
    +        -0.007129574194550514,
    +        0.32677894830703735,
    +        0.019878007471561432,
    +        0.060301825404167175,
    +        -0.6844122409820557,
    +        0.35185739398002625,
    +        -0.0028550554998219013,
    +        -0.5629953145980835,
    +        0.06621643155813217,
    +        -0.043473124504089355,
    +        -0.3398932218551636,
    +        -0.1782192587852478,
    +        -0.24575252830982208,
    +        -0.20299431681632996,
    +        -0.3652290999889374,
    +        -0.9888001680374146,
    +        -0.30628740787506104,
    +        0.6184420585632324,
    +        -0.33409008383750916,
    +        0.20486755669116974,
    +        -0.8251897692680359,
    +        -0.08471876382827759,
    +        -0.5613390803337097,
    +        0.057765014469623566,
    +        0.5359746813774109,
    +        -0.7063419818878174,
    +        0.28122395277023315,
    +        -0.004502696450799704,
    +        -0.6543170213699341,
    +        0.04663177207112312,
    +        -0.05775964632630348,
    +        -6.37779594399035e-05,
    +        0.46121329069137573,
    +        -0.004464420489966869,
    +        1.4332563877105713,
    +        0.20597098767757416,
    +        -0.17879879474639893,
    +        0.4316228926181793,
    +        -1.2352955341339111,
    +        -0.19363455474376678,
    +        -0.32174810767173767,
    +        -0.23037514090538025,
    +        0.17044368386268616,
    +        0.13070613145828247,
    +        1.2171069383621216,
    +        -1.171966314315796,
    +        0.04596511274576187,
    +        -0.1690378040075302,
    +        -0.030221890658140182,
    +        0.3216114342212677,
    +        -0.08577033132314682,
    +        -0.26656001806259155,
    +        -0.4321160316467285,
    +        -0.22010475397109985,
    +        -0.6187731623649597,
    +        -0.4711909890174866,
    +        -0.3499036431312561,
    +        0.13558903336524963,
    +        -0.2124641239643097,
    +        -0.28327351808547974,
    +        0.12788993120193481,
    +        -1.3083688020706177,
    +        -0.0332779586315155,
    +        -0.4718656837940216,
    +        1.031941533088684,
    +        -0.07811620831489563,
    +        -0.5331435799598694,
    +        -0.2602376341819763,
    +        -0.8461449146270752,
    +        0.18593788146972656,
    +        0.5763140320777893,
    +        -0.45714831352233887,
    +        -0.1056162416934967,
    +        0.2665534019470215,
    +        -0.4580163061618805,
    +        -0.25224190950393677,
    +        -0.2334505170583725,
    +        -0.6723064184188843,
    +        0.12331533432006836,
    +        0.054681699723005295,
    +        -0.14116793870925903,
    +        -0.10254379361867905,
    +        2.0082550048828125,
    +        -1.4980225563049316,
    +        0.00379346776753664,
    +        -0.8470208644866943,
    +        0.06866040825843811,
    +        -0.3133383095264435,
    +        -0.20381635427474976,
    +        -0.03295162320137024,
    +        1.1624072790145874,
    +        -1.2590479850769043,
    +        -0.5051106810569763,
    +        -0.5310556292533875,
    +        0.11350126564502716,
    +        -0.5141156315803528,
    +        1.0333826541900635,
    +        -0.5528491735458374,
    +        -0.6508246064186096,
    +        -1.0594176054000854,
    +        -0.03546600416302681,
    +        -0.0008655009442009032,
    +        0.06422116607427597,
    +        -0.5845358371734619,
    +        -0.049052149057388306,
    +        -0.578079104423523,
    +        -0.46709108352661133,
    +        -0.6544204354286194,
    +        -0.13105393946170807,
    +        -0.12359122931957245,
    +        0.19125737249851227,
    +        -0.9108084440231323,
    +        -0.24640944600105286,
    +        -0.5813102126121521,
    +        -0.2342103123664856,
    +        0.645296573638916,
    +        0.4200597405433655,
    +        1.030412197113037,
    +        0.026015933603048325,
    +        0.03929654508829117,
    +        -0.18394766747951508,
    +        -0.2946997582912445,
    +        0.029773380607366562,
    +        -1.1292797327041626,
    +        -0.3272054195404053,
    +        -0.19441728293895721,
    +        -0.8372487425804138,
    +        0.5765964984893799,
    +        -0.28797629475593567,
    +        -0.6211466789245605,
    +        0.09933445602655411,
    +        -0.5617806911468506,
    +        1.163861870765686,
    +        0.1421220600605011,
    +        -0.790323793888092,
    +        -0.4003753960132599,
    +        -0.6941299438476562,
    +        -0.5033494830131531,
    +        -0.2234964221715927,
    +        -0.12398113310337067,
    +        -0.26237404346466064,
    +        -0.4991702139377594,
    +        -0.7963886260986328,
    +        -0.012063371017575264,
    +        -1.1415417194366455,
    +        0.40668150782585144,
    +        0.33048388361930847,
    +        1.3195141553878784,
    +        -0.0008099540136754513,
    +        -0.06793856620788574,
    +    ])
         return DINOV2_IMAGENET1K_MEAN, DINOV2_IMAGENET1K_SCALAR
     
     
    @@ -1012,7 +1010,6 @@ def _dino_normalize(x):
                 x.clamp(-1e-5, 1e5) - DINOV2_IMAGENET1K_MEAN.to(x.device)
             ) / DINOV2_IMAGENET1K_SCALAR
     
    -
         dino_patch_acts, dino_out = extract_features(
             dino_vit, dino_sae, img, dino_tr, normalize_fn=_dino_normalize
         )
    
    From 01e12c2bd007314b9b64adffcd41726fb3ff985b Mon Sep 17 00:00:00 2001
    From: Matthew Thompson 
    Date: Fri, 12 Jun 2026 13:21:22 -0400
    Subject: [PATCH 7/9] Add local zenodo validation capabilities
    
    ---
     justfile                     |  3 +++
     scripts/validation-zenodo.sh | 26 ++++++++++++++++++++++++++
     2 files changed, 29 insertions(+)
     create mode 100755 scripts/validation-zenodo.sh
    
    diff --git a/justfile b/justfile
    index 1455a45..7c77dd2 100644
    --- a/justfile
    +++ b/justfile
    @@ -4,6 +4,9 @@ docs: lint
         -yek src/saev README.md AGENTS.md > docs/api/llms.txt
         uv run mkdocs build --config-file docs/mkdocs.yml
     
    +validate-zenodo:
    +    sh scripts/validation-zenodo.sh
    +
     test:
         uv run pytest -m "not slow and not integration" tests
     
    diff --git a/scripts/validation-zenodo.sh b/scripts/validation-zenodo.sh
    new file mode 100755
    index 0000000..cfcf966
    --- /dev/null
    +++ b/scripts/validation-zenodo.sh
    @@ -0,0 +1,26 @@
    +#!/bin/sh
    +# Validate .zenodo.json with the same pinned zenodraft as CI.
    +
    +set -eu
    +
    +workflow=".github/workflows/validate-zenodo.yaml"
    +
    +# e.g. "zenodraft@0.14.1" from the `npm install zenodraft@0.14.1` line.
    +spec=$(grep -oE 'zenodraft@[0-9]+\.[0-9]+\.[0-9]+' "$workflow" | head -n 1)
    +if [ -z "$spec" ]; then
    +    echo "could not read pinned zenodraft version from $workflow" >&2
    +    exit 1
    +fi
    +
    +if command -v npx >/dev/null 2>&1; then
    +    exec npx "$spec" metadata validate .zenodo.json
    +elif command -v bunx >/dev/null 2>&1; then
    +    exec bunx "$spec" metadata validate .zenodo.json
    +elif command -v dx >/dev/null 2>&1; then
    +    exec dx "$spec" metadata validate .zenodo.json
    +elif command -v deno >/dev/null 2>&1; then
    +    exec deno run --allow-read "npm:$spec" metadata validate .zenodo.json
    +else
    +    echo "no JS runner found (tried: npx bunx dx deno)" >&2
    +    exit 1
    +fi
    
    From 1c7a568fa9523077b50df26984f5bdaa14ff41c0 Mon Sep 17 00:00:00 2001
    From: Matthew Thompson 
    Date: Fri, 12 Jun 2026 13:39:40 -0400
    Subject: [PATCH 8/9] Instruct citation via GitHub link
    
    ---
     README.md | 13 ++-----------
     1 file changed, 2 insertions(+), 11 deletions(-)
    
    diff --git a/README.md b/README.md
    index 9ac798f..0734b57 100644
    --- a/README.md
    +++ b/README.md
    @@ -24,15 +24,6 @@ Trained SAE checkpoints are available at:
     
     - [Huggingface Models](https://huggingface.co/collections/osunlp/sae-v-67ab8c4fdf179d117db28195)
     
    -If you want to cite the software, please cite it as:
    -
    -```bib
    -@software{stevens2025saev,
    -  author = {Stevens, Samuel},
    -  month = apr,
    -  title = {{saev}},
    -  url = {https://github.com/Imageomics/saev},
    -  year = {2025}
    -}
    -```
    +## Citation
     
    +If you want to cite the software, please use the "Cite this repository" link on the GitHub page, which will provide you with the appropriate citation format.
    
    From cddf8ed1e116a6d604f9f38386080f669ed2d76b Mon Sep 17 00:00:00 2001
    From: egrace479 
    Date: Thu, 18 Jun 2026 14:56:37 -0400
    Subject: [PATCH 9/9] Set release date
    
    ---
     .zenodo.json | 2 +-
     CITATION.cff | 2 +-
     2 files changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/.zenodo.json b/.zenodo.json
    index d4ae2b4..8fa7c62 100644
    --- a/.zenodo.json
    +++ b/.zenodo.json
    @@ -16,7 +16,7 @@
         "title": "saev: Sparse Autoencoders for Vision Transformers",
         "version": "0.1.0",
         "license": "MIT",
    -    "publication_date": "2026-06-10",
    +    "publication_date": "2026-06-18",
         "grants": [
             {
                 "id": "021nxhr62::2118240"
    diff --git a/CITATION.cff b/CITATION.cff
    index e716abc..99e77f4 100644
    --- a/CITATION.cff
    +++ b/CITATION.cff
    @@ -24,7 +24,7 @@ keywords:
       - interpretability
       - computer vision
     license: MIT
    -date-released: '2026-06-10'
    +date-released: '2026-06-18'
     identifiers:
       - description: "The GitHub release URL of tag v0.1.0."
         type: url