diff --git a/.github/workflows/deploy-doc.yml b/.github/workflows/deploy-doc.yml
new file mode 100644
index 0000000..9d2e657
--- /dev/null
+++ b/.github/workflows/deploy-doc.yml
@@ -0,0 +1,57 @@
+name: Build Documentation
+
+on:
+  push:
+    branches:
+      - main
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# Allow one concurrent deployment
+concurrency:
+  group: "pages"
+  cancel-in-progress: true
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - uses: astral-sh/setup-uv@v5
+      with:
+        enable-cache: true
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+    - name: Install dependencies
+      run: |
+        uv sync --all-extras --dev
+    - name: Build HTML documentation
+      run: |
+        cd docs
+        uv run sphinx-build -b html . _build/html
+
+    - name: Upload artifact
+      uses: actions/upload-pages-artifact@v3
+      with:
+        path: docs/_build/html
+
+  # Deploy job - only runs on main branch
+  deploy:
+    needs: build
+    # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+    permissions:
+      contents: read
+      pages: write
+      id-token: write
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Deploy to GitHub Pages
+      id: deployment
+      uses: actions/deploy-pages@v4
diff --git a/.github/workflows/test-doc.yml b/.github/workflows/test-doc.yml
new file mode 100644
index 0000000..b2d0c19
--- /dev/null
+++ b/.github/workflows/test-doc.yml
@@ -0,0 +1,34 @@
+name: Check Documentation Build
+
+on:
+  pull_request:
+    paths:
+      - 'docs/**'
+      - 'aion/**'
+      - '.github/workflows/docs-check.yml'
+      - 'pyproject.toml'
+
+jobs:
+  docs:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - uses: astral-sh/setup-uv@v5
+      with:
+        enable-cache: true
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+    - name: Install dependencies
+      run: |
+        uv sync --all-extras --dev
+    - name: Build documentation
+      run: |
+        cd docs
+        uv run sphinx-build -W -b html . _build/html
+    - name: Check for broken links
+      run: |
+        cd docs
+        uv run sphinx-build -b linkcheck . _build/linkcheck || true
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index f2da13c..f20229a 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -4,7 +4,7 @@ on:
   pull_request:
   push:
     branches:
-      - master
+      - main
 
 jobs:
   pre-commit:
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..0c43015
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,8 @@
+version: 2
+sphinx:
+  configuration: docs/conf.py
+  fail_on_warning: false
+python:
+  version: 3.10
+  install:
+    - requirements: docs/requirements.txt
diff --git a/README.md b/README.md
index e1689db..2e8b54a 100644
--- a/README.md
+++ b/README.md
@@ -86,6 +86,8 @@ AION-1’s tokenizers cover **39 distinct data types**, grouped by survey and da
 Start with our interactive tutorial:
 - **[Open in Google Colab](https://colab.research.google.com/github/PolymathicAI/AION/blob/main/notebooks/Tutorial.ipynb)** - Learn AION basics interactively, no local setup required!
 
+For detailed guides, see the [online documentation](https://polymathic-ai.github.io/AION/).
+
 ## 📦 Advanced Installation
 
 AION offers flexible installation options to suit your environment and requirements.
diff --git a/aion/codecs/config.py b/aion/codecs/config.py
index af428b2..c9e3f57 100644
--- a/aion/codecs/config.py
+++ b/aion/codecs/config.py
@@ -1,54 +1,54 @@
 # Configuration for codecs
 
+from aion.codecs.catalog import CatalogCodec
+from aion.codecs.image import ImageCodec
+from aion.codecs.scalar import (
+    GridScalarCodec,
+    LogScalarCodec,
+    MultiScalarCodec,
+    ScalarCodec,
+)
+from aion.codecs.scalar_field import ScalarFieldCodec
+from aion.codecs.spectrum import SpectrumCodec
 from aion.modalities import (
+    HSCAG,
+    HSCAI,
+    HSCAR,
+    HSCAY,
+    HSCAZ,
+    Dec,
+    GaiaFluxBp,
+    GaiaFluxG,
+    GaiaFluxRp,
+    GaiaParallax,
+    GaiaXpBp,
+    GaiaXpRp,
+    HSCMagG,
+    HSCMagI,
+    HSCMagR,
+    HSCMagY,
+    HSCMagZ,
+    HSCShape11,
+    HSCShape12,
+    HSCShape22,
     Image,
-    Spectrum,
     LegacySurveyCatalog,
-    LegacySurveySegmentationMap,
+    LegacySurveyEBV,
     LegacySurveyFluxG,
-    LegacySurveyFluxR,
     LegacySurveyFluxI,
-    LegacySurveyFluxZ,
+    LegacySurveyFluxR,
     LegacySurveyFluxW1,
     LegacySurveyFluxW2,
     LegacySurveyFluxW3,
     LegacySurveyFluxW4,
-    LegacySurveyShapeR,
+    LegacySurveyFluxZ,
+    LegacySurveySegmentationMap,
     LegacySurveyShapeE1,
     LegacySurveyShapeE2,
-    LegacySurveyEBV,
-    Z,
-    HSCAG,
-    HSCAR,
-    HSCAI,
-    HSCAZ,
-    HSCAY,
-    HSCMagG,
-    HSCMagR,
-    HSCMagI,
-    HSCMagZ,
-    HSCMagY,
-    HSCShape11,
-    HSCShape22,
-    HSCShape12,
-    GaiaFluxG,
-    GaiaFluxBp,
-    GaiaFluxRp,
-    GaiaParallax,
+    LegacySurveyShapeR,
     Ra,
-    Dec,
-    GaiaXpBp,
-    GaiaXpRp,
-)
-from aion.codecs.image import ImageCodec
-from aion.codecs.spectrum import SpectrumCodec
-from aion.codecs.catalog import CatalogCodec
-from aion.codecs.scalar_field import ScalarFieldCodec
-from aion.codecs.scalar import (
-    ScalarCodec,
-    LogScalarCodec,
-    MultiScalarCodec,
-    GridScalarCodec,
+    Spectrum,
+    Z,
 )
 
 CODEC_CONFIG = {
diff --git a/aion/codecs/manager.py b/aion/codecs/manager.py
index 1c1434e..54cf695 100644
--- a/aion/codecs/manager.py
+++ b/aion/codecs/manager.py
@@ -3,12 +3,21 @@
 Handles dynamic loading and management of codecs for different modalities.
 """
 
-from typing import Dict, Union, Optional, Type
+from typing import Dict, Optional, Type, Union
+
 import torch
 
-from aion.modalities import Modality
 from aion.codecs.base import Codec
 from aion.codecs.config import CODEC_CONFIG
+from aion.modalities import Modality
+
+
+class ModalityTypeError(TypeError):
+    """Error raised when a modality type is not supported."""
+
+
+class TokenKeyError(ValueError):
+    """Error raised when a token key is not found in the tokens dictionary."""
 
 
 class CodecManager:
@@ -53,7 +62,7 @@ def _load_codec(self, modality_type: Type[Modality]) -> Codec:
             ):
                 config = CODEC_CONFIG[modality_type.__base__]
             else:
-                raise ValueError(
+                raise ModalityTypeError(
                     f"No codec configuration found for modality type: {modality_type.__name__}"
                 )
         else:
@@ -76,6 +85,7 @@ def _load_codec(self, modality_type: Type[Modality]) -> Codec:
 
         return codec
 
+    @torch.no_grad()
     def encode(self, *modalities: Modality) -> Dict[str, torch.Tensor]:
         """Encode multiple modalities.
 
@@ -98,7 +108,7 @@ def encode(self, *modalities: Modality) -> Dict[str, torch.Tensor]:
             if hasattr(modality, "token_key"):
                 token_key = modality.token_key
             else:
-                raise ValueError(
+                raise ModalityTypeError(
                     f"Modality {type(modality).__name__} does not have a token_key attribute"
                 )
 
@@ -106,6 +116,7 @@ def encode(self, *modalities: Modality) -> Dict[str, torch.Tensor]:
 
         return tokens
 
+    @torch.no_grad()
     def decode(
         self,
         tokens: Dict[str, torch.Tensor],
@@ -124,14 +135,14 @@ def decode(
             Decoded modality instance
         """
         if not hasattr(modality_type, "token_key"):
-            raise ValueError(
-                f"Modality type {modality_type.__name__} does not have a token_key attribute"
+            raise ModalityTypeError(
+                f"Modality type {modality_type} does not have a token_key attribute"
             )
 
         token_key = modality_type.token_key
         if token_key not in tokens:
-            raise ValueError(
-                f"Token key '{token_key}' for modality {modality_type.__name__} not found in tokens dictionary"
+            raise TokenKeyError(
+                f"Token key '{token_key}' for modality {modality_type} not found in tokens dictionary"
             )
 
         # Get the appropriate codec
@@ -140,7 +151,7 @@ def decode(
         # Decode using the codec with any provided metadata
         decoded_modality = codec.decode(tokens[token_key], **metadata)
 
-        # Casting the decoded modality to be the specific modality type requested
+        # Cast decoded modality to the correct type
         decoded_modality = modality_type(**decoded_modality.model_dump())
 
         return decoded_modality
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..ab2f07c
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,7 @@
+SPHINXBUILD := sphinx-build
+SOURCEDIR := .
+BUILDDIR := _build
+
+.PHONY: html
+html:
+	$(SPHINXBUILD) -M html $(SOURCEDIR) $(BUILDDIR)
diff --git a/docs/_static/polymathic_logo.png b/docs/_static/polymathic_logo.png
new file mode 100644
index 0000000..445d339
Binary files /dev/null and b/docs/_static/polymathic_logo.png differ
diff --git a/docs/_static/style.css b/docs/_static/style.css
new file mode 100644
index 0000000..5bc92a4
--- /dev/null
+++ b/docs/_static/style.css
@@ -0,0 +1,865 @@
+/* Import Google Fonts */
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
+
+/* Professional color palette and design system */
+:root {
+  /* Brand colors */
+  --color-brand-primary: #CA0E4C;
+  --color-brand-primary-rgb: 202, 14, 76;
+  --color-brand-secondary: #E91E63;
+  --color-brand-tertiary: #F50057;
+
+  /* Typography scale */
+  --font-family-primary: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+  --font-family-mono: 'JetBrains Mono', 'SF Mono', Consolas, monospace;
+
+  --font-size-xs: 0.75rem;
+  --font-size-sm: 0.875rem;
+  --font-size-base: 1rem;
+  --font-size-lg: 1.125rem;
+  --font-size-xl: 1.25rem;
+  --font-size-2xl: 1.5rem;
+  --font-size-3xl: 1.875rem;
+  --font-size-4xl: 2.25rem;
+  --font-size-5xl: 3rem;
+
+  /* Spacing scale */
+  --space-xs: 0.25rem;
+  --space-sm: 0.5rem;
+  --space-md: 1rem;
+  --space-lg: 1.5rem;
+  --space-xl: 2rem;
+  --space-2xl: 3rem;
+  --space-3xl: 4rem;
+
+  /* Border radius */
+  --radius-sm: 0.25rem;
+  --radius-md: 0.375rem;
+  --radius-lg: 0.5rem;
+  --radius-xl: 0.75rem;
+  --radius-2xl: 1rem;
+  --radius-full: 9999px;
+
+  /* Shadows */
+  --shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
+  --shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
+  --shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
+  --shadow-xl: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);
+  --shadow-glow: 0 0 20px rgba(var(--color-brand-primary-rgb), 0.5);
+
+  /* Transitions */
+  --transition-fast: 150ms cubic-bezier(0.4, 0, 0.2, 1);
+  --transition-base: 200ms cubic-bezier(0.4, 0, 0.2, 1);
+  --transition-slow: 300ms cubic-bezier(0.4, 0, 0.2, 1);
+}
+
+/* Light theme */
+body[data-theme="light"] {
+  --color-text-primary: #2c3e50;
+  --color-text-secondary: #546e7a;
+  --color-text-tertiary: #90a4ae;
+  --color-text-muted: #b0bec5;
+
+  --color-background-primary: #ffffff;
+  --color-background-secondary: #f5f5f5;
+  --color-background-tertiary: #eeeeee;
+  --color-background-elevated: #fafafa;
+  --color-background-code: #f8f8f8;
+
+  --color-border-primary: #e0e0e0;
+  --color-border-secondary: #bdbdbd;
+  --color-border-light: rgba(0, 0, 0, 0.08);
+}
+
+/* Dark theme */
+body[data-theme="dark"] {
+  --color-text-primary: #fafafa;
+  --color-text-secondary: #e5e5e5;
+  --color-text-tertiary: #a3a3a3;
+  --color-text-muted: #737373;
+
+  --color-background-primary: #0a0a0a;
+  --color-background-secondary: #171717;
+  --color-background-tertiary: #1a1a1a;
+  --color-background-elevated: #262626;
+  --color-background-code: #0f0f0f;
+
+  --color-border-primary: #262626;
+  --color-border-secondary: #404040;
+  --color-border-light: rgba(255, 255, 255, 0.08);
+}
+
+/* Auto theme - uses system preference */
+body:not([data-theme="light"]):not([data-theme="dark"]) {
+  /* Default to light theme values */
+  --color-text-primary: #2c3e50;
+  --color-text-secondary: #546e7a;
+  --color-text-tertiary: #90a4ae;
+  --color-text-muted: #b0bec5;
+
+  --color-background-primary: #ffffff;
+  --color-background-secondary: #f5f5f5;
+  --color-background-tertiary: #eeeeee;
+  --color-background-elevated: #fafafa;
+  --color-background-code: #f8f8f8;
+
+  --color-border-primary: #e0e0e0;
+  --color-border-secondary: #bdbdbd;
+  --color-border-light: rgba(0, 0, 0, 0.08);
+}
+
+/* Auto theme dark mode support */
+@media (prefers-color-scheme: dark) {
+  body:not([data-theme="light"]):not([data-theme="dark"]) {
+    --color-text-primary: #fafafa;
+    --color-text-secondary: #e5e5e5;
+    --color-text-tertiary: #a3a3a3;
+    --color-text-muted: #737373;
+
+    --color-background-primary: #0a0a0a;
+    --color-background-secondary: #171717;
+    --color-background-tertiary: #1a1a1a;
+    --color-background-elevated: #262626;
+    --color-background-code: #0f0f0f;
+
+    --color-border-primary: #262626;
+    --color-border-secondary: #404040;
+    --color-border-light: rgba(255, 255, 255, 0.08);
+  }
+}
+
+/* Auto theme light mode hero adjustments */
+@media (prefers-color-scheme: light) {
+  body:not([data-theme="dark"]) .hero-section {
+    background: radial-gradient(ellipse at top, rgba(202, 14, 76, 0.08) 0%, transparent 50%);
+  }
+
+  body:not([data-theme="dark"]) .hero-subtitle {
+    color: #2c3e50 !important;
+  }
+
+  body:not([data-theme="dark"]) .hero-description {
+    color: #546e7a !important;
+  }
+
+  body:not([data-theme="dark"]) .btn-secondary {
+    color: #2c3e50 !important;
+    border-color: rgba(0, 0, 0, 0.2) !important;
+  }
+
+  body:not([data-theme="dark"]) .btn-secondary:hover {
+    color: var(--color-brand-primary) !important;
+    border-color: var(--color-brand-primary) !important;
+  }
+}
+
+/* Auto theme light mode card adjustments */
+@media (prefers-color-scheme: light) {
+  body:not([data-theme="dark"]) .sd-card {
+    box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
+  }
+
+  body:not([data-theme="dark"]) .sd-card:hover {
+    box-shadow: 0 4px 15px rgba(202, 14, 76, 0.15);
+  }
+}
+
+/* Base reset and typography */
+* {
+  box-sizing: border-box;
+}
+
+html {
+  scroll-behavior: smooth;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+
+body {
+  font-family: var(--font-family-primary);
+  font-size: var(--font-size-base);
+  line-height: 1.6;
+  color: var(--color-text-secondary);
+  background-color: var(--color-background-primary);
+  margin: 0;
+  padding: 0;
+}
+
+/* Enhanced typography */
+h1, h2, h3, h4, h5, h6 {
+  font-family: var(--font-family-primary);
+  font-weight: 600;
+  color: var(--color-text-primary);
+  margin-top: var(--space-2xl);
+  margin-bottom: var(--space-lg);
+  line-height: 1.2;
+  letter-spacing: -0.02em;
+}
+
+h1 {
+  font-size: var(--font-size-4xl);
+  font-weight: 700;
+  margin-top: 0;
+  margin-bottom: var(--space-xl);
+  background: linear-gradient(135deg, var(--color-text-primary) 0%, var(--color-text-secondary) 100%);
+  -webkit-background-clip: text;
+  -webkit-text-fill-color: transparent;
+  background-clip: text;
+}
+
+h2 {
+  font-size: var(--font-size-3xl);
+  margin-top: var(--space-3xl);
+  position: relative;
+  padding-bottom: var(--space-md);
+}
+
+h2::after {
+  content: '';
+  position: absolute;
+  bottom: 0;
+  left: 0;
+  width: 3rem;
+  height: 3px;
+  background: linear-gradient(90deg, var(--color-brand-primary) 0%, transparent 100%);
+  border-radius: var(--radius-full);
+}
+
+h3 {
+  font-size: var(--font-size-2xl);
+  font-weight: 500;
+}
+
+/* Paragraphs and text */
+p {
+  margin-bottom: var(--space-lg);
+  color: var(--color-text-secondary);
+}
+
+/* Links with sophisticated hover effect */
+a {
+  color: var(--color-brand-primary);
+  text-decoration: none;
+  position: relative;
+  transition: color var(--transition-base);
+}
+
+a:hover {
+  color: var(--color-brand-secondary);
+}
+
+/* Inline link underline effect */
+p a, li a {
+  background-image: linear-gradient(to right, var(--color-brand-primary), var(--color-brand-primary));
+  background-size: 0% 1px;
+  background-repeat: no-repeat;
+  background-position: left bottom;
+  transition: background-size var(--transition-base);
+}
+
+p a:hover, li a:hover {
+  background-size: 100% 1px;
+}
+
+/* Premium code blocks */
+pre {
+  background-color: var(--color-background-code);
+  border: 1px solid var(--color-border-light);
+  border-radius: var(--radius-lg);
+  padding: var(--space-lg);
+  overflow-x: auto;
+  margin: var(--space-xl) 0;
+  position: relative;
+  box-shadow: inset 0 2px 4px 0 rgba(0, 0, 0, 0.06);
+}
+
+pre::before {
+  content: '';
+  position: absolute;
+  top: var(--space-sm);
+  left: var(--space-sm);
+  width: 12px;
+  height: 12px;
+  border-radius: var(--radius-full);
+  background-color: #ff5f57;
+  box-shadow: 20px 0 0 #ffbd2e, 40px 0 0 #28ca42;
+}
+
+pre code {
+  padding-top: var(--space-lg);
+  display: block;
+}
+
+code {
+  font-family: var(--font-family-mono);
+  font-size: var(--font-size-sm);
+  font-weight: 500;
+}
+
+/* Inline code with glass effect */
+p code, li code {
+  background: rgba(var(--color-brand-primary-rgb), 0.1);
+  color: var(--color-text-primary);
+  padding: 0.125rem 0.375rem;
+  border-radius: var(--radius-md);
+  font-size: var(--font-size-sm);
+  border: 1px solid rgba(var(--color-brand-primary-rgb), 0.2);
+  backdrop-filter: blur(10px);
+}
+
+/* Enhanced sidebar */
+.sidebar-drawer {
+  background-color: var(--color-sidebar-background);
+  border-right: 1px solid var(--color-sidebar-border);
+  backdrop-filter: blur(20px);
+}
+
+/* Sidebar navigation with hover effects */
+.sidebar-tree .reference {
+  color: var(--color-text-tertiary);
+  padding: var(--space-xs) var(--space-md);
+  border-radius: var(--radius-md);
+  transition: all var(--transition-base);
+  display: block;
+  position: relative;
+  overflow: hidden;
+}
+
+.sidebar-tree .reference::before {
+  content: '';
+  position: absolute;
+  top: 0;
+  left: 0;
+  width: 0;
+  height: 100%;
+  background: rgba(var(--color-brand-primary-rgb), 0.1);
+  transition: width var(--transition-base);
+}
+
+.sidebar-tree .reference:hover {
+  color: var(--color-text-primary);
+  padding-left: var(--space-lg);
+}
+
+.sidebar-tree .reference:hover::before {
+  width: 100%;
+}
+
+.sidebar-tree .current > .reference {
+  color: var(--color-brand-primary);
+  font-weight: 500;
+  background: rgba(var(--color-brand-primary-rgb), 0.1);
+}
+
+/* Article styling */
+article {
+  max-width: 48rem;
+  margin: 0 auto;
+  padding: var(--space-2xl) var(--space-xl);
+}
+
+/* Table of contents with gradient border */
+.toc-tree {
+  background: var(--color-background-secondary);
+  border: 1px solid var(--color-border-light);
+  border-radius: var(--radius-lg);
+  padding: var(--space-lg);
+  margin: var(--space-xl) 0;
+  position: relative;
+  overflow: hidden;
+}
+
+.toc-tree::before {
+  content: '';
+  position: absolute;
+  top: 0;
+  left: 0;
+  width: 3px;
+  height: 100%;
+  background: linear-gradient(180deg, var(--color-brand-primary) 0%, var(--color-brand-secondary) 100%);
+}
+
+.toc-tree a {
+  color: var(--color-text-tertiary);
+  transition: all var(--transition-base);
+  display: block;
+  padding: var(--space-xs) 0;
+}
+
+.toc-tree a:hover {
+  color: var(--color-brand-primary);
+  transform: translateX(var(--space-xs));
+}
+
+/* Premium buttons */
+.btn, button {
+  background: linear-gradient(135deg, var(--color-brand-primary) 0%, var(--color-brand-secondary) 100%);
+  color: white;
+  border: none;
+  padding: var(--space-sm) var(--space-xl);
+  border-radius: var(--radius-full);
+  font-family: var(--font-family-primary);
+  font-weight: 500;
+  font-size: var(--font-size-sm);
+  cursor: pointer;
+  transition: all var(--transition-base);
+  position: relative;
+  overflow: hidden;
+  box-shadow: 0 4px 15px 0 rgba(var(--color-brand-primary-rgb), 0.4);
+}
+
+.btn::before, button::before {
+  content: '';
+  position: absolute;
+  top: 50%;
+  left: 50%;
+  width: 0;
+  height: 0;
+  border-radius: var(--radius-full);
+  background: rgba(255, 255, 255, 0.2);
+  transform: translate(-50%, -50%);
+  transition: width var(--transition-slow), height var(--transition-slow);
+}
+
+.btn:hover, button:hover {
+  transform: translateY(-2px);
+  box-shadow: 0 7px 20px 0 rgba(var(--color-brand-primary-rgb), 0.5);
+}
+
+.btn:hover::before, button:hover::before {
+  width: 300px;
+  height: 300px;
+}
+
+/* Search styling */
+.search-button {
+  background: var(--color-background-elevated);
+  color: var(--color-text-tertiary);
+  border: 1px solid var(--color-border-light);
+  padding: var(--space-sm) var(--space-md);
+  border-radius: var(--radius-lg);
+  transition: all var(--transition-base);
+}
+
+.search-button:hover {
+  background: var(--color-background-secondary);
+  border-color: var(--color-brand-primary);
+  box-shadow: 0 0 0 1px var(--color-brand-primary);
+}
+
+/* API documentation with cards */
+dl.py {
+  background: var(--color-background-secondary);
+  border: 1px solid var(--color-border-light);
+  border-radius: var(--radius-lg);
+  padding: var(--space-lg);
+  margin: var(--space-xl) 0;
+  position: relative;
+  overflow: hidden;
+}
+
+dl.py::before {
+  content: '';
+  position: absolute;
+  top: -50%;
+  right: -50%;
+  width: 200%;
+  height: 200%;
+  background: radial-gradient(circle, rgba(var(--color-brand-primary-rgb), 0.05) 0%, transparent 70%);
+  pointer-events: none;
+}
+
+.sig-name {
+  color: var(--color-brand-primary) !important;
+  font-weight: 600;
+  font-size: var(--font-size-lg);
+}
+
+/* Admonitions with glassmorphism */
+.admonition {
+  background: rgba(var(--color-brand-primary-rgb), 0.05);
+  backdrop-filter: blur(10px);
+  border: 1px solid rgba(var(--color-brand-primary-rgb), 0.2);
+  border-radius: var(--radius-lg);
+  padding: var(--space-lg);
+  margin: var(--space-xl) 0;
+  position: relative;
+  overflow: hidden;
+}
+
+.admonition::before {
+  content: '';
+  position: absolute;
+  top: 0;
+  left: 0;
+  width: 100%;
+  height: 3px;
+  background: linear-gradient(90deg, var(--color-brand-primary) 0%, var(--color-brand-secondary) 100%);
+}
+
+.admonition-title {
+  font-weight: 600;
+  color: var(--color-text-primary);
+  margin-bottom: var(--space-sm);
+  font-size: var(--font-size-lg);
+}
+
+/* Tables with modern styling */
+table {
+  width: 100%;
+  border-collapse: collapse;
+  margin: var(--space-xl) 0;
+  background: var(--color-background-secondary);
+  border-radius: var(--radius-lg);
+  overflow: hidden;
+  box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1);
+}
+
+th {
+  background: var(--color-background-elevated);
+  color: var(--color-text-primary);
+  font-weight: 600;
+  text-align: left;
+  padding: var(--space-md);
+  border-bottom: 2px solid var(--color-border-primary);
+}
+
+td {
+  padding: var(--space-md);
+  border-bottom: 1px solid var(--color-border-light);
+  color: var(--color-text-secondary);
+}
+
+tr:hover td {
+  background: rgba(var(--color-brand-primary-rgb), 0.05);
+}
+
+/* Footer enhancement */
+.footer {
+  margin-top: var(--space-3xl);
+  padding: var(--space-xl) 0;
+  border-top: 1px solid var(--color-border-light);
+  text-align: center;
+  color: var(--color-text-tertiary);
+  font-size: var(--font-size-sm);
+}
+
+/* Selection styling */
+::selection {
+  background-color: rgba(var(--color-brand-primary-rgb), 0.3);
+  color: var(--color-text-primary);
+}
+
+/* Scrollbar styling */
+::-webkit-scrollbar {
+  width: 12px;
+  height: 12px;
+}
+
+::-webkit-scrollbar-track {
+  background: var(--color-background-secondary);
+}
+
+::-webkit-scrollbar-thumb {
+  background: var(--color-neutral-700);
+  border-radius: var(--radius-full);
+  border: 3px solid var(--color-background-secondary);
+}
+
+::-webkit-scrollbar-thumb:hover {
+  background: var(--color-neutral-600);
+}
+
+/* Animations */
+@keyframes fadeInUp {
+  from {
+    opacity: 0;
+    transform: translateY(20px);
+  }
+  to {
+    opacity: 1;
+    transform: translateY(0);
+  }
+}
+
+@keyframes shimmer {
+  0% {
+    background-position: -1000px 0;
+  }
+  100% {
+    background-position: 1000px 0;
+  }
+}
+
+@keyframes rotate {
+  from { transform: rotate(0deg); }
+  to { transform: rotate(360deg); }
+}
+
+/* Apply animations to content */
+article > * {
+  animation: fadeInUp 0.6s cubic-bezier(0.4, 0, 0.2, 1) forwards;
+  opacity: 0;
+}
+
+article > *:nth-child(1) { animation-delay: 0.1s; }
+article > *:nth-child(2) { animation-delay: 0.2s; }
+article > *:nth-child(3) { animation-delay: 0.3s; }
+article > *:nth-child(4) { animation-delay: 0.4s; }
+article > *:nth-child(5) { animation-delay: 0.5s; }
+
+/* Loading shimmer effect for code blocks */
+pre.loading {
+  background: linear-gradient(90deg, var(--color-background-code) 0%, var(--color-background-elevated) 50%, var(--color-background-code) 100%);
+  background-size: 1000px 100%;
+  animation: shimmer 2s infinite;
+}
+
+/* Theme toggle enhancement */
+.theme-toggle {
+  background: var(--color-background-elevated);
+  border: 1px solid var(--color-border-light);
+  border-radius: var(--radius-full);
+  padding: var(--space-xs);
+  transition: all var(--transition-base);
+}
+
+.theme-toggle:hover {
+  background: var(--color-background-secondary);
+  border-color: var(--color-brand-primary);
+}
+
+/* Hero Section Styling */
+.hero-section {
+  text-align: center;
+  padding: 4rem 2rem;
+  margin-bottom: 3rem;
+  background: radial-gradient(ellipse at top, rgba(202, 14, 76, 0.15) 0%, transparent 50%);
+  position: relative;
+  overflow: hidden;
+}
+
+.hero-background {
+  position: absolute;
+  top: -50%;
+  left: -50%;
+  width: 200%;
+  height: 200%;
+  background: conic-gradient(from 180deg at 50% 50%, rgba(202, 14, 76, 0.1) 0deg, transparent 60deg, transparent 300deg, rgba(202, 14, 76, 0.1) 360deg);
+  animation: rotate 20s linear infinite;
+  opacity: 0.5;
+}
+
+.hero-title {
+  font-size: 4.5rem !important;
+  font-weight: 800;
+  margin-bottom: 1rem;
+  background: linear-gradient(135deg, #CA0E4C 0%, #E91E63 50%, #F50057 100%);
+  -webkit-background-clip: text;
+  -webkit-text-fill-color: transparent;
+  background-clip: text;
+  position: relative;
+  z-index: 1;
+  letter-spacing: -0.03em;
+}
+
+.hero-subtitle {
+  font-size: 1.5rem;
+  color: var(--color-text-primary);
+  font-weight: 300;
+  letter-spacing: 0.2em;
+  text-transform: uppercase;
+  margin-bottom: 0.5rem;
+}
+
+.hero-description {
+  font-size: 1.125rem;
+  color: var(--color-text-tertiary);
+  margin-top: 1rem;
+  font-weight: 400;
+}
+
+.hero-buttons {
+  margin-top: 3rem;
+  display: flex;
+  gap: 1rem;
+  justify-content: center;
+  flex-wrap: wrap;
+}
+
+/* Button variants */
+.btn-primary {
+  display: inline-flex;
+  align-items: center;
+  padding: 0.875rem 2.5rem;
+  background: linear-gradient(135deg, #CA0E4C 0%, #E91E63 100%);
+  color: white !important;
+  border-radius: 9999px;
+  text-decoration: none;
+  font-weight: 500;
+  font-size: 1rem;
+  transition: all 200ms cubic-bezier(0.4, 0, 0.2, 1);
+  box-shadow: 0 4px 15px 0 rgba(202, 14, 76, 0.4);
+}
+
+.btn-primary:hover {
+  transform: translateY(-2px);
+  box-shadow: 0 7px 25px 0 rgba(202, 14, 76, 0.5);
+  color: white !important;
+}
+
+.btn-secondary {
+  display: inline-flex;
+  align-items: center;
+  padding: 0.875rem 2.5rem;
+  background: transparent;
+  color: var(--color-text-primary) !important;
+  border: 1px solid rgba(255, 255, 255, 0.2);
+  border-radius: 9999px;
+  text-decoration: none;
+  font-weight: 500;
+  font-size: 1rem;
+  transition: all 200ms cubic-bezier(0.4, 0, 0.2, 1);
+  backdrop-filter: blur(10px);
+}
+
+.btn-secondary:hover {
+  border-color: var(--color-brand-primary);
+  color: var(--color-brand-primary) !important;
+}
+
+/* Grid Card Styling */
+.sd-card.feature-card {
+  background: var(--color-background-secondary) !important;
+  border: 1px solid var(--color-border-light) !important;
+  transition: all var(--transition-base);
+  height: 100%;
+}
+
+.sd-card.feature-card:hover {
+  border-color: var(--color-brand-primary) !important;
+  background: rgba(var(--color-brand-primary-rgb), 0.05) !important;
+  transform: translateY(-2px);
+  box-shadow: 0 4px 15px 0 rgba(202, 14, 76, 0.2);
+}
+
+.sd-card.feature-card .sd-card-body {
+  text-align: center;
+  padding: 1.5rem !important;
+}
+
+.sd-card.feature-card .sd-card-title {
+  color: var(--color-text-primary) !important;
+  font-weight: 600;
+  margin-bottom: 0.5rem;
+}
+
+.sd-card.feature-card .sd-card-text {
+  color: var(--color-text-secondary) !important;
+}
+
+.sd-card.doc-card {
+  background: var(--color-background-secondary) !important;
+  border: 1px solid var(--color-border-light) !important;
+  transition: all var(--transition-base);
+}
+
+.sd-card.doc-card:hover {
+  border-color: var(--color-brand-primary) !important;
+  background: rgba(var(--color-brand-primary-rgb), 0.05) !important;
+}
+
+.sd-card.doc-card .sd-card-body {
+  padding: 1.25rem !important;
+}
+
+.sd-card.doc-card .sd-card-title {
+  color: var(--color-text-primary) !important;
+  font-weight: 600;
+  font-size: 1.125rem;
+  margin-bottom: 0.5rem;
+}
+
+.sd-card.doc-card .sd-card-text {
+  color: var(--color-text-secondary) !important;
+}
+
+/* Community section */
+.community-section {
+  text-align: center;
+  padding: 3rem 2rem;
+  background: linear-gradient(135deg, rgba(202, 14, 76, 0.05) 0%, transparent 100%);
+  border-radius: 0.75rem;
+  margin: 3rem 0;
+}
+
+.community-section h3 {
+  color: var(--color-text-primary);
+  font-size: 1.5rem;
+  margin-bottom: 1rem;
+}
+
+.community-section p {
+  color: var(--color-text-tertiary);
+  margin-bottom: 2rem;
+  max-width: 600px;
+  margin-left: auto;
+  margin-right: auto;
+}
+
+/* Fix admonition styling */
+.admonition.tip {
+  background: rgba(var(--color-brand-primary-rgb), 0.05);
+  border: 1px solid rgba(var(--color-brand-primary-rgb), 0.2);
+}
+
+/* Mobile responsive improvements */
+@media (max-width: 768px) {
+  :root {
+    --font-size-base: 0.875rem;
+    --font-size-4xl: 2rem;
+    --font-size-3xl: 1.5rem;
+    --font-size-2xl: 1.25rem;
+  }
+
+  article {
+    padding: var(--space-lg) var(--space-md);
+  }
+
+  .hero-title {
+    font-size: 3rem !important;
+  }
+
+  .hero-buttons {
+    flex-direction: column;
+    align-items: center;
+  }
+
+  .btn-primary, .btn-secondary {
+    width: 100%;
+    max-width: 300px;
+    justify-content: center;
+  }
+}
+
+/* High resolution displays */
+@media (-webkit-min-device-pixel-ratio: 2), (min-resolution: 192dpi) {
+  body {
+    -webkit-font-smoothing: antialiased;
+    -moz-osx-font-smoothing: grayscale;
+  }
+}
+
+/* Print styles */
+@media print {
+  body {
+    background: white;
+    color: black;
+  }
+
+  .sidebar-drawer,
+  .theme-toggle,
+  .search-button {
+    display: none;
+  }
+}
diff --git a/docs/api.md b/docs/api.md
new file mode 100644
index 0000000..8382838
--- /dev/null
+++ b/docs/api.md
@@ -0,0 +1,691 @@
+# API Reference
+
+This comprehensive API reference covers all major components of AION-1, including modalities, codecs, models, and utilities.
+
+## Core Model
+
+### `aion.AION`
+
+The main AION model class that provides high-level interfaces for multimodal astronomical analysis.
+
+```python
+class AION(FourM):
+    """
+    AION-1 multimodal astronomical foundation model.
+
+    Inherits from FourM architecture and adds astronomical-specific
+    functionality for processing 39 different data modalities.
+    """
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_name: str,
+        device: str = 'cuda',
+        torch_dtype: torch.dtype = torch.float32,
+        **kwargs
+    ) -> 'AION':
+        """
+        Load a pre-trained AION model.
+
+        Args:
+            model_name: HuggingFace model identifier
+                - 'polymathic-ai/aion-tiny': 300M parameter model
+                - 'polymathic-ai/aion-base': 800M parameter model
+                - 'polymathic-ai/aion-large': 3.1B parameter model
+            device: Device to load model on ('cuda', 'cpu', 'mps')
+            torch_dtype: Data type for model weights
+            **kwargs: Additional arguments passed to model constructor
+
+        Returns:
+            AION model instance
+        """
+
+    def generate(
+        self,
+        inputs: Dict[str, Modality],
+        targets: List[str],
+        num_generations: int = 1,
+        temperature: float = 1.0,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None
+    ) -> Dict[str, Modality]:
+        """
+        Generate target modalities from input observations.
+
+        Note:
+            ``targets`` must be chosen from the list returned by
+            ``AION.supported_targets`` (essentially the 39 modality names
+            listed in the architecture documentation).  Supplying an
+            unsupported string will raise ``ValueError``.
+
+        Args:
+            inputs: Dictionary mapping modality names to data
+            targets: List of modality names to generate
+            num_generations: Number of samples to generate
+            temperature: Sampling temperature (higher = more diverse)
+            top_k: Top-k sampling parameter
+            top_p: Nucleus sampling parameter
+
+        Returns:
+            Dictionary mapping target names to generated modalities
+        """
+
+    def encode(
+        self,
+        inputs: Dict[str, torch.Tensor]
+    ) -> torch.Tensor:
+        """
+        Encode input tokens to learned representations.
+
+        Args:
+            inputs: Tokenized inputs
+
+        Returns:
+            Encoder hidden states [batch, seq_len, hidden_dim]
+        """
+
+    def tokenize(
+        self,
+        modalities: Dict[str, Modality]
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Convert modalities to discrete tokens using codecs.
+
+        Args:
+            modalities: Dictionary of modality data
+
+        Returns:
+            Dictionary of tokenized tensors
+        """
+```
+
+## Modalities
+
+AION-1 supports 39 different astronomical data modalities. Each modality is represented by a Pydantic model ensuring type safety and validation.
+
+### Image Modalities
+
+#### `aion.modalities.Image`
+
+```python
+class Image(Modality):
+    """
+    Multi-band astronomical image.
+
+    Attributes:
+        flux: Image data array [bands, height, width]
+        bands: List of band identifiers (e.g., ['HSC-G', 'HSC-R'])
+        ivar: Optional inverse variance array for weighting
+        mask: Optional boolean mask array
+    """
+
+    flux: np.ndarray
+    bands: List[str]
+    ivar: Optional[np.ndarray] = None
+    mask: Optional[np.ndarray] = None
+
+    @classmethod
+    def batch(cls, images: List['Image']) -> 'Image':
+        """Batch multiple images together."""
+
+    def crop(self, size: int = 96) -> 'Image':
+        """Center crop image to specified size."""
+```
+
+### Spectrum Modalities
+
+#### `aion.modalities.Spectrum`
+
+```python
+class Spectrum(Modality):
+    """
+    Astronomical spectrum.
+
+    Attributes:
+        wavelength: Wavelength array in Angstroms
+        flux: Flux density array
+        ivar: Optional inverse variance
+        survey: Source survey identifier
+    """
+
+    wavelength: np.ndarray
+    flux: np.ndarray
+    ivar: Optional[np.ndarray] = None
+    survey: Optional[str] = None
+
+    def resample(
+        self,
+        new_wavelength: np.ndarray
+    ) -> 'Spectrum':
+        """Resample spectrum to new wavelength grid."""
+
+    def normalize(self) -> 'Spectrum':
+        """Apply median normalization."""
+```
+
+### Scalar Modalities
+
+AION-1 includes numerous scalar modalities for photometry, shapes, and physical parameters:
+
+#### Photometric Fluxes
+
+```python
+class FluxG(ScalarModality):
+    """g-band flux measurement."""
+    value: np.ndarray
+    error: Optional[np.ndarray] = None
+
+class FluxR(ScalarModality):
+    """r-band flux measurement."""
+    value: np.ndarray
+    error: Optional[np.ndarray] = None
+
+class FluxI(ScalarModality):
+    """i-band flux measurement."""
+    value: np.ndarray
+    error: Optional[np.ndarray] = None
+
+class FluxZ(ScalarModality):
+    """z-band flux measurement."""
+    value: np.ndarray
+    error: Optional[np.ndarray] = None
+```
+
+#### Shape Parameters
+
+```python
+class E1(ScalarModality):
+    """First ellipticity component."""
+    value: np.ndarray
+
+class E2(ScalarModality):
+    """Second ellipticity component."""
+    value: np.ndarray
+
+class RadiusCARP(ScalarModality):
+    """CARP radius measurement."""
+    value: np.ndarray
+```
+
+#### Physical Properties
+
+```python
+class Redshift(ScalarModality):
+    """Spectroscopic or photometric redshift."""
+    value: np.ndarray
+    error: Optional[np.ndarray] = None
+
+class ExtinctionV(ScalarModality):
+    """V-band extinction."""
+    value: np.ndarray
+
+class Parallax(ScalarModality):
+    """Parallax measurement in mas."""
+    value: np.ndarray
+    error: Optional[np.ndarray] = None
+```
+
+### Catalog Modalities
+
+#### `aion.modalities.Catalog`
+
+```python
+class Catalog(Modality):
+    """
+    Astronomical object catalog.
+
+    Attributes:
+        entries: List of catalog objects
+        max_objects: Maximum number of objects to process
+    """
+
+    entries: List[CatalogEntry]
+    max_objects: int = 100
+
+    def sort_by_distance(self) -> 'Catalog':
+        """Sort entries by distance from center."""
+
+    def filter_bright(self, magnitude_limit: float) -> 'Catalog':
+        """Filter to objects brighter than limit."""
+```
+
+## Codecs (Tokenizers)
+
+Codecs convert between modalities and discrete tokens. Each modality type has a specialized codec.
+
+### Base Codec Interface
+
+#### `aion.codecs.base.Codec`
+
+```python
+class Codec(ABC):
+    """
+    Abstract base class for modality codecs.
+    """
+
+    @abstractmethod
+    def encode(self, modality: Modality) -> torch.Tensor:
+        """Encode modality to discrete tokens."""
+
+    @abstractmethod
+    def decode(self, tokens: torch.Tensor) -> Modality:
+        """Decode tokens back to modality."""
+
+    @classmethod
+    def from_pretrained(cls, path: str) -> 'Codec':
+        """Load pre-trained codec."""
+
+    def save_pretrained(self, path: str):
+        """Save codec weights and configuration."""
+```
+
+### Image Codec
+
+#### `aion.codecs.ImageCodec`
+
+```python
+class ImageCodec(Codec):
+    """
+    Image tokenizer using MagVit architecture.
+
+    Supports multi-survey images with different band counts
+    through a unified channel embedding scheme.
+    """
+
+    def __init__(
+        self,
+        hidden_dim: int = 512,
+        n_embed: int = 10000,
+        compression_levels: int = 2,
+        quantizer: str = 'fsq'
+    ):
+        """
+        Initialize image codec.
+
+        Args:
+            hidden_dim: Hidden dimension size
+            n_embed: Codebook size
+            compression_levels: Spatial compression factor
+            quantizer: Quantization method ('fsq' or 'vq')
+        """
+
+    def preprocess(
+        self,
+        image: Image,
+        crop_size: int = 96
+    ) -> torch.Tensor:
+        """Apply survey-specific preprocessing."""
+
+    def get_latent_shape(
+        self,
+        image_shape: Tuple[int, ...]
+    ) -> Tuple[int, ...]:
+        """Get shape of latent representation."""
+```
+
+### Spectrum Codec
+
+#### `aion.codecs.SpectrumCodec`
+
+```python
+class SpectrumCodec(Codec):
+    """
+    Spectrum tokenizer using ConvNeXt V2 architecture.
+
+    Uses a shared latent wavelength grid to handle spectra
+    from different instruments.
+    """
+
+    def __init__(
+        self,
+        latent_wavelength: np.ndarray,
+        hidden_dims: List[int] = [96, 192, 384, 768],
+        n_embed: int = 1024,
+        quantizer: str = 'lfq'
+    ):
+        """
+        Initialize spectrum codec.
+
+        Args:
+            latent_wavelength: Target wavelength grid
+            hidden_dims: ConvNeXt stage dimensions
+            n_embed: Codebook size
+            quantizer: Quantization method
+        """
+
+    def to_latent_grid(
+        self,
+        spectrum: Spectrum
+    ) -> torch.Tensor:
+        """Interpolate spectrum to latent wavelength grid."""
+```
+
+### Scalar Codec
+
+#### `aion.codecs.ScalarCodec`
+
+```python
+class ScalarCodec(Codec):
+    """
+    Tokenizer for scalar quantities using adaptive quantization.
+    """
+
+    def __init__(
+        self,
+        quantizer_type: str = 'reservoir',
+        n_bins: int = 256
+    ):
+        """
+        Initialize scalar codec.
+
+        Args:
+            quantizer_type: Type of quantizer
+                - 'linear': Uniform bins
+                - 'log': Logarithmic bins
+                - 'reservoir': Learned adaptive bins
+                - 'compressed': Transform then quantize
+            n_bins: Number of quantization levels
+        """
+
+    def fit(self, values: np.ndarray):
+        """Fit quantizer to data distribution."""
+```
+
+## Quantizers
+
+Quantization modules that convert continuous values to discrete tokens.
+
+### `aion.codecs.quantizers.FSQ`
+
+```python
+class FiniteScalarQuantization(nn.Module):
+    """
+    Finite Scalar Quantization from MagVit.
+
+    Factorizes codebook into multiple small codebooks for
+    better gradient flow and training stability.
+    """
+
+    def __init__(
+        self,
+        levels: List[int] = [8, 5, 5, 5, 5],
+        eps: float = 1e-3
+    ):
+        """
+        Args:
+            levels: Number of levels per dimension
+            eps: Small constant for numerical stability
+        """
+```
+
+### `aion.codecs.quantizers.LFQ`
+
+```python
+class LookupFreeQuantization(nn.Module):
+    """
+    Lookup-Free Quantization using entropy regularization.
+
+    Achieves quantization without explicit codebook lookup,
+    improving training efficiency.
+    """
+
+    def __init__(
+        self,
+        dim: int,
+        codebook_size: int,
+        entropy_weight: float = 0.1
+    ):
+        """
+        Args:
+            dim: Embedding dimension
+            codebook_size: Target vocabulary size
+            entropy_weight: Entropy regularization weight
+        """
+```
+
+## Preprocessing
+
+Survey-specific preprocessing utilities.
+
+### `aion.codecs.preprocessing.ImagePreprocessor`
+
+```python
+class ImagePreprocessor:
+    """
+    Survey-specific image preprocessing.
+    """
+
+    def __init__(self, survey: str):
+        """
+        Initialize for specific survey.
+
+        Args:
+            survey: Survey name ('HSC', 'DES', 'SDSS', etc.)
+        """
+
+    def __call__(self, image: Image) -> torch.Tensor:
+        """Apply preprocessing pipeline."""
+
+    def get_rescaling_params(self) -> Dict[str, float]:
+        """Get survey-specific rescaling parameters."""
+```
+
+### `aion.codecs.preprocessing.SpectrumPreprocessor`
+
+```python
+class SpectrumPreprocessor:
+    """
+    Spectrum normalization and preprocessing.
+    """
+
+    def normalize_median(
+        self,
+        spectrum: Spectrum
+    ) -> Spectrum:
+        """Apply median normalization."""
+
+    def mask_skylines(
+        self,
+        spectrum: Spectrum
+    ) -> Spectrum:
+        """Mask common sky emission lines."""
+```
+
+## Model Components
+
+### `aion.fourm.FourM`
+
+```python
+class FourM(nn.Module):
+    """
+    Base multimodal transformer architecture.
+
+    Implements the encoder-decoder architecture with
+    modality-specific embeddings and flexible attention.
+    """
+
+    def __init__(
+        self,
+        encoder_depth: int = 12,
+        decoder_depth: int = 12,
+        dim: int = 768,
+        num_heads: int = 12,
+        mlp_ratio: float = 4.0,
+        use_bias: bool = False
+    ):
+        """Initialize FourM architecture."""
+```
+
+### `aion.fourm.encoder_embeddings`
+
+```python
+class ModalityEmbedding(nn.Module):
+    """
+    Learnable embeddings for each modality type.
+
+    Provides both modality identification and survey
+    provenance information.
+    """
+
+    def __init__(
+        self,
+        num_modalities: int,
+        num_surveys: int,
+        embed_dim: int
+    ):
+        """Initialize modality embeddings."""
+```
+
+## Utilities
+
+### `aion.model_utils`
+
+```python
+def load_codec(modality: str, device: str = 'cuda') -> Codec:
+    """Load pre-trained codec for modality."""
+
+def create_model_config(
+    model_size: str = 'base'
+) -> Dict[str, Any]:
+    """Get configuration for model size."""
+
+def count_parameters(model: nn.Module) -> int:
+    """Count trainable parameters in model."""
+```
+
+### `aion.generation_utils`
+
+```python
+def sample_with_temperature(
+    logits: torch.Tensor,
+    temperature: float = 1.0,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None
+) -> torch.Tensor:
+    """
+    Sample from logits with temperature scaling.
+
+    Args:
+        logits: Model output logits
+        temperature: Sampling temperature
+        top_k: Top-k filtering
+        top_p: Nucleus sampling threshold
+
+    Returns:
+        Sampled token indices
+    """
+
+def generate_with_caching(
+    model: AION,
+    inputs: Dict[str, torch.Tensor],
+    max_length: int,
+    use_cache: bool = True
+) -> torch.Tensor:
+    """Generate tokens with KV caching for efficiency."""
+```
+
+## Data Loading
+
+### `aion.data.AstronomicalDataset`
+
+```python
+class AstronomicalDataset(Dataset):
+    """
+    PyTorch dataset for astronomical observations.
+    """
+
+    def __init__(
+        self,
+        data_paths: List[str],
+        modalities: List[str],
+        transform: Optional[Callable] = None
+    ):
+        """
+        Initialize dataset.
+
+        Args:
+            data_paths: Paths to data files
+            modalities: List of modalities to load
+            transform: Optional data transformation
+        """
+
+    def __getitem__(self, idx: int) -> Dict[str, Modality]:
+        """Get single observation."""
+```
+
+## Example Usage
+
+### Complete Pipeline
+
+```python
+import torch
+from aion import AION
+from aion.modalities import Image, Spectrum
+from aion.codecs import ImageCodec, SpectrumCodec
+
+# Load model and codecs
+model = AION.from_pretrained('polymathic-ai/aion-base')
+image_codec = ImageCodec.from_pretrained('polymathic-ai/aion-image-codec')
+spectrum_codec = SpectrumCodec.from_pretrained('polymathic-ai/aion-spectrum-codec')
+
+# Load data
+image = Image(flux=galaxy_flux, bands=['g', 'r', 'i', 'z', 'y'])
+spectrum = Spectrum(wavelength=wavelength, flux=flux)
+
+# Tokenize
+tokens = {
+    'image': image_codec.encode(image),
+    'spectrum': spectrum_codec.encode(spectrum)
+}
+
+# Encode to representations
+with torch.no_grad():
+    representations = model.encode(tokens)
+
+# Generate missing modalities
+results = model.generate(
+    inputs={'image': image},
+    targets=['spectrum', 'redshift']
+)
+
+# Decode results
+generated_spectrum = spectrum_codec.decode(results['spectrum'])
+print(f"Predicted redshift: {results['redshift'].value[0]:.3f}")
+```
+
+## Error Handling
+
+All AION components include comprehensive error handling:
+
+```python
+from aion.exceptions import (
+    ModalityError,      # Invalid modality data
+    CodecError,         # Tokenization failures
+    ModelError,         # Model inference errors
+    DataError          # Data loading issues
+)
+
+try:
+    result = model.generate(inputs, targets)
+except ModalityError as e:
+    print(f"Invalid modality: {e}")
+except CodecError as e:
+    print(f"Tokenization failed: {e}")
+```
+
+## Performance Tips
+
+1. **Batch Processing**: Always process multiple objects together when possible
+2. **Mixed Precision**: Use `torch.cuda.amp` for faster inference
+3. **Token Caching**: Reuse encoder outputs when generating multiple targets
+4. **Device Placement**: Use `.to(device)` consistently for all tensors
+
+For more details, see the [Usage Guide](usage.md) and [Architecture](architecture.md) documentation.
+
+```{eval-rst}
+.. automodule:: aion
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 0000000..607c7f8
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,411 @@
+# AION-1 Architecture
+
+This document provides a comprehensive overview of AION-1's architecture, explaining how it achieves unified multimodal understanding of astronomical data through innovative tokenization strategies and transformer-based learning.
+
+## Overview
+
+AION-1 employs a two-stage architecture that elegantly handles the complexity of astronomical data:
+
+1. **Universal Tokenization**: Modality-specific encoders convert heterogeneous astronomical observations into discrete tokens
+2. **Multimodal Masked Modeling**: A unified transformer learns cross-modal relationships through masked token prediction
+
+This design enables AION-1 to process 39 different data modalities from 5 major astronomical surveys, learning from over 200 million objects.
+
+## Core Design Principles
+
+### 1. Purely Observational Learning
+
+Unlike many scientific ML models, AION-1 is trained exclusively on raw observational data without any labels derived from simulations or physical models. This approach provides:
+
+- **Model-agnostic representations**: Not tied to specific physical assumptions
+- **Flexibility**: Can adapt to changing theoretical models
+- **Robustness**: Learns patterns directly from data
+
+### 2. Arbitrary Modality Combinations
+
+AION-1 can process any subset of its 39 supported modalities without architectural changes:
+
+- No fixed input requirements
+- Graceful handling of missing data
+- Dynamic modality fusion
+
+### 3. Scalable Token-Based Approach
+
+By converting all data to tokens, AION-1 achieves:
+
+- Uniform processing across modalities
+- Efficient batching and computation
+- Natural handling of variable-length inputs
+
+## Stage 1: Universal Tokenization
+
+The tokenization stage addresses a fundamental challenge: how to convert diverse astronomical measurements (images, spectra, scalars) into a common representation suitable for transformer processing.
+
+### Image Tokenization
+
+AION-1's image tokenizer handles multi-band astronomical images from different surveys with varying:
+- Resolution and pixel scales
+- Number of channels (4-9 bands)
+- Noise characteristics
+- Dynamic range
+
+#### Architecture
+```
+# Image tokenizer structure
+class ImageCodec:
+    - Preprocessing:
+        - Center crop to 96x96 pixels
+        - Survey-specific rescaling
+        - Range compression: arcsinh(flux/α) × β
+
+    - Multi-survey projection:
+        - SubsampledLinear layer (9 → 54 channels)
+        - Handles variable input bands
+        - Embeds survey provenance
+
+    - Encoder: MagVit-based architecture
+        - ResNet backbone with 2 compressions
+        - Hidden dimensions: 512
+        - Bottleneck: 5 dimensions
+
+    - Quantization: Finite Scalar Quantization (FSQ)
+        - Levels: [8, 5, 5, 5, 5]
+        - Codebook size: 10,000
+```
+
+#### Key Innovations
+
+1. **Channel Embedding Scheme**: Accommodates images from different surveys with varying band counts in a single model
+
+2. **Inverse-Variance Weighted Loss**: Leverages known noise properties for optimal reconstruction
+   ```
+   L_NLL = Σ_i 1/2 || Σ_i^(-1/2) (x_i - Decoder(Encoder(x_i))) ||²
+   ```
+
+3. **Survey-Aware Processing**: Maintains provenance information through dedicated embeddings
+
+### Spectrum Tokenization
+
+Astronomical spectra present unique challenges:
+- Wavelength ranges vary by instrument (3500-10400 Å)
+- Resolution differences (R = 1500-5500)
+- Orders of magnitude variation in amplitude
+
+#### Architecture
+```
+# Spectrum tokenizer structure
+class SpectrumCodec:
+    - Preprocessing:
+        - Median normalization
+        - Log-transform median
+        - Resampling to latent wavelength grid
+
+    - Latent grid:
+        - Range: 3500-10462.4 Å
+        - Resolution: 0.8 Å/pixel
+        - 8704 pixels total
+
+    - Encoder: ConvNeXt V2
+        - Depths: [3, 3, 9, 3]
+        - Dimensions: [96, 192, 384, 768]
+
+    - Quantization: Lookup-Free Quantization (LFQ)
+        - Embedding dimension: 10
+        - Codebook size: 1024
+```
+
+#### Spectral Grid Interpolation
+
+The tokenizer uses a shared latent wavelength grid, enabling joint processing of spectra from different instruments:
+
+```python
+def to_latent(spectrum, observed_wavelength):
+    # Interpolate observed spectrum to latent grid
+    return interp1d(observed_wavelength, spectrum, latent_wavelength)
+```
+
+### Scalar Tokenization
+
+Scalar quantities (fluxes, shapes, physical parameters) are tokenized using adaptive quantization based on cumulative distribution functions (CDFs).
+
+#### Types of Scalar Quantizers
+
+1. **Linear Quantizer**: For uniformly distributed values
+2. **Log Quantizer**: For values spanning orders of magnitude
+3. **Reservoir Quantizer**: Learns optimal binning from data
+4. **Compressed Quantizer**: Applies transformations before quantization
+
+Example scalar modalities:
+- Photometric fluxes (g, r, i, z bands)
+- Shape parameters (ellipticity, radius)
+- Physical properties (redshift, extinction)
+
+### Token Summary at a Glance
+
+| Modality                                       | Native input tensor shape | Tokens per object | Quantizer type & levels | Codebook size |
+|------------------------------------------------|---------------------------|--------------------|-------------------------|---------------|
+| Image (HSC / Legacy Survey, 96 × 96 cut-out)   | `(B, N_band, 96, 96)`     | 144 *(18×18 grid)* | FSQ `[8,5,5,5,5]`       | 10 000        |
+| Spectrum (SDSS / DESI)                         | `(B, 2, λ)` *(flux,ivar)* | 64 + 1 norm token  | LFQ `dim=10`            | 1 024         |
+| Scalar quantity (photometry, shapes, etc.)     | `(B,)`                    | 1 per quantity     | Reservoir (linear/log)  | 256 (default) |
+| Catalog (bounding ellipses)                    | `(B, N_obj, 5)`           | ≤100×5             | Composite (per-field)   | mixed         |
+
+These numbers correspond to the default configuration used during pre-training (input budget = 256, output budget = 128 tokens).  They can be modified at fine-tune time as long as the total token budget is respected.
+
+### Catalog Tokenization
+
+Astronomical catalogs contain lists of objects with varying counts per image. AION-1 linearizes these into sequences:
+
+```
+# Catalog entry: (X, Y, e1, e2, radius)
+# Linearization: Sort by distance from center
+# Tokenization: Quantize each component separately
+```
+
+## Stage 2: Multimodal Masked Modeling
+
+The second stage uses a transformer encoder-decoder architecture to learn relationships between tokens from different modalities.
+
+### Architecture Details
+
+```
+class AION(FourM):
+    # Encoder
+    - Depth: 12-24 layers (model-dependent)
+    - Hidden dimension: 768-2048
+    - Attention heads: 12-32
+    - MLP ratio: 4.0
+    - Activation: SwiGLU
+
+    # Decoder
+    - Same architecture as encoder
+    - Cross-attention to encoder outputs
+    - Modality-specific output heads
+```
+
+### Multimodal Masking Strategy
+
+AION-1 uses a sophisticated masking strategy that enables learning both within and across modalities:
+
+1. **Input Token Budget**: Randomly select B tokens across all modalities for input
+2. **Output Token Budget**: From remaining tokens, select targets using Beta distribution
+3. **Cross-Modal Learning**: Masks ensure model learns to predict any modality from any other
+
+```python
+def mask_multimodal(tokens, num_input=256, num_output=128):
+    # 1. Select primary modality
+    primary_mod = random.choice(modalities)
+
+    # 2. Fill input budget
+    input_tokens = sample_tokens(primary_mod, budget=num_input)
+    input_tokens += sample_from_other_modalities(remaining_budget)
+
+    # 3. Select outputs (Beta distribution favors fewer tokens)
+    num_outputs = sample_beta(alpha=0.1, beta=1.0) * num_output
+    output_tokens = sample_from_remaining(num_outputs)
+
+    return input_tokens, output_tokens
+```
+
+### Training Objective
+
+The model optimizes a cross-entropy loss over predicted tokens:
+
+```
+L = -Σ_t log p(x_t^target | x^observed)
+```
+
+This simple objective, combined with diverse masking patterns, enables AION-1 to learn rich cross-modal representations.
+
+## Model Variants
+
+AION-1 comes in three sizes, each using the same architecture with different dimensions:
+
+| Model | Parameters | Encoder Layers | Decoder Layers | Hidden Dim | Attention Heads |
+|-------|------------|----------------|----------------|------------|-----------------|
+| AION-1-B (Base) | 300M | 12 | 12 | 768 | 12 |
+| AION-1-L (Large) | 800M | 24 | 24 | 1024 | 16 |
+| AION-1-XL (XLarge) | 3.1B | 24 | 24 | 2048 | 32 |
+
+All models use:
+- SwiGLU activation functions
+- No bias terms (except in embeddings)
+- QK-Norm for training stability
+- Rotary position embeddings
+
+## Data Flow Through AION-1
+
+Here's how data flows through the complete pipeline:
+
+```{mermaid}
+graph TD
+    A[Raw Astronomical Data] --> B[Modality-Specific Preprocessing]
+    B --> C[Tokenization]
+    C --> D[Token Embeddings + Position Encoding]
+    D --> E[Transformer Encoder]
+    E --> F[Cross-Modal Representations]
+    F --> G[Transformer Decoder]
+    G --> H[Modality-Specific Heads]
+    H --> I[Predictions/Generations]
+```
+
+### Example: Processing Galaxy Data
+
+```python
+# 1. Input data
+galaxy_data = {
+    'image': HSC_5band_image,        # (5, 96, 96)
+    'spectrum': SDSS_spectrum,        # (3800,)
+    'photometry': flux_measurements   # (8,)
+}
+
+# 2. Tokenization
+tokens = {
+    'image': image_codec.encode(galaxy_data['image']),      # → 144 tokens
+    'spectrum': spectrum_codec.encode(galaxy_data['spectrum']), # → 64 tokens
+    'photometry': scalar_codec.encode(galaxy_data['photometry']) # → 8 tokens
+}
+
+# 3. Embedding and encoding
+embeddings = model.embed_inputs(tokens)
+encoder_output = model.encode(embeddings)
+
+# 4. Cross-modal generation/prediction
+predictions = model.decode(encoder_output, target_modalities)
+```
+
+## Key Architectural Innovations
+
+### 1. Modality Embeddings with Provenance
+
+Each token receives two embeddings:
+- **Token embedding**: Encodes the discrete token value
+- **Modality embedding**: Identifies data type AND source survey
+
+This allows AION-1 to understand that HSC g-band and SDSS g-band images have different characteristics.
+
+### 2. Flexible Attention Patterns
+
+The attention mechanism adapts based on input:
+- **Encoder**: Full bidirectional attention across all tokens
+- **Decoder**: Causal attention within modalities, cross-attention to encoder
+
+### 3. Hierarchical Token Organization
+
+Tokens are organized hierarchically:
+- **Spatial tokens**: Preserve 2D structure for images
+- **Sequential tokens**: Maintain order for spectra and catalogs
+- **Unordered tokens**: For scalar sets
+
+## Training Infrastructure
+
+### Dataset Construction
+
+AION-1's training leverages pairwise associations between surveys:
+- HSC images ↔ SDSS spectra
+- SDSS spectra ↔ DESI spectra
+- Legacy images ↔ Photometry
+
+This creates a connected graph enabling transitive learning (e.g., HSC → SDSS → DESI).
+
+### Optimization Details
+
+- **Optimizer**: AdamW (β₁=0.9, β₂=0.95, weight decay=0.05)
+- **Learning rate**: 2e-4 with cosine decay
+- **Warmup**: Linear over first 10% of training
+- **Batch size**: 8096 (distributed across GPUs)
+- **Training steps**: 205,000
+- **Mixed precision**: bfloat16
+
+### Computational Requirements
+
+Training AION-1 requires substantial computational resources:
+- **AION-1-B**: 64 H100 GPUs for 1.5 days
+- **AION-1-L**: 100 H100 GPUs for 2.5 days
+- **AION-1-XL**: 288 H100 GPUs for 3.5 days
+
+## Emergent Capabilities
+
+The architecture enables several emergent behaviors:
+
+### 1. Zero-Shot Cross-Modal Generation
+Despite never seeing direct HSC↔DESI associations during training, AION-1 can generate DESI spectra from HSC images through transitive learning.
+
+### 2. Flexible Conditioning
+Any modality subset can condition generation of any other subset, enabling:
+- Super-resolution (low-res → high-res spectra)
+- Cross-modal translation (images → spectra)
+- Imputation (partial → complete observations)
+
+### 3. Physically Meaningful Representations
+The learned embeddings organize objects along interpretable axes:
+- Galaxy types (spiral, elliptical, merger)
+- Stellar properties (temperature, metallicity)
+- Redshift progression
+
+## Implementation Details
+
+### Memory Efficiency
+
+- **Gradient checkpointing**: Trades computation for memory
+- **Mixed precision**: bfloat16 for most operations
+- **Efficient attention**: Flash Attention 2 implementation
+
+### Inference Optimization
+
+- **Token caching**: Reuse encoder outputs for multiple decodings
+- **Batch processing**: Process multiple objects simultaneously
+- **Quantization**: INT8 inference for deployment
+
+## Data Provenance & Licensing
+
+The pre‐training corpus – dubbed *The Multimodal Universe (MMU)* – merges publicly available data products under their respective licences:
+
+| Survey | Release | Reference | Modalities Used |
+|--------|---------|-----------|-----------------|
+| Legacy Imaging Survey (DECaLS/BASS/MzLS) | DR10 | Dey et al. 2019 | 4-band images, photometry, catalog scalars |
+| Hyper Suprime-Cam (HSC) | PDR3 (Wide+Deep) | Aihara et al. 2019 | 5-band images, photometry, shapes |
+| Sloan Digital Sky Survey (SDSS) | DR17 | Eisenstein et al. 2011 | R≈2000 spectra |
+| Dark Energy Spectroscopic Instrument (DESI) | EDR | DESI Collab. 2023 | R≈3000 spectra |
+| Gaia | DR3 | Gaia Collab. 2022 | Low-res XP spectra, photometry, astrometry |
+
+All derivative checkpoints released on the Hugging Face Hub are distributed under an MIT licence; users are nevertheless responsible for complying with the upstream survey licences when redistributing raw data.
+
+## Physical Units & Conventions
+
+• **Images**: pixel values are calibrated nanomaggies.  Exposure time normalisation is survey-specific and automatically handled by the image codec.
+
+• **Spectra**: flux density in erg s⁻¹ cm⁻² Å⁻¹ (observer frame).  Wavelengths are Å, *not* log-λ when inside the model.
+
+• **Photometry / Scalars**: all fluxes in nanomaggies, magnitudes in the AB system.  Ellipticities use SDSS convention *(e₁,e₂)*.
+
+## Known Limitations & Caveats
+
+1. No ultraviolet (< 3500 Å) or mid-infrared (> 1 µm) spectral support.
+2. HSC chip-edge artefacts occasionally propagate into synthetic spectra – crop images if necessary.
+3. The model was trained on **96 × 96 px** cut-outs; objects extending beyond that FOV will be truncated.
+
+## Citation
+
+If you use AION-1 in a publication, please cite both the codebase and the accompanying paper:
+
+```bibtex
+@article{Francois2025aion,
+  title       = {AION-1: Omnimodal Foundation Model for Astronomical Sciences},
+  author      = {LASTNAME, Firstname et al.},
+  journal     = {arXiv e-prints},
+  year        = 2025,
+  archivePrefix = {arXiv},
+  eprint      = {2406.00000}
+}
+```
+
+## Summary
+
+AION-1's architecture represents a significant advance in multimodal scientific machine learning:
+
+1. **Universal tokenization** handles arbitrary astronomical data types
+2. **Unified transformer** learns cross-modal relationships
+3. **Flexible design** adapts to available observations
+4. **Emergent understanding** discovers physical relationships
+
+This architecture provides a foundation for next-generation astronomical analysis, enabling scientists to leverage all available data for their research.
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..7d675ea
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,109 @@
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath(".."))
+
+project = "AION-1"
+author = "Polymathic AI"
+html_title = "AION"
+
+extensions = [
+    "myst_parser",
+    "sphinx_copybutton",
+    "sphinx_design",  # For cards and grids
+    "sphinxcontrib.mermaid",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.napoleon",
+]
+
+autosummary_generate = True
+
+# MyST parser configuration
+myst_enable_extensions = [
+    "colon_fence",
+    "deflist",
+    "html_image",
+]
+
+myst_heading_anchors = 3
+
+html_theme = "furo"
+html_static_path = ["_static"]
+html_css_files = ["style.css"]
+
+# Theme customizations - separate light and dark themes
+html_theme_options = {
+    "light_css_variables": {
+        "color-brand-primary": "#CA0E4C",
+        "color-brand-content": "#CA0E4C",
+        "color-foreground-primary": "#2c3e50",  # Dark text for light mode
+        "color-foreground-secondary": "#546e7a",
+        "color-foreground-muted": "#90a4ae",
+        "color-foreground-border": "#e0e0e0",
+        "color-background-primary": "#ffffff",  # White background for light mode
+        "color-background-secondary": "#f5f5f5",
+        "color-background-hover": "#fafafa",
+        "color-background-border": "#e0e0e0",
+        "color-sidebar-background": "#fafafa",
+        "color-sidebar-background-border": "#e0e0e0",
+        "color-sidebar-brand-text": "#2c3e50",
+        "color-sidebar-caption-text": "#546e7a",
+        "color-sidebar-link-text": "#2c3e50",
+        "color-sidebar-link-text--top-level": "#2c3e50",
+        "color-sidebar-search-background": "#ffffff",
+        "color-sidebar-search-border": "#e0e0e0",
+        "color-sidebar-search-foreground": "#2c3e50",
+        "color-admonition-background": "#f5f5f5",
+        "color-api-background": "#f5f5f5",
+        "color-api-background-hover": "#eeeeee",
+        "color-highlight-on-target": "rgba(202, 14, 76, 0.1)",
+        "color-inline-code-background": "rgba(202, 14, 76, 0.08)",
+        "color-inline-code-text": "#CA0E4C",
+    },
+    "dark_css_variables": {
+        "color-brand-primary": "#CA0E4C",
+        "color-brand-content": "#CA0E4C",
+        "color-foreground-primary": "#e0e0e0",
+        "color-foreground-secondary": "#b0b0b0",
+        "color-foreground-muted": "#909090",
+        "color-foreground-border": "#2a2a2a",
+        "color-background-primary": "#0a0a0a",
+        "color-background-secondary": "#171717",
+        "color-background-hover": "#1a1a1a",
+        "color-background-border": "#2a2a2a",
+        "color-sidebar-background": "#0f0f0f",
+        "color-sidebar-background-border": "#2a2a2a",
+        "color-sidebar-brand-text": "#e0e0e0",
+        "color-sidebar-caption-text": "#b0b0b0",
+        "color-sidebar-link-text": "#cccccc",
+        "color-sidebar-link-text--top-level": "#e0e0e0",
+        "color-sidebar-search-background": "#1a1a1a",
+        "color-sidebar-search-border": "#2a2a2a",
+        "color-sidebar-search-foreground": "#e0e0e0",
+        "color-admonition-background": "#1a1a1a",
+        "color-api-background": "#1a1a1a",
+        "color-api-background-hover": "#262626",
+        "color-highlight-on-target": "rgba(202, 14, 76, 0.15)",
+        "color-inline-code-background": "rgba(202, 14, 76, 0.15)",
+        "color-inline-code-text": "#ff7a9a",
+    },
+    "sidebar_hide_name": False,
+    "navigation_with_keys": True,
+}
+
+# Add custom footer
+html_context = {
+    "default_mode": "auto",  # Let the user's browser preference decide
+}
+
+# Customize source link text
+html_copy_source = True
+html_show_sourcelink = True
+html_sourcelink_suffix = ""
+
+# Add custom favicon if available
+# html_favicon = "_static/favicon.ico"
+
+# Set custom logo for the top left
+# html_logo = "_static/polymathic_logo.png"
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..0d44812
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,159 @@
+```{raw} html
+<div class="hero-section">
+  <div class="hero-background"></div>
+  <h1 class="hero-title">AION-1</h1>
+  <p class="hero-subtitle">AstronomIcal Omnimodal Network</p>
+  <p class="hero-description">The first large-scale multimodal foundation model for astronomy</p>
+  <div class="hero-buttons">
+    <a href="#quick-start" class="btn-primary">Get Started →</a>
+    <a href="https://arxiv.org/abs/2406.00000" class="btn-secondary">Read the Paper</a>
+    <a href="https://colab.research.google.com/github/polymathic-ai/aion/blob/main/notebooks/AION_quickstart.ipynb" class="btn-secondary">Run on Colab</a>
+  </div>
+</div>
+```
+
+# Welcome to AION-1
+
+AION-1 (AstronomIcal Omnimodal Network) represents a breakthrough in astronomical machine learning: the first foundation model capable of understanding and processing arbitrary combinations of astronomical observations across 39 different data modalities. Trained on over 200 million astronomical objects, AION-1 unifies imaging, spectroscopy, photometry, and catalog data from major ground- and space-based observatories into a single, powerful framework.
+
+## 🌟 Why AION-1?
+
+Traditional approaches in astronomy treat each data modality in isolation, missing the rich interconnections between different types of observations. AION-1 fundamentally changes this paradigm by:
+
+- **Learning Cross-Modal Relationships**: The model discovers how different observations relate to each other, building a deep understanding of the underlying astrophysical objects
+- **Enabling Flexible Data Fusion**: Scientists can use any combination of available observations without redesigning their analysis pipeline
+- **Excelling in Low-Data Regimes**: AION-1 achieves competitive results with orders of magnitude less labeled data than supervised approaches
+- **Providing Universal Representations**: The learned embeddings capture physically meaningful structure useful across diverse downstream tasks
+
+## 📊 Key Capabilities
+
+```{eval-rst}
+.. grid:: 1 1 2 3
+   :gutter: 3
+
+   .. grid-item-card:: 🌌 39 Data Modalities
+      :class-card: feature-card
+
+      Seamlessly integrates multiband images, optical spectra, photometry, and catalog data from HSC, Legacy Survey, SDSS, DESI, and Gaia
+
+   .. grid-item-card:: 🧠 200M+ Objects
+      :class-card: feature-card
+
+      Pre-trained on massive astronomical datasets spanning galaxies, stars, and quasars across multiple surveys
+
+   .. grid-item-card:: 🔧 Flexible Architecture
+      :class-card: feature-card
+
+      Two-stage design with modality-specific tokenization followed by transformer-based multimodal masked modeling
+
+   .. grid-item-card:: ⚡ Emergent Behaviors
+      :class-card: feature-card
+
+      Demonstrates physical understanding, superior low-data performance, and meaningful latent space organization
+
+   .. grid-item-card:: 🎯 Versatile Applications
+      :class-card: feature-card
+
+      Supports regression, classification, generation, retrieval, and cross-modal prediction tasks out-of-the-box
+
+   .. grid-item-card:: 🌍 Open Science
+      :class-card: feature-card
+
+      Fully open-source including datasets, training scripts, and model weights for reproducible research
+```
+
+## 🚀 Quick Start
+
+Getting started with AION-1 is straightforward:
+
+```python
+# Minimal end-to-end example
+from aion import AION
+import numpy as np
+
+# 1) Load a pre-trained checkpoint (800 M parameters)
+model = AION.from_pretrained('polymathic-ai/aion-base')
+
+# 2) Prepare demo inputs (96×96 HSC g,r,i,z,y cut-out and SDSS spectrum)
+galaxy_image = np.load('hsc_cutout_5band.npy')       # shape (5,96,96)
+galaxy_spectrum = np.load('sdss_spectrum.npy')       # dict with wavelength/flux
+
+# 3) Generate a high-resolution DESI-like spectrum from the image
+generated = model.generate(
+    inputs={'image': galaxy_image},
+    targets=['spectrum']
+)
+
+# 4) Extract joint embeddings for downstream use
+embeddings = model.encode({'image': galaxy_image, 'spectrum': galaxy_spectrum})
+```
+
+## 🔬 Scientific Impact
+
+AION-1 demonstrates several emergent behaviors that reflect its deep understanding of astronomical data:
+
+### Physical Understanding
+- Solves non-trivial scientific tasks using only simple linear probes on learned representations
+- Organizes objects in embedding space along physically meaningful dimensions
+- Captures relationships between disparate observations of the same physical phenomena
+
+### Performance Advantages
+- Achieves state-of-the-art results on galaxy property estimation, stellar parameter prediction, and morphology classification
+- Outperforms supervised baselines by 3x on rare object detection tasks
+- Enables accurate cross-modal prediction even for modality pairs never seen during training
+
+### Practical Benefits
+- Reduces data requirements by orders of magnitude for downstream tasks
+- Enables seamless integration of heterogeneous observations
+- Provides robust uncertainty quantification through multiple sampling
+
+## 📚 Documentation Overview
+
+```{eval-rst}
+.. grid:: 2 2 2 4
+   :gutter: 3
+
+   .. grid-item-card:: Installation & Setup
+      :link: installation.html
+      :class-card: doc-card
+
+      Environment setup, dependencies, and configuration
+
+   .. grid-item-card:: Model Architecture
+      :link: architecture.html
+      :class-card: doc-card
+
+      Deep dive into tokenization, transformers, and design
+
+   .. grid-item-card:: Usage Guide
+      :link: usage.html
+      :class-card: doc-card
+
+      Tutorials, examples, and best practices
+
+   .. grid-item-card:: API Reference
+      :link: api.html
+      :class-card: doc-card
+
+      Complete API documentation and method signatures
+```
+
+```{toctree}
+:hidden:
+:maxdepth: 2
+
+installation
+architecture
+usage
+api
+```
+
+## 🤝 Join the Community
+
+```{raw} html
+<div class="community-section">
+  <h3>Advancing astronomical AI together</h3>
+  <p>AION-1 is developed by Polymathic AI in collaboration with the Flatiron Institute and leading astronomical institutions worldwide. We welcome contributions from astronomers, ML researchers, and data scientists interested in pushing the boundaries of multimodal scientific machine learning.</p>
+  <a href="contributing.html" class="btn-primary">Start Contributing →</a>
+</div>
+```
diff --git a/docs/installation.md b/docs/installation.md
new file mode 100644
index 0000000..f28e27c
--- /dev/null
+++ b/docs/installation.md
@@ -0,0 +1,89 @@
+# Installation Guide
+
+This comprehensive guide will walk you through installing AION-1 and setting up your environment for astronomical multimodal analysis.
+
+## System Requirements
+
+### Hardware Requirements
+
+AION-1 is designed to run efficiently on various hardware configurations:
+
+- **Minimum Requirements**:
+  - CPU: 4+ cores (Intel/AMD x86_64 or Apple Silicon)
+  - RAM: 16 GB
+  - GPU: NVIDIA GPU with 8GB+ VRAM (optional but recommended)
+  - Storage: 50 GB free space for models and data
+
+- **Recommended Requirements**:
+  - CPU: 8+ cores
+  - RAM: 32 GB or more
+  - GPU: NVIDIA GPU with 24GB+ VRAM (e.g., RTX 3090, A5000, or better)
+  - Storage: 100 GB+ free space
+
+- **For Large-Scale Processing**:
+  - Multiple GPUs with NVLink
+  - 64GB+ RAM
+  - Fast SSD storage for data loading
+
+### Software Requirements
+
+- Python 3.10 or later
+- CUDA 11.8+ (for GPU support)
+- Operating System: Linux, macOS, or Windows
+
+## Installation Methods
+
+### 1. Quick Install via PyPI
+
+The simplest way to install AION-1 is through PyPI:
+
+```bash
+pip install aion
+```
+
+This installs the core AION package with minimal dependencies.
+
+### 2. Full Installation with PyTorch
+
+For GPU support and optimal performance:
+
+```bash
+# Install PyTorch first (adjust for your CUDA version)
+pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118
+
+# Then install AION
+pip install aion
+```
+
+### 3. Development Installation
+
+For contributors or those who want the latest features:
+
+```bash
+# Clone the repository
+git clone https://github.com/polymathic-ai/aion.git
+cd aion
+
+# Create a virtual environment
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+
+# Install in development mode
+pip install -e ".[dev]"
+```
+
+## Setting Up Your Environment
+
+### 1. Virtual Environment Setup
+
+We strongly recommend using a virtual environment:
+
+```bash
+# Using venv
+python -m venv aion-env
+source aion-env/bin/activate  # On Windows: aion-env\Scripts\activate
+
+# Using conda
+conda create -n aion python=3.10
+conda activate aion
+```
diff --git a/docs/usage.md b/docs/usage.md
new file mode 100644
index 0000000..5d03545
--- /dev/null
+++ b/docs/usage.md
@@ -0,0 +1,723 @@
+# AION-1 Usage Guide
+
+This comprehensive guide demonstrates how to use AION-1 for various astronomical analysis tasks. From basic inference to advanced multimodal generation, you'll learn to leverage AION-1's capabilities for your research.
+
+## Table of Contents
+
+1. [Quick Start](#quick-start)
+2. [Loading and Preprocessing Data](#loading-and-preprocessing-data)
+3. [Basic Inference](#basic-inference)
+4. [Multimodal Generation](#multimodal-generation)
+5. [Cross-Modal Translation](#cross-modal-translation)
+6. [Representation Learning](#representation-learning)
+7. [Advanced Applications](#advanced-applications)
+8. [Performance Optimization](#performance-optimization)
+
+## Quick Start
+
+Let's begin with a simple example that showcases AION-1's core capabilities:
+
+```python
+import torch, numpy as np
+from aion import AION
+from aion.modalities import Image
+
+# 1) Load a checkpoint (300 M parameters)
+model = AION.from_pretrained('polymathic-ai/aion-tiny').eval()
+
+# 2) Read an example 5-band HSC cut-out (units: nanomaggies)
+flux_cube = np.load('hsc_cutout_5band.npy')  # shape (5,96,96)
+img = Image(flux=flux_cube, bands=['HSC-G','HSC-R','HSC-I','HSC-Z','HSC-Y'])
+
+# 3) Predict an SDSS-like spectrum (observer-frame, erg s⁻¹ cm⁻² Å⁻¹)
+with torch.inference_mode():
+    result = model.generate(inputs={'image': img}, targets=['spectrum'])
+
+spec = result['spectrum']
+print(f"Generated spectrum: λ range {spec.wavelength[0]:.0f}-{spec.wavelength[-1]:.0f} Å, shape={spec.flux.shape}")
+```
+
+## Loading and Preprocessing Data
+
+### Working with Images
+
+AION-1 expects images in a specific format. Here's how to prepare astronomical images:
+
+```python
+import numpy as np
+from astropy.io import fits
+from aion.modalities import Image
+from aion.codecs.preprocessing import ImagePreprocessor
+
+# Load FITS data
+with fits.open('galaxy.fits') as hdul:
+    # Assuming multi-band data in extensions
+    flux_data = np.array([hdul[i].data for i in range(1, 6)])  # 5 bands
+
+# Create Image modality
+image = Image(
+    flux=flux_data,
+    bands=['HSC-G', 'HSC-R', 'HSC-I', 'HSC-Z', 'HSC-Y'],
+    # Optional: provide inverse variance for optimal processing
+    ivar=inverse_variance_data
+)
+
+# Apply survey-specific preprocessing
+preprocessor = ImagePreprocessor(survey='HSC')
+processed_image = preprocessor(image)
+```
+
+### Working with Spectra
+
+Load and prepare spectroscopic data:
+
+```python
+from aion.modalities import Spectrum
+from astropy.io import fits
+
+# Load SDSS spectrum
+hdul = fits.open('spec-plate-mjd-fiber.fits')
+wavelength = 10**hdul[1].data['loglam']  # Convert log wavelength
+flux = hdul[1].data['flux']
+ivar = hdul[1].data['ivar']
+
+# Create Spectrum modality
+spectrum = Spectrum(
+    wavelength=wavelength,
+    flux=flux,
+    ivar=ivar,
+    survey='SDSS'
+)
+
+# The model handles resampling to internal wavelength grid automatically
+```
+
+### Working with Catalog Data
+
+Process tabular astronomical measurements:
+
+```python
+from aion.modalities import (
+    FluxG, FluxR, FluxI, FluxZ,
+    E1, E2, RadiusCARP, Redshift
+)
+
+# Load catalog data (e.g., from pandas DataFrame)
+catalog_entry = {
+    'flux_g': FluxG(value=catalog_df['flux_g'].values),
+    'flux_r': FluxR(value=catalog_df['flux_r'].values),
+    'e1': E1(value=catalog_df['e1'].values),
+    'e2': E2(value=catalog_df['e2'].values),
+    'radius': RadiusCARP(value=catalog_df['radius'].values)
+}
+```
+
+## Basic Inference
+
+### Single Modality Prediction
+
+Predict missing photometric measurements from available data:
+
+```python
+# Given g,r,i bands, predict z band
+inputs = {
+    'flux_g': FluxG(value=[19.5]),
+    'flux_r': FluxR(value=[18.2]),
+    'flux_i': FluxI(value=[17.8])
+}
+
+# Predict z-band flux
+with torch.no_grad():
+    predictions = model.generate(
+        inputs=inputs,
+        targets=['flux_z']
+    )
+
+z_flux = predictions['flux_z'].value[0]
+print(f"Predicted z-band flux: {z_flux:.2f}")
+```
+
+### Batch Processing
+
+Process multiple objects efficiently:
+
+```python
+# Prepare batch of galaxies
+batch_images = [load_galaxy(i) for i in range(32)]
+batch = {
+    'image': Image.batch(batch_images)
+}
+
+# Generate properties for all galaxies
+with torch.no_grad():
+    results = model.generate(
+        inputs=batch,
+        targets=['redshift', 'e1', 'e2', 'radius']
+    )
+
+# Extract results
+redshifts = results['redshift'].value
+ellipticities = np.sqrt(results['e1'].value**2 + results['e2'].value**2)
+```
+
+## Multimodal Generation
+
+### Conditional Generation
+
+Generate multiple modalities conditioned on partial observations:
+
+```python
+# Complex multimodal generation example
+def analyze_galaxy(image_path, known_redshift=None):
+    # Load image
+    image = load_and_preprocess_image(image_path)
+
+    inputs = {'image': image}
+    if known_redshift:
+        inputs['redshift'] = Redshift(value=[known_redshift])
+
+    # Generate comprehensive analysis
+    targets = [
+        'spectrum',           # Full spectrum
+        'flux_g', 'flux_r', 'flux_i', 'flux_z',  # Photometry
+        'e1', 'e2',          # Shape parameters
+        'radius',            # Size
+        'parallax',          # Distance indicator
+        'extinction_v'       # Dust extinction
+    ]
+
+    with torch.no_grad():
+        results = model.generate(
+            inputs=inputs,
+            targets=targets,
+            num_generations=1,
+            temperature=1.0
+        )
+
+    return results
+
+# Analyze a galaxy
+galaxy_properties = analyze_galaxy('ngc1234.fits', known_redshift=0.05)
+```
+
+### Uncertainty Quantification
+
+Generate multiple samples to estimate uncertainties:
+
+```python
+def estimate_uncertainty(inputs, target, num_samples=100):
+    samples = []
+
+    with torch.no_grad():
+        for _ in range(num_samples):
+            result = model.generate(
+                inputs=inputs,
+                targets=[target],
+                temperature=1.2  # Higher temperature for more diversity
+            )
+            samples.append(result[target].value[0])
+
+    samples = np.array(samples)
+    return {
+        'mean': np.mean(samples),
+        'std': np.std(samples),
+        'percentiles': np.percentile(samples, [16, 50, 84])
+    }
+
+# Estimate redshift uncertainty
+z_stats = estimate_uncertainty(
+    inputs={'image': galaxy_image},
+    target='redshift'
+)
+print(f"Redshift: {z_stats['mean']:.3f} ± {z_stats['std']:.3f}")
+```
+
+## Cross-Modal Translation
+
+### Image to Spectrum
+
+Convert imaging observations to spectroscopic predictions:
+
+```python
+def image_to_spectrum(image, wavelength_range=(3800, 9200)):
+    """Generate spectrum from multi-band image."""
+
+    # Generate spectrum tokens
+    with torch.no_grad():
+        result = model.generate(
+            inputs={'image': image},
+            targets=['spectrum']
+        )
+
+    spectrum = result['spectrum']
+
+    # Filter to desired wavelength range
+    mask = (spectrum.wavelength >= wavelength_range[0]) & \
+           (spectrum.wavelength <= wavelength_range[1])
+
+    return {
+        'wavelength': spectrum.wavelength[mask],
+        'flux': spectrum.flux[mask]
+    }
+
+# Generate and plot spectrum
+synthetic_spec = image_to_spectrum(galaxy_image)
+plt.plot(synthetic_spec['wavelength'], synthetic_spec['flux'])
+plt.xlabel('Wavelength (Å)')
+plt.ylabel('Flux')
+plt.title('AION-1 Generated Spectrum from Image')
+```
+
+### Spectrum to Image
+
+Inverse translation - generate images from spectra:
+
+```python
+def spectrum_to_image(spectrum, bands=['DES-G', 'DES-R', 'DES-I', 'DES-Z']):
+    """Generate multi-band image from spectrum."""
+
+    with torch.no_grad():
+        result = model.generate(
+            inputs={'spectrum': spectrum},
+            targets=['image'],
+            target_bands=bands
+        )
+
+    return result['image']
+
+# Reconstruct galaxy appearance
+reconstructed_image = spectrum_to_image(observed_spectrum)
+```
+
+### Super-Resolution
+
+Enhance low-resolution spectra using multimodal context:
+
+```python
+def enhance_spectrum(low_res_spectrum, supporting_data=None):
+    """Enhance spectrum resolution using additional modalities."""
+
+    inputs = {'spectrum': low_res_spectrum}
+
+    # Add supporting data if available
+    if supporting_data:
+        inputs.update(supporting_data)
+
+    # Generate high-resolution version
+    with torch.no_grad():
+        result = model.generate(
+            inputs=inputs,
+            targets=['spectrum_highres'],
+            num_generations=1
+        )
+
+    return result['spectrum_highres']
+
+# Example with photometric support
+enhanced = enhance_spectrum(
+    sdss_spectrum,
+    supporting_data={
+        'flux_g': FluxG(value=[18.5]),
+        'flux_r': FluxR(value=[17.2])
+    }
+)
+```
+
+## Representation Learning
+
+### Extracting Embeddings
+
+Use AION-1's learned representations for downstream tasks:
+
+```python
+def extract_embeddings(data_dict, pool='mean'):
+    """Extract feature embeddings from AION-1 encoder."""
+
+    # Tokenize inputs
+    tokens = model.tokenize(data_dict)
+
+    # Get encoder representations
+    with torch.no_grad():
+        embeddings = model.encode(tokens)
+
+    # Pool over sequence dimension
+    if pool == 'mean':
+        features = embeddings.mean(dim=1)
+    elif pool == 'cls':
+        features = embeddings[:, 0]  # First token
+    elif pool == 'max':
+        features = embeddings.max(dim=1)[0]
+
+    return features.cpu().numpy()
+
+# Extract features for clustering
+galaxy_features = extract_embeddings({
+    'image': galaxy_image,
+    'spectrum': galaxy_spectrum
+})
+```
+
+### Similarity Search
+
+Find similar objects using learned representations:
+
+```python
+from sklearn.metrics.pairwise import cosine_similarity
+
+class GalaxySimilaritySearch:
+    def __init__(self, model):
+        self.model = model
+        self.database = []
+        self.embeddings = []
+
+    def add_galaxy(self, galaxy_data, metadata=None):
+        """Add galaxy to search database."""
+        embedding = extract_embeddings(galaxy_data)
+        self.embeddings.append(embedding)
+        self.database.append({
+            'data': galaxy_data,
+            'metadata': metadata,
+            'embedding': embedding
+        })
+
+    def find_similar(self, query_data, k=10):
+        """Find k most similar galaxies."""
+        query_embedding = extract_embeddings(query_data)
+
+        # Compute similarities
+        similarities = cosine_similarity(
+            query_embedding.reshape(1, -1),
+            np.vstack(self.embeddings)
+        )[0]
+
+        # Get top k
+        indices = np.argsort(similarities)[::-1][:k]
+
+        return [(self.database[i], similarities[i]) for i in indices]
+
+# Usage
+searcher = GalaxySimilaritySearch(model)
+# ... add galaxies to database ...
+similar_galaxies = searcher.find_similar(query_galaxy, k=5)
+```
+
+### Anomaly Detection
+
+Identify unusual objects using reconstruction error:
+
+```python
+def detect_anomalies(galaxies, threshold_percentile=95):
+    """Detect anomalous galaxies using reconstruction error."""
+
+    reconstruction_errors = []
+
+    for galaxy in galaxies:
+        # Encode and decode
+        with torch.no_grad():
+            reconstructed = model.generate(
+                inputs=galaxy,
+                targets=list(galaxy.keys())
+            )
+
+        # Compute reconstruction error
+        error = 0
+        for key in galaxy:
+            if key == 'image':
+                error += np.mean((galaxy[key].flux -
+                                reconstructed[key].flux)**2)
+            elif hasattr(galaxy[key], 'value'):
+                error += np.mean((galaxy[key].value -
+                                reconstructed[key].value)**2)
+
+        reconstruction_errors.append(error)
+
+    # Set threshold
+    threshold = np.percentile(reconstruction_errors, threshold_percentile)
+
+    # Identify anomalies
+    anomalies = [g for g, e in zip(galaxies, reconstruction_errors)
+                 if e > threshold]
+
+    return anomalies, reconstruction_errors
+```
+
+## Advanced Applications
+
+### Multi-Survey Integration
+
+Combine observations from different surveys:
+
+```python
+def integrate_multi_survey(hsc_image, sdss_spectrum, desi_spectrum=None):
+    """Integrate observations from multiple surveys."""
+
+    inputs = {
+        'image': hsc_image,
+        'spectrum_sdss': sdss_spectrum
+    }
+
+    if desi_spectrum:
+        inputs['spectrum_desi'] = desi_spectrum
+
+    # Generate unified representation
+    with torch.no_grad():
+        # Extract all available properties
+        results = model.generate(
+            inputs=inputs,
+            targets=['redshift', 'stellar_mass', 'sfr', 'metallicity']
+        )
+
+        # Generate missing modalities
+        if not desi_spectrum:
+            results['spectrum_desi'] = model.generate(
+                inputs=inputs,
+                targets=['spectrum_desi']
+            )['spectrum_desi']
+
+    return results
+```
+
+### Time Series Analysis
+
+Analyze variable objects across epochs:
+
+```python
+def analyze_variable_object(observations):
+    """
+    Analyze time-variable astronomical object.
+
+    observations: list of (time, data_dict) tuples
+    """
+
+    embeddings_over_time = []
+    properties_over_time = []
+
+    for time, data in observations:
+        # Extract embeddings
+        embedding = extract_embeddings(data)
+        embeddings_over_time.append(embedding)
+
+        # Predict properties
+        with torch.no_grad():
+            props = model.generate(
+                inputs=data,
+                targets=['flux_g', 'flux_r', 'temperature']
+            )
+
+        properties_over_time.append({
+            'time': time,
+            'properties': props,
+            'embedding': embedding
+        })
+
+    # Analyze evolution
+    embeddings = np.vstack(embeddings_over_time)
+
+    # Detect significant changes
+    embedding_distances = np.sqrt(np.sum(np.diff(embeddings, axis=0)**2, axis=1))
+    change_points = np.where(embedding_distances > np.std(embedding_distances) * 2)[0]
+
+    return {
+        'properties': properties_over_time,
+        'change_points': change_points,
+        'embedding_evolution': embeddings
+    }
+```
+
+### Physical Parameter Estimation
+
+Estimate astrophysical parameters with uncertainty:
+
+```python
+class PhysicalParameterEstimator:
+    def __init__(self, model, num_samples=100):
+        self.model = model
+        self.num_samples = num_samples
+
+    def estimate_parameters(self, observations):
+        """Estimate physical parameters with uncertainties."""
+
+        # Parameters to estimate
+        parameters = [
+            'redshift', 'stellar_mass', 'sfr',
+            'metallicity', 'age', 'extinction_v'
+        ]
+
+        # Generate multiple samples
+        samples = {param: [] for param in parameters}
+
+        with torch.no_grad():
+            for _ in range(self.num_samples):
+                results = self.model.generate(
+                    inputs=observations,
+                    targets=parameters,
+                    temperature=1.1
+                )
+
+                for param in parameters:
+                    if param in results:
+                        samples[param].append(results[param].value[0])
+
+        # Compute statistics
+        estimates = {}
+        for param, values in samples.items():
+            if values:
+                values = np.array(values)
+                estimates[param] = {
+                    'median': np.median(values),
+                    'mean': np.mean(values),
+                    'std': np.std(values),
+                    'ci_68': np.percentile(values, [16, 84]),
+                    'ci_95': np.percentile(values, [2.5, 97.5])
+                }
+
+        return estimates
+
+# Usage
+estimator = PhysicalParameterEstimator(model)
+parameters = estimator.estimate_parameters({
+    'image': galaxy_image,
+    'spectrum': galaxy_spectrum
+})
+
+print(f"Stellar Mass: {parameters['stellar_mass']['median']:.2e} "
+      f"+/- {parameters['stellar_mass']['std']:.2e} M_sun")
+```
+
+## Performance Optimization
+
+### Efficient Batch Processing
+
+```python
+from torch.utils.data import DataLoader, Dataset
+
+class AIONDataset(Dataset):
+    def __init__(self, data_list):
+        self.data = data_list
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, idx):
+        return self.data[idx]
+
+def process_large_dataset(data_list, batch_size=32):
+    """Efficiently process large datasets."""
+
+    dataset = AIONDataset(data_list)
+    dataloader = DataLoader(dataset, batch_size=batch_size,
+                          num_workers=4, pin_memory=True)
+
+    all_results = []
+
+    with torch.no_grad():
+        for batch in dataloader:
+            # Process batch
+            results = model.generate(
+                inputs=batch,
+                targets=['redshift', 'stellar_mass']
+            )
+            all_results.append(results)
+
+    # Concatenate results
+    return {k: np.concatenate([r[k].value for r in all_results])
+            for k in all_results[0]}
+```
+
+### Memory-Efficient Processing
+
+```python
+def process_with_chunking(large_spectrum, chunk_size=1000):
+    """Process very long spectra in chunks."""
+
+    n_chunks = len(large_spectrum.wavelength) // chunk_size + 1
+    chunk_results = []
+
+    for i in range(n_chunks):
+        start = i * chunk_size
+        end = min((i + 1) * chunk_size, len(large_spectrum.wavelength))
+
+        chunk = Spectrum(
+            wavelength=large_spectrum.wavelength[start:end],
+            flux=large_spectrum.flux[start:end]
+        )
+
+        with torch.no_grad():
+            result = model.process_spectrum_chunk(chunk)
+            chunk_results.append(result)
+
+    # Combine chunks
+    return combine_spectrum_chunks(chunk_results)
+```
+
+### GPU Memory Management
+
+```python
+import gc
+
+def memory_efficient_generation(inputs, targets, max_batch=16):
+    """Generate with automatic batch size adjustment."""
+
+    batch_size = max_batch
+
+    while batch_size > 0:
+        try:
+            with torch.no_grad():
+                results = model.generate(
+                    inputs=inputs,
+                    targets=targets,
+                    batch_size=batch_size
+                )
+            return results
+
+        except torch.cuda.OutOfMemoryError:
+            # Clear cache and try smaller batch
+            torch.cuda.empty_cache()
+            gc.collect()
+            batch_size //= 2
+
+            if batch_size == 0:
+                raise RuntimeError("Cannot fit even batch size 1")
+
+    raise RuntimeError("Failed to process")
+```
+
+## Best Practices
+
+### 1. Data Preparation
+- Always normalize and preprocess data according to survey specifications
+- Provide inverse variance when available for optimal results
+- Use appropriate data types for each modality
+
+### 2. Model Selection
+- Use `aion-tiny` for quick experiments and limited GPU memory
+- Use `aion-base` for most research applications
+- Use `aion-large` for highest accuracy when computational resources permit
+
+### 3. Generation Settings
+- Lower temperature (0.8-1.0) for more deterministic outputs
+- Higher temperature (1.1-1.5) for diversity and uncertainty estimation
+- Multiple generations for robust uncertainty quantification
+
+### 4. Error Handling
+```python
+def safe_generate(model, inputs, targets, fallback=None):
+    """Safely generate with error handling."""
+    try:
+        return model.generate(inputs=inputs, targets=targets)
+    except Exception as e:
+        print(f"Generation failed: {e}")
+        return fallback or {t: None for t in targets}
+```
+
+## Conclusion
+
+AION-1 provides a powerful and flexible framework for multimodal astronomical analysis. Its ability to seamlessly integrate diverse observations enables new research possibilities:
+
+- Cross-modal prediction and generation
+- Unified analysis across multiple surveys
+- Robust uncertainty quantification
+- Discovery of unusual objects
+- Efficient processing of large datasets
+
+For more examples and the latest updates, visit the [AION GitHub repository](https://github.com/polymathic-ai/aion) and join our community discussions.
diff --git a/pyproject.toml b/pyproject.toml
index 2872b07..d06608f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,14 @@ dev = [
     "pytest",
     "ruff",
 ]
+docs = [
+    "furo",
+    "myst-parser>=1.0",
+    "sphinx-copybutton",
+    "sphinx-design",
+    "sphinxcontrib-mermaid",
+    "sphinx>=7.2",
+]
 
 [tool.ruff.lint]
 # Ignore space in shape notation for jaxtyping
diff --git a/tests/codecs/test_codec_manager.py b/tests/codecs/test_codec_manager.py
index 9364cd0..27991cd 100644
--- a/tests/codecs/test_codec_manager.py
+++ b/tests/codecs/test_codec_manager.py
@@ -1,11 +1,15 @@
 """Test the CodecManager class."""
 
+from pathlib import Path
+
 import pytest
 import torch
 
-from aion.codecs.manager import CodecManager
+from aion.codecs.manager import CodecManager, ModalityTypeError
 from aion.modalities import (
+    DESISpectrum,
     LegacySurveyFluxG,
+    LegacySurveyImage,
     LegacySurveyShapeE1,
 )
 
@@ -18,7 +22,58 @@ def manager(self):
         """Create a CodecManager instance."""
         return CodecManager(device="cpu")
 
-    def test_codec_caching(self, manager):
+    def test_encode_decode_image(self, manager: CodecManager, data_dir: Path):
+        """Test encoding and decoding Image modality."""
+        # Load test data
+        input_batch_dict = torch.load(
+            data_dir / "image_codec_input_batch.pt", weights_only=False
+        )
+
+        # Create Image modality
+        image = LegacySurveyImage(
+            flux=input_batch_dict["image"]["array"][:, 5:],
+            bands=["DES-G", "DES-R", "DES-I", "DES-Z"],
+        )
+
+        # Encode
+        tokens = manager.encode(image)
+        assert "tok_image" in tokens
+        assert tokens["tok_image"].shape[0] == image.flux.shape[0]
+
+        # Decode using modality type
+        decoded_image = manager.decode(
+            tokens, LegacySurveyImage, bands=["DES-G", "DES-R", "DES-I", "DES-Z"]
+        )
+        assert isinstance(decoded_image, LegacySurveyImage)
+        assert decoded_image.flux.shape == image.flux.shape
+
+    def test_encode_decode_spectrum(self, manager: CodecManager, data_dir: Path):
+        """Test encoding and decoding Spectrum modality."""
+        # Load test data
+        input_batch = torch.load(
+            data_dir / "SPECTRUM_input_batch.pt", weights_only=False
+        )["spectrum"]
+
+        # Create Spectrum modality
+        spectrum = DESISpectrum(
+            flux=input_batch["flux"],
+            ivar=input_batch["ivar"],
+            mask=input_batch["mask"],
+            wavelength=input_batch["lambda"],
+        )
+
+        # Encode
+        tokens = manager.encode(spectrum)
+        assert "tok_spectrum_desi" in tokens
+
+        # Decode
+        decoded_spectrum = manager.decode(tokens, DESISpectrum)
+        assert isinstance(decoded_spectrum, DESISpectrum)
+        assert decoded_spectrum.flux.shape[0] == spectrum.flux.shape[0]
+        # Spectrum are returned with a fixed length
+        assert decoded_spectrum.flux.shape[1] >= spectrum.flux.shape[1]
+
+    def test_codec_caching(self, manager: CodecManager):
         """Test that codecs are properly cached and reused."""
         # Create two modalities that use the same codec type
         flux_g1 = LegacySurveyFluxG(value=torch.randn(4, 1))
@@ -37,8 +92,18 @@ def test_codec_caching(self, manager):
         codec2 = manager._get_codec_for_modality(LegacySurveyFluxG)
         assert codec1 is codec2
 
+    def test_error_handling(self, manager: CodecManager):
+        """Test error handling in CodecManager."""
+
+        # Test with invalid modality type
+        class InvalidModality:
+            pass
+
+        with pytest.raises(ModalityTypeError):
+            manager._load_codec(InvalidModality)
+
     @pytest.mark.parametrize("batch_size", [1, 4, 16])
-    def test_different_batch_sizes(self, manager, batch_size):
+    def test_different_batch_sizes(self, manager: CodecManager, batch_size: int):
         """Test that CodecManager handles different batch sizes correctly."""
         # Create modalities with different batch sizes
         flux_g = LegacySurveyFluxG(value=torch.randn(batch_size, 1))