From ab2bfb7ac62136cb25e6804b6b3f362144cf1e1f Mon Sep 17 00:00:00 2001 From: Francois Lanusse Date: Sun, 25 May 2025 22:28:51 +0200 Subject: [PATCH 01/21] Add architecture overview docs --- .readthedocs.yaml | 8 ++++++++ README.md | 2 ++ docs/Makefile | 7 +++++++ docs/_static/style.css | 5 +++++ docs/api.md | 8 ++++++++ docs/architecture.md | 45 ++++++++++++++++++++++++++++++++++++++++++ docs/conf.py | 31 +++++++++++++++++++++++++++++ docs/contributing.md | 8 ++++++++ docs/index.md | 13 ++++++++++++ docs/installation.md | 9 +++++++++ docs/requirements.txt | 3 +++ docs/usage.md | 11 +++++++++++ pyproject.toml | 5 +++++ 13 files changed, 155 insertions(+) create mode 100644 .readthedocs.yaml create mode 100644 docs/Makefile create mode 100644 docs/_static/style.css create mode 100644 docs/api.md create mode 100644 docs/architecture.md create mode 100644 docs/conf.py create mode 100644 docs/contributing.md create mode 100644 docs/index.md create mode 100644 docs/installation.md create mode 100644 docs/requirements.txt create mode 100644 docs/usage.md diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..0c43015 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,8 @@ +version: 2 +sphinx: + configuration: docs/conf.py + fail_on_warning: false +python: + version: 3.10 + install: + - requirements: docs/requirements.txt diff --git a/README.md b/README.md index e1689db..2e8b54a 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,8 @@ AION-1’s tokenizers cover **39 distinct data types**, grouped by survey and da Start with our interactive tutorial: - **[Open in Google Colab](https://colab.research.google.com/github/PolymathicAI/AION/blob/main/notebooks/Tutorial.ipynb)** - Learn AION basics interactively, no local setup required! +For detailed guides, see the [online documentation](https://polymathic-ai.github.io/AION/). + ## 📦 Advanced Installation AION offers flexible installation options to suit your environment and requirements. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..5fd6dbd --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,7 @@ +SPHINXBUILD := sphinx-build +SOURCEDIR := . +BUILDDIR := _build + +.PHONY: html +html: +$(SPHINXBUILD) -M html $(SOURCEDIR) $(BUILDDIR) diff --git a/docs/_static/style.css b/docs/_static/style.css new file mode 100644 index 0000000..23724f8 --- /dev/null +++ b/docs/_static/style.css @@ -0,0 +1,5 @@ +/* Custom colors inspired by Polymathic AI */ +:root { + --color-brand-primary: #6f42c1; + --color-brand-content: #6f42c1; +} diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..d7fc5e2 --- /dev/null +++ b/docs/api.md @@ -0,0 +1,8 @@ +# API Reference + +```{eval-rst} +.. automodule:: aion + :members: + :undoc-members: + :show-inheritance: +``` diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..abf8c13 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,45 @@ +# Code Architecture + +This page explains the major components of the AION codebase and how they interact. + +## Modality Data Classes + +`aion/modalities.py` defines Pydantic models describing each input modality. Examples include `Image` for imaging data, `Spectrum` for spectroscopic data, and scalar modalities such as `FluxG` or `Parallax`. These classes provide type checked containers for the raw astronomy data. + +## Codecs (Tokenizers) + +Under `aion/codecs/` reside modality specific **Codecs**. A codec encodes a `Modality` instance into a sequence of discrete tokens and can decode tokens back to the original data. The base interface is defined in `codecs/base.py` and concrete implementations exist for images, spectra and catalog entries. + +```python +from aion.codecs import ImageCodec +from aion.modalities import Image + +image = Image(flux=my_flux, bands=["DES-G", "DES-R", "DES-I", "DES-Z"]) +codec = ImageCodec.from_pretrained("polymathic-ai/aion-image-codec") +tokens = codec.encode(image) +``` + +## FourM Architecture + +The core transformer architecture lives in the `aion/fourm/` package. The `FourM` class combines encoder and decoder blocks along with modality embeddings. It provides utilities to concatenate tokens from different modalities and to apply modality-aware attention masks. + +## AION Wrapper + +`aion/model.py` defines the `AION` class which inherits from `FourM`. It adds high level helpers for: + +- **`embed_inputs`** – convert a dictionary of modality tensors into encoder tokens. +- **`embed_targets`** – build decoder inputs and target masks for selected modalities. +- **`forward`** – run the full model returning logits for the requested targets. + +Typical usage during inference is: + +```python +from aion import AION + +model = AION.from_pretrained("aion-base") +logits = model(input_dict, target_mask) +``` + +Here `input_dict` maps modality names to token tensors (obtained via the codecs) and `target_mask` selects which tokens to predict. + +For additional details see the docstrings in `model.py` and the modules within `aion/fourm`. diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..7496f00 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,31 @@ +import os +import sys +sys.path.insert(0, os.path.abspath('..')) + +project = 'AION' +author = 'Polymathic AI' + +extensions = [ + 'myst_parser', + 'sphinx.ext.autodoc', + 'sphinx.ext.napoleon', + 'sphinx.ext.autosummary', +] + +autosummary_generate = True + +html_theme = 'furo' +html_static_path = ['_static'] +html_css_files = ['style.css'] + +# Theme customizations to approximate Polymathic AI colors +html_theme_options = { + 'light_css_variables': { + 'color-brand-primary': '#6f42c1', + 'color-brand-content': '#6f42c1', + }, + 'dark_css_variables': { + 'color-brand-primary': '#a78bfa', + 'color-brand-content': '#a78bfa', + }, +} diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 0000000..6609c63 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,8 @@ +# Contributing + +We welcome contributions from the astronomical and machine learning communities. + +1. Fork the repository on GitHub. +2. Create a feature branch and commit your changes. +3. Ensure tests and linting pass before opening a pull request. +4. Describe your changes clearly in the PR description. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..d18ae4a --- /dev/null +++ b/docs/index.md @@ -0,0 +1,13 @@ +# Welcome to AION Documentation + +AION is Polymathic AI's Omnimodal Network for Astronomy. This documentation provides a detailed guide to installation, usage and development. + +```{toctree} +:maxdepth: 2 + +installation +architecture +usage +api +contributing +``` diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 0000000..1fa0842 --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,9 @@ +# Installation + +AION requires Python 3.10 or later. For most users, installation from PyPI is recommended: + +```bash +pip install aion +``` + +For additional options such as installing with PyTorch bundled or setting up a development environment, see the project README. diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..423db0a --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,3 @@ +sphinx>=7.2 +myst-parser +furo diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 0000000..b30d6c5 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,11 @@ +# Usage Guide + +Load the pretrained model and start experimenting with astronomical data: + +```python +from aion import AION + +model = AION.from_pretrained('aion-base') +``` + +The model accepts modality-specific tokenized inputs. Refer to the API documentation for details on available modalities and helper functions. diff --git a/pyproject.toml b/pyproject.toml index 2872b07..22c450c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,11 @@ dev = [ "pytest", "ruff", ] +docs = [ + "sphinx>=7.2", + "myst-parser", + "furo", +] [tool.ruff.lint] # Ignore space in shape notation for jaxtyping From aa268a3d7da7ae1e3a6e553519a2bbd250adae0f Mon Sep 17 00:00:00 2001 From: Francois Lanusse Date: Sun, 25 May 2025 23:27:43 +0200 Subject: [PATCH 02/21] fixing format --- docs/conf.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 7496f00..2304875 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,31 +1,32 @@ import os import sys -sys.path.insert(0, os.path.abspath('..')) -project = 'AION' -author = 'Polymathic AI' +sys.path.insert(0, os.path.abspath("..")) + +project = "AION" +author = "Polymathic AI" extensions = [ - 'myst_parser', - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx.ext.autosummary', + "myst_parser", + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.autosummary", ] autosummary_generate = True -html_theme = 'furo' -html_static_path = ['_static'] -html_css_files = ['style.css'] +html_theme = "furo" +html_static_path = ["_static"] +html_css_files = ["style.css"] # Theme customizations to approximate Polymathic AI colors html_theme_options = { - 'light_css_variables': { - 'color-brand-primary': '#6f42c1', - 'color-brand-content': '#6f42c1', + "light_css_variables": { + "color-brand-primary": "#6f42c1", + "color-brand-content": "#6f42c1", }, - 'dark_css_variables': { - 'color-brand-primary': '#a78bfa', - 'color-brand-content': '#a78bfa', + "dark_css_variables": { + "color-brand-primary": "#a78bfa", + "color-brand-content": "#a78bfa", }, } From b34fa26df5a22fdb3ade1d261d133c03a87250ee Mon Sep 17 00:00:00 2001 From: Francois Lanusse Date: Mon, 26 May 2025 00:41:53 +0200 Subject: [PATCH 03/21] adding content for doc --- docs/_static/polymathic_logo.png | Bin 0 -> 23002 bytes docs/_static/style.css | 866 ++++++++++++++++++++++++++++++- docs/conf.py | 85 ++- docs/index.md | 118 ++++- docs/requirements.txt | 1 + 5 files changed, 1059 insertions(+), 11 deletions(-) create mode 100644 docs/_static/polymathic_logo.png diff --git a/docs/_static/polymathic_logo.png b/docs/_static/polymathic_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..445d3398248d79352d6dd929e5a242f6b7d624bd GIT binary patch literal 23002 zcmYJbby!s0_da}P1{k`#I|n4BQv~T$U?>?FN&!g;sX^%)X_OLdK^Q<0q=uHpf<|%k)fV70FZ#c0t9k0@cUiu ztS{gXiU33VU;q$`y7+>8wo-Be|Hu)dj|{o)?->&28sq_lg@wu7y5kd!b`9{5@elGU zTG!wN0Dizo@5+tv;?uQ z7kBGTM{o#>8`9JF6OA7hKgGEdio2p>TW02VheAeoU%z4cyBq9c=c4fStIf-g$Hi)U zT6>8NnVGcl^qmjBV_W!t?Sopt@P!(NrwxcCY zKtM0IigJ~To5WY&qN{yiXy}@7JiTu6lwfw;n`6Pn`E2pvWlZuQg$B#{PD{f-{lJ09$?-E z^QO^*FMjdxAQldw6G+yCf$km@aRRoNs$A7ctg~U1+P6h zD7$6Eunx-Gtf&M3P%YXFo(}{|wU53!iGqhygwx2Y1I?^`G|l3#m&fYL$I>bZ;Q6?ePp4%ah8b{y5` zeu6ru7xb|0j6aLU@W^2&EDZs1t!8M>@-D`_)0?P<;OlOOWg7j~aWpXKdtP`XR<{5% z)o-S3QG*s%_+3Cba}98zzG07l>aTupH|0XB%3c%iUEPzezG~`r7~J zXaLb36)_q!3sdS=Qqq66+xJUH?_YXvI_?3+9{(P{5!bxKohvRq2Qz;WbBF3eIo-pl zcqM$29L!I%>Yjyd={vcI)doSAuDJs^lJ$Tiz!1}>XZ{Yhv7|}fA&Y40bw+yQt-7iY|EYHDcXXIF)U+>lfwAB)t*-7ytR^nJRxxN;%Y1`+k-I4@u5UUlv;k?WY zLxmBO-2lv9T}VJ*p=>*Jc(A6*=W^Lov8waBA+r{*+-Y2ynpt|KCOd)LB)86JVToj$MVA%ug1m&J1#u@OmxT@~Zjunu9NZh& zLX60T+l6pEusN9IZ0_^d4NW}s!8fL7ie%R^ZI0rjwbBQ!8JM_0bu?|^4L#i$%}=5^ z;nH5NTW0q1UuU83LoVq^5R#xUuvjSwHMBcT(veYsm<+=;Getam)WbzF+ z3Z505_*fdBeyn3pt5oG?rzI?E&OFlon=>en-*TlVs9NCj|W0k@yM2FM%qKE^`N78kG z<-%QrU56Haml^w&))e~Bbc?;>ZW9X?#pV^||By^L;W{Yw;@bvS@)c79UtS9DOkzq- zH*$P?Y(^QKXzq|LIsqwySiB5RKSQ3yQAqG!eJ8^P8(q-6cwP6|K*T`;^dc%Oxw`(4 z2|>-4$5JYDXR+O)0>T)y$_YgL>GREU9~m#CEKA`jXC_Iy6$T+F*m<06IR^x1_Pz#CK8zfQ*e=RH4=QCRx&tjYo~ zs$0cGF*X}*Vjs{FO^HHC#+_MYd;(|9)sIT)p9;@7xaS|?PkSE1p75r{J;gl7v|d;1 zWk3?Ha5k~&t!&pBI7*XWKvN}r{Hb5^z!gDtUi?5|4ee^24aR{NiOEZfA#co*O@Zlm ze9{HTdCqwq$#tZu;vUEeMHl2|2Fe6*+3s9VrXL!L1TF+IELqb0VZ4dWDUh4G@{ZoV zd&QJu+NFvYuvvs1L$J@kowE~{AY≈0a&9)XNRb zJc4lpLx!)<=i_Ls|2n?o^=MoKmr4FvUF&&ej&L56?r+0%sLqf8@V*5To3VFo7iMnp z+W+&tgYcP8Fdc9q$uJ5)_Z=Wd3DhP5--0+clX8z{sR+W)+w^^`Uig?_m~GEK{`jLa z6hoi&Faf}c2gG_*DjX>iGnkM3U~M_N?G<~Q6p)8-3^*|ZT`!J58y`ONu0C3w>?L;3akQ$EA^L+m>Q zw}60H%F{b^RmGreik7q&v71(58G?G=IMl+g{a$mifnXrgq%(&Iw< zYOb*VOawk8eoV5Xe4(T~fZr+WKkad3_x^bu#;kJ#TBxzDPjchFC=!Q1}wC(4L^+j!$F4)@Tv0!y)KE40MPT>ocMtG zXht+aOaJgz!vE~G-q{MqJHhf;vOY_4ogh}IGJRXroM_xJ)7!XFMZ|wbsI3Z!2WAjKjCw`Ej~L zaFJOIU=>thGs#BdIT1aL0Uypnz2 zBdUj>7*NIHs`k!R0%3KIA|wDYf%(Wu3lzKQe!~dqfIET*xX6FcqC+0OejIBKC>n2cGqFYum{VACZ z^ugQTOwR0{gDZdY|5SVHK1Z%@S-ti0^<1t6;KEOheW`p2gXbc^uATs(tEZc812`A~ z?4`P8AdHOuCt03K8}^*8yk*7eKN}r+g%s;Uj&DDw$6TxR(gHOvtj~MlFAfO{ai=@H zdYPaCzSVJVN}xngvH>)k6Wo15P|$_OjG2Z$F&{;NTKRuUvpJBI#bGB9qE%r&Spls} zQY25JcHuIl4OJ0$WEg=8F)8#tS9}||B&29SY%oR{wl6V&{hwW+gU~*ut&b169QCjA z0u=*ce{P>#4WH8c4LT!h(H?YvpHcpkUQo!$1++=qXc05rE5e|hJ_$j!>H)?jO9oz= z^bDo!nt+1{9FIgb;iKEw8@2!QN#>NVaT9U32)cU$s68hG;dpV_eB7nWhd<=0fa;$e z0ux7qF5@tnr)|R`bQs1b+(3^9=mk-N7hWLYQ~Wd;EOPztM&4mUac>6w>tfK&jX%Td zUv`HB> zrVxla;MGKzL_Qtj1*)MskMy%_T}6mWrZ@b%tvNFEpBbj}0&f|ixM<=TD`X$Oe*GPp z{4f34s#Fgi-fzTneR|*sU7OW~`AHDGM{j%YE~jkscHyH50Hy#S;Y^jvole*KP|eqb zaHD{)U4X;FmVAEN;w_>Uand&lq38VX`AaV=m)uE09x(Q}igNmXLg#KG35M~)pmw+N z6IIsI2N=b;yf_xQyr9dA*_RoM3iDsAG-FX)9cI6fR1 zaNi9Sqyw_Vy5d%rHN}j_p`i8+P{F9T1L(Vto3)35(r(?l($2-j%_b6#z732X5|++nHbWPdrm6domz8>) z&U5l{RF9@VdK&`Xno9WTGrQZr-d8UfNXb(cr5U&?4WNd4VMW2WCq7-TJv@9cu)&8? zGLdu_0pg396L(LumnX9p=f@r|G~iD?I^^-Jc?NfrZqa<%f<`bM(6NdJ(rwBvgl?JK zPY=&xgT?t^XpY03S<|+LF{y8sVp{yCT>rT z%R_Nh!---icju?GZ`U@dx8@5^WRsvWGM)*Qzl+4Kx8Qoggt?mSq0ArWKrh>Xi86# z^TenyPi1IZGRKmAV^k*4X05u|Q)u$9h=W^Mi3}QvT3X;eh}3Uzr5gW^t8+m?zs=w` zYL8tw!kQ;Km9F)wHC);uyMln?#Z3=>KnSc%o;jJPX2hGO-HkjE{wAzHn^5*Rep%2>HD-untL~0w`?nV2+57gNi=P5t;q_a}u<*+5O%GL~H>cO{ zx6fgbF7Jmr7KiW2RKu2|_kC!czn>-KDLuvM;~fbjKU>Yxr6s&zNlJ%5#-Mn4yqz36 zB%H5?X4n{SA=!5(ue9XepNPlGF(}cI0cB*gKpWjjY7m#>aGhF!py($=(RMAFTj!KH z!TjsKtknxdRZ>D})$(V9+2)yB*6t7#iIOsR3dea{N{1alTcwc?gaPe9^-K09k1uW! zL=96t(}7hyT0*T79XdHnP%X$?o923oTOk6zGhuuI6rL|bTwvoO|I;7bjmg?jhHBb{ zycb#q%k0?@oi3Ry+){)oSiU&~^82FMeCSJNAsdi(k{GjMcJ&-8HUQ))9J?vq;{bt)$k< zIMO*ztqq%9eruJk3JjD+Z>Pl_J3^f}8!h2WJPRqNDKyA5GJ9mSu5K*- z5J$+YcIEYj=9L-Ata`*MQGboIMli{Ir4tjOSunKv8a@z>{;Izw!H6u z(xYpu#tmHM+=o*kCg_c&G=ly=A?u;>M+*AZS{ry{Y>%-5M`&{}W9Zj^GquJ!aOhqz zWx&U)>Sa8S7v!FzVzF9ZJh>3#{&0l-P4V%b&@eHwRRx+})K-%LO3>jEM@Gs}*-Q57 z7w^-1-zY}0Hp(v53cHFgzTbb1i8jYq_aBC=C);*)z7aqREnnWe%EdHlr2*l8hHNEe zrF?IO*K+QY<4}0kW>);uuoAWMeUza$O)b$ZYHI2NDO8nRTZP=N(A>E z`e6xsKf9TaPOK=SMHlE1nO{Zj1k&awZ_puMSBw_x@AP@?OT6317vP-7&;emCgAXP!QgQ|B&z#~#wx)Sh$fr_P-U{EeMRYQbPN#VI z@0V+D2AjSQ1LU@ve=M4R~5N$WAj0Ay)Zy=b)J`j-B8n( z$eQlyUfoSb&sTXFG_uNl536I&1?RZF1tCROVB9~Nz7AU!zlB)k6yRHt&`*kEp1*4UJhmR znuL!+p`m?BY<9F`6ByQDxr_Cx z`#m9FY_xWM@x_mRY!O z-AOW0birO`MOlQ*4l9DL@-V3je9?Rlu*L3C5Mz<5p4xNEacSzP3<**tV>an9J4e@agZhxQbC?yVL=#t&F_h`y5Nt z?%BCBW}z@$hY-}93+Fz!9fasMlqMpvf(qUqr}f(*@~p-jhtI+~Mz;K2n@@|fGYBlr zfGl+9-BGTW_6}}1tx}euVUlF@sT|iI72mVb)IWuqJ8JG?fK&3+GfOOOC*n?8w?O>p zw{tdMugg9^aQ(}0;q%fvU2aq^6dIy@WH2wcg~n{tsx(~qVwq)?@I9I0WDaf_uqgW` z&s*f5`Ap&PCOL0k`gb3R&|iIlh4ewtJFbjWwc(S&hL?7Z9wVz_U#Y&~pu<242agM2 zJPHc%a)N!FZ=YTRd-^5&m8^9Yz} zWeiIUum9G%-y}H4NF;Ef$fgoYPp4BOSbvvDM8k>dNV0uflw?e~VCzpwpcqSGVx$6X zCQEHual!7(UTSGE@x?yruk(kb*aHNB$!TJH8O^#_e&V>QvFYkHWWHIFRI~t<>@={2 z;vFc0xg+^ZLiWOTF1Qu%SR5y^n40D9Ht)Zv{wRUttX*tXGe}%X`^eVr zkos`JgFQvWNK^9V{8A5^uvZ3lN5Cr2rYU3(^`!u?XbC1s7+Ryxn#o81aGuY%%T z@qAlUk6P#*yIMHh^{#zSn^%8UR^%_hN_KE1-3S=eQxBl{$#n1Z;cCWtEi!-6jIqHn ze(k;!ca)xM6L)276jMKA;#$I5#aiOIo7$z+{FIOOYPe!*AGe8kx&nrREjrK3QJld{ zexf8k@{8jtl{IXl&-iRZmyx~(Dm0J}A9%hw*B*-Wn}@&~=1;>+nIq!el~3qgf6|my zpOliJ<({t0U)EAia@~k76FoYHdp?|Hau4a%9YQZUAfEl@?v&xS=#4KCZ2Tqq(--m6 zSK$P%QXKN;aHuzE_ePG%#~{g2?5>&lpH6>rHaAWteT}7}VH0`QL65pyoo931@mR6i zD7NpG&+H2e73;RyAM`j_bnD*y;znvJK2sVs674E$Vo0q7^bHP71dmhJ_s+I-5h2|GwSW5YzXhhle4One>VY{jA#5o1l~@|B zijczeXqOM!rX&QCMk*8TocFR!_XTYr0Y6_Nd0}PFlUaDUYhsQB4}o^DZMV&`@Il0z zc=l!C$c1w17AMLb37~a zW~cpgp?I@CU-UFKK|fw{fshQxC2V`)4hHFE+D~goL+5~Kj~Z3HdG*hT^muXSDDAz+aYc~L4 zV7}Z!U*vjNO-ncJUvCO&=+Z?s^tDX8yQU`NtF-f5g;@nGGo+X+go@Q<(H|q7)Nftf zCL^x~E3np|s+tzl@T?pORkc|7$u}-Y^{x6b$k8;zPvcg9eNe|e#7EXP#m)fH>lxCW zUs+{!1LX6$UT};Eo^X~kDAX$UYik~e{(gv zcLkI#@zO;l;C41$3%!eb*8^M--Fn&0a&jT`qGYQNBAz6E3YaSad)YB@en8Xq<%T<> zl0)-0C@#|$3n__3?@uG|?Vvh#ncdk2@rx>TiZXq!A{eIu`O@?R?e0(RrY()R?o2;b zhUljfaUMUvh=$SuvUY#22bS&73Px3oARhe~JZH^nov`%2O^W#(^3mg8Z)D-tiP#s5 zif?f}eY1(u%vTdht1vwsWlo2YHbW+_KL{I4pvfF>PEM6|EOu}$PhBm)^d`kCZJjiL ze)0Yf>R3Vb`St<7LR{-p^zrT29mn@GUiq`4%JsE-s*eu~U7Z!Hy-+(-m(?=1A_%Gq zj8wzHtzxDVr=i*T`#r{n+12ED(dQf)Xuh_k0-u2o-<3yIZ_18xao#iztfD&%F zso+WV-?DzC%oviz>;<!G*Il}dA+8T+X{23cseJnC3 z_D{opZAsoa+49OfJ{59glx9|Y9HT(BB(~}J?uZ!aaz}JR&szpR-D2RHvTzN3Z801{ z8>N5<@eflo>ETUT%RGv|Yt}pB%yC|FnGvwZL_0LnLrbFlyL=6VK5nA8n`?}J}TuKrle#b@_P3x;yS-Ngx{SOqx9>t z;gPFMbi2UGQVe`?JeTEEPAEn*sbLgmki=ttCyMF=qwIVwPa|?*_8^>+r3VH?!BRi` zKQ6%EJ|pEplrQ{}ZT_EySgkS>EB5%Fkr|7PI|~JNXR-1n94Gajoca8+zs>*X4AFmN zstmWpevrGmY?>PEJc6fInB&PmJD3PDPs<{l);(&RrX^Ap8s@=2jD^{x%x=D}`Np*b=wigEdlc%}2V87ZHEO;j1 zqR$3|cDM{_u$m}65(}2KQVj9#Uwc>r(5uCneX;h9P@oO2QE#eYQ`=No{E_1!{o7?e zza@a8!LVquZ^g|O(vU+G3^40vO%O0Cf3BZ?-%;~ppOM(}N8#HQb=Mv&c`@DDx;|J_ z>paRN7j_b6t#VBoj#4+xkbDz`$&1&>4N%w88tP<43+xz2#rJNUHw~Qo(}lNKwq*2- z9obSz-sWi3{t^|`Dpf06+k#AYxmjwKQh9rb8`~hDVV9abdfImE;7zY|t}JP%!q$8T z+qKcW`4i?#bGigX(+Js5&S)Hv=0{DvEcVVK#k`MeCo0pOW<_nQenUe-V^5r)oSm77 zJo2m(09x~+BpLmX9Ip)isPleLkU(ZEAj0a}sH0|U*GBu5v}CDD@!3gDIIh5W-CO}U zK!DD3V*gEL-7zYE$8LhA!+`Z7|K+q=`Frj7^qtzmzA*}d4BES6o1ar`p@oJ(Ke+B9 z{K9SCJo3Z7z|i--yj<*j*G!@J?=H2*glEtDKNoI(qvkm%kfQ1ER)V_}~IhCCb4U$5M#uJJg;hv&Q!Xt>8*atGXhKZ!sFC zA1T=V`&fy$B2n?g$b+V`2Ptp#T}iW}KZIRR|8ghnXPDVhuH!H#AB2B!TEPAnnhoK+ zb8O)q)gHw>m|!h@DOIGfU@oonv5& zo4GdHt%SmalVakc+l7@II-G8c<=mu5@~Y)2K`5Njb*78iZKUN$tm`ifQX|?QQ27-a zx825lj(>^Z1yG0bQQ054>Akt_>jEDv&2{^K7)^RqxZfdzUyR4TbWXPUtCF#5Jlzx| z`bU`aBb7;}vQR?XR}@+P@J!OIv9srJjgjaXo2wYJDPmid4{e^SF^)YQow54qOzqKj5h;p)MPD zI`!V7R}x?zhM~w87sIq){MsmTuZ$; zV$<z+^z^?o@=@{D@aYV{=E2rQ=5>SeS1G#2w_KK(#4Me3+(4fl_E5 z6RI^`!J>0Umj1M!d0T@WvA6n#Gs zGB>JXHXsaOT-X8T_Sx_-P7Zzj$v~3W4}+5Ra`P)MbWS~b4&#F*Vptt9i#7}Rq(_9- zZ+Q!tJZMHwUL5k331jt3q|Aav#5*2NU*1MWA zpp@$)*6Xqi!+p&@3=Q~`u!jsDgQljYS~5B=%?x4p>r8N)G zM>WMo=>Wa~N#KC|EF(L>ywhnVt&#Ut-wb}RG4hzEbFsW)E^Yfw3w?Ip);@o6Mx^|P zAdbZGtH}2BfN@WtM9w(ZFgZ|Q6>E1(2x0zP*B7EebG+@+n|(tQ8KFVfk|ZtZb))o{ z$@XcOR9$m_tEK8q-d^oO^$9)fza{$7XB&f;Po@V8pfTLu2!Snodgi_6{mIi=PK7G; zlPtUJ8a6%ee4-2EMmRAMK=-H7%MjDVLmSRL8{xr$As(leus{@I<`<$!c=Su`-ew?; zOi>?F2oxv2fvTEJ%Or3Dy0?tiV2vX1ck5X9ephccR0YhR29iGUJQUiD!PM&MKMDqh zprJ2SMIt3oiYRA9Z`gH|SyhPbP38I~JRPp_I4PL3iyyJ_Osak}cqyG>&Y=$ewb9r7 zX08MUDsM2LMtwH3r?3y7dAHOteXWj_h7Uq?S{3D02y?5br}Hp;_Ys(i3-)k&6{ixw z+Mh$zgtugMBBM69o`764Wr4N(Q#j5={ZEQHxkZ{&H9YRje;AtqoL0ayFFvZ2zNZS} z9wQK-A+24aC%9%Mt+F;ee^HC|7hOXYCFzy3@vmo$R{-7?ku)Nx4>>QV6mAt$MH3Vpv#bDjObEOU; zQLSpDY!Zhq(tP3XXm~vH+4k=>Tzb=IX3e|jT~q)BaNhBGqkff67|^Qikc$8gR!!NO zxZl$Wtlpm_aEdNkr|iTfY5b_6zs)bfSP$UH`X%39?o|FJa- zb;9fTY9pa+1#L(R*zS637rDv!))D<7N96Xb!v%GeKx~)h^&_Ba+AC`^rBwN^eY~pn zSG-p(k`?k~+30EAFF$O=Ss?hOvm3T_82Zo-BfJ9SsK4k~ z<3}ciQtEE9J#e6Q7CcPtJO5tfmIws!-8yzgC@oF=E`S6>7DKn~eLws{R)t*8I>`1q zcBJ4tVxXd%|9~MSa1*rfNL@=?2#Po6=M{l(OP43#j|A(}4sLA$$=`?Hse2$VC*j4o zg|YG>FMlRX?d_{;+sg+c)#i2&p}tFk~B$W$0FBKyhpyQYU7uLGL;v ze&VPEB(t(oqHz5SQ2*S21lcW4qvmI zqWR8?DliwjO4g_}@ZA6IbV+b(EExXFCiPK3RVo&aSuwxH5TUgJVaB{}9OYR%C`d5w ztZkG|X0vjFpeWWUL>%=2aUWWsY}B1$;m1h?MVRB0l3!_hwUj^^#aJIBF%vk|OB)@z z$(Sk$eA^CK2n=1=C@=i1MOJy(dO7q9w)@u<8!GVxI`sTsv~S#6eX=xkt+7XbT%IXh z9*=Ze^c-p(^OE~U8wFJn`;*Hi zX%8X#ZX^1*!=34uGRe;0;GgoTCDHe^?&zlK0>65fc(>Ra&ZMY}+jdI&^`*C+QEo_@t=SpExGw_f_9aRK1 zvWK3?u3Fxo|JcRz_5CY)OzA=6MmxPPbI(r^aG;8sSddyn5VO?&K>AnA!9kNmXZJTA zh}K|yf|aMQpzPE`-0~ufM#k}%xMm+~k4FOr*@4F+-+$fNDT=0|k%{%xag(bk?t=1a zZZ+BE%I$xp?hCCQa*_EBp7PgOl~xol-4?71DWZNI{vlPZL{UJdnxor=LO@uc1xSSO z(T-uIw5aa0Q;$k|ki%4-jRfb^@x}`cKv2vxewU@(9CWm4xk(YQ^sXQnHB1R=PWs}v z@?|jxeZZ3TKRbFnPO5CA1mwt@Y7+oomRDtVpd}SS@7o3pljxxspOzc;5#pBGQ>1_U z5^OgCppkapI;0sQ1kLP8h(p?UI`DRe3ZPM1>q{n&G=PD|+TiCG9X(O?oKiMyfIhY;q zI&*dta4PN7(Rc`0R`371>Kkmp!A={+u^%OFc|wx({-ANIVDBUsu4Cs!bI49IR7irM zBVAn?#zgCza=v2h-J}L}wEE@-oX08{ zL9;r!*5nhnAH<$USDg|u;C{J4u|Y*bE7+j)?i;i_B7ocT4#vm2%JBTjG(z*x(xxsA1q-7CegYV6TOi5&bvm_fS@}F*%^q9u13+a{^ zBY=311Wa@feQB23e*hr2NIr-Sz!pdjVuP_dI2MSr_L&+m1g1rAr-^wf5+|Yj=`Tn`aV-9jRgxtUg<@XCscY{z2I>v3rLi2 zQW{`UrG9_s%nMSvyLGe_N2ig=1~P^Qu``wFGoa9n#|{rYQ58J+!dIaIp2Vl_J>oK2RPp-kbq-GN30%& zDn(IO+3o|&blmB~q&CXuv*TtvwW$JpJ&aK8rNGs~aext=^k{ZK9BAb1my5sJ*!DqV zB)aU&vze5i%|;~%7D*8ZLaAKqE0U(|GKC26djUd3KkUAzH3Y5hU>z}3IuEP(&qqA=rKo#4nK40 z9Jn>h#0k4xIMA4#1lRc#8MPnZFKv{s-2YIS6hw?zZV*>xM}cVhom57k?n1AjL=0wr zIiqX&&970?>Vaj#Eq(}o<8c;%%Ka1No%k8gf%m~%{Fyq4Wjf%@KiP-xBG}ZX&H=zk@0HyM9+?s({J)!?o4MDq0J^2Jj6f&T-(RVJ7Ry-5?~p{4>}!F<-K8#` zM!SGdlGIO*1FHqs^@dsdW*MSFQp{L%G z+d0%TaKV^9YAQ=CpkHKES~|Um0Qt89kSB5vFgdba=jywD2cxi&Ufsl)YjbJ05`f(d zFj*P`%OH8tmEsX9Dg@tFAp#bxN}c0xS!5M0wXMhRG5&R;$ur@Xc>5Oucv?0i5`P$K zQ$LB^Znu-GNhl0-4$r?;yAV8_D?tW!w4-T(QfQ1`@blVgJmYbR-p=e5Ew^CPikb^# zf*`qkej|_ta63S_Z3E$AC>JTxRVfFzJpf36#4oPtoEjGX>Hnm2Iz;}>!rDO*4&bHm z&Y6C66e^kP?5%&k+UIly2GuSLL^+?~xYa>jg~ zG1F#F3Ozoi_3gV9>U2S9W%pnNqF5hk1>0s3=)-{nf#DfPK2oKQZRkNUmDaT=)3Vn8 zQ{gh??`@xhWN&>jIywv+^q-;(%H=c22eew4)H%J(ltZ|kPV;S4A@0b?xH1wL>TP{D z`!n7GsYXv|p9)@rYhAn9IGZC3u*-FqD4A4J0S1tL7~h>=?|JZ-;-oQRFzzX@XqK-s zI1im{_WHAPf9>StP%#3;JJZgqbabi@)lN?nwh>CdA;cxc>o0_z-iiZJ?BV2u%KnKtY|qggdpNgN zqLb-F1*piU^ceF2P_Ew)KuI~Vf@AcJHB9~0!ro+bwe5|_%7Ke>@KYsZO?vu+p zNI?+x`j^`eej(g(k@g5YhcAFdV~@>pK#s;OMF=Q_`D3g_D<>fZH(6T=is27ZWuK)( zF)NW5+5YCz*#pS&4;7S))rF-7TOmgefw4S_xT7XbOKq7?FuV%ihX`M4oX-=vRXXa| zhsUbEY&e7=u-WYCCf{~ckw=2@cfE7oRTOn~gz77ByTad>!1NaOV234HWFbc5B%irFFEc4~CdLf{bE#!URNbuMCSwLL8S6{N_8n-8o)h#wmUL0MT{b@Mu^PqCzbrHtMF0bqxuloYQG-L1cg(Hg+> zcXo5ka6h5rb6QG?$n^>1L`iPpqMlRMMQIASI=juD*IRN2V%Oiypook*bp z5IPEB%rh!Ja2b+J^RdNdJoYPxIbGDaU2XXML29DDc3)ILw79A25%uY?Y5Lg&ifMIg(O}ta0SanRe1EwchKt^W0JsFCY-ou;mZpE6J)DC z+KAd96A(e4OIEh9&Xhm@-6U%OZ5;jE5dy}w5HB#hmEh_g%A~`UY7UQKoym9g!PS%j zmZD%wpAx(k)+ZW%N>jKJoB!@UKvXxU05rh>CARY*Q;^50eMy59lN46~+f!sVshZ9P zL6rOX`zvzOz5%L3+*M3qZXEr(ek}P|(k0MAg0wf&uY$RhDYH)_ySjJABX?)Vh8B!u zHzb|{oA$(KBsJ%Jn?`lYO3|S`ZlFjL&%l&s-J8*L!2$1#m|cqbG>o+%Ta}Uo>&d@? zrC!H2xZA+np#YTBsMd>|_G4{B;8h=K=9*I{$l0|Bi>yvxKYt<#5DQ@Ztn-x|+c(`4 zJ=usK!1gkq3J|Vc@^jZ%DjZ~id#8i(BQV+mya%wo3t({l*bi_mW?Z}XMc{(!+?!5l zpE}GGAY+t^B=XVii=|o_Ujr{P$aMRePmvTp#0{_G8y~lq0lYyfq0}H3UjVR)8W+=7 z7#Z&gN)q0>aj3S5QtSG|1iHlacfknZ{~QoJ>&OA_52s<_e=B)ZL@Zsb7{J8LA@;ap z407_R9*g~Vnr$8Y%Io+MvdWtQbp9M=9O6Vq*yxNj!1P1=;KyS+M&Q4xI@MB{p|T{6 z#qJ(De0WCE#RLm-9mI2&Xi|(|3A#M6#NukrP8!CiF_3zt45X6w$3tMuYq0yaq5O~{3TZxV4MRtg&cj?ZFW0m!G9H z9flJDGtf4$d1#*IUVKVM5bs!FgOrK2J2!-$bXvL*4oS{OKMWOtJCGCbeh%1X1elUWSs31UIq~wh6z)QG=LMmQn;8RXLkCc+bO&aWmN<(na}z{T3{9 ze|7#CNv>lRP$K5m2sy$5jh(e?pUqlcWR&)wi>mToB8#GKlNTw=hGTvPYcabf-6kcn zQA~wAY(H3WWK(|;5nFCB1%egy!DmCffE*PV@xkD#p8xEP$b|TfT7a88Z#?{h+)!JIAFCoA)`3fj55M3YQ}WW5sg- z@HUmuKV0S<>?u=26GOOULalIz0p@YFUhdO8qaQi&lG*&y~ zb!)Y*6`@I!OvR05qS8wAf5aH{Cs-ETMu6evtZ`}d(Cbs2iXh~PYW9u0$utv@FjptsmUL%Npjy3)Qi zSNOYD*n!8B!1&&~0ykgGe3}iIdE^2oeC#;aGMOlW+gUs)+?0zN1jD*e;n7EbHj1)Q z0SgGx8>N{PS_7>ifs^ z+btlRtFga!&FinmL|JKy@MzeowrEtk-JJ8FpxpFRzem&A(Il9b&{szgmtbFYk>5Jp z?8)E9wZ`kq*xkx156A%@5-$A+amSnS@_pwIjyd0gG-)75?t!LoGy1S`ARD>QP7Fn; zO1mR~gIxWU5$Lbv8VRm4_{~6XWmXlA|uzVy$hBs_G9xX*3cq zOWeS_LXJ{z&F*grOV`ccF zn{P^rc_{N({%prp&GyGAxIxa}79Cfi3Fa_1hA+xI_8MOD3}~e|Vh^be)VxD=Ab=P9 zs5zk-xabnM?@Y$R(bjzM^nSXaOXAO^P^2h?zjr?<*I=q;c4He^H|Gr9V-pMw<;O|? zv17P5^>poT$2BVrr}+smpw$w@s|9@+cDPJC^e|&lEbW`K8k3Y#3pX!SNjY5H_(G)i zwg*-Q&(R$)Ed(=6ksYL>r#9wqx(b*$`7^%FK4Cz>@y|2;u+@jr=+)vk&snC=%Qxzh zqpipwcsmk_udScOd=Xf}a2CFrcdaj;(-5i!)gbX@#f{QMG-&fG3~h7r^MLaK3$YNHfyIIGwP_Ow{;gR5!(UAoI_@YJB;k!F4KO zSB3UH;rI}LmyaxlDI5_|N4=h+8$(^p8LIMnEb;g~>pX2K-I8j<6m_!_V7~mXU4mIi zA)K}9_hgLkL#y9uHGpNlPc=>ZjsSMA?P%e6QncMowt(?o{H+9=nOP!$hQ~)%>plR7BnvL&kFsgz12ofdy+6iOKFh>>gd8 z1AQd`ynWw{)N6w}{TW;Tij;{X-0Dk@EilEm_c99g;{U%Q&O8vR^?l%H?2LV9EFn8t zBYUBuL~g|>H1>)pvhP_UTarCgF3LJ%jWG!CI$Rs`27Ny z=N}Nn(b-@KYv0@%fICq7k0dyYzWEoPr>v{?AyeqSDkqut5nCSYN>Uc}YFZJ%emgZ2 zc%gI^nXu~jtiBcNmHCF@4=4W1+o^)3#=bkwA(b_$pVHaoXwrlxglS)!48(L?v@eW@ z-qmy-`{91Ey`dJL^QuF~^R?SPOH`t`02kGE>Mq8?ea3PbJ=)5+8aXvq5+vkwdZQ!y zVMdqVSkp7e)RIcFYd~AgLH?`DPs+eazu1-E(y;>$J}LDrvqATLgxBI5_Z@F3xb>1` zVoSOa3SR%z;13F`dRc3bO!N&dvOu)o!>c-4iA%YASycJz1XuPAP27~yC3D)AnZY$7 z-2U75n>ThgY~n}~&B6DF8fhA|@e)6O@^~w4B$DFh5JI74qoK1CEezx?3lhB_lgf+N zn+HPm%&O0wqv2^G+-h=0YWz?gG#CHfMT#{w44}OKbMW`#X3KeECTC z{P_ksP=HxFIHgM>q7@<8JIvVx9%ofo^x1LU?wWx-LSDWzy{82=hg6z=SU-fbR!K)j z^+=y~WVb4sq4hBGZj>6w(e1Wxl{x3gxTUw;eHo{^y89~924C4$!8oB9BYUXQIaueQ z1DiI*d#co*`YqV0P%sS34X9@eo#M+Qp@`!#YrPN4PE)9FAVa<0P0SZuK4mc;8A`Q1 zpYy8Lgxu6FdSy3Z<9at5fa_ep`w*UW31K?-qB8wb2rYn&wrzd0$r`9r z&mWrg`%Il1FhZ4BsLAVa2BRO6>p{p%%{t}H;rc`sJ z9)e&$JQF(g?bM_2_B<_olJ1Aw2FzTnA+LJM`QtjTNhzwSLd#f$-M2apbSJ!coVg#X zfUT>V$9*Uc3D{uG_FHI_a^~ziC9`FeI-&YwSVW8dbAe#gmdbLb;+6J$zjVc;jh3|IcbE>%~m?r?v@D&@Q8l^QQ5T9{K>ww z_Vd#T-H8+|VOgEF#7Ko>5vkpMXforp(-FED4Dd?Z@p53hqfqZ1fCJ>t`xI+Gm>&yY zUSpO+zTbym4q%EcNn`roMu$@4HMcHr7C57Q z4C;f0(sX>k-S3!lY`I!J)tr0##*=qSy*C)Q~Q-uS? zn{_DFjoTvqc9~0v0IeR z(Kd^b{Yhm8xNvUDD7G1@L9fxN^mEP#Ptq&eH>l@K*VxpC+CP`&hpYL^hetPDBj=Z1oHJefQuRz@x*(D3A_2K|r5Hq0B#XEKfp}AXL z%4^oSmX;=;BE6{;&ilLwsa$w~v@;hSD%TWgo5NLgC#I^8GD=gs6%DRaV=W@UZaJp` zgHFM2_vAtFK6AmRNk`H9a~~~IEy~v`@MpX;NY7UyQ$WDPt<{$BJfjI7l)J-7@4&-I zOH6Z^Mm>5@>6z16M+U;hIwP!p+t$ScdZKnCjLS8lS>r*sp<)OPMnaid`IXxX`7*@p z<6y^F#664j2GmY}0fTf{QJXT3_lp{2Pgh~}0i7S4U}7Le@tU^1nzf^ty)SJ}Eo}r( zgxY6M>6K6q??%2{gh;!TNUGjE(%aCq&&90M^G!=pTu63Y9@^B&5~P4o+3|pP;RPaG zDk|+Bc^Ef=4J<4#AEli+^&P+1eWb|R<60#rxkmz@UWVZV>p3!z4e6!-gA3CiELV9-58c^6=|U`GGd7EWrjg!J_5y zd6c(n;TAxqBYL_Dv1t1h>UO%mLsLYDQbw9SJvXL+dvi4>qEX~P&gmS38n5mA_cSEK zga8J{MY|>zZWO3XxYf*3@5wUTH5KXN)+$a*^Z!{LLPoWlvvQsm0%0$f9hB zRp^<(u<6x>$!B@ZVxEsov*HwkY*ke#ED0|C?NjQEg-GU~b(iooTPc>g$x}iFWxDy@ z#!nYz>t9YPq&=3eBT{>83d_4YZnjJ~qNgq@eH)LQ676qKG_)S={!TiKzB-q2)(fFB zTSBw!#pRLdZSCQZLwY@uw};mwC}l0J);J6%$NRq}3h!-cyW%BTI&2x$>}za0gfE5_ zS+piA=8U|P8z+b5S0QEtE8oT#q0VBBJ=_8=2a(i!p0b)Lw?fe(URYhBVMq3Qsc}v< za)T6l*iRHuZ*q$}03eaHW6h1i?#x3%bT-^?SlI*mUpo3ISan-EOXlI_f<}M=o4ll6WrYlt0-an0I zsBzNg*2!e+Xs!;KD>bTS2;jc6G8y|Tf8fE!cpIMF(9KLKZ|@0#M`Idbaw+fdwkwW- zcS7Xtc;e+%bc#Pra+%UZ@TfG5GCs*OalE_H93`7fa`ZG|yTLj(AzsZa zFn%`6VQF8u&Y^VOA!{W!VPXOX_t>Z9mM4LO)N^IkgAgI=*O{dAoT{7wY0j0?Bo6EQ zxZ9)R=CkiI?!yBNhgSmSc0)5`5*};*8CTXE^KEC5R2B5QuHJ!l5kxN8`@#6 zzxS}iG>Q{OE1>8^7ZBW%)?+RvQl$X7)1zs5Y~dhxyO)$l&@+~}fm)?r!&9w=G$OtN)O(0W;45P;@-V2 zf+Kofx^f_0Bq$xi*@>TN5`ij)6vdB)agcA77JkmB%vz88LRXvk`CZRtSL}Pl(5)!6z}?yHwQxy2Iyp%FClMBnjl_?a7d5H?FL8 zSK};zmP7#)Q*RoDXD*5QwS>k-Je8wH-db`YI7@~iqnj1mcspBxLsJySSY#j=kiA~( zt^Gwco~;Fv{hTDsw7~Y{P)__5uGm^+1gTJ8vvj+^GAdsaGmEd&r-B;s z+w289FQ}9fNXwK5Hc#*967GQK%pWHTAL1UTiy0<~bTmt!kwn`SQ5ZpGN%;N7 zh>i6{QMhh!YHyrkm+`C1Do)xd(M7`ep!x38eNo##pTwrgx161{7Bin0ZIW6wQ`4H) z^_@mE&c4HM6zTl(xTnuQAV9XPxH7_FDXAx0*WV0MDx|QJ`>KRfN%(9H= z?~!>MM7rTvC~Ek%5X-q|nTVZ7RglWAdBeUejq-ltY?kG4q#}}lnClV3^<>8jAnGW& zc6*rLt;^FROX}lTPr4}Nx+jqLIcZq{@eqIQZwz5C$YA1u{oMmC1|R7|-Jh;_^Mo*X z*Is%~z^_|)Uv~LE2UPs6!mtMmi_ELd+1C~g3uP3N^4ull3^KcTzngyJf~J*KN1x-g z*)4Pv=nBkQ>}}6i@_N3Y++c@XZ>lOMjv$Fnyne~JDqYQj{>Jyqg7*8tJL|VM15WX- zKsEdm-v=hH?)vYjG;jIvYus%aTZR3Ksz3&JwR5ieaZxywSbvU>sBYFv4KGT%WO&V2q*5^G>t3T@OR47@^(McP z5jEon96Oj|!k`+8a6L%y$JZU2Cpj}Eo8<6DV~sLR-`5nJXz5TX_7 zT`I6)gyS!>L;4mlm-4Lw+O7BpQbWt8(|I4Bb!vA%cY1Dm z@(+Xp@&lxuv}m8d8npN{(7&B$@xu;x73({8i^0-1qr=wBzw1g=a#0w5;)%zZ0JgG( zL#DhU%Ywhp+24~;9y>Tdg=k9zg=-O&qbRB#cIp1r2G--q^4VM`(_M|WmcG<&?tllF zg72S3)4gtrC?Ob23W`X7gEzb!l{qG|COK-Z4sS7(24N=c0(H+{S!u7O1;h;1gnS|e zQ>Fr}EB}-SMb=u2K#`p{E9gJ#;HgS~2Wa+1CS|=fH#wBdU^8ME61=HtQtJ5) zHJtqGRYmf{_ct-S1c!ugiz-nKt8=bjVH&&ZH$Z$>2lB6%Fo#B*)ZqOD^)xRJMk$7t zxW*l8Q42?Vj%*LZDT1%$dCFk&$Icl`fee%*Iw!12kb*!{{&mQ>_}J_xTF8E%OlSRnec1vVFQ9Cp zZ~zM&MQEgt_(Bv&J~s58%P|e=CrZ*TDg64D!LjeDTu1OwaXtN^U+Xd0g+ONGW0M$; zePX4KG(zk7OKfE|g&$ za@ELsUetqhx9i{F^BldKBSXn|A5H!7^#7C{2cH4NOYVa2fCk7uL!G!aM0+fsTm^Xu zK9KoOen?mn!fhC*oJgHdPX`8{4Gzi5=!-j!e)RhbN0cd!Sg+x)WO}mvf6>2iL{Mcv zEC)hL`-$EZ5PSt18gt<&Kx7|I<&P6SUXTWY@-d*4L2OwZ0;c5;T!3twx2Oc+bvYrI zGKdMDJUGV%FP-oa#uof9Of1Lsx8}htWVN`L8&RTq!f1_V#~kmN)XM!$T+*^(J4skv z%e|GQrJi7B&{BzgN0-AUA_GAHG!Qh)`0X*H?zEfiObN>&DNTX?BtynE$0_>p8d7e* zxF?HE7f88yQjo>RzJ1PO2>H-P34Nk}aBzYZuA)nZ17O@=g(L=dA{=rksO-xi(4xj%N171|~kp|ScB=7R|Q}cI^ z&E*yXxn^R$J%51!)NZQnI({N>?^7XG3i}f1K7#9l^?mjg_yt*B3cJidFB+tI2r)PO zw}9fn#w)nD>pO_j@RJ|}6taV4-fVc1 zM`Cz{TI%Kz-IhU2{CI;|dD_t9e4xK~=|p&rQs>_-<7@1|A3I&EL03L`9xDfFQ-zsS z$i^th)x}j{?p*&wOJiJng$H`!#i=nz+w<6L_j9@y;VqO}q6(3%<~qnS{&u?~ar#;? zUH+&t*i3UR1b|E4t&a|kb}E`|0$2e6XAS`pe+0Fywtl_fTQ|cD0PsXAfd6yVvR~xn z;__wR`tuq9P#y!WeHq9m=9jL2{fuh_0Hl?uRoU0zX$Zm`_SL0M&1lrF?yeo~1uX!q z_rd_|J&|1Aohgjie5rQ1{=%#%5OF~Uh~SvIdAD@Fcp;QB*Qj05Vuh}9J6L7|A{@vx z0RI$7Upd4q`|O~`J>=*P)fHKFtCDZKMxFwIKYaE;r3*St<~$ep3jj .reference { + color: var(--color-brand-primary); + font-weight: 500; + background: rgba(var(--color-brand-primary-rgb), 0.1); +} + +/* Article styling */ +article { + max-width: 48rem; + margin: 0 auto; + padding: var(--space-2xl) var(--space-xl); +} + +/* Table of contents with gradient border */ +.toc-tree { + background: var(--color-background-secondary); + border: 1px solid var(--color-border-light); + border-radius: var(--radius-lg); + padding: var(--space-lg); + margin: var(--space-xl) 0; + position: relative; + overflow: hidden; +} + +.toc-tree::before { + content: ''; + position: absolute; + top: 0; + left: 0; + width: 3px; + height: 100%; + background: linear-gradient(180deg, var(--color-brand-primary) 0%, var(--color-brand-secondary) 100%); +} + +.toc-tree a { + color: var(--color-text-tertiary); + transition: all var(--transition-base); + display: block; + padding: var(--space-xs) 0; +} + +.toc-tree a:hover { + color: var(--color-brand-primary); + transform: translateX(var(--space-xs)); +} + +/* Premium buttons */ +.btn, button { + background: linear-gradient(135deg, var(--color-brand-primary) 0%, var(--color-brand-secondary) 100%); + color: white; + border: none; + padding: var(--space-sm) var(--space-xl); + border-radius: var(--radius-full); + font-family: var(--font-family-primary); + font-weight: 500; + font-size: var(--font-size-sm); + cursor: pointer; + transition: all var(--transition-base); + position: relative; + overflow: hidden; + box-shadow: 0 4px 15px 0 rgba(var(--color-brand-primary-rgb), 0.4); +} + +.btn::before, button::before { + content: ''; + position: absolute; + top: 50%; + left: 50%; + width: 0; + height: 0; + border-radius: var(--radius-full); + background: rgba(255, 255, 255, 0.2); + transform: translate(-50%, -50%); + transition: width var(--transition-slow), height var(--transition-slow); +} + +.btn:hover, button:hover { + transform: translateY(-2px); + box-shadow: 0 7px 20px 0 rgba(var(--color-brand-primary-rgb), 0.5); +} + +.btn:hover::before, button:hover::before { + width: 300px; + height: 300px; +} + +/* Search styling */ +.search-button { + background: var(--color-background-elevated); + color: var(--color-text-tertiary); + border: 1px solid var(--color-border-light); + padding: var(--space-sm) var(--space-md); + border-radius: var(--radius-lg); + transition: all var(--transition-base); +} + +.search-button:hover { + background: var(--color-background-secondary); + border-color: var(--color-brand-primary); + box-shadow: 0 0 0 1px var(--color-brand-primary); +} + +/* API documentation with cards */ +dl.py { + background: var(--color-background-secondary); + border: 1px solid var(--color-border-light); + border-radius: var(--radius-lg); + padding: var(--space-lg); + margin: var(--space-xl) 0; + position: relative; + overflow: hidden; +} + +dl.py::before { + content: ''; + position: absolute; + top: -50%; + right: -50%; + width: 200%; + height: 200%; + background: radial-gradient(circle, rgba(var(--color-brand-primary-rgb), 0.05) 0%, transparent 70%); + pointer-events: none; +} + +.sig-name { + color: var(--color-brand-primary) !important; + font-weight: 600; + font-size: var(--font-size-lg); +} + +/* Admonitions with glassmorphism */ +.admonition { + background: rgba(var(--color-brand-primary-rgb), 0.05); + backdrop-filter: blur(10px); + border: 1px solid rgba(var(--color-brand-primary-rgb), 0.2); + border-radius: var(--radius-lg); + padding: var(--space-lg); + margin: var(--space-xl) 0; + position: relative; + overflow: hidden; +} + +.admonition::before { + content: ''; + position: absolute; + top: 0; + left: 0; + width: 100%; + height: 3px; + background: linear-gradient(90deg, var(--color-brand-primary) 0%, var(--color-brand-secondary) 100%); +} + +.admonition-title { + font-weight: 600; + color: var(--color-text-primary); + margin-bottom: var(--space-sm); + font-size: var(--font-size-lg); +} + +/* Tables with modern styling */ +table { + width: 100%; + border-collapse: collapse; + margin: var(--space-xl) 0; + background: var(--color-background-secondary); + border-radius: var(--radius-lg); + overflow: hidden; + box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1); +} + +th { + background: var(--color-background-elevated); + color: var(--color-text-primary); + font-weight: 600; + text-align: left; + padding: var(--space-md); + border-bottom: 2px solid var(--color-border-primary); +} + +td { + padding: var(--space-md); + border-bottom: 1px solid var(--color-border-light); + color: var(--color-text-secondary); +} + +tr:hover td { + background: rgba(var(--color-brand-primary-rgb), 0.05); +} + +/* Footer enhancement */ +.footer { + margin-top: var(--space-3xl); + padding: var(--space-xl) 0; + border-top: 1px solid var(--color-border-light); + text-align: center; + color: var(--color-text-tertiary); + font-size: var(--font-size-sm); +} + +/* Selection styling */ +::selection { + background-color: rgba(var(--color-brand-primary-rgb), 0.3); + color: var(--color-text-primary); +} + +/* Scrollbar styling */ +::-webkit-scrollbar { + width: 12px; + height: 12px; +} + +::-webkit-scrollbar-track { + background: var(--color-background-secondary); +} + +::-webkit-scrollbar-thumb { + background: var(--color-neutral-700); + border-radius: var(--radius-full); + border: 3px solid var(--color-background-secondary); +} + +::-webkit-scrollbar-thumb:hover { + background: var(--color-neutral-600); +} + +/* Animations */ +@keyframes fadeInUp { + from { + opacity: 0; + transform: translateY(20px); + } + to { + opacity: 1; + transform: translateY(0); + } +} + +@keyframes shimmer { + 0% { + background-position: -1000px 0; + } + 100% { + background-position: 1000px 0; + } +} + +@keyframes rotate { + from { transform: rotate(0deg); } + to { transform: rotate(360deg); } +} + +/* Apply animations to content */ +article > * { + animation: fadeInUp 0.6s cubic-bezier(0.4, 0, 0.2, 1) forwards; + opacity: 0; +} + +article > *:nth-child(1) { animation-delay: 0.1s; } +article > *:nth-child(2) { animation-delay: 0.2s; } +article > *:nth-child(3) { animation-delay: 0.3s; } +article > *:nth-child(4) { animation-delay: 0.4s; } +article > *:nth-child(5) { animation-delay: 0.5s; } + +/* Loading shimmer effect for code blocks */ +pre.loading { + background: linear-gradient(90deg, var(--color-background-code) 0%, var(--color-background-elevated) 50%, var(--color-background-code) 100%); + background-size: 1000px 100%; + animation: shimmer 2s infinite; +} + +/* Theme toggle enhancement */ +.theme-toggle { + background: var(--color-background-elevated); + border: 1px solid var(--color-border-light); + border-radius: var(--radius-full); + padding: var(--space-xs); + transition: all var(--transition-base); +} + +.theme-toggle:hover { + background: var(--color-background-secondary); + border-color: var(--color-brand-primary); +} + +/* Hero Section Styling */ +.hero-section { + text-align: center; + padding: 4rem 2rem; + margin-bottom: 3rem; + background: radial-gradient(ellipse at top, rgba(202, 14, 76, 0.15) 0%, transparent 50%); + position: relative; + overflow: hidden; +} + +.hero-background { + position: absolute; + top: -50%; + left: -50%; + width: 200%; + height: 200%; + background: conic-gradient(from 180deg at 50% 50%, rgba(202, 14, 76, 0.1) 0deg, transparent 60deg, transparent 300deg, rgba(202, 14, 76, 0.1) 360deg); + animation: rotate 20s linear infinite; + opacity: 0.5; +} + +.hero-title { + font-size: 4.5rem !important; + font-weight: 800; + margin-bottom: 1rem; + background: linear-gradient(135deg, #CA0E4C 0%, #E91E63 50%, #F50057 100%); + -webkit-background-clip: text; + -webkit-text-fill-color: transparent; + background-clip: text; + position: relative; + z-index: 1; + letter-spacing: -0.03em; +} + +.hero-subtitle { + font-size: 1.5rem; + color: var(--color-text-primary); + font-weight: 300; + letter-spacing: 0.2em; + text-transform: uppercase; + margin-bottom: 0.5rem; +} + +.hero-description { + font-size: 1.125rem; + color: var(--color-text-tertiary); + margin-top: 1rem; + font-weight: 400; +} + +.hero-buttons { + margin-top: 3rem; + display: flex; + gap: 1rem; + justify-content: center; + flex-wrap: wrap; +} + +/* Button variants */ +.btn-primary { + display: inline-flex; + align-items: center; + padding: 0.875rem 2.5rem; + background: linear-gradient(135deg, #CA0E4C 0%, #E91E63 100%); + color: white !important; + border-radius: 9999px; + text-decoration: none; + font-weight: 500; + font-size: 1rem; + transition: all 200ms cubic-bezier(0.4, 0, 0.2, 1); + box-shadow: 0 4px 15px 0 rgba(202, 14, 76, 0.4); +} + +.btn-primary:hover { + transform: translateY(-2px); + box-shadow: 0 7px 25px 0 rgba(202, 14, 76, 0.5); + color: white !important; +} + +.btn-secondary { + display: inline-flex; + align-items: center; + padding: 0.875rem 2.5rem; + background: transparent; + color: var(--color-text-primary) !important; + border: 1px solid rgba(255, 255, 255, 0.2); + border-radius: 9999px; + text-decoration: none; + font-weight: 500; + font-size: 1rem; + transition: all 200ms cubic-bezier(0.4, 0, 0.2, 1); + backdrop-filter: blur(10px); +} + +.btn-secondary:hover { + border-color: var(--color-brand-primary); + color: var(--color-brand-primary) !important; +} + +/* Grid Card Styling */ +.sd-card.feature-card { + background: var(--color-background-secondary) !important; + border: 1px solid var(--color-border-light) !important; + transition: all var(--transition-base); + height: 100%; +} + +.sd-card.feature-card:hover { + border-color: var(--color-brand-primary) !important; + background: rgba(var(--color-brand-primary-rgb), 0.05) !important; + transform: translateY(-2px); + box-shadow: 0 4px 15px 0 rgba(202, 14, 76, 0.2); +} + +.sd-card.feature-card .sd-card-body { + text-align: center; + padding: 1.5rem !important; +} + +.sd-card.feature-card .sd-card-title { + color: var(--color-text-primary) !important; + font-weight: 600; + margin-bottom: 0.5rem; +} + +.sd-card.feature-card .sd-card-text { + color: var(--color-text-secondary) !important; +} + +.sd-card.doc-card { + background: var(--color-background-secondary) !important; + border: 1px solid var(--color-border-light) !important; + transition: all var(--transition-base); +} + +.sd-card.doc-card:hover { + border-color: var(--color-brand-primary) !important; + background: rgba(var(--color-brand-primary-rgb), 0.05) !important; +} + +.sd-card.doc-card .sd-card-body { + padding: 1.25rem !important; +} + +.sd-card.doc-card .sd-card-title { + color: var(--color-text-primary) !important; + font-weight: 600; + font-size: 1.125rem; + margin-bottom: 0.5rem; +} + +.sd-card.doc-card .sd-card-text { + color: var(--color-text-secondary) !important; +} + +/* Community section */ +.community-section { + text-align: center; + padding: 3rem 2rem; + background: linear-gradient(135deg, rgba(202, 14, 76, 0.05) 0%, transparent 100%); + border-radius: 0.75rem; + margin: 3rem 0; +} + +.community-section h3 { + color: var(--color-text-primary); + font-size: 1.5rem; + margin-bottom: 1rem; +} + +.community-section p { + color: var(--color-text-tertiary); + margin-bottom: 2rem; + max-width: 600px; + margin-left: auto; + margin-right: auto; +} + +/* Fix admonition styling */ +.admonition.tip { + background: rgba(var(--color-brand-primary-rgb), 0.05); + border: 1px solid rgba(var(--color-brand-primary-rgb), 0.2); +} + +/* Mobile responsive improvements */ +@media (max-width: 768px) { + :root { + --font-size-base: 0.875rem; + --font-size-4xl: 2rem; + --font-size-3xl: 1.5rem; + --font-size-2xl: 1.25rem; + } + + article { + padding: var(--space-lg) var(--space-md); + } + + .hero-title { + font-size: 3rem !important; + } + + .hero-buttons { + flex-direction: column; + align-items: center; + } + + .btn-primary, .btn-secondary { + width: 100%; + max-width: 300px; + justify-content: center; + } +} + +/* High resolution displays */ +@media (-webkit-min-device-pixel-ratio: 2), (min-resolution: 192dpi) { + body { + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; + } +} + +/* Print styles */ +@media print { + body { + background: white; + color: black; + } + + .sidebar-drawer, + .theme-toggle, + .search-button { + display: none; + } } diff --git a/docs/conf.py b/docs/conf.py index 2304875..5585728 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -3,30 +3,103 @@ sys.path.insert(0, os.path.abspath("..")) -project = "AION" +project = "AION-1" author = "Polymathic AI" +html_title = "AION-1" extensions = [ "myst_parser", "sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx.ext.autosummary", + "sphinx_design", # For cards and grids ] autosummary_generate = True +# MyST parser configuration +myst_enable_extensions = [ + "colon_fence", + "deflist", + "html_image", +] + html_theme = "furo" html_static_path = ["_static"] html_css_files = ["style.css"] -# Theme customizations to approximate Polymathic AI colors +# Theme customizations - separate light and dark themes html_theme_options = { "light_css_variables": { - "color-brand-primary": "#6f42c1", - "color-brand-content": "#6f42c1", + "color-brand-primary": "#CA0E4C", + "color-brand-content": "#CA0E4C", + "color-foreground-primary": "#2c3e50", # Dark text for light mode + "color-foreground-secondary": "#546e7a", + "color-foreground-muted": "#90a4ae", + "color-foreground-border": "#e0e0e0", + "color-background-primary": "#ffffff", # White background for light mode + "color-background-secondary": "#f5f5f5", + "color-background-hover": "#fafafa", + "color-background-border": "#e0e0e0", + "color-sidebar-background": "#fafafa", + "color-sidebar-background-border": "#e0e0e0", + "color-sidebar-brand-text": "#2c3e50", + "color-sidebar-caption-text": "#546e7a", + "color-sidebar-link-text": "#2c3e50", + "color-sidebar-link-text--top-level": "#2c3e50", + "color-sidebar-search-background": "#ffffff", + "color-sidebar-search-border": "#e0e0e0", + "color-sidebar-search-foreground": "#2c3e50", + "color-admonition-background": "#f5f5f5", + "color-api-background": "#f5f5f5", + "color-api-background-hover": "#eeeeee", + "color-highlight-on-target": "rgba(202, 14, 76, 0.1)", + "color-inline-code-background": "rgba(202, 14, 76, 0.08)", + "color-inline-code-text": "#CA0E4C", }, "dark_css_variables": { - "color-brand-primary": "#a78bfa", - "color-brand-content": "#a78bfa", + "color-brand-primary": "#CA0E4C", + "color-brand-content": "#CA0E4C", + "color-foreground-primary": "#e0e0e0", + "color-foreground-secondary": "#b0b0b0", + "color-foreground-muted": "#909090", + "color-foreground-border": "#2a2a2a", + "color-background-primary": "#0a0a0a", + "color-background-secondary": "#171717", + "color-background-hover": "#1a1a1a", + "color-background-border": "#2a2a2a", + "color-sidebar-background": "#0f0f0f", + "color-sidebar-background-border": "#2a2a2a", + "color-sidebar-brand-text": "#e0e0e0", + "color-sidebar-caption-text": "#b0b0b0", + "color-sidebar-link-text": "#cccccc", + "color-sidebar-link-text--top-level": "#e0e0e0", + "color-sidebar-search-background": "#1a1a1a", + "color-sidebar-search-border": "#2a2a2a", + "color-sidebar-search-foreground": "#e0e0e0", + "color-admonition-background": "#1a1a1a", + "color-api-background": "#1a1a1a", + "color-api-background-hover": "#262626", + "color-highlight-on-target": "rgba(202, 14, 76, 0.15)", + "color-inline-code-background": "rgba(202, 14, 76, 0.15)", + "color-inline-code-text": "#ff7a9a", }, + "sidebar_hide_name": False, + "navigation_with_keys": True, +} + +# Add custom footer +html_context = { + "default_mode": "auto", # Let the user's browser preference decide } + +# Customize source link text +html_copy_source = True +html_show_sourcelink = True +html_sourcelink_suffix = "" + +# Add custom favicon if available +# html_favicon = "_static/favicon.ico" + +# Set custom logo for the top left +# html_logo = "_static/polymathic_logo.png" diff --git a/docs/index.md b/docs/index.md index d18ae4a..e8cdaec 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,8 +1,112 @@ -# Welcome to AION Documentation +```{raw} html +
+
+

AION-1

+

AstronomIcal Omnimodal Network

+

Next-generation foundation model for multimodal astronomical analysis

+ +
+``` + +# Welcome to the AION-1 documentation + + + +## 🚀 Quick Start + +```{admonition} Get up and running with AION +:class: tip + +Our foundation model seamlessly processes astronomical imaging, spectroscopy, and catalog data. +``` + +```python +from aion import AION + +# Initialize the model +model = AION.from_pretrained('polymathic-ai/aion-base') + +# Process multimodal astronomical data +outputs = model.generate( + images=galaxy_images, + spectra=stellar_spectra, + catalog=source_catalog +) +``` + +## ✨ Key Capabilities + +```{eval-rst} +.. grid:: 1 1 2 3 + :gutter: 3 + + .. grid-item-card:: 🌌 Multimodal Processing + :class-card: feature-card + + Unified handling of images, spectra, time series, and catalog data through specialized encoders + + .. grid-item-card:: 🧠 Foundation Architecture + :class-card: feature-card + + State-of-the-art transformer backbone pre-trained on massive astronomical datasets + + .. grid-item-card:: 🔧 Extensible Framework + :class-card: feature-card + + Modular codec system allows easy integration of new data modalities and instruments -AION is Polymathic AI's Omnimodal Network for Astronomy. This documentation provides a detailed guide to installation, usage and development. + .. grid-item-card:: ⚡ High Performance + :class-card: feature-card + + Optimized for both research and production with efficient batching and GPU acceleration + + .. grid-item-card:: 📊 Rich Embeddings + :class-card: feature-card + + Generate powerful representations for downstream tasks like classification and discovery + + .. grid-item-card:: 🌍 Community Driven + :class-card: feature-card + + Open-source development with contributions from leading astronomical institutions +``` + +## 📚 Documentation + +```{eval-rst} +.. grid:: 2 2 2 4 + :gutter: 3 + + .. grid-item-card:: Installation + :link: installation.html + :class-card: doc-card + + Quick setup guide and requirements + + .. grid-item-card:: Architecture + :link: architecture.html + :class-card: doc-card + + Deep dive into model design + + .. grid-item-card:: Usage Guide + :link: usage.html + :class-card: doc-card + + Examples and best practices + + .. grid-item-card:: API Reference + :link: api.html + :class-card: doc-card + + Complete API documentation +``` ```{toctree} +:hidden: :maxdepth: 2 installation @@ -11,3 +115,13 @@ usage api contributing ``` + +## 🤝 Join the Community + +```{raw} html +
+

Advancing astronomical AI together

+

AION is developed by Polymathic AI in collaboration with astronomers and ML researchers worldwide. Join us in building the future of astronomical data analysis.

+ Start Contributing → +
+``` diff --git a/docs/requirements.txt b/docs/requirements.txt index 423db0a..94af3cb 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,4 @@ sphinx>=7.2 myst-parser furo +sphinx-design From ee5a406e667f48461e29701fae2f834f3ae59438 Mon Sep 17 00:00:00 2001 From: Francois Lanusse Date: Mon, 26 May 2025 00:43:45 +0200 Subject: [PATCH 04/21] adding stuff to build the documentation --- .github/workflows/docs-check.yml | 37 +++++++++++++++++ .github/workflows/docs.yml | 70 ++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 .github/workflows/docs-check.yml create mode 100644 .github/workflows/docs.yml diff --git a/.github/workflows/docs-check.yml b/.github/workflows/docs-check.yml new file mode 100644 index 0000000..e12230d --- /dev/null +++ b/.github/workflows/docs-check.yml @@ -0,0 +1,37 @@ +name: Check Documentation Build + +on: + pull_request: + paths: + - 'docs/**' + - 'aion/**' + - '.github/workflows/docs-check.yml' + - 'pyproject.toml' + +jobs: + docs: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e . + pip install -r docs/requirements.txt + + - name: Build documentation + run: | + cd docs + sphinx-build -W -b html . _build/html + + - name: Check for broken links + run: | + cd docs + sphinx-build -b linkcheck . _build/linkcheck || true diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..5fdb66c --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,70 @@ +name: Build Documentation + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow one concurrent deployment +concurrency: + group: "pages" + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Cache pip dependencies + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt', '**/pyproject.toml') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e . + pip install -r docs/requirements.txt + + - name: Build HTML documentation + run: | + cd docs + sphinx-build -b html . _build/html + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: docs/_build/html + + # Deploy job - only runs on main branch + deploy: + if: github.ref == 'refs/heads/main' && github.event_name == 'push' + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 From b9b1c01209106773e9ed282f14fbf8d71c72c096 Mon Sep 17 00:00:00 2001 From: Francois Lanusse Date: Mon, 26 May 2025 01:38:50 +0200 Subject: [PATCH 05/21] Adding content --- docs/Makefile | 2 +- docs/api.md | 683 +++++++++++++++++++++++++++++++++++++++ docs/architecture.md | 414 ++++++++++++++++++++++-- docs/conf.py | 3 +- docs/contributing.md | 526 +++++++++++++++++++++++++++++- docs/index.md | 121 ++++--- docs/installation.md | 308 +++++++++++++++++- docs/requirements.txt | 3 +- docs/usage.md | 720 +++++++++++++++++++++++++++++++++++++++++- 9 files changed, 2696 insertions(+), 84 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index 5fd6dbd..ab2f07c 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -4,4 +4,4 @@ BUILDDIR := _build .PHONY: html html: -$(SPHINXBUILD) -M html $(SOURCEDIR) $(BUILDDIR) + $(SPHINXBUILD) -M html $(SOURCEDIR) $(BUILDDIR) diff --git a/docs/api.md b/docs/api.md index d7fc5e2..0a17be1 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1,5 +1,688 @@ # API Reference +This comprehensive API reference covers all major components of AION-1, including modalities, codecs, models, and utilities. + +## Core Model + +### `aion.AION` + +The main AION model class that provides high-level interfaces for multimodal astronomical analysis. + +```python +class AION(FourM): + """ + AION-1 multimodal astronomical foundation model. + + Inherits from FourM architecture and adds astronomical-specific + functionality for processing 39 different data modalities. + """ + + @classmethod + def from_pretrained( + cls, + model_name: str, + device: str = 'cuda', + torch_dtype: torch.dtype = torch.float32, + **kwargs + ) -> 'AION': + """ + Load a pre-trained AION model. + + Args: + model_name: HuggingFace model identifier + - 'polymathic-ai/aion-tiny': 300M parameter model + - 'polymathic-ai/aion-base': 800M parameter model + - 'polymathic-ai/aion-large': 3.1B parameter model + device: Device to load model on ('cuda', 'cpu', 'mps') + torch_dtype: Data type for model weights + **kwargs: Additional arguments passed to model constructor + + Returns: + AION model instance + """ + + def generate( + self, + inputs: Dict[str, Modality], + targets: List[str], + num_generations: int = 1, + temperature: float = 1.0, + top_k: Optional[int] = None, + top_p: Optional[float] = None + ) -> Dict[str, Modality]: + """ + Generate target modalities from input observations. + + Note: + ``targets`` must be chosen from the list returned by + ``AION.supported_targets`` (essentially the 39 modality names + listed in the architecture documentation). Supplying an + unsupported string will raise ``ValueError``. + + Args: + inputs: Dictionary mapping modality names to data + targets: List of modality names to generate + num_generations: Number of samples to generate + temperature: Sampling temperature (higher = more diverse) + top_k: Top-k sampling parameter + top_p: Nucleus sampling parameter + + Returns: + Dictionary mapping target names to generated modalities + """ + + def encode( + self, + inputs: Dict[str, torch.Tensor] + ) -> torch.Tensor: + """ + Encode input tokens to learned representations. + + Args: + inputs: Tokenized inputs + + Returns: + Encoder hidden states [batch, seq_len, hidden_dim] + """ + + def tokenize( + self, + modalities: Dict[str, Modality] + ) -> Dict[str, torch.Tensor]: + """ + Convert modalities to discrete tokens using codecs. + + Args: + modalities: Dictionary of modality data + + Returns: + Dictionary of tokenized tensors + """ +``` + +## Modalities + +AION-1 supports 39 different astronomical data modalities. Each modality is represented by a Pydantic model ensuring type safety and validation. + +### Image Modalities + +#### `aion.modalities.Image` + +```python +class Image(Modality): + """ + Multi-band astronomical image. + + Attributes: + flux: Image data array [bands, height, width] + bands: List of band identifiers (e.g., ['HSC-G', 'HSC-R']) + ivar: Optional inverse variance array for weighting + mask: Optional boolean mask array + """ + + flux: np.ndarray + bands: List[str] + ivar: Optional[np.ndarray] = None + mask: Optional[np.ndarray] = None + + @classmethod + def batch(cls, images: List['Image']) -> 'Image': + """Batch multiple images together.""" + + def crop(self, size: int = 96) -> 'Image': + """Center crop image to specified size.""" +``` + +### Spectrum Modalities + +#### `aion.modalities.Spectrum` + +```python +class Spectrum(Modality): + """ + Astronomical spectrum. + + Attributes: + wavelength: Wavelength array in Angstroms + flux: Flux density array + ivar: Optional inverse variance + survey: Source survey identifier + """ + + wavelength: np.ndarray + flux: np.ndarray + ivar: Optional[np.ndarray] = None + survey: Optional[str] = None + + def resample( + self, + new_wavelength: np.ndarray + ) -> 'Spectrum': + """Resample spectrum to new wavelength grid.""" + + def normalize(self) -> 'Spectrum': + """Apply median normalization.""" +``` + +### Scalar Modalities + +AION-1 includes numerous scalar modalities for photometry, shapes, and physical parameters: + +#### Photometric Fluxes + +```python +class FluxG(ScalarModality): + """g-band flux measurement.""" + value: np.ndarray + error: Optional[np.ndarray] = None + +class FluxR(ScalarModality): + """r-band flux measurement.""" + value: np.ndarray + error: Optional[np.ndarray] = None + +class FluxI(ScalarModality): + """i-band flux measurement.""" + value: np.ndarray + error: Optional[np.ndarray] = None + +class FluxZ(ScalarModality): + """z-band flux measurement.""" + value: np.ndarray + error: Optional[np.ndarray] = None +``` + +#### Shape Parameters + +```python +class E1(ScalarModality): + """First ellipticity component.""" + value: np.ndarray + +class E2(ScalarModality): + """Second ellipticity component.""" + value: np.ndarray + +class RadiusCARP(ScalarModality): + """CARP radius measurement.""" + value: np.ndarray +``` + +#### Physical Properties + +```python +class Redshift(ScalarModality): + """Spectroscopic or photometric redshift.""" + value: np.ndarray + error: Optional[np.ndarray] = None + +class ExtinctionV(ScalarModality): + """V-band extinction.""" + value: np.ndarray + +class Parallax(ScalarModality): + """Parallax measurement in mas.""" + value: np.ndarray + error: Optional[np.ndarray] = None +``` + +### Catalog Modalities + +#### `aion.modalities.Catalog` + +```python +class Catalog(Modality): + """ + Astronomical object catalog. + + Attributes: + entries: List of catalog objects + max_objects: Maximum number of objects to process + """ + + entries: List[CatalogEntry] + max_objects: int = 100 + + def sort_by_distance(self) -> 'Catalog': + """Sort entries by distance from center.""" + + def filter_bright(self, magnitude_limit: float) -> 'Catalog': + """Filter to objects brighter than limit.""" +``` + +## Codecs (Tokenizers) + +Codecs convert between modalities and discrete tokens. Each modality type has a specialized codec. + +### Base Codec Interface + +#### `aion.codecs.base.Codec` + +```python +class Codec(ABC): + """ + Abstract base class for modality codecs. + """ + + @abstractmethod + def encode(self, modality: Modality) -> torch.Tensor: + """Encode modality to discrete tokens.""" + + @abstractmethod + def decode(self, tokens: torch.Tensor) -> Modality: + """Decode tokens back to modality.""" + + @classmethod + def from_pretrained(cls, path: str) -> 'Codec': + """Load pre-trained codec.""" + + def save_pretrained(self, path: str): + """Save codec weights and configuration.""" +``` + +### Image Codec + +#### `aion.codecs.ImageCodec` + +```python +class ImageCodec(Codec): + """ + Image tokenizer using MagVit architecture. + + Supports multi-survey images with different band counts + through a unified channel embedding scheme. + """ + + def __init__( + self, + hidden_dim: int = 512, + n_embed: int = 10000, + compression_levels: int = 2, + quantizer: str = 'fsq' + ): + """ + Initialize image codec. + + Args: + hidden_dim: Hidden dimension size + n_embed: Codebook size + compression_levels: Spatial compression factor + quantizer: Quantization method ('fsq' or 'vq') + """ + + def preprocess( + self, + image: Image, + crop_size: int = 96 + ) -> torch.Tensor: + """Apply survey-specific preprocessing.""" + + def get_latent_shape( + self, + image_shape: Tuple[int, ...] + ) -> Tuple[int, ...]: + """Get shape of latent representation.""" +``` + +### Spectrum Codec + +#### `aion.codecs.SpectrumCodec` + +```python +class SpectrumCodec(Codec): + """ + Spectrum tokenizer using ConvNeXt V2 architecture. + + Uses a shared latent wavelength grid to handle spectra + from different instruments. + """ + + def __init__( + self, + latent_wavelength: np.ndarray, + hidden_dims: List[int] = [96, 192, 384, 768], + n_embed: int = 1024, + quantizer: str = 'lfq' + ): + """ + Initialize spectrum codec. + + Args: + latent_wavelength: Target wavelength grid + hidden_dims: ConvNeXt stage dimensions + n_embed: Codebook size + quantizer: Quantization method + """ + + def to_latent_grid( + self, + spectrum: Spectrum + ) -> torch.Tensor: + """Interpolate spectrum to latent wavelength grid.""" +``` + +### Scalar Codec + +#### `aion.codecs.ScalarCodec` + +```python +class ScalarCodec(Codec): + """ + Tokenizer for scalar quantities using adaptive quantization. + """ + + def __init__( + self, + quantizer_type: str = 'reservoir', + n_bins: int = 256 + ): + """ + Initialize scalar codec. + + Args: + quantizer_type: Type of quantizer + - 'linear': Uniform bins + - 'log': Logarithmic bins + - 'reservoir': Learned adaptive bins + - 'compressed': Transform then quantize + n_bins: Number of quantization levels + """ + + def fit(self, values: np.ndarray): + """Fit quantizer to data distribution.""" +``` + +## Quantizers + +Quantization modules that convert continuous values to discrete tokens. + +### `aion.codecs.quantizers.FSQ` + +```python +class FiniteScalarQuantization(nn.Module): + """ + Finite Scalar Quantization from MagVit. + + Factorizes codebook into multiple small codebooks for + better gradient flow and training stability. + """ + + def __init__( + self, + levels: List[int] = [8, 5, 5, 5, 5], + eps: float = 1e-3 + ): + """ + Args: + levels: Number of levels per dimension + eps: Small constant for numerical stability + """ +``` + +### `aion.codecs.quantizers.LFQ` + +```python +class LookupFreeQuantization(nn.Module): + """ + Lookup-Free Quantization using entropy regularization. + + Achieves quantization without explicit codebook lookup, + improving training efficiency. + """ + + def __init__( + self, + dim: int, + codebook_size: int, + entropy_weight: float = 0.1 + ): + """ + Args: + dim: Embedding dimension + codebook_size: Target vocabulary size + entropy_weight: Entropy regularization weight + """ +``` + +## Preprocessing + +Survey-specific preprocessing utilities. + +### `aion.codecs.preprocessing.ImagePreprocessor` + +```python +class ImagePreprocessor: + """ + Survey-specific image preprocessing. + """ + + def __init__(self, survey: str): + """ + Initialize for specific survey. + + Args: + survey: Survey name ('HSC', 'DES', 'SDSS', etc.) + """ + + def __call__(self, image: Image) -> torch.Tensor: + """Apply preprocessing pipeline.""" + + def get_rescaling_params(self) -> Dict[str, float]: + """Get survey-specific rescaling parameters.""" +``` + +### `aion.codecs.preprocessing.SpectrumPreprocessor` + +```python +class SpectrumPreprocessor: + """ + Spectrum normalization and preprocessing. + """ + + def normalize_median( + self, + spectrum: Spectrum + ) -> Spectrum: + """Apply median normalization.""" + + def mask_skylines( + self, + spectrum: Spectrum + ) -> Spectrum: + """Mask common sky emission lines.""" +``` + +## Model Components + +### `aion.fourm.FourM` + +```python +class FourM(nn.Module): + """ + Base multimodal transformer architecture. + + Implements the encoder-decoder architecture with + modality-specific embeddings and flexible attention. + """ + + def __init__( + self, + encoder_depth: int = 12, + decoder_depth: int = 12, + dim: int = 768, + num_heads: int = 12, + mlp_ratio: float = 4.0, + use_bias: bool = False + ): + """Initialize FourM architecture.""" +``` + +### `aion.fourm.encoder_embeddings` + +```python +class ModalityEmbedding(nn.Module): + """ + Learnable embeddings for each modality type. + + Provides both modality identification and survey + provenance information. + """ + + def __init__( + self, + num_modalities: int, + num_surveys: int, + embed_dim: int + ): + """Initialize modality embeddings.""" +``` + +## Utilities + +### `aion.model_utils` + +```python +def load_codec(modality: str, device: str = 'cuda') -> Codec: + """Load pre-trained codec for modality.""" + +def create_model_config( + model_size: str = 'base' +) -> Dict[str, Any]: + """Get configuration for model size.""" + +def count_parameters(model: nn.Module) -> int: + """Count trainable parameters in model.""" +``` + +### `aion.generation_utils` + +```python +def sample_with_temperature( + logits: torch.Tensor, + temperature: float = 1.0, + top_k: Optional[int] = None, + top_p: Optional[float] = None +) -> torch.Tensor: + """ + Sample from logits with temperature scaling. + + Args: + logits: Model output logits + temperature: Sampling temperature + top_k: Top-k filtering + top_p: Nucleus sampling threshold + + Returns: + Sampled token indices + """ + +def generate_with_caching( + model: AION, + inputs: Dict[str, torch.Tensor], + max_length: int, + use_cache: bool = True +) -> torch.Tensor: + """Generate tokens with KV caching for efficiency.""" +``` + +## Data Loading + +### `aion.data.AstronomicalDataset` + +```python +class AstronomicalDataset(Dataset): + """ + PyTorch dataset for astronomical observations. + """ + + def __init__( + self, + data_paths: List[str], + modalities: List[str], + transform: Optional[Callable] = None + ): + """ + Initialize dataset. + + Args: + data_paths: Paths to data files + modalities: List of modalities to load + transform: Optional data transformation + """ + + def __getitem__(self, idx: int) -> Dict[str, Modality]: + """Get single observation.""" +``` + +## Example Usage + +### Complete Pipeline + +```python +import torch +from aion import AION +from aion.modalities import Image, Spectrum +from aion.codecs import ImageCodec, SpectrumCodec + +# Load model and codecs +model = AION.from_pretrained('polymathic-ai/aion-base') +image_codec = ImageCodec.from_pretrained('polymathic-ai/aion-image-codec') +spectrum_codec = SpectrumCodec.from_pretrained('polymathic-ai/aion-spectrum-codec') + +# Load data +image = Image(flux=galaxy_flux, bands=['g', 'r', 'i', 'z', 'y']) +spectrum = Spectrum(wavelength=wavelength, flux=flux) + +# Tokenize +tokens = { + 'image': image_codec.encode(image), + 'spectrum': spectrum_codec.encode(spectrum) +} + +# Encode to representations +with torch.no_grad(): + representations = model.encode(tokens) + +# Generate missing modalities +results = model.generate( + inputs={'image': image}, + targets=['spectrum', 'redshift'] +) + +# Decode results +generated_spectrum = spectrum_codec.decode(results['spectrum']) +print(f"Predicted redshift: {results['redshift'].value[0]:.3f}") +``` + +## Error Handling + +All AION components include comprehensive error handling: + +```python +from aion.exceptions import ( + ModalityError, # Invalid modality data + CodecError, # Tokenization failures + ModelError, # Model inference errors + DataError # Data loading issues +) + +try: + result = model.generate(inputs, targets) +except ModalityError as e: + print(f"Invalid modality: {e}") +except CodecError as e: + print(f"Tokenization failed: {e}") +``` + +## Performance Tips + +1. **Batch Processing**: Always process multiple objects together when possible +2. **Mixed Precision**: Use `torch.cuda.amp` for faster inference +3. **Token Caching**: Reuse encoder outputs when generating multiple targets +4. **Device Placement**: Use `.to(device)` consistently for all tensors + +For more details, see the [Usage Guide](usage.html) and [Architecture](architecture.html) documentation. + ```{eval-rst} .. automodule:: aion :members: diff --git a/docs/architecture.md b/docs/architecture.md index abf8c13..5d620a1 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -1,45 +1,411 @@ -# Code Architecture +# AION-1 Architecture -This page explains the major components of the AION codebase and how they interact. +This document provides a comprehensive overview of AION-1's architecture, explaining how it achieves unified multimodal understanding of astronomical data through innovative tokenization strategies and transformer-based learning. -## Modality Data Classes +## Overview -`aion/modalities.py` defines Pydantic models describing each input modality. Examples include `Image` for imaging data, `Spectrum` for spectroscopic data, and scalar modalities such as `FluxG` or `Parallax`. These classes provide type checked containers for the raw astronomy data. +AION-1 employs a two-stage architecture that elegantly handles the complexity of astronomical data: -## Codecs (Tokenizers) +1. **Universal Tokenization**: Modality-specific encoders convert heterogeneous astronomical observations into discrete tokens +2. **Multimodal Masked Modeling**: A unified transformer learns cross-modal relationships through masked token prediction -Under `aion/codecs/` reside modality specific **Codecs**. A codec encodes a `Modality` instance into a sequence of discrete tokens and can decode tokens back to the original data. The base interface is defined in `codecs/base.py` and concrete implementations exist for images, spectra and catalog entries. +This design enables AION-1 to process 39 different data modalities from 5 major astronomical surveys, learning from over 200 million objects. + +## Core Design Principles + +### 1. Purely Observational Learning + +Unlike many scientific ML models, AION-1 is trained exclusively on raw observational data without any labels derived from simulations or physical models. This approach provides: + +- **Model-agnostic representations**: Not tied to specific physical assumptions +- **Flexibility**: Can adapt to changing theoretical models +- **Robustness**: Learns patterns directly from data + +### 2. Arbitrary Modality Combinations + +AION-1 can process any subset of its 39 supported modalities without architectural changes: + +- No fixed input requirements +- Graceful handling of missing data +- Dynamic modality fusion + +### 3. Scalable Token-Based Approach + +By converting all data to tokens, AION-1 achieves: + +- Uniform processing across modalities +- Efficient batching and computation +- Natural handling of variable-length inputs + +## Stage 1: Universal Tokenization + +The tokenization stage addresses a fundamental challenge: how to convert diverse astronomical measurements (images, spectra, scalars) into a common representation suitable for transformer processing. + +### Image Tokenization + +AION-1's image tokenizer handles multi-band astronomical images from different surveys with varying: +- Resolution and pixel scales +- Number of channels (4-9 bands) +- Noise characteristics +- Dynamic range + +#### Architecture +```python +# Image tokenizer structure +class ImageCodec: + - Preprocessing: + - Center crop to 96x96 pixels + - Survey-specific rescaling + - Range compression: arcsinh(flux/α) × β + + - Multi-survey projection: + - SubsampledLinear layer (9 → 54 channels) + - Handles variable input bands + - Embeds survey provenance + + - Encoder: MagVit-based architecture + - ResNet backbone with 2 compressions + - Hidden dimensions: 512 + - Bottleneck: 5 dimensions + + - Quantization: Finite Scalar Quantization (FSQ) + - Levels: [8, 5, 5, 5, 5] + - Codebook size: 10,000 +``` + +#### Key Innovations + +1. **Channel Embedding Scheme**: Accommodates images from different surveys with varying band counts in a single model + +2. **Inverse-Variance Weighted Loss**: Leverages known noise properties for optimal reconstruction + ``` + L_NLL = Σ_i 1/2 || Σ_i^(-1/2) (x_i - Decoder(Encoder(x_i))) ||² + ``` + +3. **Survey-Aware Processing**: Maintains provenance information through dedicated embeddings + +### Spectrum Tokenization + +Astronomical spectra present unique challenges: +- Wavelength ranges vary by instrument (3500-10400 Å) +- Resolution differences (R = 1500-5500) +- Orders of magnitude variation in amplitude + +#### Architecture +```python +# Spectrum tokenizer structure +class SpectrumCodec: + - Preprocessing: + - Median normalization + - Log-transform median + - Resampling to latent wavelength grid + + - Latent grid: + - Range: 3500-10462.4 Å + - Resolution: 0.8 Å/pixel + - 8704 pixels total + + - Encoder: ConvNeXt V2 + - Depths: [3, 3, 9, 3] + - Dimensions: [96, 192, 384, 768] + + - Quantization: Lookup-Free Quantization (LFQ) + - Embedding dimension: 10 + - Codebook size: 1024 +``` + +#### Spectral Grid Interpolation + +The tokenizer uses a shared latent wavelength grid, enabling joint processing of spectra from different instruments: ```python -from aion.codecs import ImageCodec -from aion.modalities import Image +def to_latent(spectrum, observed_wavelength): + # Interpolate observed spectrum to latent grid + return interp1d(observed_wavelength, spectrum, latent_wavelength) +``` + +### Scalar Tokenization + +Scalar quantities (fluxes, shapes, physical parameters) are tokenized using adaptive quantization based on cumulative distribution functions (CDFs). + +#### Types of Scalar Quantizers + +1. **Linear Quantizer**: For uniformly distributed values +2. **Log Quantizer**: For values spanning orders of magnitude +3. **Reservoir Quantizer**: Learns optimal binning from data +4. **Compressed Quantizer**: Applies transformations before quantization + +Example scalar modalities: +- Photometric fluxes (g, r, i, z bands) +- Shape parameters (ellipticity, radius) +- Physical properties (redshift, extinction) + +### Token Summary at a Glance + +| Modality | Native input tensor shape | Tokens per object | Quantizer type & levels | Codebook size | +|------------------------------------------------|---------------------------|--------------------|-------------------------|---------------| +| Image (HSC / Legacy Survey, 96 × 96 cut-out) | `(B, N_band, 96, 96)` | 144 *(18×18 grid)* | FSQ `[8,5,5,5,5]` | 10 000 | +| Spectrum (SDSS / DESI) | `(B, 2, λ)` *(flux,ivar)* | 64 + 1 norm token | LFQ `dim=10` | 1 024 | +| Scalar quantity (photometry, shapes, etc.) | `(B,)` | 1 per quantity | Reservoir (linear/log) | 256 (default) | +| Catalog (bounding ellipses) | `(B, N_obj, 5)` | ≤100×5 | Composite (per-field) | mixed | + +These numbers correspond to the default configuration used during pre-training (input budget = 256, output budget = 128 tokens). They can be modified at fine-tune time as long as the total token budget is respected. + +### Catalog Tokenization -image = Image(flux=my_flux, bands=["DES-G", "DES-R", "DES-I", "DES-Z"]) -codec = ImageCodec.from_pretrained("polymathic-ai/aion-image-codec") -tokens = codec.encode(image) +Astronomical catalogs contain lists of objects with varying counts per image. AION-1 linearizes these into sequences: + +```python +# Catalog entry: (X, Y, e1, e2, radius) +# Linearization: Sort by distance from center +# Tokenization: Quantize each component separately ``` -## FourM Architecture +## Stage 2: Multimodal Masked Modeling -The core transformer architecture lives in the `aion/fourm/` package. The `FourM` class combines encoder and decoder blocks along with modality embeddings. It provides utilities to concatenate tokens from different modalities and to apply modality-aware attention masks. +The second stage uses a transformer encoder-decoder architecture to learn relationships between tokens from different modalities. -## AION Wrapper +### Architecture Details -`aion/model.py` defines the `AION` class which inherits from `FourM`. It adds high level helpers for: +```python +class AION(FourM): + # Encoder + - Depth: 12-24 layers (model-dependent) + - Hidden dimension: 768-2048 + - Attention heads: 12-32 + - MLP ratio: 4.0 + - Activation: SwiGLU -- **`embed_inputs`** – convert a dictionary of modality tensors into encoder tokens. -- **`embed_targets`** – build decoder inputs and target masks for selected modalities. -- **`forward`** – run the full model returning logits for the requested targets. + # Decoder + - Same architecture as encoder + - Cross-attention to encoder outputs + - Modality-specific output heads +``` -Typical usage during inference is: +### Multimodal Masking Strategy + +AION-1 uses a sophisticated masking strategy that enables learning both within and across modalities: + +1. **Input Token Budget**: Randomly select B tokens across all modalities for input +2. **Output Token Budget**: From remaining tokens, select targets using Beta distribution +3. **Cross-Modal Learning**: Masks ensure model learns to predict any modality from any other ```python -from aion import AION +def mask_multimodal(tokens, num_input=256, num_output=128): + # 1. Select primary modality + primary_mod = random.choice(modalities) + + # 2. Fill input budget + input_tokens = sample_tokens(primary_mod, budget=num_input) + input_tokens += sample_from_other_modalities(remaining_budget) -model = AION.from_pretrained("aion-base") -logits = model(input_dict, target_mask) + # 3. Select outputs (Beta distribution favors fewer tokens) + num_outputs = sample_beta(alpha=0.1, beta=1.0) * num_output + output_tokens = sample_from_remaining(num_outputs) + + return input_tokens, output_tokens ``` -Here `input_dict` maps modality names to token tensors (obtained via the codecs) and `target_mask` selects which tokens to predict. +### Training Objective + +The model optimizes a cross-entropy loss over predicted tokens: + +``` +L = -Σ_t log p(x_t^target | x^observed) +``` + +This simple objective, combined with diverse masking patterns, enables AION-1 to learn rich cross-modal representations. + +## Model Variants + +AION-1 comes in three sizes, each using the same architecture with different dimensions: + +| Model | Parameters | Encoder Layers | Decoder Layers | Hidden Dim | Attention Heads | +|-------|------------|----------------|----------------|------------|-----------------| +| AION-1-B (Base) | 300M | 12 | 12 | 768 | 12 | +| AION-1-L (Large) | 800M | 24 | 24 | 1024 | 16 | +| AION-1-XL (XLarge) | 3.1B | 24 | 24 | 2048 | 32 | + +All models use: +- SwiGLU activation functions +- No bias terms (except in embeddings) +- QK-Norm for training stability +- Rotary position embeddings + +## Data Flow Through AION-1 + +Here's how data flows through the complete pipeline: + +```mermaid +graph TD + A[Raw Astronomical Data] --> B[Modality-Specific Preprocessing] + B --> C[Tokenization] + C --> D[Token Embeddings + Position Encoding] + D --> E[Transformer Encoder] + E --> F[Cross-Modal Representations] + F --> G[Transformer Decoder] + G --> H[Modality-Specific Heads] + H --> I[Predictions/Generations] +``` + +### Example: Processing Galaxy Data + +```python +# 1. Input data +galaxy_data = { + 'image': HSC_5band_image, # (5, 96, 96) + 'spectrum': SDSS_spectrum, # (3800,) + 'photometry': flux_measurements # (8,) +} + +# 2. Tokenization +tokens = { + 'image': image_codec.encode(galaxy_data['image']), # → 144 tokens + 'spectrum': spectrum_codec.encode(galaxy_data['spectrum']), # → 64 tokens + 'photometry': scalar_codec.encode(galaxy_data['photometry']) # → 8 tokens +} + +# 3. Embedding and encoding +embeddings = model.embed_inputs(tokens) +encoder_output = model.encode(embeddings) + +# 4. Cross-modal generation/prediction +predictions = model.decode(encoder_output, target_modalities) +``` + +## Key Architectural Innovations + +### 1. Modality Embeddings with Provenance + +Each token receives two embeddings: +- **Token embedding**: Encodes the discrete token value +- **Modality embedding**: Identifies data type AND source survey + +This allows AION-1 to understand that HSC g-band and SDSS g-band images have different characteristics. + +### 2. Flexible Attention Patterns + +The attention mechanism adapts based on input: +- **Encoder**: Full bidirectional attention across all tokens +- **Decoder**: Causal attention within modalities, cross-attention to encoder + +### 3. Hierarchical Token Organization + +Tokens are organized hierarchically: +- **Spatial tokens**: Preserve 2D structure for images +- **Sequential tokens**: Maintain order for spectra and catalogs +- **Unordered tokens**: For scalar sets + +## Training Infrastructure + +### Dataset Construction + +AION-1's training leverages pairwise associations between surveys: +- HSC images ↔ SDSS spectra +- SDSS spectra ↔ DESI spectra +- Legacy images ↔ Photometry + +This creates a connected graph enabling transitive learning (e.g., HSC → SDSS → DESI). + +### Optimization Details + +- **Optimizer**: AdamW (β₁=0.9, β₂=0.95, weight decay=0.05) +- **Learning rate**: 2e-4 with cosine decay +- **Warmup**: Linear over first 10% of training +- **Batch size**: 8096 (distributed across GPUs) +- **Training steps**: 205,000 +- **Mixed precision**: bfloat16 + +### Computational Requirements + +Training AION-1 requires substantial computational resources: +- **AION-1-B**: 64 H100 GPUs for 1.5 days +- **AION-1-L**: 100 H100 GPUs for 2.5 days +- **AION-1-XL**: 288 H100 GPUs for 3.5 days + +## Emergent Capabilities + +The architecture enables several emergent behaviors: + +### 1. Zero-Shot Cross-Modal Generation +Despite never seeing direct HSC↔DESI associations during training, AION-1 can generate DESI spectra from HSC images through transitive learning. + +### 2. Flexible Conditioning +Any modality subset can condition generation of any other subset, enabling: +- Super-resolution (low-res → high-res spectra) +- Cross-modal translation (images → spectra) +- Imputation (partial → complete observations) + +### 3. Physically Meaningful Representations +The learned embeddings organize objects along interpretable axes: +- Galaxy types (spiral, elliptical, merger) +- Stellar properties (temperature, metallicity) +- Redshift progression + +## Implementation Details + +### Memory Efficiency + +- **Gradient checkpointing**: Trades computation for memory +- **Mixed precision**: bfloat16 for most operations +- **Efficient attention**: Flash Attention 2 implementation + +### Inference Optimization + +- **Token caching**: Reuse encoder outputs for multiple decodings +- **Batch processing**: Process multiple objects simultaneously +- **Quantization**: INT8 inference for deployment + +## Data Provenance & Licensing + +The pre‐training corpus – dubbed *The Multimodal Universe (MMU)* – merges publicly available data products under their respective licences: + +| Survey | Release | Reference | Modalities Used | +|--------|---------|-----------|-----------------| +| Legacy Imaging Survey (DECaLS/BASS/MzLS) | DR10 | Dey et al. 2019 | 4-band images, photometry, catalog scalars | +| Hyper Suprime-Cam (HSC) | PDR3 (Wide+Deep) | Aihara et al. 2019 | 5-band images, photometry, shapes | +| Sloan Digital Sky Survey (SDSS) | DR17 | Eisenstein et al. 2011 | R≈2000 spectra | +| Dark Energy Spectroscopic Instrument (DESI) | EDR | DESI Collab. 2023 | R≈3000 spectra | +| Gaia | DR3 | Gaia Collab. 2022 | Low-res XP spectra, photometry, astrometry | + +All derivative checkpoints released on the Hugging Face Hub are distributed under an MIT licence; users are nevertheless responsible for complying with the upstream survey licences when redistributing raw data. + +## Physical Units & Conventions + +• **Images**: pixel values are calibrated nanomaggies. Exposure time normalisation is survey-specific and automatically handled by the image codec. + +• **Spectra**: flux density in erg s⁻¹ cm⁻² Å⁻¹ (observer frame). Wavelengths are Å, *not* log-λ when inside the model. + +• **Photometry / Scalars**: all fluxes in nanomaggies, magnitudes in the AB system. Ellipticities use SDSS convention *(e₁,e₂)*. + +## Known Limitations & Caveats + +1. No ultraviolet (< 3500 Å) or mid-infrared (> 1 µm) spectral support. +2. HSC chip-edge artefacts occasionally propagate into synthetic spectra – crop images if necessary. +3. The model was trained on **96 × 96 px** cut-outs; objects extending beyond that FOV will be truncated. + +## Citation + +If you use AION-1 in a publication, please cite both the codebase and the accompanying paper: + +```bibtex +@article{Francois2025aion, + title = {AION-1: Omnimodal Foundation Model for Astronomical Sciences}, + author = {LASTNAME, Firstname et al.}, + journal = {arXiv e-prints}, + year = 2025, + archivePrefix = {arXiv}, + eprint = {2406.00000} +} +``` + +## Summary + +AION-1's architecture represents a significant advance in multimodal scientific machine learning: + +1. **Universal tokenization** handles arbitrary astronomical data types +2. **Unified transformer** learns cross-modal relationships +3. **Flexible design** adapts to available observations +4. **Emergent understanding** discovers physical relationships -For additional details see the docstrings in `model.py` and the modules within `aion/fourm`. +This architecture provides a foundation for next-generation astronomical analysis, enabling scientists to leverage all available data for their research. diff --git a/docs/conf.py b/docs/conf.py index 5585728..8eb4ded 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -5,7 +5,7 @@ project = "AION-1" author = "Polymathic AI" -html_title = "AION-1" +html_title = "AION" extensions = [ "myst_parser", @@ -13,6 +13,7 @@ "sphinx.ext.napoleon", "sphinx.ext.autosummary", "sphinx_design", # For cards and grids + "sphinx_copybutton", ] autosummary_generate = True diff --git a/docs/contributing.md b/docs/contributing.md index 6609c63..779c7cf 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -1,8 +1,522 @@ -# Contributing +# Contributing to AION-1 -We welcome contributions from the astronomical and machine learning communities. +Welcome to the AION-1 project! We're thrilled that you're interested in contributing to the first large-scale multimodal foundation model for astronomy. This guide will help you get started with contributing, whether you're fixing bugs, adding features, improving documentation, or conducting research with AION-1. -1. Fork the repository on GitHub. -2. Create a feature branch and commit your changes. -3. Ensure tests and linting pass before opening a pull request. -4. Describe your changes clearly in the PR description. +## Table of Contents + +1. [Getting Started](#getting-started) +2. [Development Setup](#development-setup) +3. [Contribution Types](#contribution-types) +4. [Code Standards](#code-standards) +5. [Testing Guidelines](#testing-guidelines) +6. [Documentation](#documentation) +7. [Submitting Changes](#submitting-changes) +8. [Community Guidelines](#community-guidelines) + +## Getting Started + +### Prerequisites + +Before contributing, ensure you have: + +- Python 3.10 or later +- Git for version control +- CUDA-capable GPU (recommended for testing) +- Familiarity with PyTorch and transformers + +### Understanding AION-1 + +Before diving into code, we recommend: + +1. Reading the [AION-1 paper](https://arxiv.org/abs/XXXX.XXXXX) +2. Exploring the [Architecture documentation](architecture.html) +3. Running through the [Usage examples](usage.html) +4. Joining our [Discord community](https://discord.gg/polymathic-ai) + +## Development Setup + +### 1. Fork and Clone + +```bash +# Fork the repository on GitHub, then: +git clone https://github.com/YOUR_USERNAME/aion.git +cd aion +git remote add upstream https://github.com/polymathic-ai/aion.git +``` + +### 2. Create Development Environment + +```bash +# Create virtual environment +python -m venv venv-dev +source venv-dev/bin/activate # On Windows: venv-dev\Scripts\activate + +# Install in development mode with all dependencies +pip install -e ".[dev,test,docs]" + +# Install pre-commit hooks +pre-commit install +``` + +### 3. Download Test Data + +```bash +# Download minimal test datasets +python scripts/download_test_data.py + +# Verify installation +python -m pytest tests/test_installation.py +``` + +## Contribution Types + +### 🐛 Bug Fixes + +Found a bug? Here's how to fix it: + +1. **Check existing issues** to avoid duplicates +2. **Create a minimal reproduction** script +3. **Write a test** that fails with the bug +4. **Fix the bug** and ensure the test passes +5. **Submit a PR** with clear description + +Example bug fix workflow: +```python +# tests/test_bug_fix.py +def test_spectrum_interpolation_edge_case(): + """Test for issue #123: spectrum interpolation fails at boundaries.""" + spectrum = Spectrum( + wavelength=np.array([3500, 3501, 10400]), + flux=np.array([1.0, 1.5, 2.0]) + ) + + # This should not raise an exception + interpolated = spectrum.resample(np.linspace(3500, 10400, 100)) + assert len(interpolated.flux) == 100 +``` + +### ✨ New Features + +Adding new capabilities to AION-1: + +1. **Discuss first**: Open an issue or discussion +2. **Design document**: For major features, write a brief design doc +3. **Implement incrementally**: Break into small PRs +4. **Add tests and docs**: Every feature needs both + +#### Adding a New Modality + +Here's an example of adding a new modality: + +```python +# aion/modalities.py +class TimeSeries(Modality): + """ + Time series astronomical measurements. + + Attributes: + time: Time stamps in MJD + flux: Flux measurements + error: Measurement uncertainties + """ + time: np.ndarray + flux: np.ndarray + error: Optional[np.ndarray] = None + + def validate(self): + """Ensure time series is properly formatted.""" + assert len(self.time) == len(self.flux) + assert np.all(np.diff(self.time) >= 0), "Time must be monotonic" + +# aion/codecs/timeseries.py +class TimeSeriesCodec(Codec): + """Tokenizer for astronomical time series.""" + + def encode(self, timeseries: TimeSeries) -> torch.Tensor: + # Implementation here + pass + + def decode(self, tokens: torch.Tensor) -> TimeSeries: + # Implementation here + pass +``` + +### 📚 Documentation Improvements + +Good documentation is crucial: + +- **Fix typos and clarify**: Even small improvements help +- **Add examples**: Real-world usage examples +- **Improve API docs**: Better docstrings +- **Write tutorials**: Step-by-step guides + +### 🔬 Research Contributions + +Using AION-1 for research? Consider contributing: + +- **Benchmarks**: Performance on astronomical tasks +- **Fine-tuning scripts**: For specific applications +- **Analysis notebooks**: Demonstrating capabilities +- **Model improvements**: Better architectures or training + +## Code Standards + +### Style Guide + +We follow PEP 8 with some modifications: + +```python +# Good: Clear variable names and type hints +def process_galaxy_spectrum( + spectrum: Spectrum, + redshift: float, + extinction_curve: Optional[np.ndarray] = None +) -> Spectrum: + """ + Process galaxy spectrum with redshift and extinction corrections. + + Args: + spectrum: Input spectrum + redshift: Cosmological redshift + extinction_curve: Optional extinction curve + + Returns: + Corrected spectrum + """ + # De-redshift + corrected_wavelength = spectrum.wavelength / (1 + redshift) + + # Apply extinction if provided + if extinction_curve is not None: + extinction_factor = np.interp( + corrected_wavelength, + EXTINCTION_WAVELENGTH, + extinction_curve + ) + corrected_flux = spectrum.flux * extinction_factor + else: + corrected_flux = spectrum.flux + + return Spectrum( + wavelength=corrected_wavelength, + flux=corrected_flux, + ivar=spectrum.ivar + ) +``` + +### Type Hints + +Always use type hints for better code clarity: + +```python +from typing import Dict, List, Optional, Tuple, Union +import torch +import numpy as np + +def tokenize_multimodal( + data: Dict[str, Modality], + codecs: Dict[str, Codec], + max_length: Optional[int] = None +) -> Dict[str, torch.Tensor]: + """Tokenize multiple modalities.""" + tokens = {} + for modality_name, modality_data in data.items(): + if modality_name in codecs: + tokens[modality_name] = codecs[modality_name].encode(modality_data) + return tokens +``` + +### Docstrings + +Use Google-style docstrings: + +```python +def cross_match_catalogs( + catalog1: Catalog, + catalog2: Catalog, + radius: float = 1.0, + unit: str = 'arcsec' +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + Cross-match two astronomical catalogs. + + Performs positional cross-matching between two catalogs using + a specified search radius. + + Args: + catalog1: First catalog + catalog2: Second catalog + radius: Search radius for matching + unit: Unit of radius ('arcsec', 'arcmin', 'deg') + + Returns: + Tuple containing: + - indices1: Matched indices from catalog1 + - indices2: Matched indices from catalog2 + - distances: Angular distances of matches + + Raises: + ValueError: If unit is not recognized + + Example: + >>> idx1, idx2, dist = cross_match_catalogs( + ... gaia_catalog, + ... sdss_catalog, + ... radius=2.0 + ... ) + """ +``` + +## Testing Guidelines + +### Test Structure + +``` +tests/ +├── unit/ # Fast unit tests +├── integration/ # Integration tests +├── fixtures/ # Test data and fixtures +└── benchmarks/ # Performance benchmarks +``` + +### Writing Tests + +```python +# tests/unit/test_spectrum_codec.py +import pytest +import numpy as np +from aion.modalities import Spectrum +from aion.codecs import SpectrumCodec + +class TestSpectrumCodec: + @pytest.fixture + def sample_spectrum(self): + """Create a sample spectrum for testing.""" + wavelength = np.linspace(4000, 8000, 1000) + flux = np.random.randn(1000) + 10 + return Spectrum(wavelength=wavelength, flux=flux) + + @pytest.fixture + def codec(self): + """Initialize spectrum codec.""" + return SpectrumCodec( + latent_wavelength=np.linspace(3500, 10500, 8704) + ) + + def test_encode_decode_preserves_shape(self, sample_spectrum, codec): + """Test that encode/decode preserves spectrum shape.""" + tokens = codec.encode(sample_spectrum) + reconstructed = codec.decode(tokens) + + assert reconstructed.wavelength.shape == sample_spectrum.wavelength.shape + assert reconstructed.flux.shape == sample_spectrum.flux.shape + + def test_handles_missing_data(self, codec): + """Test codec handles spectra with gaps.""" + wavelength = np.array([4000, 4100, 4200, 6000, 6100]) + flux = np.array([1.0, 1.1, 1.2, 2.0, 2.1]) + + spectrum = Spectrum(wavelength=wavelength, flux=flux) + tokens = codec.encode(spectrum) + + assert tokens is not None + assert len(tokens.shape) == 2 # [batch, seq_len] +``` + +### Running Tests + +```bash +# Run all tests +pytest + +# Run specific test file +pytest tests/unit/test_spectrum_codec.py + +# Run with coverage +pytest --cov=aion --cov-report=html + +# Run benchmarks +pytest tests/benchmarks/ --benchmark-only +``` + +## Documentation + +### Building Documentation + +```bash +cd docs +make html +# View at docs/_build/html/index.html +``` + +### Writing Documentation + +When adding new features, update: + +1. **Docstrings**: In the code itself +2. **API Reference**: In `docs/api.md` +3. **Usage Examples**: In `docs/usage.md` +4. **Architecture**: If design changes + +Example documentation addition: + +```markdown +### Working with Time Series + +AION-1 can process variable star light curves and other time series data: + +\```python +from aion.modalities import TimeSeries + +# Load light curve data +lightcurve = TimeSeries( + time=mjd_times, + flux=flux_measurements, + error=flux_errors +) + +# Generate period estimate +results = model.generate( + inputs={'timeseries': lightcurve}, + targets=['period', 'variability_class'] +) + +print(f"Period: {results['period'].value[0]:.3f} days") +print(f"Class: {results['variability_class'].value[0]}") +\``` +``` + +## Submitting Changes + +### 1. Create Feature Branch + +```bash +git checkout -b feature/your-feature-name +# or +git checkout -b fix/issue-description +``` + +### 2. Make Changes + +- Write clean, documented code +- Add tests for new functionality +- Update documentation as needed +- Ensure all tests pass + +### 3. Commit Guidelines + +Use conventional commits: + +```bash +# Format: (): + +git commit -m "feat(modalities): add time series support" +git commit -m "fix(codec): handle edge case in spectrum interpolation" +git commit -m "docs(api): improve codec documentation" +git commit -m "test(integration): add multi-survey processing tests" +``` + +Types: +- `feat`: New feature +- `fix`: Bug fix +- `docs`: Documentation changes +- `test`: Test additions/changes +- `refactor`: Code refactoring +- `perf`: Performance improvements +- `style`: Code style changes +- `chore`: Maintenance tasks + +### 4. Push and Create PR + +```bash +git push origin feature/your-feature-name +``` + +Then create a Pull Request on GitHub with: + +- **Clear title**: Summarize the change +- **Description**: Explain what and why +- **Tests**: Confirm all tests pass +- **Screenshots**: If relevant (e.g., for visualizations) + +### 5. Code Review + +- Respond to feedback constructively +- Make requested changes +- Keep PR focused and reasonably sized + +## Community Guidelines + +### Code of Conduct + +We follow the [Contributor Covenant Code of Conduct](https://www.contributor-covenant.org/). Key points: + +- Be respectful and inclusive +- Welcome newcomers +- Focus on constructive criticism +- Report unacceptable behavior + +### Getting Help + +- **Discord**: Quick questions and discussions +- **GitHub Issues**: Bug reports and feature requests +- **Discussions**: Longer form conversations +- **Office Hours**: Weekly community calls (Thursdays 3pm UTC) + +### Recognition + +Contributors are recognized in: + +- The `CONTRIBUTORS.md` file +- Release notes +- Research papers (for significant contributions) + +## Advanced Topics + +### Adding New Surveys + +To add support for a new astronomical survey: + +1. **Define band mappings** in `aion/surveys.py` +2. **Add preprocessing** in `aion/codecs/preprocessing/` +3. **Update documentation** with survey details +4. **Add tests** with sample data + +### Performance Optimization + +When optimizing AION-1: + +```python +# Profile first +import cProfile +import pstats + +profiler = cProfile.Profile() +profiler.enable() + +# Your code here +result = model.generate(inputs, targets) + +profiler.disable() +stats = pstats.Stats(profiler).sort_stats('cumulative') +stats.print_stats(10) +``` + +### Memory Profiling + +```python +from memory_profiler import profile + +@profile +def process_large_batch(model, data): + # Function to profile + pass +``` + +## Thank You! + +Your contributions make AION-1 better for the entire astronomical community. Whether you're fixing a typo, adding a feature, or conducting research, every contribution matters. + +If you have questions or need help getting started, don't hesitate to reach out on Discord or open an issue. We're here to help! + +Happy contributing! 🌟🔭 diff --git a/docs/index.md b/docs/index.md index e8cdaec..74bffde 100644 --- a/docs/index.md +++ b/docs/index.md @@ -3,106 +3,139 @@

AION-1

AstronomIcal Omnimodal Network

-

Next-generation foundation model for multimodal astronomical analysis

+

The first large-scale multimodal foundation model for astronomy

``` -# Welcome to the AION-1 documentation +# Welcome to AION-1 +AION-1 (AstronomIcal Omnimodal Network) represents a breakthrough in astronomical machine learning: the first foundation model capable of understanding and processing arbitrary combinations of astronomical observations across 39 different data modalities. Trained on over 200 million astronomical objects, AION-1 unifies imaging, spectroscopy, photometry, and catalog data from major ground- and space-based observatories into a single, powerful framework. +## 🌟 Why AION-1? -## 🚀 Quick Start - -```{admonition} Get up and running with AION -:class: tip - -Our foundation model seamlessly processes astronomical imaging, spectroscopy, and catalog data. -``` +Traditional approaches in astronomy treat each data modality in isolation, missing the rich interconnections between different types of observations. AION-1 fundamentally changes this paradigm by: -```python -from aion import AION +- **Learning Cross-Modal Relationships**: The model discovers how different observations relate to each other, building a deep understanding of the underlying astrophysical objects +- **Enabling Flexible Data Fusion**: Scientists can use any combination of available observations without redesigning their analysis pipeline +- **Excelling in Low-Data Regimes**: AION-1 achieves competitive results with orders of magnitude less labeled data than supervised approaches +- **Providing Universal Representations**: The learned embeddings capture physically meaningful structure useful across diverse downstream tasks -# Initialize the model -model = AION.from_pretrained('polymathic-ai/aion-base') - -# Process multimodal astronomical data -outputs = model.generate( - images=galaxy_images, - spectra=stellar_spectra, - catalog=source_catalog -) -``` - -## ✨ Key Capabilities +## 📊 Key Capabilities ```{eval-rst} .. grid:: 1 1 2 3 :gutter: 3 - .. grid-item-card:: 🌌 Multimodal Processing + .. grid-item-card:: 🌌 39 Data Modalities :class-card: feature-card - Unified handling of images, spectra, time series, and catalog data through specialized encoders + Seamlessly integrates multiband images, optical spectra, photometry, and catalog data from HSC, Legacy Survey, SDSS, DESI, and Gaia - .. grid-item-card:: 🧠 Foundation Architecture + .. grid-item-card:: 🧠 200M+ Objects :class-card: feature-card - State-of-the-art transformer backbone pre-trained on massive astronomical datasets + Pre-trained on massive astronomical datasets spanning galaxies, stars, and quasars across multiple surveys - .. grid-item-card:: 🔧 Extensible Framework + .. grid-item-card:: 🔧 Flexible Architecture :class-card: feature-card - Modular codec system allows easy integration of new data modalities and instruments + Two-stage design with modality-specific tokenization followed by transformer-based multimodal masked modeling - .. grid-item-card:: ⚡ High Performance + .. grid-item-card:: ⚡ Emergent Behaviors :class-card: feature-card - Optimized for both research and production with efficient batching and GPU acceleration + Demonstrates physical understanding, superior low-data performance, and meaningful latent space organization - .. grid-item-card:: 📊 Rich Embeddings + .. grid-item-card:: 🎯 Versatile Applications :class-card: feature-card - Generate powerful representations for downstream tasks like classification and discovery + Supports regression, classification, generation, retrieval, and cross-modal prediction tasks out-of-the-box - .. grid-item-card:: 🌍 Community Driven + .. grid-item-card:: 🌍 Open Science :class-card: feature-card - Open-source development with contributions from leading astronomical institutions + Fully open-source including datasets, training scripts, and model weights for reproducible research ``` -## 📚 Documentation +## 🚀 Quick Start + +Getting started with AION-1 is straightforward: + +```python +# Minimal end-to-end example +from aion import AION +import numpy as np + +# 1) Load a pre-trained checkpoint (800 M parameters) +model = AION.from_pretrained('polymathic-ai/aion-base') + +# 2) Prepare demo inputs (96×96 HSC g,r,i,z,y cut-out and SDSS spectrum) +galaxy_image = np.load('hsc_cutout_5band.npy') # shape (5,96,96) +galaxy_spectrum = np.load('sdss_spectrum.npy') # dict with wavelength/flux + +# 3) Generate a high-resolution DESI-like spectrum from the image +generated = model.generate( + inputs={'image': galaxy_image}, + targets=['spectrum'] +) + +# 4) Extract joint embeddings for downstream use +embeddings = model.encode({'image': galaxy_image, 'spectrum': galaxy_spectrum}) +``` + +## 🔬 Scientific Impact + +AION-1 demonstrates several emergent behaviors that reflect its deep understanding of astronomical data: + +### Physical Understanding +- Solves non-trivial scientific tasks using only simple linear probes on learned representations +- Organizes objects in embedding space along physically meaningful dimensions +- Captures relationships between disparate observations of the same physical phenomena + +### Performance Advantages +- Achieves state-of-the-art results on galaxy property estimation, stellar parameter prediction, and morphology classification +- Outperforms supervised baselines by 3x on rare object detection tasks +- Enables accurate cross-modal prediction even for modality pairs never seen during training + +### Practical Benefits +- Reduces data requirements by orders of magnitude for downstream tasks +- Enables seamless integration of heterogeneous observations +- Provides robust uncertainty quantification through multiple sampling + +## 📚 Documentation Overview ```{eval-rst} .. grid:: 2 2 2 4 :gutter: 3 - .. grid-item-card:: Installation + .. grid-item-card:: Installation & Setup :link: installation.html :class-card: doc-card - Quick setup guide and requirements + Environment setup, dependencies, and configuration - .. grid-item-card:: Architecture + .. grid-item-card:: Model Architecture :link: architecture.html :class-card: doc-card - Deep dive into model design + Deep dive into tokenization, transformers, and design .. grid-item-card:: Usage Guide :link: usage.html :class-card: doc-card - Examples and best practices + Tutorials, examples, and best practices .. grid-item-card:: API Reference :link: api.html :class-card: doc-card - Complete API documentation + Complete API documentation and method signatures ``` ```{toctree} @@ -121,7 +154,7 @@ contributing ```{raw} html

Advancing astronomical AI together

-

AION is developed by Polymathic AI in collaboration with astronomers and ML researchers worldwide. Join us in building the future of astronomical data analysis.

+

AION-1 is developed by Polymathic AI in collaboration with the Flatiron Institute and leading astronomical institutions worldwide. We welcome contributions from astronomers, ML researchers, and data scientists interested in pushing the boundaries of multimodal scientific machine learning.

Start Contributing →
``` diff --git a/docs/installation.md b/docs/installation.md index 1fa0842..1ebe2d2 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -1,9 +1,311 @@ -# Installation +# Installation Guide -AION requires Python 3.10 or later. For most users, installation from PyPI is recommended: +This comprehensive guide will walk you through installing AION-1 and setting up your environment for astronomical multimodal analysis. +## System Requirements + +### Hardware Requirements + +AION-1 is designed to run efficiently on various hardware configurations: + +- **Minimum Requirements**: + - CPU: 4+ cores (Intel/AMD x86_64 or Apple Silicon) + - RAM: 16 GB + - GPU: NVIDIA GPU with 8GB+ VRAM (optional but recommended) + - Storage: 50 GB free space for models and data + +- **Recommended Requirements**: + - CPU: 8+ cores + - RAM: 32 GB or more + - GPU: NVIDIA GPU with 24GB+ VRAM (e.g., RTX 3090, A5000, or better) + - Storage: 100 GB+ free space + +- **For Large-Scale Processing**: + - Multiple GPUs with NVLink + - 64GB+ RAM + - Fast SSD storage for data loading + +### Software Requirements + +- Python 3.10 or later +- CUDA 11.8+ (for GPU support) +- Operating System: Linux, macOS, or Windows + +## Installation Methods + +### 1. Quick Install via PyPI + +The simplest way to install AION-1 is through PyPI: + +```bash +pip install aion +``` + +This installs the core AION package with minimal dependencies. + +### 2. Full Installation with PyTorch + +For GPU support and optimal performance: + +```bash +# Install PyTorch first (adjust for your CUDA version) +pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118 + +# Then install AION +pip install aion[full] +``` + +### 3. Development Installation + +For contributors or those who want the latest features: + +```bash +# Clone the repository +git clone https://github.com/polymathic-ai/aion.git +cd aion + +# Create a virtual environment +python -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate + +# Install in development mode +pip install -e ".[dev]" +``` + +### 4. Docker Installation + +For containerized deployments: + +```bash +# Pull the official Docker image +docker pull polymathic/aion:latest + +# Run with GPU support +docker run --gpus all -it polymathic/aion:latest +``` + +## Setting Up Your Environment + +### 1. Virtual Environment Setup + +We strongly recommend using a virtual environment: + +```bash +# Using venv +python -m venv aion-env +source aion-env/bin/activate # On Windows: aion-env\Scripts\activate + +# Using conda +conda create -n aion python=3.10 +conda activate aion +``` + +### 2. Verify Installation + +After installation, verify everything is working: + +```python +import aion +import torch + +# Check AION version +print(f"AION version: {aion.__version__}") + +# Check PyTorch and CUDA +print(f"PyTorch version: {torch.__version__}") +print(f"CUDA available: {torch.cuda.is_available()}") +if torch.cuda.is_available(): + print(f"CUDA version: {torch.version.cuda}") + print(f"GPU: {torch.cuda.get_device_name(0)}") + +# Test loading a model +from aion import AION +model = AION.from_pretrained('polymathic-ai/aion-tiny') +print("Model loaded successfully!") +``` + +### 3. Download Pre-trained Models + +AION-1 comes in three sizes. Models are automatically downloaded on first use, but you can pre-download them: + +```python +from aion import AION + +# Download models (choose based on your hardware) +model_tiny = AION.from_pretrained('polymathic-ai/aion-tiny') # 300M parameters +model_base = AION.from_pretrained('polymathic-ai/aion-base') # 800M parameters +model_large = AION.from_pretrained('polymathic-ai/aion-large') # 3.1B parameters +``` + +Model sizes and requirements: +- **aion-tiny**: ~1.2 GB, runs on 8GB GPUs +- **aion-base**: ~3.2 GB, recommended 16GB+ GPU +- **aion-large**: ~12 GB, requires 24GB+ GPU + +### 4. Configure Model Cache + +By default, models are cached in `~/.cache/huggingface/hub/`. To change this: + +```bash +# Set environment variable +export HF_HOME=/path/to/your/cache + +# Or in Python +import os +os.environ['HF_HOME'] = '/path/to/your/cache' +``` + +## Installing Optional Dependencies + +### For Astronomical Data Processing + +```bash +pip install astropy fits +``` + +### For Visualization + +```bash +pip install matplotlib seaborn plotly +``` + +### For Advanced Scientific Computing + +```bash +pip install scipy scikit-learn pandas +``` + +## Platform-Specific Instructions + +### Linux + +Most straightforward installation. Ensure you have: ```bash +# Ubuntu/Debian +sudo apt-get update +sudo apt-get install python3-dev python3-pip + +# CentOS/RHEL +sudo yum install python3-devel python3-pip +``` + +### macOS + +For Apple Silicon Macs: +```bash +# Install using conda for better compatibility +conda install pytorch torchvision -c pytorch pip install aion ``` -For additional options such as installing with PyTorch bundled or setting up a development environment, see the project README. +Note: GPU acceleration on macOS uses Metal Performance Shaders (MPS): +```python +device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") +``` + +### Windows + +Ensure you have Visual C++ Build Tools: +1. Download from: https://visualstudio.microsoft.com/visual-cpp-build-tools/ +2. Install with "Desktop development with C++" + +Then: +```bash +pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118 +pip install aion +``` + +## Troubleshooting + +### Common Issues and Solutions + +**1. CUDA Out of Memory** +```python +# Reduce batch size +model.eval() +with torch.no_grad(): + outputs = model(inputs) + +# Use mixed precision +from torch.cuda.amp import autocast +with autocast(): + outputs = model(inputs) +``` + +**2. Import Errors** +```bash +# Ensure all dependencies are installed +pip install --upgrade aion[full] + +# Check for conflicts +pip check +``` + +**3. Slow Model Loading** +```python +# Use faster model loading +model = AION.from_pretrained('polymathic-ai/aion-base', + torch_dtype=torch.float16, + device_map="auto") +``` + +**4. Version Conflicts** +```bash +# Create a fresh environment +conda create -n aion-clean python=3.10 +conda activate aion-clean +pip install aion +``` + +### Getting Help + +If you encounter issues: + +1. Check the [GitHub Issues](https://github.com/polymathic-ai/aion/issues) +2. Join our [Discord community](https://discord.gg/polymathic-ai) +3. Consult the [FAQ section](https://polymathic-ai.org/aion/faq) + +## Next Steps + +Now that you have AION-1 installed, explore: +- [Architecture Overview](architecture.html) - Understand how AION-1 works +- [Usage Guide](usage.html) - Learn to use AION-1 for your research +- [API Reference](api.html) - Detailed API documentation + +## Performance Optimization + +### GPU Memory Management + +```python +# Clear cache when switching between models +torch.cuda.empty_cache() + +# Use gradient checkpointing for large models +model.gradient_checkpointing_enable() + +# Optimize for inference +model.eval() +torch.set_grad_enabled(False) +``` + +### Multi-GPU Setup + +```python +# DataParallel for simple multi-GPU +model = torch.nn.DataParallel(model) + +# DistributedDataParallel for better performance +import torch.distributed as dist +dist.init_process_group(backend='nccl') +model = torch.nn.parallel.DistributedDataParallel(model) +``` + +### CPU Optimization + +```python +# Enable MKL optimizations +torch.set_num_threads(8) # Adjust based on your CPU + +# Use channels_last memory format +model = model.to(memory_format=torch.channels_last) +``` diff --git a/docs/requirements.txt b/docs/requirements.txt index 94af3cb..f70feb7 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,5 @@ sphinx>=7.2 -myst-parser +myst-parser>=1.0 furo sphinx-design +sphinx-copybutton diff --git a/docs/usage.md b/docs/usage.md index b30d6c5..5d03545 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,11 +1,723 @@ -# Usage Guide +# AION-1 Usage Guide -Load the pretrained model and start experimenting with astronomical data: +This comprehensive guide demonstrates how to use AION-1 for various astronomical analysis tasks. From basic inference to advanced multimodal generation, you'll learn to leverage AION-1's capabilities for your research. + +## Table of Contents + +1. [Quick Start](#quick-start) +2. [Loading and Preprocessing Data](#loading-and-preprocessing-data) +3. [Basic Inference](#basic-inference) +4. [Multimodal Generation](#multimodal-generation) +5. [Cross-Modal Translation](#cross-modal-translation) +6. [Representation Learning](#representation-learning) +7. [Advanced Applications](#advanced-applications) +8. [Performance Optimization](#performance-optimization) + +## Quick Start + +Let's begin with a simple example that showcases AION-1's core capabilities: ```python +import torch, numpy as np from aion import AION +from aion.modalities import Image + +# 1) Load a checkpoint (300 M parameters) +model = AION.from_pretrained('polymathic-ai/aion-tiny').eval() + +# 2) Read an example 5-band HSC cut-out (units: nanomaggies) +flux_cube = np.load('hsc_cutout_5band.npy') # shape (5,96,96) +img = Image(flux=flux_cube, bands=['HSC-G','HSC-R','HSC-I','HSC-Z','HSC-Y']) + +# 3) Predict an SDSS-like spectrum (observer-frame, erg s⁻¹ cm⁻² Å⁻¹) +with torch.inference_mode(): + result = model.generate(inputs={'image': img}, targets=['spectrum']) + +spec = result['spectrum'] +print(f"Generated spectrum: λ range {spec.wavelength[0]:.0f}-{spec.wavelength[-1]:.0f} Å, shape={spec.flux.shape}") +``` + +## Loading and Preprocessing Data + +### Working with Images + +AION-1 expects images in a specific format. Here's how to prepare astronomical images: + +```python +import numpy as np +from astropy.io import fits +from aion.modalities import Image +from aion.codecs.preprocessing import ImagePreprocessor + +# Load FITS data +with fits.open('galaxy.fits') as hdul: + # Assuming multi-band data in extensions + flux_data = np.array([hdul[i].data for i in range(1, 6)]) # 5 bands + +# Create Image modality +image = Image( + flux=flux_data, + bands=['HSC-G', 'HSC-R', 'HSC-I', 'HSC-Z', 'HSC-Y'], + # Optional: provide inverse variance for optimal processing + ivar=inverse_variance_data +) + +# Apply survey-specific preprocessing +preprocessor = ImagePreprocessor(survey='HSC') +processed_image = preprocessor(image) +``` + +### Working with Spectra + +Load and prepare spectroscopic data: + +```python +from aion.modalities import Spectrum +from astropy.io import fits + +# Load SDSS spectrum +hdul = fits.open('spec-plate-mjd-fiber.fits') +wavelength = 10**hdul[1].data['loglam'] # Convert log wavelength +flux = hdul[1].data['flux'] +ivar = hdul[1].data['ivar'] + +# Create Spectrum modality +spectrum = Spectrum( + wavelength=wavelength, + flux=flux, + ivar=ivar, + survey='SDSS' +) + +# The model handles resampling to internal wavelength grid automatically +``` + +### Working with Catalog Data + +Process tabular astronomical measurements: + +```python +from aion.modalities import ( + FluxG, FluxR, FluxI, FluxZ, + E1, E2, RadiusCARP, Redshift +) + +# Load catalog data (e.g., from pandas DataFrame) +catalog_entry = { + 'flux_g': FluxG(value=catalog_df['flux_g'].values), + 'flux_r': FluxR(value=catalog_df['flux_r'].values), + 'e1': E1(value=catalog_df['e1'].values), + 'e2': E2(value=catalog_df['e2'].values), + 'radius': RadiusCARP(value=catalog_df['radius'].values) +} +``` + +## Basic Inference + +### Single Modality Prediction + +Predict missing photometric measurements from available data: + +```python +# Given g,r,i bands, predict z band +inputs = { + 'flux_g': FluxG(value=[19.5]), + 'flux_r': FluxR(value=[18.2]), + 'flux_i': FluxI(value=[17.8]) +} + +# Predict z-band flux +with torch.no_grad(): + predictions = model.generate( + inputs=inputs, + targets=['flux_z'] + ) + +z_flux = predictions['flux_z'].value[0] +print(f"Predicted z-band flux: {z_flux:.2f}") +``` + +### Batch Processing + +Process multiple objects efficiently: + +```python +# Prepare batch of galaxies +batch_images = [load_galaxy(i) for i in range(32)] +batch = { + 'image': Image.batch(batch_images) +} + +# Generate properties for all galaxies +with torch.no_grad(): + results = model.generate( + inputs=batch, + targets=['redshift', 'e1', 'e2', 'radius'] + ) + +# Extract results +redshifts = results['redshift'].value +ellipticities = np.sqrt(results['e1'].value**2 + results['e2'].value**2) +``` + +## Multimodal Generation + +### Conditional Generation + +Generate multiple modalities conditioned on partial observations: + +```python +# Complex multimodal generation example +def analyze_galaxy(image_path, known_redshift=None): + # Load image + image = load_and_preprocess_image(image_path) + + inputs = {'image': image} + if known_redshift: + inputs['redshift'] = Redshift(value=[known_redshift]) + + # Generate comprehensive analysis + targets = [ + 'spectrum', # Full spectrum + 'flux_g', 'flux_r', 'flux_i', 'flux_z', # Photometry + 'e1', 'e2', # Shape parameters + 'radius', # Size + 'parallax', # Distance indicator + 'extinction_v' # Dust extinction + ] + + with torch.no_grad(): + results = model.generate( + inputs=inputs, + targets=targets, + num_generations=1, + temperature=1.0 + ) + + return results + +# Analyze a galaxy +galaxy_properties = analyze_galaxy('ngc1234.fits', known_redshift=0.05) +``` + +### Uncertainty Quantification + +Generate multiple samples to estimate uncertainties: + +```python +def estimate_uncertainty(inputs, target, num_samples=100): + samples = [] + + with torch.no_grad(): + for _ in range(num_samples): + result = model.generate( + inputs=inputs, + targets=[target], + temperature=1.2 # Higher temperature for more diversity + ) + samples.append(result[target].value[0]) + + samples = np.array(samples) + return { + 'mean': np.mean(samples), + 'std': np.std(samples), + 'percentiles': np.percentile(samples, [16, 50, 84]) + } + +# Estimate redshift uncertainty +z_stats = estimate_uncertainty( + inputs={'image': galaxy_image}, + target='redshift' +) +print(f"Redshift: {z_stats['mean']:.3f} ± {z_stats['std']:.3f}") +``` + +## Cross-Modal Translation + +### Image to Spectrum + +Convert imaging observations to spectroscopic predictions: + +```python +def image_to_spectrum(image, wavelength_range=(3800, 9200)): + """Generate spectrum from multi-band image.""" + + # Generate spectrum tokens + with torch.no_grad(): + result = model.generate( + inputs={'image': image}, + targets=['spectrum'] + ) + + spectrum = result['spectrum'] + + # Filter to desired wavelength range + mask = (spectrum.wavelength >= wavelength_range[0]) & \ + (spectrum.wavelength <= wavelength_range[1]) + + return { + 'wavelength': spectrum.wavelength[mask], + 'flux': spectrum.flux[mask] + } + +# Generate and plot spectrum +synthetic_spec = image_to_spectrum(galaxy_image) +plt.plot(synthetic_spec['wavelength'], synthetic_spec['flux']) +plt.xlabel('Wavelength (Å)') +plt.ylabel('Flux') +plt.title('AION-1 Generated Spectrum from Image') +``` + +### Spectrum to Image + +Inverse translation - generate images from spectra: + +```python +def spectrum_to_image(spectrum, bands=['DES-G', 'DES-R', 'DES-I', 'DES-Z']): + """Generate multi-band image from spectrum.""" + + with torch.no_grad(): + result = model.generate( + inputs={'spectrum': spectrum}, + targets=['image'], + target_bands=bands + ) + + return result['image'] + +# Reconstruct galaxy appearance +reconstructed_image = spectrum_to_image(observed_spectrum) +``` + +### Super-Resolution + +Enhance low-resolution spectra using multimodal context: + +```python +def enhance_spectrum(low_res_spectrum, supporting_data=None): + """Enhance spectrum resolution using additional modalities.""" + + inputs = {'spectrum': low_res_spectrum} + + # Add supporting data if available + if supporting_data: + inputs.update(supporting_data) + + # Generate high-resolution version + with torch.no_grad(): + result = model.generate( + inputs=inputs, + targets=['spectrum_highres'], + num_generations=1 + ) + + return result['spectrum_highres'] + +# Example with photometric support +enhanced = enhance_spectrum( + sdss_spectrum, + supporting_data={ + 'flux_g': FluxG(value=[18.5]), + 'flux_r': FluxR(value=[17.2]) + } +) +``` + +## Representation Learning + +### Extracting Embeddings + +Use AION-1's learned representations for downstream tasks: + +```python +def extract_embeddings(data_dict, pool='mean'): + """Extract feature embeddings from AION-1 encoder.""" + + # Tokenize inputs + tokens = model.tokenize(data_dict) + + # Get encoder representations + with torch.no_grad(): + embeddings = model.encode(tokens) + + # Pool over sequence dimension + if pool == 'mean': + features = embeddings.mean(dim=1) + elif pool == 'cls': + features = embeddings[:, 0] # First token + elif pool == 'max': + features = embeddings.max(dim=1)[0] + + return features.cpu().numpy() -model = AION.from_pretrained('aion-base') +# Extract features for clustering +galaxy_features = extract_embeddings({ + 'image': galaxy_image, + 'spectrum': galaxy_spectrum +}) ``` -The model accepts modality-specific tokenized inputs. Refer to the API documentation for details on available modalities and helper functions. +### Similarity Search + +Find similar objects using learned representations: + +```python +from sklearn.metrics.pairwise import cosine_similarity + +class GalaxySimilaritySearch: + def __init__(self, model): + self.model = model + self.database = [] + self.embeddings = [] + + def add_galaxy(self, galaxy_data, metadata=None): + """Add galaxy to search database.""" + embedding = extract_embeddings(galaxy_data) + self.embeddings.append(embedding) + self.database.append({ + 'data': galaxy_data, + 'metadata': metadata, + 'embedding': embedding + }) + + def find_similar(self, query_data, k=10): + """Find k most similar galaxies.""" + query_embedding = extract_embeddings(query_data) + + # Compute similarities + similarities = cosine_similarity( + query_embedding.reshape(1, -1), + np.vstack(self.embeddings) + )[0] + + # Get top k + indices = np.argsort(similarities)[::-1][:k] + + return [(self.database[i], similarities[i]) for i in indices] + +# Usage +searcher = GalaxySimilaritySearch(model) +# ... add galaxies to database ... +similar_galaxies = searcher.find_similar(query_galaxy, k=5) +``` + +### Anomaly Detection + +Identify unusual objects using reconstruction error: + +```python +def detect_anomalies(galaxies, threshold_percentile=95): + """Detect anomalous galaxies using reconstruction error.""" + + reconstruction_errors = [] + + for galaxy in galaxies: + # Encode and decode + with torch.no_grad(): + reconstructed = model.generate( + inputs=galaxy, + targets=list(galaxy.keys()) + ) + + # Compute reconstruction error + error = 0 + for key in galaxy: + if key == 'image': + error += np.mean((galaxy[key].flux - + reconstructed[key].flux)**2) + elif hasattr(galaxy[key], 'value'): + error += np.mean((galaxy[key].value - + reconstructed[key].value)**2) + + reconstruction_errors.append(error) + + # Set threshold + threshold = np.percentile(reconstruction_errors, threshold_percentile) + + # Identify anomalies + anomalies = [g for g, e in zip(galaxies, reconstruction_errors) + if e > threshold] + + return anomalies, reconstruction_errors +``` + +## Advanced Applications + +### Multi-Survey Integration + +Combine observations from different surveys: + +```python +def integrate_multi_survey(hsc_image, sdss_spectrum, desi_spectrum=None): + """Integrate observations from multiple surveys.""" + + inputs = { + 'image': hsc_image, + 'spectrum_sdss': sdss_spectrum + } + + if desi_spectrum: + inputs['spectrum_desi'] = desi_spectrum + + # Generate unified representation + with torch.no_grad(): + # Extract all available properties + results = model.generate( + inputs=inputs, + targets=['redshift', 'stellar_mass', 'sfr', 'metallicity'] + ) + + # Generate missing modalities + if not desi_spectrum: + results['spectrum_desi'] = model.generate( + inputs=inputs, + targets=['spectrum_desi'] + )['spectrum_desi'] + + return results +``` + +### Time Series Analysis + +Analyze variable objects across epochs: + +```python +def analyze_variable_object(observations): + """ + Analyze time-variable astronomical object. + + observations: list of (time, data_dict) tuples + """ + + embeddings_over_time = [] + properties_over_time = [] + + for time, data in observations: + # Extract embeddings + embedding = extract_embeddings(data) + embeddings_over_time.append(embedding) + + # Predict properties + with torch.no_grad(): + props = model.generate( + inputs=data, + targets=['flux_g', 'flux_r', 'temperature'] + ) + + properties_over_time.append({ + 'time': time, + 'properties': props, + 'embedding': embedding + }) + + # Analyze evolution + embeddings = np.vstack(embeddings_over_time) + + # Detect significant changes + embedding_distances = np.sqrt(np.sum(np.diff(embeddings, axis=0)**2, axis=1)) + change_points = np.where(embedding_distances > np.std(embedding_distances) * 2)[0] + + return { + 'properties': properties_over_time, + 'change_points': change_points, + 'embedding_evolution': embeddings + } +``` + +### Physical Parameter Estimation + +Estimate astrophysical parameters with uncertainty: + +```python +class PhysicalParameterEstimator: + def __init__(self, model, num_samples=100): + self.model = model + self.num_samples = num_samples + + def estimate_parameters(self, observations): + """Estimate physical parameters with uncertainties.""" + + # Parameters to estimate + parameters = [ + 'redshift', 'stellar_mass', 'sfr', + 'metallicity', 'age', 'extinction_v' + ] + + # Generate multiple samples + samples = {param: [] for param in parameters} + + with torch.no_grad(): + for _ in range(self.num_samples): + results = self.model.generate( + inputs=observations, + targets=parameters, + temperature=1.1 + ) + + for param in parameters: + if param in results: + samples[param].append(results[param].value[0]) + + # Compute statistics + estimates = {} + for param, values in samples.items(): + if values: + values = np.array(values) + estimates[param] = { + 'median': np.median(values), + 'mean': np.mean(values), + 'std': np.std(values), + 'ci_68': np.percentile(values, [16, 84]), + 'ci_95': np.percentile(values, [2.5, 97.5]) + } + + return estimates + +# Usage +estimator = PhysicalParameterEstimator(model) +parameters = estimator.estimate_parameters({ + 'image': galaxy_image, + 'spectrum': galaxy_spectrum +}) + +print(f"Stellar Mass: {parameters['stellar_mass']['median']:.2e} " + f"+/- {parameters['stellar_mass']['std']:.2e} M_sun") +``` + +## Performance Optimization + +### Efficient Batch Processing + +```python +from torch.utils.data import DataLoader, Dataset + +class AIONDataset(Dataset): + def __init__(self, data_list): + self.data = data_list + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + return self.data[idx] + +def process_large_dataset(data_list, batch_size=32): + """Efficiently process large datasets.""" + + dataset = AIONDataset(data_list) + dataloader = DataLoader(dataset, batch_size=batch_size, + num_workers=4, pin_memory=True) + + all_results = [] + + with torch.no_grad(): + for batch in dataloader: + # Process batch + results = model.generate( + inputs=batch, + targets=['redshift', 'stellar_mass'] + ) + all_results.append(results) + + # Concatenate results + return {k: np.concatenate([r[k].value for r in all_results]) + for k in all_results[0]} +``` + +### Memory-Efficient Processing + +```python +def process_with_chunking(large_spectrum, chunk_size=1000): + """Process very long spectra in chunks.""" + + n_chunks = len(large_spectrum.wavelength) // chunk_size + 1 + chunk_results = [] + + for i in range(n_chunks): + start = i * chunk_size + end = min((i + 1) * chunk_size, len(large_spectrum.wavelength)) + + chunk = Spectrum( + wavelength=large_spectrum.wavelength[start:end], + flux=large_spectrum.flux[start:end] + ) + + with torch.no_grad(): + result = model.process_spectrum_chunk(chunk) + chunk_results.append(result) + + # Combine chunks + return combine_spectrum_chunks(chunk_results) +``` + +### GPU Memory Management + +```python +import gc + +def memory_efficient_generation(inputs, targets, max_batch=16): + """Generate with automatic batch size adjustment.""" + + batch_size = max_batch + + while batch_size > 0: + try: + with torch.no_grad(): + results = model.generate( + inputs=inputs, + targets=targets, + batch_size=batch_size + ) + return results + + except torch.cuda.OutOfMemoryError: + # Clear cache and try smaller batch + torch.cuda.empty_cache() + gc.collect() + batch_size //= 2 + + if batch_size == 0: + raise RuntimeError("Cannot fit even batch size 1") + + raise RuntimeError("Failed to process") +``` + +## Best Practices + +### 1. Data Preparation +- Always normalize and preprocess data according to survey specifications +- Provide inverse variance when available for optimal results +- Use appropriate data types for each modality + +### 2. Model Selection +- Use `aion-tiny` for quick experiments and limited GPU memory +- Use `aion-base` for most research applications +- Use `aion-large` for highest accuracy when computational resources permit + +### 3. Generation Settings +- Lower temperature (0.8-1.0) for more deterministic outputs +- Higher temperature (1.1-1.5) for diversity and uncertainty estimation +- Multiple generations for robust uncertainty quantification + +### 4. Error Handling +```python +def safe_generate(model, inputs, targets, fallback=None): + """Safely generate with error handling.""" + try: + return model.generate(inputs=inputs, targets=targets) + except Exception as e: + print(f"Generation failed: {e}") + return fallback or {t: None for t in targets} +``` + +## Conclusion + +AION-1 provides a powerful and flexible framework for multimodal astronomical analysis. Its ability to seamlessly integrate diverse observations enables new research possibilities: + +- Cross-modal prediction and generation +- Unified analysis across multiple surveys +- Robust uncertainty quantification +- Discovery of unusual objects +- Efficient processing of large datasets + +For more examples and the latest updates, visit the [AION GitHub repository](https://github.com/polymathic-ai/aion) and join our community discussions. From 2c5fe45b7f847c9fa49126966ce43849e3c29287 Mon Sep 17 00:00:00 2001 From: Francois Lanusse Date: Mon, 26 May 2025 01:42:13 +0200 Subject: [PATCH 06/21] minor edits --- docs/contributing.md | 522 ------------------------------------------- docs/installation.md | 224 +------------------ 2 files changed, 1 insertion(+), 745 deletions(-) delete mode 100644 docs/contributing.md diff --git a/docs/contributing.md b/docs/contributing.md deleted file mode 100644 index 779c7cf..0000000 --- a/docs/contributing.md +++ /dev/null @@ -1,522 +0,0 @@ -# Contributing to AION-1 - -Welcome to the AION-1 project! We're thrilled that you're interested in contributing to the first large-scale multimodal foundation model for astronomy. This guide will help you get started with contributing, whether you're fixing bugs, adding features, improving documentation, or conducting research with AION-1. - -## Table of Contents - -1. [Getting Started](#getting-started) -2. [Development Setup](#development-setup) -3. [Contribution Types](#contribution-types) -4. [Code Standards](#code-standards) -5. [Testing Guidelines](#testing-guidelines) -6. [Documentation](#documentation) -7. [Submitting Changes](#submitting-changes) -8. [Community Guidelines](#community-guidelines) - -## Getting Started - -### Prerequisites - -Before contributing, ensure you have: - -- Python 3.10 or later -- Git for version control -- CUDA-capable GPU (recommended for testing) -- Familiarity with PyTorch and transformers - -### Understanding AION-1 - -Before diving into code, we recommend: - -1. Reading the [AION-1 paper](https://arxiv.org/abs/XXXX.XXXXX) -2. Exploring the [Architecture documentation](architecture.html) -3. Running through the [Usage examples](usage.html) -4. Joining our [Discord community](https://discord.gg/polymathic-ai) - -## Development Setup - -### 1. Fork and Clone - -```bash -# Fork the repository on GitHub, then: -git clone https://github.com/YOUR_USERNAME/aion.git -cd aion -git remote add upstream https://github.com/polymathic-ai/aion.git -``` - -### 2. Create Development Environment - -```bash -# Create virtual environment -python -m venv venv-dev -source venv-dev/bin/activate # On Windows: venv-dev\Scripts\activate - -# Install in development mode with all dependencies -pip install -e ".[dev,test,docs]" - -# Install pre-commit hooks -pre-commit install -``` - -### 3. Download Test Data - -```bash -# Download minimal test datasets -python scripts/download_test_data.py - -# Verify installation -python -m pytest tests/test_installation.py -``` - -## Contribution Types - -### 🐛 Bug Fixes - -Found a bug? Here's how to fix it: - -1. **Check existing issues** to avoid duplicates -2. **Create a minimal reproduction** script -3. **Write a test** that fails with the bug -4. **Fix the bug** and ensure the test passes -5. **Submit a PR** with clear description - -Example bug fix workflow: -```python -# tests/test_bug_fix.py -def test_spectrum_interpolation_edge_case(): - """Test for issue #123: spectrum interpolation fails at boundaries.""" - spectrum = Spectrum( - wavelength=np.array([3500, 3501, 10400]), - flux=np.array([1.0, 1.5, 2.0]) - ) - - # This should not raise an exception - interpolated = spectrum.resample(np.linspace(3500, 10400, 100)) - assert len(interpolated.flux) == 100 -``` - -### ✨ New Features - -Adding new capabilities to AION-1: - -1. **Discuss first**: Open an issue or discussion -2. **Design document**: For major features, write a brief design doc -3. **Implement incrementally**: Break into small PRs -4. **Add tests and docs**: Every feature needs both - -#### Adding a New Modality - -Here's an example of adding a new modality: - -```python -# aion/modalities.py -class TimeSeries(Modality): - """ - Time series astronomical measurements. - - Attributes: - time: Time stamps in MJD - flux: Flux measurements - error: Measurement uncertainties - """ - time: np.ndarray - flux: np.ndarray - error: Optional[np.ndarray] = None - - def validate(self): - """Ensure time series is properly formatted.""" - assert len(self.time) == len(self.flux) - assert np.all(np.diff(self.time) >= 0), "Time must be monotonic" - -# aion/codecs/timeseries.py -class TimeSeriesCodec(Codec): - """Tokenizer for astronomical time series.""" - - def encode(self, timeseries: TimeSeries) -> torch.Tensor: - # Implementation here - pass - - def decode(self, tokens: torch.Tensor) -> TimeSeries: - # Implementation here - pass -``` - -### 📚 Documentation Improvements - -Good documentation is crucial: - -- **Fix typos and clarify**: Even small improvements help -- **Add examples**: Real-world usage examples -- **Improve API docs**: Better docstrings -- **Write tutorials**: Step-by-step guides - -### 🔬 Research Contributions - -Using AION-1 for research? Consider contributing: - -- **Benchmarks**: Performance on astronomical tasks -- **Fine-tuning scripts**: For specific applications -- **Analysis notebooks**: Demonstrating capabilities -- **Model improvements**: Better architectures or training - -## Code Standards - -### Style Guide - -We follow PEP 8 with some modifications: - -```python -# Good: Clear variable names and type hints -def process_galaxy_spectrum( - spectrum: Spectrum, - redshift: float, - extinction_curve: Optional[np.ndarray] = None -) -> Spectrum: - """ - Process galaxy spectrum with redshift and extinction corrections. - - Args: - spectrum: Input spectrum - redshift: Cosmological redshift - extinction_curve: Optional extinction curve - - Returns: - Corrected spectrum - """ - # De-redshift - corrected_wavelength = spectrum.wavelength / (1 + redshift) - - # Apply extinction if provided - if extinction_curve is not None: - extinction_factor = np.interp( - corrected_wavelength, - EXTINCTION_WAVELENGTH, - extinction_curve - ) - corrected_flux = spectrum.flux * extinction_factor - else: - corrected_flux = spectrum.flux - - return Spectrum( - wavelength=corrected_wavelength, - flux=corrected_flux, - ivar=spectrum.ivar - ) -``` - -### Type Hints - -Always use type hints for better code clarity: - -```python -from typing import Dict, List, Optional, Tuple, Union -import torch -import numpy as np - -def tokenize_multimodal( - data: Dict[str, Modality], - codecs: Dict[str, Codec], - max_length: Optional[int] = None -) -> Dict[str, torch.Tensor]: - """Tokenize multiple modalities.""" - tokens = {} - for modality_name, modality_data in data.items(): - if modality_name in codecs: - tokens[modality_name] = codecs[modality_name].encode(modality_data) - return tokens -``` - -### Docstrings - -Use Google-style docstrings: - -```python -def cross_match_catalogs( - catalog1: Catalog, - catalog2: Catalog, - radius: float = 1.0, - unit: str = 'arcsec' -) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: - """ - Cross-match two astronomical catalogs. - - Performs positional cross-matching between two catalogs using - a specified search radius. - - Args: - catalog1: First catalog - catalog2: Second catalog - radius: Search radius for matching - unit: Unit of radius ('arcsec', 'arcmin', 'deg') - - Returns: - Tuple containing: - - indices1: Matched indices from catalog1 - - indices2: Matched indices from catalog2 - - distances: Angular distances of matches - - Raises: - ValueError: If unit is not recognized - - Example: - >>> idx1, idx2, dist = cross_match_catalogs( - ... gaia_catalog, - ... sdss_catalog, - ... radius=2.0 - ... ) - """ -``` - -## Testing Guidelines - -### Test Structure - -``` -tests/ -├── unit/ # Fast unit tests -├── integration/ # Integration tests -├── fixtures/ # Test data and fixtures -└── benchmarks/ # Performance benchmarks -``` - -### Writing Tests - -```python -# tests/unit/test_spectrum_codec.py -import pytest -import numpy as np -from aion.modalities import Spectrum -from aion.codecs import SpectrumCodec - -class TestSpectrumCodec: - @pytest.fixture - def sample_spectrum(self): - """Create a sample spectrum for testing.""" - wavelength = np.linspace(4000, 8000, 1000) - flux = np.random.randn(1000) + 10 - return Spectrum(wavelength=wavelength, flux=flux) - - @pytest.fixture - def codec(self): - """Initialize spectrum codec.""" - return SpectrumCodec( - latent_wavelength=np.linspace(3500, 10500, 8704) - ) - - def test_encode_decode_preserves_shape(self, sample_spectrum, codec): - """Test that encode/decode preserves spectrum shape.""" - tokens = codec.encode(sample_spectrum) - reconstructed = codec.decode(tokens) - - assert reconstructed.wavelength.shape == sample_spectrum.wavelength.shape - assert reconstructed.flux.shape == sample_spectrum.flux.shape - - def test_handles_missing_data(self, codec): - """Test codec handles spectra with gaps.""" - wavelength = np.array([4000, 4100, 4200, 6000, 6100]) - flux = np.array([1.0, 1.1, 1.2, 2.0, 2.1]) - - spectrum = Spectrum(wavelength=wavelength, flux=flux) - tokens = codec.encode(spectrum) - - assert tokens is not None - assert len(tokens.shape) == 2 # [batch, seq_len] -``` - -### Running Tests - -```bash -# Run all tests -pytest - -# Run specific test file -pytest tests/unit/test_spectrum_codec.py - -# Run with coverage -pytest --cov=aion --cov-report=html - -# Run benchmarks -pytest tests/benchmarks/ --benchmark-only -``` - -## Documentation - -### Building Documentation - -```bash -cd docs -make html -# View at docs/_build/html/index.html -``` - -### Writing Documentation - -When adding new features, update: - -1. **Docstrings**: In the code itself -2. **API Reference**: In `docs/api.md` -3. **Usage Examples**: In `docs/usage.md` -4. **Architecture**: If design changes - -Example documentation addition: - -```markdown -### Working with Time Series - -AION-1 can process variable star light curves and other time series data: - -\```python -from aion.modalities import TimeSeries - -# Load light curve data -lightcurve = TimeSeries( - time=mjd_times, - flux=flux_measurements, - error=flux_errors -) - -# Generate period estimate -results = model.generate( - inputs={'timeseries': lightcurve}, - targets=['period', 'variability_class'] -) - -print(f"Period: {results['period'].value[0]:.3f} days") -print(f"Class: {results['variability_class'].value[0]}") -\``` -``` - -## Submitting Changes - -### 1. Create Feature Branch - -```bash -git checkout -b feature/your-feature-name -# or -git checkout -b fix/issue-description -``` - -### 2. Make Changes - -- Write clean, documented code -- Add tests for new functionality -- Update documentation as needed -- Ensure all tests pass - -### 3. Commit Guidelines - -Use conventional commits: - -```bash -# Format: (): - -git commit -m "feat(modalities): add time series support" -git commit -m "fix(codec): handle edge case in spectrum interpolation" -git commit -m "docs(api): improve codec documentation" -git commit -m "test(integration): add multi-survey processing tests" -``` - -Types: -- `feat`: New feature -- `fix`: Bug fix -- `docs`: Documentation changes -- `test`: Test additions/changes -- `refactor`: Code refactoring -- `perf`: Performance improvements -- `style`: Code style changes -- `chore`: Maintenance tasks - -### 4. Push and Create PR - -```bash -git push origin feature/your-feature-name -``` - -Then create a Pull Request on GitHub with: - -- **Clear title**: Summarize the change -- **Description**: Explain what and why -- **Tests**: Confirm all tests pass -- **Screenshots**: If relevant (e.g., for visualizations) - -### 5. Code Review - -- Respond to feedback constructively -- Make requested changes -- Keep PR focused and reasonably sized - -## Community Guidelines - -### Code of Conduct - -We follow the [Contributor Covenant Code of Conduct](https://www.contributor-covenant.org/). Key points: - -- Be respectful and inclusive -- Welcome newcomers -- Focus on constructive criticism -- Report unacceptable behavior - -### Getting Help - -- **Discord**: Quick questions and discussions -- **GitHub Issues**: Bug reports and feature requests -- **Discussions**: Longer form conversations -- **Office Hours**: Weekly community calls (Thursdays 3pm UTC) - -### Recognition - -Contributors are recognized in: - -- The `CONTRIBUTORS.md` file -- Release notes -- Research papers (for significant contributions) - -## Advanced Topics - -### Adding New Surveys - -To add support for a new astronomical survey: - -1. **Define band mappings** in `aion/surveys.py` -2. **Add preprocessing** in `aion/codecs/preprocessing/` -3. **Update documentation** with survey details -4. **Add tests** with sample data - -### Performance Optimization - -When optimizing AION-1: - -```python -# Profile first -import cProfile -import pstats - -profiler = cProfile.Profile() -profiler.enable() - -# Your code here -result = model.generate(inputs, targets) - -profiler.disable() -stats = pstats.Stats(profiler).sort_stats('cumulative') -stats.print_stats(10) -``` - -### Memory Profiling - -```python -from memory_profiler import profile - -@profile -def process_large_batch(model, data): - # Function to profile - pass -``` - -## Thank You! - -Your contributions make AION-1 better for the entire astronomical community. Whether you're fixing a typo, adding a feature, or conducting research, every contribution matters. - -If you have questions or need help getting started, don't hesitate to reach out on Discord or open an issue. We're here to help! - -Happy contributing! 🌟🔭 diff --git a/docs/installation.md b/docs/installation.md index 1ebe2d2..f28e27c 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -52,7 +52,7 @@ For GPU support and optimal performance: pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118 # Then install AION -pip install aion[full] +pip install aion ``` ### 3. Development Installation @@ -72,18 +72,6 @@ source venv/bin/activate # On Windows: venv\Scripts\activate pip install -e ".[dev]" ``` -### 4. Docker Installation - -For containerized deployments: - -```bash -# Pull the official Docker image -docker pull polymathic/aion:latest - -# Run with GPU support -docker run --gpus all -it polymathic/aion:latest -``` - ## Setting Up Your Environment ### 1. Virtual Environment Setup @@ -99,213 +87,3 @@ source aion-env/bin/activate # On Windows: aion-env\Scripts\activate conda create -n aion python=3.10 conda activate aion ``` - -### 2. Verify Installation - -After installation, verify everything is working: - -```python -import aion -import torch - -# Check AION version -print(f"AION version: {aion.__version__}") - -# Check PyTorch and CUDA -print(f"PyTorch version: {torch.__version__}") -print(f"CUDA available: {torch.cuda.is_available()}") -if torch.cuda.is_available(): - print(f"CUDA version: {torch.version.cuda}") - print(f"GPU: {torch.cuda.get_device_name(0)}") - -# Test loading a model -from aion import AION -model = AION.from_pretrained('polymathic-ai/aion-tiny') -print("Model loaded successfully!") -``` - -### 3. Download Pre-trained Models - -AION-1 comes in three sizes. Models are automatically downloaded on first use, but you can pre-download them: - -```python -from aion import AION - -# Download models (choose based on your hardware) -model_tiny = AION.from_pretrained('polymathic-ai/aion-tiny') # 300M parameters -model_base = AION.from_pretrained('polymathic-ai/aion-base') # 800M parameters -model_large = AION.from_pretrained('polymathic-ai/aion-large') # 3.1B parameters -``` - -Model sizes and requirements: -- **aion-tiny**: ~1.2 GB, runs on 8GB GPUs -- **aion-base**: ~3.2 GB, recommended 16GB+ GPU -- **aion-large**: ~12 GB, requires 24GB+ GPU - -### 4. Configure Model Cache - -By default, models are cached in `~/.cache/huggingface/hub/`. To change this: - -```bash -# Set environment variable -export HF_HOME=/path/to/your/cache - -# Or in Python -import os -os.environ['HF_HOME'] = '/path/to/your/cache' -``` - -## Installing Optional Dependencies - -### For Astronomical Data Processing - -```bash -pip install astropy fits -``` - -### For Visualization - -```bash -pip install matplotlib seaborn plotly -``` - -### For Advanced Scientific Computing - -```bash -pip install scipy scikit-learn pandas -``` - -## Platform-Specific Instructions - -### Linux - -Most straightforward installation. Ensure you have: -```bash -# Ubuntu/Debian -sudo apt-get update -sudo apt-get install python3-dev python3-pip - -# CentOS/RHEL -sudo yum install python3-devel python3-pip -``` - -### macOS - -For Apple Silicon Macs: -```bash -# Install using conda for better compatibility -conda install pytorch torchvision -c pytorch -pip install aion -``` - -Note: GPU acceleration on macOS uses Metal Performance Shaders (MPS): -```python -device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") -``` - -### Windows - -Ensure you have Visual C++ Build Tools: -1. Download from: https://visualstudio.microsoft.com/visual-cpp-build-tools/ -2. Install with "Desktop development with C++" - -Then: -```bash -pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118 -pip install aion -``` - -## Troubleshooting - -### Common Issues and Solutions - -**1. CUDA Out of Memory** -```python -# Reduce batch size -model.eval() -with torch.no_grad(): - outputs = model(inputs) - -# Use mixed precision -from torch.cuda.amp import autocast -with autocast(): - outputs = model(inputs) -``` - -**2. Import Errors** -```bash -# Ensure all dependencies are installed -pip install --upgrade aion[full] - -# Check for conflicts -pip check -``` - -**3. Slow Model Loading** -```python -# Use faster model loading -model = AION.from_pretrained('polymathic-ai/aion-base', - torch_dtype=torch.float16, - device_map="auto") -``` - -**4. Version Conflicts** -```bash -# Create a fresh environment -conda create -n aion-clean python=3.10 -conda activate aion-clean -pip install aion -``` - -### Getting Help - -If you encounter issues: - -1. Check the [GitHub Issues](https://github.com/polymathic-ai/aion/issues) -2. Join our [Discord community](https://discord.gg/polymathic-ai) -3. Consult the [FAQ section](https://polymathic-ai.org/aion/faq) - -## Next Steps - -Now that you have AION-1 installed, explore: -- [Architecture Overview](architecture.html) - Understand how AION-1 works -- [Usage Guide](usage.html) - Learn to use AION-1 for your research -- [API Reference](api.html) - Detailed API documentation - -## Performance Optimization - -### GPU Memory Management - -```python -# Clear cache when switching between models -torch.cuda.empty_cache() - -# Use gradient checkpointing for large models -model.gradient_checkpointing_enable() - -# Optimize for inference -model.eval() -torch.set_grad_enabled(False) -``` - -### Multi-GPU Setup - -```python -# DataParallel for simple multi-GPU -model = torch.nn.DataParallel(model) - -# DistributedDataParallel for better performance -import torch.distributed as dist -dist.init_process_group(backend='nccl') -model = torch.nn.parallel.DistributedDataParallel(model) -``` - -### CPU Optimization - -```python -# Enable MKL optimizations -torch.set_num_threads(8) # Adjust based on your CPU - -# Use channels_last memory format -model = model.to(memory_format=torch.channels_last) -``` From be1666ddd276097a6bd727b350e96db237c51c7d Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 10:58:45 +0200 Subject: [PATCH 07/21] Format tests with ruff and add typing --- tests/codecs/test_codec_manager.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/codecs/test_codec_manager.py b/tests/codecs/test_codec_manager.py index ece562a..3f89057 100644 --- a/tests/codecs/test_codec_manager.py +++ b/tests/codecs/test_codec_manager.py @@ -1,13 +1,15 @@ """Test the CodecManager class.""" +from pathlib import Path + import pytest import torch from aion.codecs.manager import CodecManager from aion.modalities import ( - LegacySurveyImage, DESISpectrum, LegacySurveyFluxG, + LegacySurveyImage, LegacySurveyShapeE1, ) @@ -20,7 +22,7 @@ def manager(self): """Create a CodecManager instance.""" return CodecManager(device="cpu") - def test_encode_decode_image(self, manager, data_dir): + def test_encode_decode_image(self, manager: CodecManager, data_dir: Path): """Test encoding and decoding Image modality.""" # Load test data input_batch_dict = torch.load( @@ -52,7 +54,7 @@ def test_encode_decode_image(self, manager, data_dir): assert isinstance(decoded_image_2, LegacySurveyImage) assert torch.allclose(decoded_image.flux, decoded_image_2.flux) - def test_encode_decode_spectrum(self, manager, data_dir): + def test_encode_decode_spectrum(self, manager: CodecManager, data_dir: Path): """Test encoding and decoding Spectrum modality.""" # Load test data input_batch = torch.load( @@ -76,7 +78,7 @@ def test_encode_decode_spectrum(self, manager, data_dir): assert isinstance(decoded_spectrum, DESISpectrum) assert decoded_spectrum.flux.shape == spectrum.flux.shape - def test_codec_caching(self, manager): + def test_codec_caching(self, manager: CodecManager): """Test that codecs are properly cached and reused.""" # Create two modalities that use the same codec type flux_g1 = LegacySurveyFluxG(value=torch.randn(4, 1)) @@ -95,7 +97,7 @@ def test_codec_caching(self, manager): codec2 = manager._get_codec_for_modality(LegacySurveyFluxG) assert codec1 is codec2 - def test_error_handling(self, manager): + def test_error_handling(self, manager: CodecManager): """Test error handling in CodecManager.""" # Test with invalid modality type @@ -116,7 +118,7 @@ class InvalidModality: manager.decode(tokens, "invalid_token_key") @pytest.mark.parametrize("batch_size", [1, 4, 16]) - def test_different_batch_sizes(self, manager, batch_size): + def test_different_batch_sizes(self, manager: CodecManager, batch_size: int): """Test that CodecManager handles different batch sizes correctly.""" # Create modalities with different batch sizes flux_g = LegacySurveyFluxG(value=torch.randn(batch_size, 1)) From 1a631ee24ff91920edd0251be816995b7907dfbe Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 11:01:40 +0200 Subject: [PATCH 08/21] Disable gradients on codec manager encode/decode --- aion/codecs/manager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aion/codecs/manager.py b/aion/codecs/manager.py index ad04dc3..9b9b4bf 100644 --- a/aion/codecs/manager.py +++ b/aion/codecs/manager.py @@ -76,6 +76,7 @@ def _load_codec(self, modality_type: Type[Modality]) -> Codec: return codec + @torch.no_grad() def encode(self, *modalities: Modality) -> Dict[str, torch.Tensor]: """Encode multiple modalities. @@ -106,6 +107,7 @@ def encode(self, *modalities: Modality) -> Dict[str, torch.Tensor]: return tokens + @torch.no_grad() def decode( self, tokens: Dict[str, torch.Tensor], From 8204007d825ead227ed25ad08a591d4654344b6e Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 13:19:28 +0200 Subject: [PATCH 09/21] Add codec manager specific errors --- aion/codecs/manager.py | 27 +++++++++++++++++++-------- tests/codecs/test_codec_manager.py | 8 ++++---- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/aion/codecs/manager.py b/aion/codecs/manager.py index 9b9b4bf..2db8442 100644 --- a/aion/codecs/manager.py +++ b/aion/codecs/manager.py @@ -3,12 +3,21 @@ Handles dynamic loading and management of codecs for different modalities. """ -from typing import Dict, Union, Optional, Type +from typing import Dict, Optional, Type, Union + import torch -from aion.modalities import Modality from aion.codecs.base import Codec from aion.codecs.config import CODEC_CONFIG +from aion.modalities import Modality + + +class ModalityTypeError(TypeError): + """Error raised when a modality type is not supported.""" + + +class TokenKeyError(ValueError): + """Error raised when a token key is not found in the tokens dictionary.""" class CodecManager: @@ -53,7 +62,7 @@ def _load_codec(self, modality_type: Type[Modality]) -> Codec: ): config = CODEC_CONFIG[modality_type.__base__] else: - raise ValueError( + raise ModalityTypeError( f"No codec configuration found for modality type: {modality_type.__name__}" ) else: @@ -99,7 +108,7 @@ def encode(self, *modalities: Modality) -> Dict[str, torch.Tensor]: if hasattr(modality, "token_key"): token_key = modality.token_key else: - raise ValueError( + raise ModalityTypeError( f"Modality {type(modality).__name__} does not have a token_key attribute" ) @@ -130,7 +139,7 @@ def decode( # Token key provided token_key = modality_or_token_key if token_key not in tokens: - raise ValueError( + raise TokenKeyError( f"Token key '{token_key}' not found in tokens dictionary" ) @@ -142,18 +151,20 @@ def decode( break if modality_type is None: - raise ValueError(f"No modality type found for token key '{token_key}'") + raise TokenKeyError( + f"No modality type found for token key '{token_key}'" + ) else: # Modality type provided modality_type = modality_or_token_key if not hasattr(modality_type, "token_key"): - raise ValueError( + raise ModalityTypeError( f"Modality type {modality_type.__name__} does not have a token_key attribute" ) token_key = modality_type.token_key if token_key not in tokens: - raise ValueError( + raise TokenKeyError( f"Token key '{token_key}' for modality {modality_type.__name__} not found in tokens dictionary" ) diff --git a/tests/codecs/test_codec_manager.py b/tests/codecs/test_codec_manager.py index 3f89057..b932561 100644 --- a/tests/codecs/test_codec_manager.py +++ b/tests/codecs/test_codec_manager.py @@ -5,7 +5,7 @@ import pytest import torch -from aion.codecs.manager import CodecManager +from aion.codecs.manager import CodecManager, ModalityTypeError, TokenKeyError from aion.modalities import ( DESISpectrum, LegacySurveyFluxG, @@ -104,17 +104,17 @@ def test_error_handling(self, manager: CodecManager): class InvalidModality: pass - with pytest.raises(ValueError, match="No codec configuration found"): + with pytest.raises(ModalityTypeError): manager._load_codec(InvalidModality) # Test decoding with missing token key tokens = {"tok_flux_g": torch.randn(4, 10)} - with pytest.raises(ValueError, match="Token key .* not found"): + with pytest.raises(TokenKeyError): manager.decode(tokens, "tok_missing") # Test decoding with invalid token key - with pytest.raises(ValueError, match="No modality type found for token key"): + with pytest.raises(TokenKeyError): manager.decode(tokens, "invalid_token_key") @pytest.mark.parametrize("batch_size", [1, 4, 16]) From 17ef078bfec57857fa20c577aecf8a743b49e5da Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 13:32:42 +0200 Subject: [PATCH 10/21] Recast decoded image into the correct type --- aion/codecs/manager.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/aion/codecs/manager.py b/aion/codecs/manager.py index 2db8442..e1d2c66 100644 --- a/aion/codecs/manager.py +++ b/aion/codecs/manager.py @@ -174,4 +174,7 @@ def decode( # Decode using the codec with any provided metadata decoded_modality = codec.decode(tokens[token_key], **metadata) + # Cast decoded modality to the correct type + decoded_modality = modality_type(**decoded_modality.model_dump()) + return decoded_modality From b8977be5c3877a48615675d9874b82a41a2d24a6 Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 13:39:36 +0200 Subject: [PATCH 11/21] Add LegacySurveyImage to codec config --- aion/codecs/config.py | 79 ++++++++++++++++++++++-------------------- aion/codecs/manager.py | 2 +- 2 files changed, 43 insertions(+), 38 deletions(-) diff --git a/aion/codecs/config.py b/aion/codecs/config.py index af428b2..e65007a 100644 --- a/aion/codecs/config.py +++ b/aion/codecs/config.py @@ -1,54 +1,55 @@ # Configuration for codecs +from aion.codecs.catalog import CatalogCodec +from aion.codecs.image import ImageCodec +from aion.codecs.scalar import ( + GridScalarCodec, + LogScalarCodec, + MultiScalarCodec, + ScalarCodec, +) +from aion.codecs.scalar_field import ScalarFieldCodec +from aion.codecs.spectrum import SpectrumCodec from aion.modalities import ( + HSCAG, + HSCAI, + HSCAR, + HSCAY, + HSCAZ, + Dec, + GaiaFluxBp, + GaiaFluxG, + GaiaFluxRp, + GaiaParallax, + GaiaXpBp, + GaiaXpRp, + HSCMagG, + HSCMagI, + HSCMagR, + HSCMagY, + HSCMagZ, + HSCShape11, + HSCShape12, + HSCShape22, Image, - Spectrum, LegacySurveyCatalog, - LegacySurveySegmentationMap, + LegacySurveyEBV, LegacySurveyFluxG, - LegacySurveyFluxR, LegacySurveyFluxI, - LegacySurveyFluxZ, + LegacySurveyFluxR, LegacySurveyFluxW1, LegacySurveyFluxW2, LegacySurveyFluxW3, LegacySurveyFluxW4, - LegacySurveyShapeR, + LegacySurveyFluxZ, + LegacySurveyImage, + LegacySurveySegmentationMap, LegacySurveyShapeE1, LegacySurveyShapeE2, - LegacySurveyEBV, - Z, - HSCAG, - HSCAR, - HSCAI, - HSCAZ, - HSCAY, - HSCMagG, - HSCMagR, - HSCMagI, - HSCMagZ, - HSCMagY, - HSCShape11, - HSCShape22, - HSCShape12, - GaiaFluxG, - GaiaFluxBp, - GaiaFluxRp, - GaiaParallax, + LegacySurveyShapeR, Ra, - Dec, - GaiaXpBp, - GaiaXpRp, -) -from aion.codecs.image import ImageCodec -from aion.codecs.spectrum import SpectrumCodec -from aion.codecs.catalog import CatalogCodec -from aion.codecs.scalar_field import ScalarFieldCodec -from aion.codecs.scalar import ( - ScalarCodec, - LogScalarCodec, - MultiScalarCodec, - GridScalarCodec, + Spectrum, + Z, ) CODEC_CONFIG = { @@ -56,6 +57,10 @@ "class": ImageCodec, "repo_id": "polymathic-ai/aion-image-codec", }, + LegacySurveyImage: { + "class": ImageCodec, + "repo_id": "polymathic-ai/aion-image-codec", + }, Spectrum: { "class": SpectrumCodec, "repo_id": "polymathic-ai/aion-spectrum-codec", diff --git a/aion/codecs/manager.py b/aion/codecs/manager.py index e1d2c66..6b29aa4 100644 --- a/aion/codecs/manager.py +++ b/aion/codecs/manager.py @@ -145,7 +145,7 @@ def decode( # Find the modality type from token_key modality_type = None - for mod_type, config in CODEC_CONFIG.items(): + for mod_type in CODEC_CONFIG.keys(): if hasattr(mod_type, "token_key") and mod_type.token_key == token_key: modality_type = mod_type break From 63b72261af3958100d652df8f86248949df9ee62 Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 15:29:23 +0200 Subject: [PATCH 12/21] Update documentation test and deploy CI --- .../workflows/{docs.yml => deploy-doc.yml} | 29 +++++++------------ .../{docs-check.yml => test-doc.yml} | 9 +++--- .github/workflows/test.yaml | 2 +- docs/requirements.txt | 5 ---- pyproject.toml | 6 ++-- 5 files changed, 19 insertions(+), 32 deletions(-) rename .github/workflows/{docs.yml => deploy-doc.yml} (70%) rename .github/workflows/{docs-check.yml => test-doc.yml} (84%) delete mode 100644 docs/requirements.txt diff --git a/.github/workflows/docs.yml b/.github/workflows/deploy-doc.yml similarity index 70% rename from .github/workflows/docs.yml rename to .github/workflows/deploy-doc.yml index 5fdb66c..d09b10e 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/deploy-doc.yml @@ -1,10 +1,12 @@ name: Build Documentation on: - push: - branches: [ main ] pull_request: - branches: [ main ] + branches: + - main + push: + branches: + - main # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -25,26 +27,16 @@ jobs: steps: - uses: actions/checkout@v4 - + - uses: astral-sh/setup-uv@v5 + with: + enable-cache: true - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.11' - - - name: Cache pip dependencies - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt', '**/pyproject.toml') }} - restore-keys: | - ${{ runner.os }}-pip- - - name: Install dependencies run: | - python -m pip install --upgrade pip - pip install -e . - pip install -r docs/requirements.txt - + uv sync --all-extras --dev - name: Build HTML documentation run: | cd docs @@ -57,12 +49,11 @@ jobs: # Deploy job - only runs on main branch deploy: - if: github.ref == 'refs/heads/main' && github.event_name == 'push' + needs: build environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} runs-on: ubuntu-latest - needs: build steps: - name: Deploy to GitHub Pages diff --git a/.github/workflows/docs-check.yml b/.github/workflows/test-doc.yml similarity index 84% rename from .github/workflows/docs-check.yml rename to .github/workflows/test-doc.yml index e12230d..2c8efcb 100644 --- a/.github/workflows/docs-check.yml +++ b/.github/workflows/test-doc.yml @@ -14,17 +14,16 @@ jobs: steps: - uses: actions/checkout@v4 - + - uses: astral-sh/setup-uv@v5 + with: + enable-cache: true - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.11' - - name: Install dependencies run: | - python -m pip install --upgrade pip - pip install -e . - pip install -r docs/requirements.txt + uv sync --all-extras --dev - name: Build documentation run: | diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f2da13c..f20229a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -4,7 +4,7 @@ on: pull_request: push: branches: - - master + - main jobs: pre-commit: diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index f70feb7..0000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -sphinx>=7.2 -myst-parser>=1.0 -furo -sphinx-design -sphinx-copybutton diff --git a/pyproject.toml b/pyproject.toml index 22c450c..eeaabc2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,9 +28,11 @@ dev = [ "ruff", ] docs = [ - "sphinx>=7.2", - "myst-parser", "furo", + "myst-parser>=1.0", + "sphinx-copybutton", + "sphinx-design", + "sphinx>=7.2", ] [tool.ruff.lint] From 62b0018e782dd835c29dcf1db838283f053db5f1 Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 15:39:46 +0200 Subject: [PATCH 13/21] Use uv run for sphinx invocation --- .github/workflows/deploy-doc.yml | 2 +- .github/workflows/test-doc.yml | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/deploy-doc.yml b/.github/workflows/deploy-doc.yml index d09b10e..a94d92e 100644 --- a/.github/workflows/deploy-doc.yml +++ b/.github/workflows/deploy-doc.yml @@ -40,7 +40,7 @@ jobs: - name: Build HTML documentation run: | cd docs - sphinx-build -b html . _build/html + uv run sphinx-build -b html . _build/html - name: Upload artifact uses: actions/upload-pages-artifact@v3 diff --git a/.github/workflows/test-doc.yml b/.github/workflows/test-doc.yml index 2c8efcb..b2d0c19 100644 --- a/.github/workflows/test-doc.yml +++ b/.github/workflows/test-doc.yml @@ -24,13 +24,11 @@ jobs: - name: Install dependencies run: | uv sync --all-extras --dev - - name: Build documentation run: | cd docs - sphinx-build -W -b html . _build/html - + uv run sphinx-build -W -b html . _build/html - name: Check for broken links run: | cd docs - sphinx-build -b linkcheck . _build/linkcheck || true + uv run sphinx-build -b linkcheck . _build/linkcheck || true From 3e0a7279db30367a6ee820f7e64c46ee0d02a6f9 Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 16:08:48 +0200 Subject: [PATCH 14/21] Fix documentation links --- docs/api.md | 2 +- docs/index.md | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/api.md b/docs/api.md index 0a17be1..8382838 100644 --- a/docs/api.md +++ b/docs/api.md @@ -681,7 +681,7 @@ except CodecError as e: 3. **Token Caching**: Reuse encoder outputs when generating multiple targets 4. **Device Placement**: Use `.to(device)` consistently for all tensors -For more details, see the [Usage Guide](usage.html) and [Architecture](architecture.html) documentation. +For more details, see the [Usage Guide](usage.md) and [Architecture](architecture.md) documentation. ```{eval-rst} .. automodule:: aion diff --git a/docs/index.md b/docs/index.md index 74bffde..0d44812 100644 --- a/docs/index.md +++ b/docs/index.md @@ -146,7 +146,6 @@ installation architecture usage api -contributing ``` ## 🤝 Join the Community From 093d73a816247e739c2a98b0860036ec3ae0d618 Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 16:24:35 +0200 Subject: [PATCH 15/21] Remove python tag of non-python code blocks --- docs/architecture.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/architecture.md b/docs/architecture.md index 5d620a1..98b33a6 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -50,7 +50,7 @@ AION-1's image tokenizer handles multi-band astronomical images from different s - Dynamic range #### Architecture -```python +``` # Image tokenizer structure class ImageCodec: - Preprocessing: @@ -92,7 +92,7 @@ Astronomical spectra present unique challenges: - Orders of magnitude variation in amplitude #### Architecture -```python +``` # Spectrum tokenizer structure class SpectrumCodec: - Preprocessing: @@ -155,7 +155,7 @@ These numbers correspond to the default configuration used during pre-training ( Astronomical catalogs contain lists of objects with varying counts per image. AION-1 linearizes these into sequences: -```python +``` # Catalog entry: (X, Y, e1, e2, radius) # Linearization: Sort by distance from center # Tokenization: Quantize each component separately @@ -167,7 +167,7 @@ The second stage uses a transformer encoder-decoder architecture to learn relati ### Architecture Details -```python +``` class AION(FourM): # Encoder - Depth: 12-24 layers (model-dependent) From a3b4cc53380c42362e4ce236ae6bb2f005a78c8d Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 16:26:19 +0200 Subject: [PATCH 16/21] Fix mermaid code block --- docs/architecture.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/architecture.md b/docs/architecture.md index 98b33a6..607c7f8 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -236,7 +236,7 @@ All models use: Here's how data flows through the complete pipeline: -```mermaid +```{mermaid} graph TD A[Raw Astronomical Data] --> B[Modality-Specific Preprocessing] B --> C[Tokenization] From e24693ae89a0cc1768c16a375afca793fb768f33 Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 16:28:16 +0200 Subject: [PATCH 17/21] Add sphinx-mermaid to the dependencies --- docs/conf.py | 7 ++++--- pyproject.toml | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 8eb4ded..87ac8a2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -9,11 +9,12 @@ extensions = [ "myst_parser", + "sphinx_copybutton", + "sphinx_design", # For cards and grids + "sphinx_mermaid", "sphinx.ext.autodoc", - "sphinx.ext.napoleon", "sphinx.ext.autosummary", - "sphinx_design", # For cards and grids - "sphinx_copybutton", + "sphinx.ext.napoleon", ] autosummary_generate = True diff --git a/pyproject.toml b/pyproject.toml index eeaabc2..74c4bd5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ docs = [ "myst-parser>=1.0", "sphinx-copybutton", "sphinx-design", + "sphinx-mermaid", "sphinx>=7.2", ] From 67111cb3f2b79570d0da1ef0859713d921260eff Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 17:05:04 +0200 Subject: [PATCH 18/21] Fix sphinxcontrib-mermaid dependency --- docs/conf.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 87ac8a2..72fd03b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,7 +11,7 @@ "myst_parser", "sphinx_copybutton", "sphinx_design", # For cards and grids - "sphinx_mermaid", + "sphinxcontrib.mermaid", "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.napoleon", diff --git a/pyproject.toml b/pyproject.toml index 74c4bd5..d06608f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ docs = [ "myst-parser>=1.0", "sphinx-copybutton", "sphinx-design", - "sphinx-mermaid", + "sphinxcontrib-mermaid", "sphinx>=7.2", ] From a5703e94498bf1f6019a7413cba4b79b7438ad6e Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 17:11:19 +0200 Subject: [PATCH 19/21] Fix missing cross-references --- docs/conf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index 72fd03b..7d675ea 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -26,6 +26,8 @@ "html_image", ] +myst_heading_anchors = 3 + html_theme = "furo" html_static_path = ["_static"] html_css_files = ["style.css"] From f7c9f6edcaa9a278d3a8a65b3e674b42a73eed9f Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 17:18:41 +0200 Subject: [PATCH 20/21] Update deploy workflow trigger to be only on main --- .github/workflows/deploy-doc.yml | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/deploy-doc.yml b/.github/workflows/deploy-doc.yml index a94d92e..9d2e657 100644 --- a/.github/workflows/deploy-doc.yml +++ b/.github/workflows/deploy-doc.yml @@ -1,21 +1,12 @@ name: Build Documentation on: - pull_request: - branches: - - main push: branches: - main # Allows you to run this workflow manually from the Actions tab workflow_dispatch: -# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages -permissions: - contents: read - pages: write - id-token: write - # Allow one concurrent deployment concurrency: group: "pages" @@ -50,6 +41,11 @@ jobs: # Deploy job - only runs on main branch deploy: needs: build + # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages + permissions: + contents: read + pages: write + id-token: write environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} From f5068971c833cb5444424b302e05110654a560f9 Mon Sep 17 00:00:00 2001 From: Lucas Meyer Date: Mon, 26 May 2025 17:30:52 +0200 Subject: [PATCH 21/21] Revert LegacySurveyImage from codec config --- aion/codecs/config.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/aion/codecs/config.py b/aion/codecs/config.py index e65007a..c9e3f57 100644 --- a/aion/codecs/config.py +++ b/aion/codecs/config.py @@ -42,7 +42,6 @@ LegacySurveyFluxW3, LegacySurveyFluxW4, LegacySurveyFluxZ, - LegacySurveyImage, LegacySurveySegmentationMap, LegacySurveyShapeE1, LegacySurveyShapeE2, @@ -57,10 +56,6 @@ "class": ImageCodec, "repo_id": "polymathic-ai/aion-image-codec", }, - LegacySurveyImage: { - "class": ImageCodec, - "repo_id": "polymathic-ai/aion-image-codec", - }, Spectrum: { "class": SpectrumCodec, "repo_id": "polymathic-ai/aion-spectrum-codec",