Skip to content

Commit 86c508a

Browse files
committed
Version 0.4.0
1 parent c95f1d7 commit 86c508a

44 files changed

Lines changed: 923 additions & 365 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

docs/build/doctrees/api.doctree

60.7 KB
Binary file not shown.
9.98 KB
Binary file not shown.

docs/build/html/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file records the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: be26c3a70d77e412cb5ca26079edeb35
3+
config: b087916a1b701a63a5b0cbcf81dfb26b
44
tags: 645f666f9bcd5a90fca523b33c5a78b7

docs/build/html/_modules/index.html

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="../genindex.html"><link rel="search" title="Search" href="../search.html">
66

77
<!-- Generated with Sphinx 9.1.0 and Furo 2025.12.19 -->
8-
<title>Overview: module code - transformer 0.3.0 documentation</title>
8+
<title>Overview: module code - transformer 0.4.0 documentation</title>
99
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=d111a655" />
1010
<link rel="stylesheet" type="text/css" href="../_static/styles/furo.css?v=7bdb33bb" />
1111
<link rel="stylesheet" type="text/css" href="../_static/styles/furo-extensions.css?v=8dab3a3b" />
@@ -160,7 +160,7 @@
160160
</label>
161161
</div>
162162
<div class="header-center">
163-
<a href="../index.html"><div class="brand">transformer 0.3.0 documentation</div></a>
163+
<a href="../index.html"><div class="brand">transformer 0.4.0 documentation</div></a>
164164
</div>
165165
<div class="header-right">
166166
<div class="theme-toggle-container theme-toggle-header">
@@ -181,7 +181,7 @@
181181

182182
<div class="sidebar-sticky"><a class="sidebar-brand" href="../index.html">
183183

184-
<span class="sidebar-brand-text">transformer 0.3.0 documentation</span>
184+
<span class="sidebar-brand-text">transformer 0.4.0 documentation</span>
185185

186186
</a><form class="sidebar-search-container" method="get" action="../search.html" role="search">
187187
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
@@ -248,6 +248,7 @@ <h1>All modules for which code is available</h1>
248248
<li><a href="transformer/ffn.html">transformer.ffn</a></li>
249249
<li><a href="transformer/pos.html">transformer.pos</a></li>
250250
<li><a href="transformer/transformer.html">transformer.transformer</a></li>
251+
<li><a href="transformer/utils.html">transformer.utils</a></li>
251252
</ul>
252253
</article>
253254
</div>
@@ -280,7 +281,7 @@ <h1>All modules for which code is available</h1>
280281

281282
</aside>
282283
</div>
283-
</div><script src="../_static/documentation_options.js?v=e259d695"></script>
284+
</div><script src="../_static/documentation_options.js?v=6c02275b"></script>
284285
<script src="../_static/doctools.js?v=fd6eb6e6"></script>
285286
<script src="../_static/sphinx_highlight.js?v=6ffebe34"></script>
286287
<script src="../_static/scripts/furo.js?v=46bd48cc"></script>

docs/build/html/_modules/transformer/attns.html

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="../../genindex.html"><link rel="search" title="Search" href="../../search.html">
66

77
<!-- Generated with Sphinx 9.1.0 and Furo 2025.12.19 -->
8-
<title>transformer.attns - transformer 0.3.0 documentation</title>
8+
<title>transformer.attns - transformer 0.4.0 documentation</title>
99
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=d111a655" />
1010
<link rel="stylesheet" type="text/css" href="../../_static/styles/furo.css?v=7bdb33bb" />
1111
<link rel="stylesheet" type="text/css" href="../../_static/styles/furo-extensions.css?v=8dab3a3b" />
@@ -160,7 +160,7 @@
160160
</label>
161161
</div>
162162
<div class="header-center">
163-
<a href="../../index.html"><div class="brand">transformer 0.3.0 documentation</div></a>
163+
<a href="../../index.html"><div class="brand">transformer 0.4.0 documentation</div></a>
164164
</div>
165165
<div class="header-right">
166166
<div class="theme-toggle-container theme-toggle-header">
@@ -181,28 +181,33 @@
181181

182182
<div class="sidebar-sticky"><a class="sidebar-brand" href="../../index.html">
183183

184-
<span class="sidebar-brand-text">transformer 0.3.0 documentation</span>
184+
<span class="sidebar-brand-text">transformer 0.4.0 documentation</span>
185185

186186
</a><form class="sidebar-search-container" method="get" action="../../search.html" role="search">
187187
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
188188
<input type="hidden" name="check_keywords" value="yes">
189189
<input type="hidden" name="area" value="default">
190190
</form>
191191
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
192-
<ul>
192+
<p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
193+
<ul>
193194
<li class="toctree-l1"><a class="reference internal" href="../../installation.html">Installation</a></li>
194195
<li class="toctree-l1"><a class="reference internal" href="../../quickstart.html">Quick Start</a></li>
195196
</ul>
197+
<p class="caption" role="heading"><span class="caption-text">Guide</span></p>
196198
<ul>
197199
<li class="toctree-l1"><a class="reference internal" href="../../guide.html">Transformer: A PyTorch SOTA Transformer Implementation</a></li>
198200
<li class="toctree-l1"><a class="reference internal" href="../../guide.html#configuration">Configuration</a></li>
199201
</ul>
202+
<p class="caption" role="heading"><span class="caption-text">API Reference</span></p>
200203
<ul>
201204
<li class="toctree-l1"><a class="reference internal" href="../../api.html">API Reference</a></li>
202205
</ul>
206+
<p class="caption" role="heading"><span class="caption-text">Usage Examples</span></p>
203207
<ul>
204208
<li class="toctree-l1"><a class="reference internal" href="../../examples.html">Usage Examples</a></li>
205209
</ul>
210+
<p class="caption" role="heading"><span class="caption-text">Project Info</span></p>
206211
<ul>
207212
<li class="toctree-l1"><a class="reference internal" href="../../contributing.html">Contributing</a></li>
208213
</ul>
@@ -890,7 +895,7 @@ <h1>Source code for transformer.attns</h1><div class="highlight"><pre>
890895

891896
</aside>
892897
</div>
893-
</div><script src="../../_static/documentation_options.js?v=e259d695"></script>
898+
</div><script src="../../_static/documentation_options.js?v=6c02275b"></script>
894899
<script src="../../_static/doctools.js?v=fd6eb6e6"></script>
895900
<script src="../../_static/sphinx_highlight.js?v=6ffebe34"></script>
896901
<script src="../../_static/scripts/furo.js?v=46bd48cc"></script>

docs/build/html/_modules/transformer/config.html

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="../../genindex.html"><link rel="search" title="Search" href="../../search.html">
66

77
<!-- Generated with Sphinx 9.1.0 and Furo 2025.12.19 -->
8-
<title>transformer.config - transformer 0.3.0 documentation</title>
8+
<title>transformer.config - transformer 0.4.0 documentation</title>
99
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=d111a655" />
1010
<link rel="stylesheet" type="text/css" href="../../_static/styles/furo.css?v=7bdb33bb" />
1111
<link rel="stylesheet" type="text/css" href="../../_static/styles/furo-extensions.css?v=8dab3a3b" />
@@ -160,7 +160,7 @@
160160
</label>
161161
</div>
162162
<div class="header-center">
163-
<a href="../../index.html"><div class="brand">transformer 0.3.0 documentation</div></a>
163+
<a href="../../index.html"><div class="brand">transformer 0.4.0 documentation</div></a>
164164
</div>
165165
<div class="header-right">
166166
<div class="theme-toggle-container theme-toggle-header">
@@ -181,28 +181,33 @@
181181

182182
<div class="sidebar-sticky"><a class="sidebar-brand" href="../../index.html">
183183

184-
<span class="sidebar-brand-text">transformer 0.3.0 documentation</span>
184+
<span class="sidebar-brand-text">transformer 0.4.0 documentation</span>
185185

186186
</a><form class="sidebar-search-container" method="get" action="../../search.html" role="search">
187187
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
188188
<input type="hidden" name="check_keywords" value="yes">
189189
<input type="hidden" name="area" value="default">
190190
</form>
191191
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
192-
<ul>
192+
<p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
193+
<ul>
193194
<li class="toctree-l1"><a class="reference internal" href="../../installation.html">Installation</a></li>
194195
<li class="toctree-l1"><a class="reference internal" href="../../quickstart.html">Quick Start</a></li>
195196
</ul>
197+
<p class="caption" role="heading"><span class="caption-text">Guide</span></p>
196198
<ul>
197199
<li class="toctree-l1"><a class="reference internal" href="../../guide.html">Transformer: A PyTorch SOTA Transformer Implementation</a></li>
198200
<li class="toctree-l1"><a class="reference internal" href="../../guide.html#configuration">Configuration</a></li>
199201
</ul>
202+
<p class="caption" role="heading"><span class="caption-text">API Reference</span></p>
200203
<ul>
201204
<li class="toctree-l1"><a class="reference internal" href="../../api.html">API Reference</a></li>
202205
</ul>
206+
<p class="caption" role="heading"><span class="caption-text">Usage Examples</span></p>
203207
<ul>
204208
<li class="toctree-l1"><a class="reference internal" href="../../examples.html">Usage Examples</a></li>
205209
</ul>
210+
<p class="caption" role="heading"><span class="caption-text">Project Info</span></p>
206211
<ul>
207212
<li class="toctree-l1"><a class="reference internal" href="../../contributing.html">Contributing</a></li>
208213
</ul>
@@ -278,7 +283,7 @@ <h1>Source code for transformer.config</h1><div class="highlight"><pre>
278283
<span class="sd"> - If ``str``, one of ``rms_norm`` or ``layer_norm``.</span>
279284
<span class="sd"> - If ``Type[nn.Module]`` then will be instantiated inside the model.</span>
280285
<span class="sd"> Should have the same API as a torch Normalization Layer.</span>
281-
<span class="sd"> - If ``List[Union[Type[nn.Module], str]]`` and len(ffn_class) == n_layers</span>
286+
<span class="sd"> - If ``List[Union[Type[nn.Module], str]]`` and len(norm_class) == n_layers</span>
282287
<span class="sd"> then will be instantiated inside the model for the corresponding layers.</span>
283288
<span class="sd"> :type norm_class: Union[List[Union[Type[nn.Module], str]], Type[nn.Module], str]</span>
284289

@@ -297,9 +302,9 @@ <h1>Source code for transformer.config</h1><div class="highlight"><pre>
297302
<span class="sd"> - If ``Type[nn.Module]`` then will be instantiated inside the model.</span>
298303
<span class="sd"> Should have the same API as ``transformer.attn.MHA``.</span>
299304
<span class="sd"> Default ``MHA``</span>
300-
<span class="sd"> - If ``List[Union[Type[nn.Module], str]]`` and len(ffn_class) == n_layers</span>
305+
<span class="sd"> - If ``List[Union[Type[nn.Module], str]]`` and len(attn_class) == n_layers</span>
301306
<span class="sd"> then will be instantiated inside the model for the corresponding layers.</span>
302-
<span class="sd"> Default ``SwiGLU`` for every layer.</span>
307+
<span class="sd"> Default ``MHA`` for every layer.</span>
303308
<span class="sd"> :type attn_class: Union[List[Union[Type[nn.Module], str]], Type[nn.Module], str]</span>
304309

305310
<span class="sd"> :param block_class: Transformer Block class for every layer. Default: ``None``</span>
@@ -329,11 +334,9 @@ <h1>Source code for transformer.config</h1><div class="highlight"><pre>
329334
<span class="sd"> :type seq_len: int</span>
330335

331336
<span class="sd"> :param pos_encoding: Positional Encoding for attention.</span>
332-
<span class="sd"> - If ``List[Union[Type[nn.Module], str]]`` and len(ffn_class) == n_layers</span>
333-
<span class="sd"> then will be instantiated inside the model for the corresponding layers.</span>
334-
<span class="sd"> Default ``SwiGLU`` for every layer.</span>
335337
<span class="sd"> - If ``str`` one of ``RoPE``, ``AliBI``, ``PartialRoPE``. Default: ``RoPE``</span>
336338
<span class="sd"> Note: Is recommended to change the default to ``PartialRoPE`` which is used in SOTA models like Qwen3-Next-80B-A3B</span>
339+
<span class="sd"> - If ``List[str]`` and len(pos_encoding) == n_layers, applies different positional encodings per layer.</span>
337340
<span class="sd"> :type pos_encoding: Union[List[str], str]</span>
338341

339342
<span class="sd"> :param rope_base: Base for the Exponential Frequency Calculation in RoPE. Default: ``10000.0``</span>
@@ -342,6 +345,12 @@ <h1>Source code for transformer.config</h1><div class="highlight"><pre>
342345
<span class="sd"> :param max_seq_len: Maximum sequence length for positional embeddings.</span>
343346
<span class="sd"> :type max_seq_len: int</span>
344347

348+
<span class="sd"> :param use_cache: Whether to use KV cache during generation. Default: ``True``</span>
349+
<span class="sd"> :type use_cache: bool, optional</span>
350+
351+
<span class="sd"> :param is_decoder: Whether this is a decoder model. Default: ``True``</span>
352+
<span class="sd"> :type is_decoder: bool, optional</span>
353+
345354
<span class="sd"> :param kwargs: Additional keyword arguments passed to `PretrainedConfig`</span>
346355
<span class="sd"> :type kwargs: dict, optional</span>
347356

@@ -371,17 +380,19 @@ <h1>Source code for transformer.config</h1><div class="highlight"><pre>
371380
<span class="n">attn_dropout</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span>
372381
<span class="n">tied_weights</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
373382
<span class="n">seq_len</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1024</span><span class="p">,</span>
374-
<span class="n">pos_encoding</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;RoPE&quot;</span><span class="p">,</span>
383+
<span class="n">pos_encoding</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;RoPE&quot;</span><span class="p">,</span>
375384
<span class="n">rope_base</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">10000.0</span><span class="p">,</span>
376385
<span class="n">max_seq_len</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">4096</span><span class="p">,</span>
386+
<span class="n">use_cache</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
387+
<span class="n">is_decoder</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
377388
<span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">,</span>
378389
<span class="p">):</span>
379390
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
380391

381392
<span class="bp">self</span><span class="o">.</span><span class="n">n_layer</span> <span class="o">=</span> <span class="n">n_layers</span>
382393
<span class="bp">self</span><span class="o">.</span><span class="n">d_model</span> <span class="o">=</span> <span class="n">d_model</span>
383394
<span class="bp">self</span><span class="o">.</span><span class="n">n_heads</span> <span class="o">=</span> <span class="n">n_heads</span>
384-
<span class="bp">self</span><span class="o">.</span><span class="n">n_kv_heads</span> <span class="o">=</span> <span class="n">n_kv_heads</span> <span class="k">if</span> <span class="n">attn_class</span> <span class="o">==</span> <span class="s2">&quot;GQA&quot;</span> <span class="k">else</span> <span class="n">n_heads</span>
395+
<span class="bp">self</span><span class="o">.</span><span class="n">n_kv_heads</span> <span class="o">=</span> <span class="n">n_kv_heads</span> <span class="k">if</span> <span class="n">n_kv_heads</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">n_heads</span>
385396
<span class="bp">self</span><span class="o">.</span><span class="n">vocab_size</span> <span class="o">=</span> <span class="n">vocab_size</span>
386397

387398
<span class="bp">self</span><span class="o">.</span><span class="n">attn_class</span> <span class="o">=</span> <span class="n">attn_class</span>
@@ -405,7 +416,10 @@ <h1>Source code for transformer.config</h1><div class="highlight"><pre>
405416
<span class="bp">self</span><span class="o">.</span><span class="n">seq_len</span> <span class="o">=</span> <span class="n">seq_len</span>
406417
<span class="bp">self</span><span class="o">.</span><span class="n">pos_encoding</span> <span class="o">=</span> <span class="n">pos_encoding</span>
407418
<span class="bp">self</span><span class="o">.</span><span class="n">rope_base</span> <span class="o">=</span> <span class="n">rope_base</span>
408-
<span class="bp">self</span><span class="o">.</span><span class="n">max_seq_len</span> <span class="o">=</span> <span class="n">max_seq_len</span></div>
419+
<span class="bp">self</span><span class="o">.</span><span class="n">max_seq_len</span> <span class="o">=</span> <span class="n">max_seq_len</span>
420+
421+
<span class="bp">self</span><span class="o">.</span><span class="n">use_cache</span> <span class="o">=</span> <span class="n">use_cache</span>
422+
<span class="bp">self</span><span class="o">.</span><span class="n">is_decoder</span> <span class="o">=</span> <span class="n">is_decoder</span></div>
409423
</div>
410424

411425
</pre></div>
@@ -440,7 +454,7 @@ <h1>Source code for transformer.config</h1><div class="highlight"><pre>
440454

441455
</aside>
442456
</div>
443-
</div><script src="../../_static/documentation_options.js?v=e259d695"></script>
457+
</div><script src="../../_static/documentation_options.js?v=6c02275b"></script>
444458
<script src="../../_static/doctools.js?v=fd6eb6e6"></script>
445459
<script src="../../_static/sphinx_highlight.js?v=6ffebe34"></script>
446460
<script src="../../_static/scripts/furo.js?v=46bd48cc"></script>

0 commit comments

Comments
 (0)