From 77a5f05dffa324a159a0b676aa67e2a846e3656c Mon Sep 17 00:00:00 2001 From: curt-tigges Date: Thu, 6 Jun 2024 22:11:31 +0000 Subject: [PATCH 01/11] Added unit test to verify that all model configs are not blank/None --- tests/unit/test_model_configurations.py | 26 +++++++++++++++++++++ transformer_lens/loading_from_pretrained.py | 3 +++ 2 files changed, 29 insertions(+) create mode 100644 tests/unit/test_model_configurations.py diff --git a/tests/unit/test_model_configurations.py b/tests/unit/test_model_configurations.py new file mode 100644 index 000000000..8505d30c7 --- /dev/null +++ b/tests/unit/test_model_configurations.py @@ -0,0 +1,26 @@ +from functools import lru_cache + +from transformer_lens import loading +from transformer_lens.HookedTransformerConfig import HookedTransformerConfig + + +@lru_cache(maxsize=None) +def get_cached_config(model_name: str) -> HookedTransformerConfig: + """Retrieve the configuration of a pretrained model. + + Args: + model_name (str): Name of the pretrained model. + + Returns: + HookedTransformerConfig: Configuration of the pretrained model. + """ + return loading.get_pretrained_model_config(model_name) + + +def test_model_configurations(): + """Tests that all of the model configurations are in fact loaded (e.g. are not None). + """ + + for model_name in loading.DEFAULT_MODEL_ALIASES: + assert get_cached_config(model_name) is not None, f"Configuration for {model_name} is None" + diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py index a0b29c009..d2d13811c 100644 --- a/transformer_lens/loading_from_pretrained.py +++ b/transformer_lens/loading_from_pretrained.py @@ -1336,6 +1336,9 @@ def get_pretrained_model_config( if hf_cfg is not None: cfg_dict["load_in_4bit"] = hf_cfg.get("quantization_config", {}).get("load_in_4bit", False) + if "rotary_base" in cfg_dict: + cfg_dict["rotary_base"] = int(cfg_dict["rotary_base"]) + cfg = HookedTransformerConfig.from_dict(cfg_dict) return cfg From 769a540d4c86e1e8812656db419a746247421f2e Mon Sep 17 00:00:00 2001 From: curt-tigges Date: Sat, 8 Jun 2024 17:39:22 +0000 Subject: [PATCH 02/11] Parameterized config tests --- tests/unit/test_model_configurations.py | 12 ++++++++---- transformer_lens/loading_from_pretrained.py | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/unit/test_model_configurations.py b/tests/unit/test_model_configurations.py index 8505d30c7..8d80ea2d0 100644 --- a/tests/unit/test_model_configurations.py +++ b/tests/unit/test_model_configurations.py @@ -1,8 +1,12 @@ +import os from functools import lru_cache +import pytest + from transformer_lens import loading from transformer_lens.HookedTransformerConfig import HookedTransformerConfig +os.environ["HF_TOKEN"] = "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" @lru_cache(maxsize=None) def get_cached_config(model_name: str) -> HookedTransformerConfig: @@ -17,10 +21,10 @@ def get_cached_config(model_name: str) -> HookedTransformerConfig: return loading.get_pretrained_model_config(model_name) -def test_model_configurations(): + +@pytest.mark.parametrize("model_name", loading.DEFAULT_MODEL_ALIASES) +def test_model_configurations(model_name: str): """Tests that all of the model configurations are in fact loaded (e.g. are not None). """ - - for model_name in loading.DEFAULT_MODEL_ALIASES: - assert get_cached_config(model_name) is not None, f"Configuration for {model_name} is None" + assert get_cached_config(model_name) is not None, f"Configuration for {model_name} is None" diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py index d2d13811c..956d0cbee 100644 --- a/transformer_lens/loading_from_pretrained.py +++ b/transformer_lens/loading_from_pretrained.py @@ -1336,8 +1336,8 @@ def get_pretrained_model_config( if hf_cfg is not None: cfg_dict["load_in_4bit"] = hf_cfg.get("quantization_config", {}).get("load_in_4bit", False) - if "rotary_base" in cfg_dict: - cfg_dict["rotary_base"] = int(cfg_dict["rotary_base"]) + # if "rotary_base" in cfg_dict: + # cfg_dict["rotary_base"] = int(cfg_dict["rotary_base"]) cfg = HookedTransformerConfig.from_dict(cfg_dict) return cfg From e904b5015c15a42523dbee62abb834b722504302 Mon Sep 17 00:00:00 2001 From: curt-tigges Date: Sat, 8 Jun 2024 17:46:56 +0000 Subject: [PATCH 03/11] Minor correction --- tests/unit/test_model_configurations.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/unit/test_model_configurations.py b/tests/unit/test_model_configurations.py index 8d80ea2d0..2533995a8 100644 --- a/tests/unit/test_model_configurations.py +++ b/tests/unit/test_model_configurations.py @@ -1,4 +1,3 @@ -import os from functools import lru_cache import pytest @@ -6,7 +5,6 @@ from transformer_lens import loading from transformer_lens.HookedTransformerConfig import HookedTransformerConfig -os.environ["HF_TOKEN"] = "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" @lru_cache(maxsize=None) def get_cached_config(model_name: str) -> HookedTransformerConfig: From 9825fdc876322de9248c990a58d2e6ed181a995a Mon Sep 17 00:00:00 2001 From: curt-tigges Date: Thu, 4 Jul 2024 22:29:59 +0000 Subject: [PATCH 04/11] Set rotary_base defaults to floats and type to float --- transformer_lens/HookedTransformerConfig.py | 2 +- transformer_lens/components/abstract_attention.py | 2 +- transformer_lens/loading_from_pretrained.py | 5 +---- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/transformer_lens/HookedTransformerConfig.py b/transformer_lens/HookedTransformerConfig.py index fa743016b..af7e4a8a3 100644 --- a/transformer_lens/HookedTransformerConfig.py +++ b/transformer_lens/HookedTransformerConfig.py @@ -214,7 +214,7 @@ class HookedTransformerConfig: tokenizer_prepends_bos: Optional[bool] = None n_key_value_heads: Optional[int] = None post_embedding_ln: bool = False - rotary_base: int = 10000 + rotary_base: float = 10000.0 trust_remote_code: bool = False rotary_adjacent_pairs: bool = False load_in_4bit: bool = False diff --git a/transformer_lens/components/abstract_attention.py b/transformer_lens/components/abstract_attention.py index 0e6979df8..15dd6bcff 100644 --- a/transformer_lens/components/abstract_attention.py +++ b/transformer_lens/components/abstract_attention.py @@ -454,7 +454,7 @@ def calculate_sin_cos_rotary( self, rotary_dim: int, n_ctx: int, - base: int = 10000, + base: float = 10000, dtype: torch.dtype = torch.float32, ) -> Tuple[Float[torch.Tensor, "n_ctx rotary_dim"], Float[torch.Tensor, "n_ctx rotary_dim"]]: """ diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py index a8b4bec75..92704e9e6 100644 --- a/transformer_lens/loading_from_pretrained.py +++ b/transformer_lens/loading_from_pretrained.py @@ -747,7 +747,7 @@ def convert_hf_model_config(model_name: str, **kwargs): "rotary_dim": 4096 // 32, "final_rms": True, "gated_mlp": True, - "rotary_base": 1000000, + "rotary_base": 1000000.0, } if "python" in official_model_name.lower(): # The vocab size of python version of CodeLlama-7b is 32000 @@ -1397,9 +1397,6 @@ def get_pretrained_model_config( if hf_cfg is not None: cfg_dict["load_in_4bit"] = hf_cfg.get("quantization_config", {}).get("load_in_4bit", False) - # if "rotary_base" in cfg_dict: - # cfg_dict["rotary_base"] = int(cfg_dict["rotary_base"]) - cfg = HookedTransformerConfig.from_dict(cfg_dict) return cfg From d2776096fd3eb40c349d520e108313aff8c9ff22 Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Sat, 6 Jul 2024 01:52:46 +0200 Subject: [PATCH 05/11] ran format --- tests/unit/test_model_configurations.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/unit/test_model_configurations.py b/tests/unit/test_model_configurations.py index 2533995a8..2337d40be 100644 --- a/tests/unit/test_model_configurations.py +++ b/tests/unit/test_model_configurations.py @@ -19,10 +19,7 @@ def get_cached_config(model_name: str) -> HookedTransformerConfig: return loading.get_pretrained_model_config(model_name) - @pytest.mark.parametrize("model_name", loading.DEFAULT_MODEL_ALIASES) def test_model_configurations(model_name: str): - """Tests that all of the model configurations are in fact loaded (e.g. are not None). - """ + """Tests that all of the model configurations are in fact loaded (e.g. are not None).""" assert get_cached_config(model_name) is not None, f"Configuration for {model_name} is None" - From a8b0025e5c99ecf6b9ee69f5fd5fe965a61cd4a6 Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Tue, 26 Nov 2024 00:13:18 +0100 Subject: [PATCH 06/11] added hf token access --- .github/workflows/checks.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index fb686122d..e41ebf5ed 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -69,6 +69,8 @@ jobs: poetry install --with dev - name: Unit Test run: make unit-test + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} - name: Acceptance Test run: make acceptance-test - name: Build check From a0265368a2635ecac17720fefaf35659ae5cc129 Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Tue, 26 Nov 2024 00:17:25 +0100 Subject: [PATCH 07/11] added key to code checks --- .github/workflows/checks.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index e41ebf5ed..fc895c122 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -108,6 +108,8 @@ jobs: run: poetry run mypy . - name: Test Suite with Coverage Report run: make coverage-report-test + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} - name: Build check run: poetry build - name: Upload Coverage Report Artifact From 27bcef1746871fcdab5bb4139ba8132507f32f5f Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Tue, 26 Nov 2024 00:26:31 +0100 Subject: [PATCH 08/11] updated to var --- .github/workflows/checks.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index fc895c122..552798b1c 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -70,7 +70,7 @@ jobs: - name: Unit Test run: make unit-test env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} + HF_TOKEN: ${{ vars.HF_TOKEN }} - name: Acceptance Test run: make acceptance-test - name: Build check @@ -109,7 +109,7 @@ jobs: - name: Test Suite with Coverage Report run: make coverage-report-test env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} + HF_TOKEN: ${{ vars.HF_TOKEN }} - name: Build check run: poetry build - name: Upload Coverage Report Artifact @@ -199,7 +199,7 @@ jobs: - name: Build Docs run: poetry run build-docs env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} + HF_TOKEN: ${{ vars.HF_TOKEN }} - name: Upload Docs Artifact uses: actions/upload-artifact@v3 with: From e102d47d55fe3b7f4fe5ba140441b47a2ddddc99 Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Tue, 26 Nov 2024 00:40:37 +0100 Subject: [PATCH 09/11] updated config --- .github/workflows/checks.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 552798b1c..98b891820 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -13,7 +13,7 @@ on: - "!*.md" - "!.github/**" - ".github/workflows/checks.yml" # Still include current workflow - pull_request: + pull_request_target: branches: - main - dev* @@ -70,7 +70,7 @@ jobs: - name: Unit Test run: make unit-test env: - HF_TOKEN: ${{ vars.HF_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} - name: Acceptance Test run: make acceptance-test - name: Build check @@ -109,7 +109,7 @@ jobs: - name: Test Suite with Coverage Report run: make coverage-report-test env: - HF_TOKEN: ${{ vars.HF_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} - name: Build check run: poetry build - name: Upload Coverage Report Artifact @@ -199,7 +199,7 @@ jobs: - name: Build Docs run: poetry run build-docs env: - HF_TOKEN: ${{ vars.HF_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} - name: Upload Docs Artifact uses: actions/upload-artifact@v3 with: From f21753ccc1920754e9c76f43d67ec9b5ab99ec50 Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Tue, 26 Nov 2024 00:44:04 +0100 Subject: [PATCH 10/11] added types config --- .github/workflows/checks.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 98b891820..0eab7dc65 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -14,6 +14,7 @@ on: - "!.github/**" - ".github/workflows/checks.yml" # Still include current workflow pull_request_target: + types: [assigned, opened, synchronize, reopened] branches: - main - dev* From 4dd3e0b20374572a1cdb9de8e149c455725da1c5 Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Tue, 26 Nov 2024 00:51:34 +0100 Subject: [PATCH 11/11] reverted action --- .github/workflows/checks.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 0eab7dc65..fc895c122 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -13,8 +13,7 @@ on: - "!*.md" - "!.github/**" - ".github/workflows/checks.yml" # Still include current workflow - pull_request_target: - types: [assigned, opened, synchronize, reopened] + pull_request: branches: - main - dev*