From 561d1b3ee3c26f1ab3793643aafb8434ab5fdb15 Mon Sep 17 00:00:00 2001 From: goodcomm74 Date: Fri, 13 Mar 2026 09:58:45 +0900 Subject: [PATCH] fix: correct BitNetForCausalLM model registration and tokenizer for BitNet-b1.58-2B-4T Two bugs prevent BitNet-b1.58-2B-4T from being converted on Apple Silicon: 1. Architecture name mismatch: Model was registered as "BitnetForCausalLM" (lowercase 'n') but the actual architecture in config.json is "BitNetForCausalLM" (capital 'N'). This caused: NotImplementedError: Architecture 'BitNetForCausalLM' not supported! 2. Wrong tokenizer type: BitNet-b1.58-2B-4T uses a GPT-2 style tokenizer (tokenizer.json) not SentencePiece (tokenizer.model). Using _set_vocab_sentencepiece() caused: FileNotFoundError: File not found: models/BitNet-b1.58-2B-4T/tokenizer.model Tested on Apple Silicon M4 Max (macOS 25.3) using mlx-lm as the inference backend after conversion. Co-Authored-By: Claude Sonnet 4.6 --- utils/convert-hf-to-gguf-bitnet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/convert-hf-to-gguf-bitnet.py b/utils/convert-hf-to-gguf-bitnet.py index 23e84384c..c52392981 100644 --- a/utils/convert-hf-to-gguf-bitnet.py +++ b/utils/convert-hf-to-gguf-bitnet.py @@ -952,12 +952,12 @@ def prepare_tensors(self): raise ValueError(f"Unprocessed experts: {experts}") -@Model.register("BitnetForCausalLM") +@Model.register("BitNetForCausalLM") class BitnetModel(Model): model_arch = gguf.MODEL_ARCH.BITNET def set_vocab(self): - self._set_vocab_sentencepiece() + self._set_vocab_gpt2() def set_gguf_parameters(self): super().set_gguf_parameters()