From ac1cd1e4a3267a2e48d3d3311fd46eca673a183c Mon Sep 17 00:00:00 2001 From: jlarson4 Date: Tue, 17 Mar 2026 15:31:12 -0500 Subject: [PATCH 1/2] Setup architecture adapters for the 3 Granite Architectures --- .../factories/architecture_adapter_factory.py | 6 + .../generalized_components/moe.py | 4 +- .../model_bridge/sources/transformers.py | 5 + .../supported_architectures/__init__.py | 12 + .../supported_architectures/granite.py | 162 + .../supported_architectures/granite_moe.py | 43 + .../granite_moe_hybrid.py | 112 + .../tools/model_registry/__init__.py | 7 + .../data/architecture_gaps.json | 2013 ++-- .../model_registry/data/supported_models.json | 8806 ++++++++++++++++- 10 files changed, 10191 insertions(+), 979 deletions(-) create mode 100644 transformer_lens/model_bridge/supported_architectures/granite.py create mode 100644 transformer_lens/model_bridge/supported_architectures/granite_moe.py create mode 100644 transformer_lens/model_bridge/supported_architectures/granite_moe_hybrid.py diff --git a/transformer_lens/factories/architecture_adapter_factory.py b/transformer_lens/factories/architecture_adapter_factory.py index 1843462dd..3c72a51f0 100644 --- a/transformer_lens/factories/architecture_adapter_factory.py +++ b/transformer_lens/factories/architecture_adapter_factory.py @@ -12,6 +12,9 @@ Gemma2ArchitectureAdapter, Gemma3ArchitectureAdapter, Gemma3MultimodalArchitectureAdapter, + GraniteArchitectureAdapter, + GraniteMoeArchitectureAdapter, + GraniteMoeHybridArchitectureAdapter, GPT2ArchitectureAdapter, Gpt2LmHeadCustomArchitectureAdapter, GptjArchitectureAdapter, @@ -51,6 +54,9 @@ "Gemma2ForCausalLM": Gemma2ArchitectureAdapter, "Gemma3ForCausalLM": Gemma3ArchitectureAdapter, "Gemma3ForConditionalGeneration": Gemma3MultimodalArchitectureAdapter, + "GraniteForCausalLM": GraniteArchitectureAdapter, + "GraniteMoeForCausalLM": GraniteMoeArchitectureAdapter, + "GraniteMoeHybridForCausalLM": GraniteMoeHybridArchitectureAdapter, "GPT2LMHeadModel": GPT2ArchitectureAdapter, "GptOssForCausalLM": GPTOSSArchitectureAdapter, "GPT2LMHeadCustomModel": Gpt2LmHeadCustomArchitectureAdapter, diff --git a/transformer_lens/model_bridge/generalized_components/moe.py b/transformer_lens/model_bridge/generalized_components/moe.py index 6a7b55cca..ea96c4916 100644 --- a/transformer_lens/model_bridge/generalized_components/moe.py +++ b/transformer_lens/model_bridge/generalized_components/moe.py @@ -65,8 +65,10 @@ def get_random_inputs( if dtype is None: dtype = torch.float32 d_model = self.config.d_model if self.config and hasattr(self.config, "d_model") else 768 + # Use positional args to avoid parameter name mismatches across MoE implementations + # (e.g., Mixtral uses "hidden_states", GraniteMoe uses "layer_input") return { - "hidden_states": torch.randn(batch_size, seq_len, d_model, device=device, dtype=dtype) + "args": (torch.randn(batch_size, seq_len, d_model, device=device, dtype=dtype),) } def forward(self, *args: Any, **kwargs: Any) -> Any: diff --git a/transformer_lens/model_bridge/sources/transformers.py b/transformer_lens/model_bridge/sources/transformers.py index 4bca9dbb6..4ea4314fc 100644 --- a/transformer_lens/model_bridge/sources/transformers.py +++ b/transformer_lens/model_bridge/sources/transformers.py @@ -342,6 +342,11 @@ def boot( attn_logit_softcapping = getattr(hf_config, "attn_logit_softcapping", None) if attn_logit_softcapping is not None: bridge_config.attn_scores_soft_cap = float(attn_logit_softcapping) + # Propagate position_embedding_type for Granite Hybrid models that use + # "nope" (no positional embeddings) instead of "rope" on some/all layers. + position_embedding_type = getattr(hf_config, "position_embedding_type", None) + if position_embedding_type is not None: + bridge_config.position_embedding_type = position_embedding_type # Propagate vision config for multimodal models so the adapter can # select the correct vision encoder bridge (CLIP vs SigLIP). if hasattr(hf_config, "vision_config") and hf_config.vision_config is not None: diff --git a/transformer_lens/model_bridge/supported_architectures/__init__.py b/transformer_lens/model_bridge/supported_architectures/__init__.py index 8a74e90e2..64cae5a2f 100644 --- a/transformer_lens/model_bridge/supported_architectures/__init__.py +++ b/transformer_lens/model_bridge/supported_architectures/__init__.py @@ -21,6 +21,15 @@ from transformer_lens.model_bridge.supported_architectures.gemma3_multimodal import ( Gemma3MultimodalArchitectureAdapter, ) +from transformer_lens.model_bridge.supported_architectures.granite import ( + GraniteArchitectureAdapter, +) +from transformer_lens.model_bridge.supported_architectures.granite_moe import ( + GraniteMoeArchitectureAdapter, +) +from transformer_lens.model_bridge.supported_architectures.granite_moe_hybrid import ( + GraniteMoeHybridArchitectureAdapter, +) from transformer_lens.model_bridge.supported_architectures.gpt2 import ( GPT2ArchitectureAdapter, ) @@ -116,6 +125,9 @@ "Gemma2ArchitectureAdapter", "Gemma3ArchitectureAdapter", "Gemma3MultimodalArchitectureAdapter", + "GraniteArchitectureAdapter", + "GraniteMoeArchitectureAdapter", + "GraniteMoeHybridArchitectureAdapter", "GPT2ArchitectureAdapter", "GPTOSSArchitectureAdapter", "Gpt2LmHeadCustomArchitectureAdapter", diff --git a/transformer_lens/model_bridge/supported_architectures/granite.py b/transformer_lens/model_bridge/supported_architectures/granite.py new file mode 100644 index 000000000..0b6afa72f --- /dev/null +++ b/transformer_lens/model_bridge/supported_architectures/granite.py @@ -0,0 +1,162 @@ +"""Granite architecture adapter. + +Base adapter for the IBM Granite model family. Provides shared config setup and +helper methods used by GraniteMoe and GraniteMoeHybrid variants. +""" + +from typing import Any, Dict + +from transformer_lens.conversion_utils.conversion_steps import RearrangeTensorConversion +from transformer_lens.conversion_utils.param_processing_conversion import ( + ParamProcessingConversion, +) +from transformer_lens.model_bridge.architecture_adapter import ArchitectureAdapter +from transformer_lens.model_bridge.generalized_components import ( + BlockBridge, + EmbeddingBridge, + GatedMLPBridge, + LinearBridge, + PositionEmbeddingsAttentionBridge, + RMSNormalizationBridge, + RotaryEmbeddingBridge, + UnembeddingBridge, +) + + +class GraniteArchitectureAdapter(ArchitectureAdapter): + """Architecture adapter for IBM Granite models (dense). + + Granite is a Llama-like architecture with RMSNorm, rotary position embeddings + (RoPE), GQA, and a gated MLP (SiLU activation). Granite-specific scaling + multipliers are handled by the HF model's native forward pass. + + Optional Parameters (may not exist in state_dict): + ------------------------------------------------- + Granite models do NOT have biases on attention and MLP projections: + + - blocks.{i}.attn.b_Q/b_K/b_V/b_O - No bias on attention projections + - blocks.{i}.mlp.b_in/b_gate/b_out - No bias on MLP projections + - blocks.{i}.ln1.b, blocks.{i}.ln2.b, ln_final.b - RMSNorm has no bias + """ + + def __init__(self, cfg: Any) -> None: + """Initialize the Granite architecture adapter.""" + super().__init__(cfg) + + self._setup_common_config(cfg) + n_kv_heads = self._get_n_kv_heads() + self.weight_processing_conversions = self._build_attn_weight_conversions(n_kv_heads) + self.component_mapping = self._build_component_mapping() + + def _setup_common_config(self, cfg: Any) -> None: + """Set up config variables shared across all Granite variants.""" + self.cfg.normalization_type = "RMS" + self.cfg.positional_embedding_type = "rotary" + self.cfg.final_rms = True + self.cfg.gated_mlp = True + self.cfg.attn_only = False + self.cfg.uses_rms_norm = True + self.cfg.eps_attr = "variance_epsilon" + + self.default_config = { + "d_model": cfg.d_model, + "d_head": cfg.d_model // cfg.n_heads, + "n_heads": cfg.n_heads, + "n_layers": cfg.n_layers, + "d_vocab": cfg.d_vocab, + } + + if hasattr(cfg, "n_key_value_heads") and cfg.n_key_value_heads is not None: + self.default_config["n_key_value_heads"] = cfg.n_key_value_heads + self.cfg.n_key_value_heads = cfg.n_key_value_heads + + def _get_n_kv_heads(self) -> int: + """Get the number of key-value heads (for GQA or MHA).""" + if hasattr(self.cfg, "n_key_value_heads") and self.cfg.n_key_value_heads is not None: + return self.cfg.n_key_value_heads + return self.cfg.n_heads + + def _build_attn_weight_conversions(self, n_kv_heads: int) -> Dict[str, ParamProcessingConversion]: + """Build weight processing conversions for attention projections.""" + return { + "blocks.{i}.attn.q.weight": ParamProcessingConversion( + tensor_conversion=RearrangeTensorConversion("(n h) m -> n m h", n=self.cfg.n_heads), + ), + "blocks.{i}.attn.k.weight": ParamProcessingConversion( + tensor_conversion=RearrangeTensorConversion("(n h) m -> n m h", n=n_kv_heads), + ), + "blocks.{i}.attn.v.weight": ParamProcessingConversion( + tensor_conversion=RearrangeTensorConversion("(n h) m -> n m h", n=n_kv_heads), + ), + "blocks.{i}.attn.o.weight": ParamProcessingConversion( + tensor_conversion=RearrangeTensorConversion("m (n h) -> n h m", n=self.cfg.n_heads), + ), + } + + def _build_attention_bridge(self) -> PositionEmbeddingsAttentionBridge: + """Build the standard Granite attention bridge.""" + return PositionEmbeddingsAttentionBridge( + name="self_attn", + config=self.cfg, + submodules={ + "q": LinearBridge(name="q_proj"), + "k": LinearBridge(name="k_proj"), + "v": LinearBridge(name="v_proj"), + "o": LinearBridge(name="o_proj"), + }, + requires_attention_mask=True, + requires_position_embeddings=True, + ) + + def _build_mlp_bridge(self) -> GatedMLPBridge: + """Build the dense gated MLP bridge.""" + return GatedMLPBridge( + name="mlp", + config=self.cfg, + submodules={ + "gate": LinearBridge(name="gate_proj"), + "in": LinearBridge(name="up_proj"), + "out": LinearBridge(name="down_proj"), + }, + ) + + def _build_component_mapping(self) -> dict: + """Build the full component mapping for dense Granite.""" + return { + "embed": EmbeddingBridge(name="model.embed_tokens"), + "rotary_emb": RotaryEmbeddingBridge(name="model.rotary_emb"), + "blocks": BlockBridge( + name="model.layers", + submodules={ + "ln1": RMSNormalizationBridge(name="input_layernorm", config=self.cfg), + "ln2": RMSNormalizationBridge(name="post_attention_layernorm", config=self.cfg), + "attn": self._build_attention_bridge(), + "mlp": self._build_mlp_bridge(), + }, + ), + "ln_final": RMSNormalizationBridge(name="model.norm", config=self.cfg), + "unembed": UnembeddingBridge(name="lm_head", config=self.cfg), + } + + def setup_component_testing(self, hf_model: Any, bridge_model: Any = None) -> None: + """Set up rotary embedding references for Granite component testing. + + Args: + hf_model: The HuggingFace Granite model instance + bridge_model: The TransformerBridge model (if available) + """ + if not hasattr(hf_model.model, "rotary_emb"): + return + + rotary_emb = hf_model.model.rotary_emb + + if bridge_model is not None and hasattr(bridge_model, "blocks"): + for block in bridge_model.blocks: + if hasattr(block, "attn"): + block.attn.set_rotary_emb(rotary_emb) + + try: + attn_bridge = self.get_generalized_component("blocks.0.attn") + attn_bridge.set_rotary_emb(rotary_emb) + except (AttributeError, KeyError): + pass diff --git a/transformer_lens/model_bridge/supported_architectures/granite_moe.py b/transformer_lens/model_bridge/supported_architectures/granite_moe.py new file mode 100644 index 000000000..5db23df31 --- /dev/null +++ b/transformer_lens/model_bridge/supported_architectures/granite_moe.py @@ -0,0 +1,43 @@ +"""Granite MoE architecture adapter.""" + +from transformer_lens.model_bridge.generalized_components import ( + BlockBridge, + EmbeddingBridge, + MoEBridge, + RMSNormalizationBridge, + RotaryEmbeddingBridge, + UnembeddingBridge, +) +from transformer_lens.model_bridge.supported_architectures.granite import ( + GraniteArchitectureAdapter, +) + + +class GraniteMoeArchitectureAdapter(GraniteArchitectureAdapter): + """Architecture adapter for IBM Granite MoE models. + + Identical to dense Granite but replaces the gated MLP with a Sparse Mixture + of Experts block (block_sparse_moe) using batched expert parameters and + top-k routing. + """ + + def _build_component_mapping(self) -> dict: + """Build component mapping with MoE instead of dense MLP.""" + return { + "embed": EmbeddingBridge(name="model.embed_tokens"), + "rotary_emb": RotaryEmbeddingBridge(name="model.rotary_emb"), + "blocks": BlockBridge( + name="model.layers", + submodules={ + "ln1": RMSNormalizationBridge(name="input_layernorm", config=self.cfg), + "ln2": RMSNormalizationBridge(name="post_attention_layernorm", config=self.cfg), + "attn": self._build_attention_bridge(), + "mlp": MoEBridge( + name="block_sparse_moe", + config=self.cfg, + ), + }, + ), + "ln_final": RMSNormalizationBridge(name="model.norm", config=self.cfg), + "unembed": UnembeddingBridge(name="lm_head", config=self.cfg), + } diff --git a/transformer_lens/model_bridge/supported_architectures/granite_moe_hybrid.py b/transformer_lens/model_bridge/supported_architectures/granite_moe_hybrid.py new file mode 100644 index 000000000..a594e8e64 --- /dev/null +++ b/transformer_lens/model_bridge/supported_architectures/granite_moe_hybrid.py @@ -0,0 +1,112 @@ +"""Granite MoE Hybrid architecture adapter. + +GraniteMoeHybridForCausalLM is a hybrid Mamba + Attention architecture with +Sparse Mixture of Experts. Layers alternate between Mamba SSM blocks and +standard attention blocks, with a shared MLP and optional sparse MoE on +every layer. + +Since self_attn is None on Mamba layers and mamba is None on attention +layers, we only map submodules that exist on ALL layers (norms, shared_mlp, +block_sparse_moe). The HF native forward handles mamba/attention dispatch. +""" + +from typing import Any + +from transformer_lens.model_bridge.architecture_adapter import ArchitectureAdapter +from transformer_lens.model_bridge.generalized_components import ( + BlockBridge, + EmbeddingBridge, + LinearBridge, + MLPBridge, + MoEBridge, + RMSNormalizationBridge, + RotaryEmbeddingBridge, + UnembeddingBridge, +) +from transformer_lens.model_bridge.supported_architectures.granite import ( + GraniteArchitectureAdapter, +) + + +class GraniteMoeHybridArchitectureAdapter(GraniteArchitectureAdapter): + """Architecture adapter for IBM Granite MoE Hybrid models. + + Hybrid Mamba2 + Attention architecture with Sparse MoE. Most layers are Mamba + SSM blocks; a few are standard attention (determined by config.layer_types). + + Since self_attn is None on Mamba layers and mamba is None on attention layers, + we only map submodules present on ALL layers (norms, shared_mlp, MoE). The HF + native forward handles mamba/attention dispatch internally. + + Hook coverage: + - Block-level: hook_resid_pre, hook_resid_post on every layer + - Normalization: ln1 (input_layernorm), ln2 (post_attention_layernorm) + - MLP: shared_mlp input/output hooks + - MoE: block_sparse_moe input/output and router_scores hooks + - Attention/Mamba internals are NOT individually hooked (conditional per layer) + """ + + def __init__(self, cfg: Any) -> None: + """Initialize the Granite MoE Hybrid architecture adapter.""" + # Call ArchitectureAdapter.__init__ directly, not GraniteArchitectureAdapter.__init__, + # because we need to customize the setup sequence + ArchitectureAdapter.__init__(self, cfg) + + self._setup_common_config(cfg) + + # Hybrid may use "rope" or "nope" (no positional embeddings) + pos_emb_type = getattr(cfg, "position_embedding_type", "rope") + if pos_emb_type != "rope": + self.cfg.positional_embedding_type = "none" + + # No attention weight conversions — attn Q/K/V aren't mapped as submodules + self.weight_processing_conversions = {} + self.component_mapping = self._build_component_mapping() + + def _build_component_mapping(self) -> dict: + """Build component mapping with only universal (all-layer) submodules.""" + block_submodules = { + "ln1": RMSNormalizationBridge(name="input_layernorm", config=self.cfg), + "ln2": RMSNormalizationBridge(name="post_attention_layernorm", config=self.cfg), + "shared_mlp": MLPBridge( + name="shared_mlp", + config=self.cfg, + submodules={ + "in": LinearBridge(name="input_linear"), + "out": LinearBridge(name="output_linear"), + }, + ), + } + + num_experts = getattr(self.cfg, "num_experts", None) or getattr( + self.cfg, "num_local_experts", 0 + ) + if num_experts > 0: + block_submodules["moe"] = MoEBridge( + name="block_sparse_moe", + config=self.cfg, + ) + + mapping = { + "embed": EmbeddingBridge(name="model.embed_tokens"), + "blocks": BlockBridge( + name="model.layers", + submodules=block_submodules, + ), + "ln_final": RMSNormalizationBridge(name="model.norm", config=self.cfg), + "unembed": UnembeddingBridge(name="lm_head", config=self.cfg), + } + + if self.cfg.positional_embedding_type == "rotary": + mapping["rotary_emb"] = RotaryEmbeddingBridge( + name="model.rotary_emb", config=self.cfg + ) + + return mapping + + def setup_component_testing(self, hf_model: Any, bridge_model: Any = None) -> None: + """No-op for hybrid models. + + Hybrid models don't map attention as a submodule (it's conditional per + layer), so there are no rotary embedding references to set up. + """ diff --git a/transformer_lens/tools/model_registry/__init__.py b/transformer_lens/tools/model_registry/__init__.py index 44b128528..bfe1d6be3 100644 --- a/transformer_lens/tools/model_registry/__init__.py +++ b/transformer_lens/tools/model_registry/__init__.py @@ -47,6 +47,10 @@ "GemmaForCausalLM", "Gemma2ForCausalLM", "Gemma3ForCausalLM", + "Gemma3ForConditionalGeneration", + "GraniteForCausalLM", + "GraniteMoeForCausalLM", + "GraniteMoeHybridForCausalLM", "GPT2LMHeadModel", "GptOssForCausalLM", "GPTJForCausalLM", @@ -54,6 +58,9 @@ "OpenELMForCausalLM", "GPTNeoXForCausalLM", "LlamaForCausalLM", + "LlavaForConditionalGeneration", + "LlavaNextForConditionalGeneration", + "LlavaOnevisionForConditionalGeneration", "MistralForCausalLM", "MixtralForCausalLM", "Olmo2ForCausalLM", diff --git a/transformer_lens/tools/model_registry/data/architecture_gaps.json b/transformer_lens/tools/model_registry/data/architecture_gaps.json index ca43b33c0..2525966e5 100644 --- a/transformer_lens/tools/model_registry/data/architecture_gaps.json +++ b/transformer_lens/tools/model_registry/data/architecture_gaps.json @@ -1,124 +1,76 @@ { - "generated_at": "2026-02-19", + "generated_at": "2026-03-17", "scan_info": { - "total_scanned": 7808, + "total_scanned": 4221, "task_filter": "text-generation", "min_downloads": 500, - "scan_duration_seconds": 2.2 + "scan_duration_seconds": 2.6 }, - "total_unsupported_architectures": 238, - "total_unsupported_models": 1258, + "total_unsupported_architectures": 254, + "total_unsupported_models": 1019, "gaps": [ { "architecture_id": "Qwen3MoeForCausalLM", - "total_models": 130, + "total_models": 66, "sample_models": [ - "Qwen/Qwen3-30B-A3B-Instruct-2507", "Qwen/Qwen3-30B-A3B", - "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", - "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8", + "Qwen/Qwen3-30B-A3B-Thinking-2507", + "Qwen/Qwen3-30B-A3B-Instruct-2507", "Qwen/Qwen3-Coder-30B-A3B-Instruct", "Qwen/Qwen3-235B-A22B", - "Qwen/Qwen3-30B-A3B-Thinking-2507", - "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", - "Qwen/Qwen3-30B-A3B-GPTQ-Int4", - "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8" + "trl-internal-testing/tiny-Qwen3MoeForCausalLM", + "Qwen/Qwen3-235B-A22B-Instruct-2507", + "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "nvidia/Qwen3-30B-A3B-NVFP4", + "NVFP4/Qwen3-30B-A3B-Instruct-2507-FP4" ] }, { "architecture_id": "DeepseekV3ForCausalLM", - "total_models": 69, + "total_models": 51, "sample_models": [ + "deepseek-ai/DeepSeek-R1", "deepseek-ai/DeepSeek-V3", "deepseek-ai/DeepSeek-R1-0528", - "deepseek-ai/DeepSeek-R1", - "moonshotai/Kimi-K2-Thinking", "deepseek-ai/DeepSeek-V3-0324", - "moonshotai/Kimi-K2-Instruct", "nvidia/DeepSeek-R1-0528-NVFP4-v2", - "nvidia/Kimi-K2-Thinking-NVFP4", "deepseek-ai/DeepSeek-V3.1", - "trl-internal-testing/tiny-DeepseekV3ForCausalLM" - ] - }, - { - "architecture_id": "Qwen3NextForCausalLM", - "total_models": 67, - "sample_models": [ - "Qwen/Qwen3-Next-80B-A3B-Instruct", - "Qwen/Qwen3-Coder-Next", - "bullpoint/Qwen3-Coder-Next-AWQ-4bit", - "Qwen/Qwen3-Coder-Next-FP8", - "cyankiwi/Qwen3-Next-80B-A3B-Thinking-AWQ-4bit", - "GadflyII/Qwen3-Coder-Next-NVFP4", - "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8", - "NexVeridian/Qwen3-Coder-Next-8bit", - "cyankiwi/Qwen3-Next-80B-A3B-Instruct-AWQ-4bit", - "Qwen/Qwen3-Next-80B-A3B-Thinking" + "ai-sage/GigaChat3-10B-A1.8B", + "trl-internal-testing/tiny-DeepseekV3ForCausalLM", + "trl-internal-testing/tiny-DeepseekV3ForCausalLM-0528", + "nvidia/DeepSeek-V3-0324-NVFP4" ] }, { - "architecture_id": "Glm4MoeForCausalLM", - "total_models": 47, - "sample_models": [ - "cyankiwi/GLM-4.5-Air-AWQ-4bit", - "zai-org/GLM-4.7-FP8", - "zai-org/GLM-4.5-Air", - "zai-org/GLM-4.7", - "zai-org/GLM-4.6", - "zai-org/GLM-4.5-Air-FP8", - "mlx-community/GLM-4.7-4bit", - "zai-org/GLM-4.5", - "mlx-community/GLM-4.7-8bit-gs32", - "mlx-community/GLM-4.7-6bit" - ] - }, - { - "architecture_id": "MiniMaxM2ForCausalLM", + "architecture_id": "Qwen3_5ForConditionalGeneration", "total_models": 42, "sample_models": [ - "MiniMaxAI/MiniMax-M2", - "QuantTrio/MiniMax-M2-AWQ", - "MiniMaxAI/MiniMax-M2.5", - "MiniMaxAI/MiniMax-M2.1", - "lmstudio-community/MiniMax-M2.5-MLX-8bit", - "lmstudio-community/MiniMax-M2.5-MLX-4bit", - "lmstudio-community/MiniMax-M2.5-MLX-6bit", - "cyankiwi/MiniMax-M2.1-AWQ-4bit", - "mlx-community/MiniMax-M2.1-8bit", - "mlx-community/MiniMax-M2.1-3bit" + "Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled", + "osoleve/Qwen3.5-27B-Text-NVFP4-MTP", + "nightmedia/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled-qx64-hi-mlx", + "Tesslate/OmniCoder-9B", + "txn545/Qwen3.5-27B-NVFP4", + "Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled", + "EganAI/qwen3.5-9b-terminal-merge", + "Jackrong/Qwen3.5-4B-Claude-4.6-Opus-Reasoning-Distilled", + "Jackrong/Qwen3.5-2B-Claude-4.6-Opus-Reasoning-Distilled", + "nightmedia/Qwen3.5-27B-Text" ] }, { - "architecture_id": "Glm4MoeLiteForCausalLM", - "total_models": 38, - "sample_models": [ - "zai-org/GLM-4.7-Flash", - "lmstudio-community/GLM-4.7-Flash-MLX-8bit", - "lmstudio-community/GLM-4.7-Flash-MLX-6bit", - "GadflyII/GLM-4.7-Flash-NVFP4", - "cyankiwi/GLM-4.7-Flash-AWQ-4bit", - "QuantTrio/GLM-4.7-Flash-AWQ", - "unsloth/GLM-4.7-Flash-FP8-Dynamic", - "unsloth/GLM-4.7-Flash", - "marksverdhei/GLM-4.7-Flash-FP8", - "mlx-community/GLM-4.7-Flash-4bit" - ] - }, - { - "architecture_id": "Lfm2ForCausalLM", - "total_models": 33, + "architecture_id": "Qwen3NextForCausalLM", + "total_models": 37, "sample_models": [ - "lmstudio-community/LFM2.5-1.2B-Instruct-MLX-8bit", - "lmstudio-community/LFM2.5-1.2B-Instruct-MLX-6bit", - "lmstudio-community/LFM2.5-1.2B-Instruct-MLX-4bit", - "LiquidAI/LFM2-1.2B", - "LiquidAI/LFM2.5-1.2B-Instruct", - "LiquidAI/LFM2-2.6B", - "LiquidAI/LFM2.5-1.2B-Thinking", - "LiquidAI/LFM2-350M", - "lmstudio-community/LFM2-1.2B-MLX-8bit", - "lmstudio-community/LFM2-1.2B-MLX-bf16" + "Qwen/Qwen3-Coder-Next", + "Qwen/Qwen3-Next-80B-A3B-Instruct", + "GadflyII/Qwen3-Coder-Next-NVFP4", + "nvidia/Qwen3-Next-80B-A3B-Thinking-NVFP4", + "nvidia/Qwen3-Next-80B-A3B-Instruct-NVFP4", + "Qwen/Qwen3-Next-80B-A3B-Thinking", + "tiny-random/qwen3-next-moe", + "unsloth/Qwen3-Coder-Next", + "yujiepan/qwen3-next-moe-tiny-random", + "RedHatAI/Qwen3-Coder-Next-NVFP4" ] }, { @@ -130,64 +82,32 @@ "tiiuae/falcon-40b-instruct", "tiiuae/falcon-40b", "fxmarty/really-tiny-falcon-testing", + "tiiuae/falcon-rw-1b", "vilsonrodrigues/falcon-7b-instruct-sharded", "tiiuae/falcon-11B", - "tiiuae/falcon-rw-1b", "euclaise/falcon_1b_stage2", - "fxmarty/tiny-testing-falcon-alibi" - ] - }, - { - "architecture_id": "GraniteMoeHybridForCausalLM", - "total_models": 28, - "sample_models": [ - "ibm-granite/granite-4.0-micro", - "ibm-granite/granite-4.0-h-small", - "ibm-granite/granite-4.0-tiny-preview", - "ibm-granite/granite-4.0-h-tiny", - "onnx-community/granite-4.0-350m-ONNX-web", - "cyankiwi/granite-4.0-h-tiny-AWQ-4bit", - "ibm-granite/granite-4.0-h-micro", - "ibm-granite/granite-4.0-350m-base", - "ibm-granite/granite-4.0-350m", - "ibm-granite/granite-4.0-1b" + "explosion-testing/falcon-test" ] }, { - "architecture_id": "GraniteForCausalLM", - "total_models": 23, - "sample_models": [ - "ibm-granite/granite-3.1-8b-instruct", - "ibm-granite/granite-3.3-8b-instruct", - "ibm-research/PowerLM-3b", - "ibm-granite/granite-3.3-2b-instruct", - "ibm-granite/granite-guardian-3.0-2b", - "ibm-granite/granite-3.0-8b-instruct", - "ibm-granite/granite-3.2-8b-instruct", - "ibm-granite/granite-3.2-2b-instruct", - "ibm-granite/granite-3.1-2b-instruct", - "ibm-granite/granite-3.0-2b-instruct" - ] - }, - { - "architecture_id": "DeepseekV2ForCausalLM", - "total_models": 20, + "architecture_id": "Qwen3_5MoeForConditionalGeneration", + "total_models": 27, "sample_models": [ - "deepseek-ai/DeepSeek-V2-Lite-Chat", - "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", - "deepseek-ai/DeepSeek-V2-Lite", - "casperhansen/deepseek-coder-v2-instruct-awq", - "deepseek-ai/DeepSeek-V2.5", - "RedHatAI/DeepSeek-Coder-V2-Lite-Instruct-FP8", - "RedHatAI/DeepSeek-V2.5-1210-FP8", - "deepseek-ai/DeepSeek-V2-Chat", - "deepseek-ai/DeepSeek-Coder-V2-Lite-Base", - "TechxGenus/DeepSeek-Coder-V2-Lite-Instruct-AWQ" + "txn545/Qwen3.5-122B-A10B-NVFP4", + "nvidia/Qwen3.5-397B-A17B-NVFP4", + "txn545/Qwen3.5-35B-A3B-NVFP4", + "RepublicOfKorokke/Qwen3.5-35B-A3B-mlx-lm-mxfp4", + "nightmedia/Qwen3.5-35B-A3B-Text-qx64-hi-mlx", + "nightmedia/Qwen3.5-122B-A10B-Text-mxfp4-mlx", + "olka-fi/Qwen3.5-122B-A10B-MXFP4", + "lukealonso/Qwen3.5-397B-A17B-NVFP4", + "Jackrong/Qwen3.5-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled", + "NexVeridian/Qwen3.5-35B-A3B-3bit" ] }, { "architecture_id": "InternLM2ForCausalLM", - "total_models": 20, + "total_models": 21, "sample_models": [ "internlm/internlm2-chat-7b", "internlm/internlm2_5-7b-chat", @@ -198,39 +118,87 @@ "internlm/internlm2-base-20b", "chujiezheng/internlm2-chat-7b-ExPO", "chujiezheng/internlm2-chat-20b-ExPO", - "optimum-internal-testing/tiny-random-internlm2" + "AI4Chem/ChemLLM-7B-Chat-1_5-DPO" + ] + }, + { + "architecture_id": "Lfm2ForCausalLM", + "total_models": 19, + "sample_models": [ + "LiquidAI/LFM2-1.2B", + "LiquidAI/LFM2.5-1.2B-Instruct", + "LiquidAI/LFM2-350M", + "LiquidAI/LFM2.5-1.2B-Base", + "LiquidAI/LFM2.5-1.2B-Thinking", + "LiquidAI/LFM2-2.6B", + "LiquidAI/LFM2-2.6B-Exp", + "LiquidAI/LFM2-700M", + "unsloth/LFM2.5-1.2B-Instruct", + "LiquidAI/LFM2.5-1.2B-Thinking-ONNX" ] }, { - "architecture_id": "Gemma3ForConditionalGeneration", - "total_models": 20, + "architecture_id": "Glm4MoeForCausalLM", + "total_models": 17, "sample_models": [ - "DreamFast/gemma-3-12b-it-heretic", - "mlx-community/translategemma-4b-it-4bit", - "aisingapore/Gemma-SEA-LION-v4-27B-IT", - "vanta-research/scout-4b", - "gateremark/kikuyu_translategemma_12b_merged_V2", - "vanta-research/atom-v1-preview-4b", - "mlx-community/translategemma-4b-it-8bit", - "aisingapore/Gemma-SEA-LION-v4-27B-IT-FP8-Dynamic", - "vanta-research/atom-v1-preview-12b", - "ig1/medgemma-27b-it-FP8-Dynamic" + "zai-org/GLM-4.5-Air", + "zai-org/GLM-4.7", + "trl-internal-testing/tiny-Glm4MoeForCausalLM", + "zai-org/GLM-4.6", + "zai-org/GLM-4.5", + "Tengyunw/GLM-4.7-NVFP4", + "Salyut1/GLM-4.7-NVFP4", + "np-cr/testing-glm4-moe", + "ArliAI/GLM-4.6-Derestricted-v3", + "zai-org/GLM-4.5-Air-Base" + ] + }, + { + "architecture_id": "JambaForCausalLM", + "total_models": 17, + "sample_models": [ + "ai21labs/AI21-Jamba-Mini-1.5", + "ai21labs/AI21-Jamba2-3B", + "ai21labs/Jamba-tiny-random", + "ai21labs/AI21-Jamba-Reasoning-3B", + "ai21labs/AI21-Jamba-Large-1.5", + "ai21labs/AI21-Jamba-Mini-1.6", + "ai21labs/AI21-Jamba-Large-1.6", + "microsoft/Dayhoff-170m-GR", + "ai21labs/Jamba-v0.1", + "microsoft/Dayhoff-170m-UR90" + ] + }, + { + "architecture_id": "QWenLMHeadModel", + "total_models": 16, + "sample_models": [ + "Qwen/Qwen-7B", + "Qwen/Qwen-7B-Chat", + "Qwen/Qwen-VL-Chat", + "Qwen/Qwen-VL", + "Qwen/Qwen-14B-Chat-Int4", + "Qwen/Qwen-14B-Chat", + "Qwen/Qwen-1_8B-Chat", + "Qwen/Qwen-72B", + "Qwen/Qwen-14B", + "Qwen/Qwen-Audio-Chat" ] }, { "architecture_id": "FalconH1ForCausalLM", - "total_models": 19, + "total_models": 16, "sample_models": [ "tiiuae/Falcon-H1-Tiny-90M-Instruct", "tiiuae/Falcon-H1-0.5B-Base", - "tiiuae/Falcon-H1R-7B", "tiiuae/Falcon-H1-7B-Instruct", - "tiiuae/Falcon-H1-3B-Instruct", - "tiiuae/Falcon-H1-7B-Base", + "tiiuae/Falcon-H1R-7B", "tiiuae/Falcon-H1-34B-Instruct", "tiiuae/Falcon-H1-34B-Base", "tiiuae/Falcon-H1-1.5B-Base", - "tiiuae/Falcon-H1-3B-Base" + "tiiuae/Falcon-H1-7B-Base", + "tiiuae/Falcon-H1-3B-Base", + "tiiuae/Falcon-H1-1.5B-Instruct" ] }, { @@ -240,707 +208,640 @@ "bigcode/gpt_bigcode-santacoder", "bigcode/tiny_starcoder_py", "bigcode/starcoder", - "bigcode/starcoderbase-3b", "bigcode/starcoderbase-1b", - "ibm-granite/granite-20b-code-instruct-8k", "ibm-granite/granite-20b-code-base-8k", - "HuggingFaceH4/starchat-alpha", + "ibm-granite/granite-20b-code-instruct-8k", "HuggingFaceH4/starchat-beta", - "V-YangXu/StarCoder-Alpaca" + "bigcode/starcoderbase-3b", + "HuggingFaceH4/starchat-alpha", + "openchat/opencoderplus" ] }, { - "architecture_id": "RwkvForCausalLM", - "total_models": 15, + "architecture_id": "Glm4MoeLiteForCausalLM", + "total_models": 14, "sample_models": [ - "RWKV/v5-Eagle-7B-HF", - "RWKV/rwkv-4-169m-pile", - "recursal/EagleX_1-7T_HF", - "beomi/KoRWKV-6B", - "RWKV/rwkv-4-430m-pile", - "RWKV/rwkv-raven-1b5", - "RWKV/rwkv-4-3b-pile", - "RWKV/rwkv-4-14b-pile", - "RWKV/rwkv-raven-14b", - "RWKV/rwkv-raven-7b" + "zai-org/GLM-4.7-Flash", + "GadflyII/GLM-4.7-Flash-NVFP4", + "unsloth/GLM-4.7-Flash", + "GadflyII/GLM-4.7-Flash-MTP-NVFP4", + "Olafangensan/GLM-4.7-Flash-heretic", + "huihui-ai/Huihui-GLM-4.7-Flash-abliterated", + "cerebras/GLM-4.7-Flash-REAP-23B-A3B", + "TeichAI/GLM-4.7-Flash-Claude-Opus-4.5-High-Reasoning-Distill", + "Ex0bit/GLM-4.7-Flash-PRISM", + "MuXodious/GLM-4.7-Flash-absolute-heresy" ] }, { - "architecture_id": "QWenLMHeadModel", + "architecture_id": "NemotronHForCausalLM", "total_models": 14, "sample_models": [ - "Qwen/Qwen-7B-Chat", - "Qwen/Qwen-7B", - "Qwen/Qwen-VL-Chat", - "Qwen/Qwen-VL", - "Qwen/Qwen-14B-Chat-Int4", - "Qwen/Qwen-1_8B-Chat", - "Qwen/Qwen-Audio-Chat", - "Qwen/Qwen-14B", - "Qwen/Qwen-VL-Chat-Int4", - "Qwen/Qwen-72B" + "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", + "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4", + "nvidia/NVIDIA-Nemotron-Nano-9B-v2-Japanese", + "nvidia/NVIDIA-Nemotron-Nano-9B-v2", + "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-NVFP4", + "unsloth/NVIDIA-Nemotron-3-Super-120B-A12B-NVFP4", + "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", + "OpenResearcher/OpenResearcher-30B-A3B", + "nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4", + "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-Base-BF16" ] }, { - "architecture_id": "DeepseekV32ForCausalLM", - "total_models": 13, + "architecture_id": "MiniMaxM2ForCausalLM", + "total_models": 14, "sample_models": [ - "deepseek-ai/DeepSeek-V3.2", - "deepseek-ai/DeepSeek-V3.2-Speciale", - "QuantTrio/DeepSeek-V3.2-AWQ", - "deepseek-ai/DeepSeek-V3.2-Exp", - "nvidia/DeepSeek-V3.2-NVFP4", - "deepseek-ai/DeepSeek-Math-V2", - "mlx-community/DeepSeek-V3.2-4bit", - "inferencerlabs/DeepSeek-V3.2-Speciale-MLX-4.8bit", - "mlx-community/DeepSeek-V3.2-mlx-5bit", - "inferencerlabs/DeepSeek-V3.2-MLX-5.5bit" + "MiniMaxAI/MiniMax-M2.5", + "MiniMaxAI/MiniMax-M2", + "cerebras/MiniMax-M2.1-REAP-139B-A10B", + "MiniMaxAI/MiniMax-M2.1", + "cerebras/MiniMax-M2.5-REAP-139B-A10B", + "PrimeIntellect/MiniMax-M2.5-bf16", + "cerebras/MiniMax-M2.5-REAP-172B-A10B", + "saricles/MiniMax-M2.5-REAP-172B-A10B-NVFP4-GB10", + "amd/MiniMax-M2.1-MXFP4", + "aspctu/MiniMax-M2.5" ] }, { - "architecture_id": "GlmMoeDsaForCausalLM", + "architecture_id": "XGLMForCausalLM", + "total_models": 14, + "sample_models": [ + "facebook/xglm-564M", + "facebook/xglm-7.5B", + "facebook/xglm-1.7B", + "KoboldAI/fairseq-dense-13B", + "facebook/xglm-4.5B", + "KoboldAI/fairseq-dense-2.7B", + "KoboldAI/fairseq-dense-125M", + "KoboldAI/fairseq-dense-1.3B", + "KoboldAI/fairseq-dense-355M", + "KoboldAI/fairseq-dense-6.7B" + ] + }, + { + "architecture_id": "CodeGenForCausalLM", "total_models": 13, "sample_models": [ - "zai-org/GLM-5-FP8", - "zai-org/GLM-5", - "inferencerlabs/GLM-5-MLX-4.8bit", - "mlx-community/GLM-5-4bit", - "mlx-community/GLM-5", - "mlx-community/GLM-5-MXFP4-Q8", - "inferencerlabs/GLM-5-MLX-5.6bit", - "mlx-community/GLM-5-8bit-MXFP8", - "mlx-community/GLM-5-4bit-gs32", - "INC4AI/GLM-5-int4-mixed-AutoRound" + "Salesforce/codegen-350M-mono", + "Salesforce/codegen-350M-multi", + "hf-tiny-model-private/tiny-random-CodeGenForCausalLM", + "Salesforce/codegen-2B-mono", + "Salesforce/codegen-6B-multi", + "shailja/fine-tuned-codegen-16B-Verilog", + "katuni4ka/tiny-random-codegen2", + "Salesforce/codegen-2B-multi", + "Salesforce/codegen-6B-mono", + "Salesforce/codegen-6B-nl" ] }, { - "architecture_id": "XGLMForCausalLM", + "architecture_id": "RwkvForCausalLM", "total_models": 13, "sample_models": [ - "facebook/xglm-564M", - "facebook/xglm-1.7B", - "facebook/xglm-4.5B", - "facebook/xglm-7.5B", - "facebook/xglm-2.9B", - "pythainlp/wangchanglm-7.5B-sft-enth", - "KoboldAI/fairseq-dense-355M", - "KoboldAI/fairseq-dense-125M", - "KoboldAI/fairseq-dense-2.7B", - "KoboldAI/fairseq-dense-1.3B" + "RWKV/v5-Eagle-7B-HF", + "RWKV/rwkv-4-169m-pile", + "beomi/KoRWKV-6B", + "RWKV/rwkv-4-1b5-pile", + "RWKV/rwkv-4-430m-pile", + "RWKV/rwkv-4-3b-pile", + "RWKV/rwkv-4-7b-pile", + "RWKV/rwkv-4-14b-pile", + "RWKV/rwkv-raven-1b5", + "RWKV/rwkv-raven-7b" + ] + }, + { + "architecture_id": "DeepseekV2ForCausalLM", + "total_models": 11, + "sample_models": [ + "deepseek-ai/DeepSeek-V2-Lite-Chat", + "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", + "deepseek-ai/DeepSeek-V2-Lite", + "deepseek-ai/DeepSeek-V2.5", + "deepseek-ai/DeepSeek-V2-Chat", + "deepseek-ai/DeepSeek-Coder-V2-Instruct-0724", + "deepseek-ai/DeepSeek-V2", + "deepseek-ai/DeepSeek-Coder-V2-Instruct", + "deepseek-ai/DeepSeek-Coder-V2-Lite-Base", + "deepseek-ai/DeepSeek-V2-Chat-0628" ] }, { "architecture_id": "CohereForCausalLM", - "total_models": 13, + "total_models": 10, "sample_models": [ "trl-internal-testing/tiny-CohereForCausalLM", - "CohereLabs/aya-expanse-8b", "CohereLabs/aya-23-8B", + "CohereLabs/aya-expanse-8b", "CohereLabs/c4ai-command-r-v01", - "NLPark/AnFeng_v3_Avocet", "CohereLabs/aya-expanse-32b", + "NLPark/AnFeng_v3_Avocet", "CohereLabs/aya-23-35B", - "CohereLabs/c4ai-command-r-plus", "CohereLabs/c4ai-command-r-plus-08-2024", - "CohereLabs/c4ai-command-r-08-2024" + "CohereLabs/c4ai-command-r-08-2024", + "CohereLabs/c4ai-command-r-plus" ] }, { "architecture_id": "T5GemmaForConditionalGeneration", - "total_models": 12, + "total_models": 10, "sample_models": [ - "google/t5gemma-b-b-prefixlm", + "google/t5gemma-s-s-prefixlm", "google/t5gemma-b-b-ul2", - "google/t5gemma-xl-xl-ul2", "google/t5gemma-9b-9b-ul2", - "google/t5gemma-s-s-prefixlm", "google/t5gemma-2b-2b-ul2", - "google/t5gemma-2b-2b-ul2-it", + "google/t5gemma-b-b-prefixlm", "google/t5gemma-9b-9b-ul2-it", - "jordimas/t5gemma-s-s-ul2", + "google/t5gemma-9b-2b-ul2-it", + "google/t5gemma-2b-2b-prefixlm", + "google/t5gemma-l-l-prefixlm", "harshaljanjani/tiny-t5gemma-test" ] }, { - "architecture_id": "NemotronHForCausalLM", - "total_models": 12, + "architecture_id": "Cohere2ForCausalLM", + "total_models": 10, "sample_models": [ - "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", - "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", - "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4", - "nvidia/NVIDIA-Nemotron-Nano-9B-v2", - "nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8", - "nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4", - "cybermotaz/nemotron3-nano-nvfp4-w4a16", - "RedHatAI/NVIDIA-Nemotron-Nano-9B-v2-FP8-dynamic", - "unsloth/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4", - "nvidia/NVIDIA-Nemotron-Nano-9B-v2-Japanese" + "trl-internal-testing/tiny-Cohere2ForCausalLM", + "CohereLabs/tiny-aya-global", + "CohereLabs/c4ai-command-r7b-12-2024", + "CohereLabs/tiny-aya-base", + "CohereLabs/c4ai-command-a-03-2025", + "CohereLabs/c4ai-command-r7b-arabic-02-2025", + "CohereLabs/tiny-aya-water", + "CohereLabs/tiny-aya-fire", + "CohereLabs/command-a-reasoning-08-2025", + "CohereLabs/tiny-aya-earth" ] }, { "architecture_id": "DeciLMForCausalLM", - "total_models": 12, + "total_models": 10, "sample_models": [ "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", "nvidia/Llama-3_3-Nemotron-Super-49B-v1", - "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5-FP8", - "nvidia/Llama-3_1-Nemotron-51B-Instruct", - "etsien/Llama-3_3-Nemotron-Super-49B-v1_5-GPTQ-w4a8", + "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5-NVFP4", + "Deci/DeciLM-7B-instruct", "Deci/DeciLM-7B", "NewstaR/Porpoise-6b-instruct", "Danielbrdz/Barcenas-6b", - "nvidia/Llama-3_3-Nemotron-Super-49B-v1-FP8", - "Deci/DeciLM-7B-instruct" - ] - }, - { - "architecture_id": "MPTForCausalLM", - "total_models": 11, - "sample_models": [ - "anas-awadalla/mpt-7b", - "replit/replit-code-v1-3b", - "wtang06/mpt-125m-c4", - "lightblue/japanese-mpt-7b", - "vinai/PhoGPT-4B-Chat", - "echarlaix/tiny-mpt-random-remote-code", - "TehVenom/MPT-7b-Chat-Instruct-LongCTX-Merge", - "TehVenom/MPT-7b-InstructAndStorywriting-50_50-Merge", - "aisingapore/SEA-LION-v1-7B", - "Nethermind/Mpt-Instruct-DotNet-S" - ] - }, - { - "architecture_id": "ExaoneForCausalLM", - "total_models": 10, - "sample_models": [ - "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct", - "LGAI-EXAONE/EXAONE-Deep-7.8B", - "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct", - "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", - "LGAI-EXAONE/EXAONE-3.5-32B-Instruct", - "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct-AWQ", - "LGAI-EXAONE/EXAONE-Deep-32B", - "LGAI-EXAONE/EXAONE-Deep-2.4B", - "OpenLLM-Korea/EXAONE-Deep-7.8B", - "OpenLLM-Korea/EXAONE-Deep-32B" - ] - }, - { - "architecture_id": "JambaForCausalLM", - "total_models": 10, - "sample_models": [ - "ai21labs/AI21-Jamba-Mini-1.5", - "ai21labs/AI21-Jamba2-3B", - "ai21labs/AI21-Jamba-Large-1.5", - "ai21labs/Jamba-tiny-random", - "ai21labs/AI21-Jamba-Mini-1.6", - "ai21labs/AI21-Jamba-Large-1.6", - "ai21labs/Jamba-v0.1", - "microsoft/Dayhoff-3b-GR-HM-c", - "ai21labs/AI21-Jamba-Reasoning-3B", - "microsoft/Dayhoff-170m-GR" - ] - }, - { - "architecture_id": "GraniteMoeForCausalLM", - "total_models": 9, - "sample_models": [ - "ibm-research/PowerMoE-3b", - "ibm-granite/granite-3.1-3b-a800m-instruct", - "ibm-granite/granite-3.0-1b-a400m-base", - "ibm-granite/granite-3.1-1b-a400m-instruct", - "ibm-granite/granite-3.1-3b-a800m-base", - "ibm-granite/granite-3.1-1b-a400m-base", - "ibm-granite/granite-3.0-3b-a800m-instruct", - "ibm-granite/granite-3.0-3b-a800m-base", - "ibm-granite/granite-guardian-3.2-3b-a800m" - ] - }, - { - "architecture_id": "SeedOssForCausalLM", - "total_models": 9, - "sample_models": [ - "lmstudio-community/Seed-OSS-36B-Instruct-MLX-8bit", - "lmstudio-community/Seed-OSS-36B-Instruct-MLX-4bit", - "lmstudio-community/Seed-OSS-36B-Instruct-MLX-5bit", - "lmstudio-community/Seed-OSS-36B-Instruct-MLX-6bit", - "ByteDance-Seed/Seed-OSS-36B-Instruct", - "NousResearch/Hermes-4.3-36B", - "ByteDance-Seed/Seed-OSS-36B-Base", - "unsloth/Seed-OSS-36B-Instruct", - "magiccodingman/Seed-OSS-36B-Instruct-unsloth-MagicQuant-Hybrid-GGUF" - ] - }, - { - "architecture_id": "InductionVL2ForCausalLM", - "total_models": 9, - "sample_models": [ - "jonathanli/induction-vl2-mdl-fswd7-20000-720p-proj-256-var", - "jonathanli/induction-vl2-mdl-fswd7-20000-720p-proj-256-var-img", - "jonathanli/induction-vl2-mdl-j9vtm-95000-720p-proj-256-var", - "jonathanli/induction-vl2-mdl-fswd7-20000-720p-proj-256-var-img-2x", - "jonathanli/induction-vl2-mdl-fswd7-20000-720p-proj-256-var-img-flow", - "jonathanli/induction-vl2-mdl-j9vtm-95000-720p-proj-256-var-img", - "jonathanli/induction-vl2-mdl-cf9rp-20000-720p-proj-256-var", - "jonathanli/induction-vl2-mdl-j9vtm-95000-720p-proj-256-var-2x-img", - "jonathanli/induction-vl2-mdl-j9vtm-165000-720p-proj-256-var" - ] - }, - { - "architecture_id": "LlavaLlamaForCausalLM", - "total_models": 9, - "sample_models": [ - "ShareGPTVideo/LLaVA-Hound-Pretrain", - "LanguageBind/Video-LLaVA-7B", - "second-state/Llava-v1.5-7B-GGUF", - "lmms-lab/llama3-llava-next-8b", - "wisdomik/Quilt-Llava-v1.5-7b", - "mmaaz60/LLaVA-7B-Lightening-v1-1", - "liuhaotian/llava-llama-2-13b-chat-lightning-preview", - "second-state/Llava-v1.6-Vicuna-7B-GGUF", - "ManishThota/Ollama_Video_llama_7B" + "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", + "nvidia/Llama-3_1-Nemotron-51B-Instruct", + "nvidia/Llama-3_3-Nemotron-Super-49B-GenRM" ] }, { - "architecture_id": "Step3p5ForCausalLM", + "architecture_id": "LlavaQwenForCausalLM", "total_models": 8, "sample_models": [ - "stepfun-ai/Step-3.5-Flash", - "stepfun-ai/Step-3.5-Flash-FP8", - "stepfun-ai/Step-3.5-Flash-GGUF-Q4_K_S", - "mlx-community/Step-3.5-Flash-4bit", - "tacos4me/Step-3.5-Flash-NVFP4", - "inferencerlabs/Step-3.5-Flash-MLX-6.5bit", - "mlx-community/Step-3.5-Flash-8bit", - "mlx-community/Step-3.5-Flash-6bit" + "lmms-lab/llava-onevision-qwen2-7b-ov", + "lmms-lab/llava-onevision-qwen2-0.5b-ov", + "lmms-lab/llava-onevision-qwen2-7b-si", + "lmms-lab/LLaVA-Video-7B-Qwen2-Video-Only", + "lmms-lab/llava-onevision-qwen2-7b-ov-chat", + "lmms-lab/llava-next-interleave-qwen-7b", + "lmms-lab/llava-onevision-qwen2-0.5b-si", + "lmms-lab/LongVA-7B" ] }, { "architecture_id": "MiniCPMForCausalLM", "total_models": 8, "sample_models": [ - "openbmb/MiniCPM-2B-sft-bf16", "openbmb/MiniCPM4.1-8B", + "openbmb/MiniCPM-2B-sft-bf16", "openbmb/MiniCPM4-0.5B", "openbmb/MiniCPM-1B-sft-bf16", - "openbmb/AgentCPM-Report", - "openbmb/MiniCPM4.1-8B-GPTQ", "openbmb/MiniCPM-MoE-8x2B", - "openbmb/MiniCPM4-8B" + "katuni4ka/tiny-random-minicpm", + "openbmb/MiniCPM4-8B", + "openbmb/MiniCPM-S-1B-sft" ] }, { - "architecture_id": "Glm4ForCausalLM", + "architecture_id": "MT5ForConditionalGeneration", "total_models": 8, "sample_models": [ - "zai-org/GLM-4-9B-0414", - "zai-org/GLM-Z1-32B-0414", - "unsloth/GLM-4-32B-0414-GGUF", - "unsloth/GLM-4-9B-0414-GGUF", - "zai-org/GLM-Z1-9B-0414", - "zai-org/GLM-4-32B-0414", - "allura-org/GLM4-32B-Neon-v2", - "zai-org/GLM-4-32B-Base-0414" + "knowledgator/IUPAC2SMILES-canonical-base", + "knowledgator/SMILES2IUPAC-canonical-base", + "knowledgator/SMILES2IUPAC-canonical-small", + "bigscience/mt0-base", + "bigscience/mt0-small", + "HiTZ/Medical-mT5-large", + "bigscience/mt0-large", + "dreuxx26/Multilingual-grammar-Corrector-using-mT5-small" ] }, { - "architecture_id": "ApertusForCausalLM", - "total_models": 7, + "architecture_id": "DFlashDraftModel", + "total_models": 8, "sample_models": [ - "swiss-ai/Apertus-8B-Instruct-2509", - "swiss-ai/Apertus-8B-2509", - "swiss-ai/Apertus-70B-Instruct-2509", - "RedHatAI/Apertus-70B-Instruct-2509-quantized.w4a16", - "unsloth/Apertus-70B-Instruct-2509-unsloth-bnb-4bit", - "swiss-ai/Apertus-70B-2509", - "tiny-random/apertus" + "z-lab/Qwen3-4B-DFlash-b16", + "z-lab/Qwen3-8B-DFlash-b16", + "z-lab/Qwen3.5-9B-DFlash", + "z-lab/gpt-oss-20b-DFlash", + "z-lab/LLaMA3.1-8B-Instruct-DFlash-UltraChat", + "z-lab/gpt-oss-120b-DFlash", + "z-lab/Qwen3.5-35B-A3B-DFlash", + "z-lab/Qwen3-Coder-30B-A3B-DFlash" ] }, { - "architecture_id": "CodeGenForCausalLM", - "total_models": 7, + "architecture_id": "Qwen3_5ForCausalLM", + "total_models": 8, "sample_models": [ - "Salesforce/codegen-350M-mono", - "Salesforce/codegen-350M-multi", - "Salesforce/codegen-6B-multi", - "Salesforce/codegen2-1B_P", - "Salesforce/codegen-16B-nl", - "Salesforce/codegen-6B-nl", - "sharoz/codegen-350M-mono-custom-functions-dataset-python_v2" + "lukey03/Qwen3.5-9B-abliterated", + "osoleve/Qwen3.5-9B-Base-Text-NVFP4", + "Green-eyedDevil/Monika-9B", + "Phonsiri/Qwen3.5-9B-Thai-Law-Base", + "eerwitt/qwen-h-neurons-honest", + "rahul7star/albeit", + "nahidstaq/html-section-retriever", + "nbeerbower/Huihui-Qwen3.5-9B-abliterated-Grimoire-ORPO" ] }, { - "architecture_id": "Starcoder2ForCausalLM", - "total_models": 7, + "architecture_id": "MPTForCausalLM", + "total_models": 8, "sample_models": [ - "bigcode/starcoder2-3b", - "bigcode/starcoder2-15b", - "bigcode/starcoder2-7b", - "second-state/StarCoder2-15B-GGUF", - "second-state/StarCoder2-3B-GGUF", - "second-state/StarCoder2-7B-GGUF", - "bigcode/starcoder2-15b-instruct-v0.1" + "echarlaix/tiny-mpt-random-remote-code", + "anas-awadalla/mpt-7b", + "wtang06/mpt-125m-c4", + "lightblue/japanese-mpt-7b", + "vinai/PhoGPT-4B", + "Nethermind/Mpt-Instruct-DotNet-S", + "vinai/PhoGPT-4B-Chat", + "replit/replit-code-v1-3b" ] }, { - "architecture_id": "SDARForCausalLM", + "architecture_id": "ExaoneForCausalLM", "total_models": 7, "sample_models": [ - "JetLM/SDAR-8B-Chat", - "JetLM/SDAR-4B-Chat-b32", - "JetLM/SDAR-8B-Chat-b32", - "JetLM/SDAR-4B-Chat", - "JetLM/SDAR-1.7B-Chat", - "JetLM/SDAR-1.7B-Chat-b32", - "JetLM/SDAR-8B-Chat-b16" + "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct", + "LGAI-EXAONE/EXAONE-Deep-7.8B", + "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct", + "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", + "LGAI-EXAONE/EXAONE-3.5-32B-Instruct", + "LGAI-EXAONE/EXAONE-Deep-32B", + "LGAI-EXAONE/EXAONE-Deep-2.4B" ] }, { - "architecture_id": "LlavaLlamaModel", + "architecture_id": "BaichuanForCausalLM", "total_models": 7, "sample_models": [ - "Efficient-Large-Model/VILA1.5-3b", - "Efficient-Large-Model/NVILA-8B", - "Efficient-Large-Model/NVILA-15B", - "Efficient-Large-Model/NVILA-Lite-8B", - "Efficient-Large-Model/NVILA-8B-Video", - "Efficient-Large-Model/VILA1.5-13b", - "Efficient-Large-Model/Llama-3-VILA1.5-8B" + "baichuan-inc/Baichuan2-7B-Chat", + "baichuan-inc/Baichuan2-13B-Chat", + "baichuan-inc/Baichuan-13B-Chat", + "katuni4ka/tiny-random-baichuan2", + "baichuan-inc/Baichuan2-7B-Base", + "katuni4ka/tiny-random-baichuan2-13b", + "baichuan-inc/Baichuan2-13B-Base" ] }, { - "architecture_id": "Qwen3VLMoeForConditionalGeneration", - "total_models": 6, + "architecture_id": "SmolLM3ForCausalLM", + "total_models": 7, "sample_models": [ - "QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ", - "RedHatAI/Qwen3-VL-235B-A22B-Instruct-NVFP4", - "QuantTrio/Qwen3-VL-30B-A3B-Thinking-AWQ", - "QuantTrio/Qwen3-VL-235B-A22B-Instruct-AWQ", - "QuantTrio/Qwen3-VL-235B-A22B-Thinking-AWQ", - "RedHatAI/Qwen3-VL-235B-A22B-Instruct-FP8-dynamic" + "HuggingFaceTB/SmolLM3-3B", + "HuggingFaceTB/SmolLM3-3B-Base", + "optimum-internal-testing/tiny-random-SmolLM3ForCausalLM", + "onnx-internal-testing/tiny-random-SmolLM3ForCausalLM", + "HuggingFaceTB/SmolLM3-3B-ONNX", + "toroe/SmolLM-3B-Science-ES", + "N-Bot-Int/SmolSam3-MEMGRPO" ] }, { - "architecture_id": "LLaDAModelLM", + "architecture_id": "DeepseekV32ForCausalLM", "total_models": 6, "sample_models": [ - "GSAI-ML/LLaDA-8B-Instruct", - "GSAI-ML/LLaDA-8B-Base", - "GSAI-ML/LLaDA-1.5", - "Fraser/LLaDA-8B-Base-gg2m", - "Zigeng/dParallel-LLaDA-8B-instruct", - "d3LLM/d3LLM_LLaDA" + "deepseek-ai/DeepSeek-V3.2", + "deepseek-ai/DeepSeek-V3.2-Exp", + "nvidia/DeepSeek-V3.2-NVFP4", + "deepseek-ai/DeepSeek-V3.2-Speciale", + "deepseek-ai/DeepSeek-Math-V2", + "cerebras/DeepSeek-V3.2-REAP-508B-A37B" ] }, { - "architecture_id": "Exaone4ForCausalLM", + "architecture_id": "MambaForCausalLM", "total_models": 6, "sample_models": [ - "LGAI-EXAONE/EXAONE-4.0.1-32B", - "LGAI-EXAONE/EXAONE-4.0-1.2B", - "LGAI-EXAONE/EXAONE-4.0-32B-FP8", - "LGAI-EXAONE/EXAONE-4.0-32B", - "LGAI-EXAONE/EXAONE-4.0-32B-AWQ", - "lmstudio-community/EXAONE-4.0-32B-MLX-4bit" + "state-spaces/mamba-130m-hf", + "state-spaces/mamba-2.8b-hf", + "state-spaces/mamba-370m-hf", + "state-spaces/mamba-1.4b-hf", + "state-spaces/mamba-790m-hf", + "TRI-ML/mamba-7b-rw" ] }, { - "architecture_id": "SmolLM3ForCausalLM", + "architecture_id": "Qwen2MoeForCausalLM", "total_models": 6, "sample_models": [ - "HuggingFaceTB/SmolLM3-3B", - "HuggingFaceTB/SmolLM3-3B-Base", - "optimum-internal-testing/tiny-random-SmolLM3ForCausalLM", - "unsloth/SmolLM3-3B", - "onnx-internal-testing/tiny-random-SmolLM3ForCausalLM", - "mlx-community/SmolLM3-3B-4bit" + "Qwen/Qwen1.5-MoE-A2.7B", + "Qwen/Qwen1.5-MoE-A2.7B-Chat", + "Qwen/Qwen2-57B-A14B-Instruct", + "Qwen/Qwen2-57B-A14B", + "katuni4ka/tiny-random-qwen1.5-moe", + "yujiepan/qwen1.5-moe-tiny-random" ] }, { - "architecture_id": "Ernie4_5_MoeForCausalLM", + "architecture_id": "ProGenForCausalLM", "total_models": 6, "sample_models": [ - "baidu/ERNIE-4.5-21B-A3B-Base-PT", - "lmstudio-community/ERNIE-4.5-21B-A3B-MLX-4bit", - "lmstudio-community/ERNIE-4.5-21B-A3B-MLX-8bit", - "lmstudio-community/ERNIE-4.5-21B-A3B-MLX-6bit", - "baidu/ERNIE-4.5-21B-A3B-PT", - "baidu/ERNIE-4.5-21B-A3B-Thinking" + "hugohrban/progen2-small", + "hugohrban/progen2-base", + "hugohrban/progen2-medium", + "hugohrban/progen2-oas", + "hugohrban/progen2-xlarge", + "hugohrban/progen2-small-mix7" ] }, { - "architecture_id": "Qwen2MoeForCausalLM", + "architecture_id": "LlavaLlamaForCausalLM", "total_models": 6, "sample_models": [ - "Qwen/Qwen1.5-MoE-A2.7B", - "Qwen/Qwen1.5-MoE-A2.7B-Chat", - "Qwen/Qwen2-57B-A14B-Instruct", - "Qwen/Qwen2-57B-A14B", - "Qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4", - "RedHatAI/Qwen2-57B-A14B-Instruct-FP8" + "LanguageBind/Video-LLaVA-7B", + "lmms-lab/llama3-llava-next-8b", + "liuhaotian/llava-llama-2-13b-chat-lightning-preview", + "wisdomik/Quilt-Llava-v1.5-7b", + "ManishThota/Ollama_Video_llama_7B", + "ShareGPTVideo/LLaVA-Hound-Pretrain" ] }, { - "architecture_id": "LlamaForCausalLMEagle3", + "architecture_id": "HyenaDNAForCausalLM", "total_models": 6, "sample_models": [ - "nvidia/gpt-oss-120b-Eagle3-long-context", - "nvidia/gpt-oss-120b-Eagle3-short-context", - "nvidia/Qwen3-235B-A22B-Eagle3", - "taobao-mnn/Qwen3-VL-4B-Instruct-Eagle3", - "taobao-mnn/Qwen3-VL-8B-Instruct-Eagle3", - "nvidia/gpt-oss-120b-Eagle3-throughput" + "LongSafari/hyenadna-small-32k-seqlen-hf", + "LongSafari/hyenadna-large-1m-seqlen-hf", + "LongSafari/hyenadna-medium-160k-seqlen-hf", + "LongSafari/hyenadna-medium-450k-seqlen-hf", + "LongSafari/hyenadna-tiny-1k-seqlen-hf", + "LongSafari/hyenadna-tiny-1k-seqlen-d256-hf" ] }, { - "architecture_id": "AfmoeForCausalLM", + "architecture_id": "LlavaLlamaModel", "total_models": 6, "sample_models": [ - "arcee-ai/Trinity-Nano-Preview", - "arcee-ai/Trinity-Mini", - "arcee-ai/Trinity-Large-Preview-FP8", - "arcee-ai/Trinity-Large-Preview", - "arcee-ai/Trinity-Nano-Base", - "arcee-ai/Trinity-Large-Base" + "Efficient-Large-Model/VILA1.5-3b", + "Efficient-Large-Model/NVILA-15B", + "Efficient-Large-Model/NVILA-Lite-8B", + "Efficient-Large-Model/NVILA-8B", + "Efficient-Large-Model/VILA1.5-13b", + "Efficient-Large-Model/Llama-3-VILA1.5-8B" ] }, { - "architecture_id": "MT5ForConditionalGeneration", + "architecture_id": "LLaMAForCausalLM", "total_models": 6, "sample_models": [ - "knowledgator/SMILES2IUPAC-canonical-base", - "bigscience/mt0-large", - "bigscience/mt0-small", - "bigscience/mt0-base", - "bigscience/mt0-xl", - "bigscience/mt0-xxl" + "Enoch/llama-65b-hf", + "Rardilit/Panther_v1", + "James-WYang/BigTranslate", + "mncai/chatdoctor", + "heegyu/LIMA-13b", + "maicomputer/alpaca-13b" ] }, { - "architecture_id": "Cohere2ForCausalLM", - "total_models": 6, + "architecture_id": "LLaDAModelLM", + "total_models": 5, "sample_models": [ - "CohereLabs/c4ai-command-r7b-12-2024", - "CohereLabs/c4ai-command-r7b-arabic-02-2025", - "CohereLabs/c4ai-command-a-03-2025", - "trl-internal-testing/tiny-Cohere2ForCausalLM", - "CohereLabs/command-a-reasoning-08-2025", - "CohereLabs/tiny-aya-global" + "GSAI-ML/LLaDA-8B-Instruct", + "GSAI-ML/LLaDA-8B-Base", + "GSAI-ML/LLaDA-1.5", + "Fraser/LLaDA-8B-Base-gg2m", + "d3LLM/d3LLM_LLaDA" ] }, { - "architecture_id": "Mistral3ForConditionalGeneration", - "total_models": 6, + "architecture_id": "ApertusForCausalLM", + "total_models": 5, "sample_models": [ - "ArmGPT/ArmenianGPT-1.0-3B", - "RedHatAI/Mistral-Small-3.2-24B-Instruct-2506-NVFP4", - "mlx-community/Ministral-3-3B-Instruct-2512", - "mlx-community/mistralai_Devstral-Small-2-24B-Instruct-2512-MLX-8Bit", - "mlx-community/mistralai_Ministral-3-14B-Instruct-2512-MLX-MXFP4", - "mlx-community/Ministral-3-8B-Instruct-2512" + "swiss-ai/Apertus-8B-Instruct-2509", + "swiss-ai/Apertus-8B-2509", + "swiss-ai/Apertus-70B-Instruct-2509", + "swiss-ai/Apertus-70B-2509", + "aisingapore/Apertus-SEA-LION-v4-8B-IT" ] }, { - "architecture_id": "LLaMAForCausalLM", - "total_models": 6, + "architecture_id": "FalconMambaForCausalLM", + "total_models": 5, + "sample_models": [ + "trl-internal-testing/tiny-FalconMambaForCausalLM", + "tiiuae/falcon-mamba-tiny-dev", + "tiiuae/falcon-mamba-7b", + "tiiuae/falcon-mamba-7b-instruct", + "tiiuae/Falcon3-Mamba-7B-Instruct" + ] + }, + { + "architecture_id": "Eagle3Speculator", + "total_models": 5, "sample_models": [ - "Enoch/llama-65b-hf", - "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", - "James-WYang/BigTranslate", - "Rardilit/Panther_v1", - "heegyu/LIMA-13b", - "mncai/chatdoctor" + "RedHatAI/Qwen3-8B-speculator.eagle3", + "RedHatAI/gpt-oss-20b-speculator.eagle3", + "RedHatAI/Llama-3.1-8B-Instruct-speculator.eagle3", + "RedHatAI/Qwen3-32B-speculator.eagle3", + "RedHatAI/Qwen3-14B-speculator.eagle3" ] }, { - "architecture_id": "MambaForCausalLM", + "architecture_id": "SeedOssForCausalLM", "total_models": 5, "sample_models": [ - "state-spaces/mamba-130m-hf", - "state-spaces/mamba-2.8b-hf", - "state-spaces/mamba-370m-hf", - "state-spaces/mamba-1.4b-hf", - "state-spaces/mamba-790m-hf" + "ByteDance-Seed/Seed-OSS-36B-Instruct", + "ByteDance-Seed/Seed-OSS-36B-Base", + "NousResearch/Hermes-4.3-36B", + "mratsim/Seed-OSS-36B-Instruct-NVFP4", + "YanLabs/Seed-OSS-36B-Instruct-MPOA" ] }, { - "architecture_id": "BaichuanForCausalLM", + "architecture_id": "Ernie4_5_MoeForCausalLM", "total_models": 5, "sample_models": [ - "baichuan-inc/Baichuan2-7B-Chat", - "baichuan-inc/Baichuan2-13B-Chat", - "baichuan-inc/Baichuan-13B-Chat", - "Flmc/DISC-MedLLM", - "baichuan-inc/Baichuan2-7B-Base" + "baidu/ERNIE-4.5-21B-A3B-Base-PT", + "baidu/ERNIE-4.5-21B-A3B-PT", + "baidu/ERNIE-4.5-21B-A3B-Thinking", + "baidu/ERNIE-4.5-300B-A47B-PT", + "baidu/ERNIE-4.5-300B-A47B-Paddle" ] }, { - "architecture_id": "DreamModel", + "architecture_id": "SDARForCausalLM", "total_models": 5, "sample_models": [ - "Dream-org/Dream-v0-Instruct-7B", - "Dream-org/Dream-v0-Base-7B", - "Dream-org/Dream-Coder-v0-Instruct-7B", - "Zigeng/dParallel_Dream_7B_Instruct", - "Dream-org/DreamOn-v0-7B" + "JetLM/SDAR-8B-Chat-b32", + "JetLM/SDAR-4B-Chat-b32", + "JetLM/SDAR-8B-Chat", + "JetLM/SDAR-1.7B-Chat-b32", + "JetLM/SDAR-1.7B-Chat" ] }, { - "architecture_id": "LlavaQwenForCausalLM", + "architecture_id": "BloomModel", "total_models": 5, "sample_models": [ - "lmms-lab/llava-onevision-qwen2-7b-ov", - "lmms-lab/llava-onevision-qwen2-0.5b-ov", - "lmms-lab/llava-onevision-qwen2-7b-si", - "lmms-lab/llava-onevision-qwen2-7b-ov-chat", - "lmms-lab/llava-onevision-qwen2-72b-ov-sft" + "bigscience/bigscience-small-testing", + "TurkuNLP/gpt3-finnish-small", + "TurkuNLP/gpt3-finnish-large", + "TurkuNLP/gpt3-finnish-13B", + "TurkuNLP/gpt3-finnish-xl" ] }, { - "architecture_id": "FalconMambaForCausalLM", + "architecture_id": "AfmoeForCausalLM", "total_models": 5, "sample_models": [ - "trl-internal-testing/tiny-FalconMambaForCausalLM", - "tiiuae/falcon-mamba-tiny-dev", - "tiiuae/falcon-mamba-7b", - "tiiuae/falcon-mamba-7b-instruct", - "tiiuae/Falcon3-Mamba-7B-Instruct" + "arcee-ai/Trinity-Nano-Preview", + "arcee-ai/Trinity-Mini", + "arcee-ai/Trinity-Large-Preview", + "arcee-ai/Trinity-Nano-Base", + "arcee-ai/Trinity-Mini-Base" ] }, { - "architecture_id": "LLaDA2MoeModelLM", + "architecture_id": "LlavaQwen2ForCausalLM", "total_models": 5, "sample_models": [ - "inclusionAI/LLaDA2.0-mini", - "inclusionAI/LLaDA2.1-mini", - "inclusionAI/LLaDA2.0-mini-preview", - "inclusionAI/LLaDA2.0-mini-CAP", - "inclusionAI/LLaDA2.1-flash" + "apple/FastVLM-0.5B", + "qnguyen3/nanoLLaVA", + "apple/FastVLM-1.5B", + "apple/FastVLM-7B", + "FreedomIntelligence/HuatuoGPT-Vision-7B" ] }, { "architecture_id": "NemotronForCausalLM", "total_models": 5, "sample_models": [ + "nvidia/Minitron-8B-Base", "nvidia/Nemotron-Mini-4B-Instruct", "badaoui/tiny-random-NemotronForCausalLM", - "nvidia/Minitron-8B-Base", "nvidia/Minitron-4B-Base", "thhaus/nemotron3-8b" ] }, { - "architecture_id": "LlavaQwen2ForCausalLM", + "architecture_id": "HunYuanDenseV1ForCausalLM", "total_models": 5, "sample_models": [ - "apple/FastVLM-0.5B", - "qnguyen3/nanoLLaVA", - "FreedomIntelligence/HuatuoGPT-Vision-7B", - "apple/FastVLM-1.5B", - "apple/FastVLM-7B" + "tencent/Hunyuan-7B-Instruct", + "tencent/Hunyuan-0.5B-Pretrain", + "tencent/Hunyuan-4B-Instruct", + "tencent/Hunyuan-1.8B-Instruct", + "tencent/Hunyuan-0.5B-Instruct" ] }, { - "architecture_id": "ProGenForCausalLM", - "total_models": 5, + "architecture_id": "PhiMoEForCausalLM", + "total_models": 4, "sample_models": [ - "hugohrban/progen2-base", - "hugohrban/progen2-small", - "hugohrban/progen2-large", - "hugohrban/progen2-medium", - "hugohrban/progen2-xlarge" + "microsoft/Phi-tiny-MoE-instruct", + "microsoft/Phi-mini-MoE-instruct", + "microsoft/Phi-3.5-MoE-instruct", + "optimum-intel-internal-testing/phi-3.5-moe-tiny-random" ] }, { - "architecture_id": "HyenaDNAForCausalLM", - "total_models": 5, + "architecture_id": "Starcoder2ForCausalLM", + "total_models": 4, "sample_models": [ - "LongSafari/hyenadna-large-1m-seqlen-hf", - "LongSafari/hyenadna-medium-450k-seqlen-hf", - "LongSafari/hyenadna-medium-160k-seqlen-hf", - "LongSafari/hyenadna-small-32k-seqlen-hf", - "LongSafari/hyenadna-tiny-1k-seqlen-hf" + "bigcode/starcoder2-3b", + "bigcode/starcoder2-15b", + "bigcode/starcoder2-7b", + "bigcode/starcoder2-15b-instruct-v0.1" ] }, { - "architecture_id": "Llama4ForConditionalGeneration", - "total_models": 5, + "architecture_id": "DreamModel", + "total_models": 4, "sample_models": [ - "yujiepan/llama-4-tiny-random", - "lmstudio-community/Llama-4-Scout-17B-16E-MLX-text-8bit", - "RedHatAI/Llama-4-Maverick-17B-128E-Instruct-NVFP4", - "RedHatAI/Llama-4-Scout-17B-16E-Instruct-NVFP4", - "lmstudio-community/Llama-4-Scout-17B-16E-MLX-text-4bit" + "Dream-org/Dream-v0-Instruct-7B", + "Dream-org/Dream-v0-Base-7B", + "Dream-org/Dream-Coder-v0-Instruct-7B", + "d3LLM/d3LLM_Dream" ] }, { - "architecture_id": "PhiMoEForCausalLM", + "architecture_id": "Step3p5ForCausalLM", "total_models": 4, "sample_models": [ - "microsoft/Phi-tiny-MoE-instruct", - "microsoft/Phi-3.5-MoE-instruct", - "microsoft/Phi-mini-MoE-instruct", - "optimum-intel-internal-testing/phi-3.5-moe-tiny-random" + "stepfun-ai/Step-3.5-Flash", + "tacos4me/Step-3.5-Flash-NVFP4", + "stepfun-ai/Step-3.5-Flash-Base", + "shieldstackllc/Step-3.5-Flash-REAP-128B-A11B-mlx-mixed-4-6" ] }, { - "architecture_id": "Eagle3Speculator", + "architecture_id": "LLaDA2MoeModelLM", "total_models": 4, "sample_models": [ - "RedHatAI/Qwen3-8B-speculator.eagle3", - "RedHatAI/gpt-oss-20b-speculator.eagle3", - "RedHatAI/Llama-3.1-8B-Instruct-speculator.eagle3", - "RedHatAI/Qwen3-32B-speculator.eagle3" + "inclusionAI/LLaDA2.0-mini", + "inclusionAI/LLaDA2.1-mini", + "inclusionAI/LLaDA2.1-flash", + "inclusionAI/LLaDA2.0-flash" ] }, { - "architecture_id": "KimiLinearForCausalLM", + "architecture_id": "LlamaForCausalLMEagle3", "total_models": 4, "sample_models": [ - "moonshotai/Kimi-Linear-48B-A3B-Instruct", - "moonshotai/Kimi-Linear-48B-A3B-Base", - "nightmedia/Kimi-Linear-REAP-35B-A3B-Instruct-mxfp4-mlx", - "mlx-community/Kimi-Linear-48B-A3B-Instruct-4bit" + "nvidia/gpt-oss-120b-Eagle3-short-context", + "nvidia/gpt-oss-120b-Eagle3-long-context", + "taobao-mnn/Qwen3-VL-8B-Instruct-Eagle3", + "nvidia/gpt-oss-120b-Eagle3-throughput" ] }, { - "architecture_id": "GlmForCausalLM", + "architecture_id": "DeepseekForCausalLM", "total_models": 4, "sample_models": [ - "zai-org/glm-4-9b-chat-hf", - "zai-org/glm-edge-1.5b-chat", - "zai-org/glm-4-9b-hf", - "zai-org/glm-edge-4b-chat" + "deepseek-ai/deepseek-moe-16b-base", + "deepseek-ai/deepseek-moe-16b-chat", + "ai-sage/GigaChat-20B-A3B-base", + "ai-sage/GigaChat-20B-A3B-instruct" ] }, { - "architecture_id": "OuroForCausalLM", + "architecture_id": "OlmoHybridForCausalLM", "total_models": 4, "sample_models": [ - "ByteDance/Ouro-1.4B", - "ByteDance/Ouro-2.6B-Thinking", - "ByteDance/Ouro-1.4B-Thinking", - "ByteDance/Ouro-2.6B" + "allenai/Olmo-Hybrid-7B", + "allenai/Olmo-Hybrid-Instruct-DPO-7B", + "allenai/Olmo-Hybrid-Instruct-SFT-7B", + "allenai/Olmo-Hybrid-Think-SFT-7B" ] }, { - "architecture_id": "Qwen3VLForConditionalGeneration", + "architecture_id": "OuroForCausalLM", "total_models": 4, "sample_models": [ - "RedHatAI/Qwen3-VL-32B-Instruct-NVFP4", - "JoshXT/AGiXT-Qwen3-VL-4B-GGUF", - "JoshXT/AGiXT-Qwen3-VL-2B-GGUF", - "jedisct1/Qwen3-VL-Embedding-8B-mlx" + "ByteDance/Ouro-1.4B", + "ByteDance/Ouro-2.6B-Thinking", + "ByteDance/Ouro-1.4B-Thinking", + "ByteDance/Ouro-2.6B" ] }, { - "architecture_id": "BloomModel", + "architecture_id": "Glm4ForCausalLM", "total_models": 4, "sample_models": [ - "bigscience/bigscience-small-testing", - "TurkuNLP/gpt3-finnish-small", - "TurkuNLP/gpt3-finnish-large", - "TurkuNLP/gpt3-finnish-13B" + "zai-org/GLM-4-9B-0414", + "zai-org/GLM-Z1-32B-0414", + "zai-org/GLM-Z1-9B-0414", + "zai-org/GLM-4-32B-0414" ] }, { @@ -948,8 +849,8 @@ "total_models": 4, "sample_models": [ "optimum-intel-internal-testing/tiny-random-ArceeForCausalLM", - "onnx-internal-testing/tiny-random-ArceeForCausalLM", "arcee-ai/AFM-4.5B-Base", + "onnx-internal-testing/tiny-random-ArceeForCausalLM", "arcee-ai/AFM-4.5B" ] }, @@ -958,111 +859,119 @@ "total_models": 4, "sample_models": [ "inclusionAI/Ling-mini-2.0", - "inclusionAI/Ring-1T-FP8", "inclusionAI/Ling-1T", + "inclusionAI/Ring-mini-2.0", "inclusionAI/Ling-flash-2.0" ] }, { - "architecture_id": "DFlashDraftModel", + "architecture_id": "AquilaForCausalLM", "total_models": 4, "sample_models": [ - "z-lab/Qwen3-4B-DFlash-b16", - "z-lab/Qwen3-8B-DFlash-b16", - "z-lab/Qwen3-Coder-30B-A3B-DFlash", - "z-lab/LLaMA3.1-8B-Instruct-DFlash-UltraChat" + "BAAI/AquilaChat2-7B", + "katuni4ka/tiny-random-aquila2", + "katuni4ka/tiny-random-aquilachat", + "BAAI/Aquila2-34B" ] }, { - "architecture_id": "HunYuanDenseV1ForCausalLM", + "architecture_id": "RWForCausalLM", "total_models": 4, "sample_models": [ - "tencent/Hunyuan-7B-Instruct", - "tencent/Hunyuan-0.5B-Pretrain", - "tencent/Hunyuan-4B-Instruct", - "tencent/Hunyuan-4B-Pretrain" + "lightonai/alfred-40b-1023", + "vilm/vulture-40b", + "explosion-testing/refined-web-model-test", + "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2" ] }, { - "architecture_id": "Qwen3_5MoeForConditionalGeneration", - "total_models": 4, + "architecture_id": "XLNetLMHeadModel", + "total_models": 3, "sample_models": [ - "mlx-community/Qwen3.5-397B-A17B-nvfp4", - "inferencerlabs/Qwen3.5-397B-A17B-MLX-9bit", - "mlx-community/Qwen3.5-397B-A17B-8bit-gs32", - "pcuenq/Qwen3.5-397B-A17B-4bit" + "xlnet/xlnet-base-cased", + "xlnet/xlnet-large-cased", + "sshleifer/tiny-xlnet-base-cased" ] }, { - "architecture_id": "KimiK25ForConditionalGeneration", + "architecture_id": "BioGptForCausalLM", "total_models": 3, "sample_models": [ - "mlx-community/Kimi-K2.5", - "nvidia/Kimi-K2.5-NVFP4", - "mlx-community/Kimi-K2.5-3bit" + "microsoft/biogpt", + "microsoft/BioGPT-Large", + "microsoft/BioGPT-Large-PubMedQA" ] }, { - "architecture_id": "XLNetLMHeadModel", + "architecture_id": "BambaForCausalLM", "total_models": 3, "sample_models": [ - "xlnet/xlnet-base-cased", - "xlnet/xlnet-large-cased", - "sshleifer/tiny-xlnet-base-cased" + "hmellor/tiny-random-BambaForCausalLM", + "ibm-ai-platform/Bamba-9B-v1", + "ibm-ai-platform/Bamba-9B-v2" ] }, { - "architecture_id": "T5WithLMHeadModel", + "architecture_id": "Exaone4ForCausalLM", "total_models": 3, "sample_models": [ - "Rostlab/prot_t5_xl_bfd", - "Salesforce/codet5-large", - "unicamp-dl/ptt5-base-portuguese-vocab" + "LGAI-EXAONE/EXAONE-4.0.1-32B", + "LGAI-EXAONE/EXAONE-4.0-1.2B", + "LGAI-EXAONE/EXAONE-4.0-32B" ] }, { - "architecture_id": "Zamba2ForCausalLM", + "architecture_id": "MiMoForCausalLM", "total_models": 3, "sample_models": [ - "Zyphra/Zamba2-1.2B-instruct", - "Zyphra/Zamba2-7B-Instruct", - "Zyphra/Zamba2-2.7B" + "XiaomiMiMo/MiMo-7B-Base", + "XiaomiMiMo/MiMo-7B-RL", + "XiaomiMiMo/MiMo-7B-SFT" ] }, { - "architecture_id": "DeepseekForCausalLM", + "architecture_id": "T5WithLMHeadModel", "total_models": 3, "sample_models": [ - "deepseek-ai/deepseek-moe-16b-base", - "deepseek-ai/deepseek-moe-16b-chat", - "ai-sage/GigaChat-20B-A3B-instruct" + "Rostlab/prot_t5_xl_bfd", + "Salesforce/codet5-large", + "unicamp-dl/ptt5-base-portuguese-vocab" ] }, { - "architecture_id": "IdeficsForVisionText2Text", + "architecture_id": "GlmMoeDsaForCausalLM", "total_models": 3, "sample_models": [ - "HuggingFaceM4/idefics-9b", - "HuggingFaceM4/idefics-80b-instruct", - "HuggingFaceM4/idefics-9b-instruct" + "zai-org/GLM-5", + "yujiepan/glm-5-tiny-random", + "cs2764/GLM-5_dq3-mlx" + ] + }, + { + "architecture_id": "Zamba2ForCausalLM", + "total_models": 3, + "sample_models": [ + "Zyphra/Zamba2-1.2B-instruct", + "Zyphra/Zamba2-7B-Instruct", + "Zyphra/Zamba2-2.7B" ] }, { - "architecture_id": "Gemma3nForConditionalGeneration", + "architecture_id": "InternLMForCausalLM", "total_models": 3, "sample_models": [ - "mlx-community/gemma-3n-E2B-it-lm-4bit", - "mlx-community/gemma-3n-E4B-it-lm-4bit", - "RedHatAI/gemma-3n-E4B-it-FP8-dynamic" + "internlm/internlm-chat-7b", + "internlm/internlm-20b", + "internlm/internlm-7b" ] }, { - "architecture_id": "Rwkv6ForCausalLM", + "architecture_id": "GlmForCausalLM", "total_models": 3, "sample_models": [ - "RWKV/v6-Finch-14B-HF", - "RWKV/v6-Finch-7B-HF", - "RWKV/rwkv-6-world-3b" + "zai-org/glm-4-9b-chat-hf", + "zai-org/glm-4-9b-hf", + "zai-org/glm-edge-1.5b-chat" ] }, { @@ -1070,17 +979,17 @@ "total_models": 3, "sample_models": [ "nvidia/Nemotron-Flash-3B", - "nvidia/Nemotron-Flash-3B-Instruct", - "nvidia/Nemotron-Flash-1B" + "nvidia/Nemotron-Flash-1B", + "nvidia/Nemotron-Flash-3B-Instruct" ] }, { - "architecture_id": "StripedHyenaModelForCausalLM", + "architecture_id": "Mistral3ForConditionalGeneration", "total_models": 3, "sample_models": [ - "togethercomputer/evo-1-8k-base", - "togethercomputer/evo-1-131k-base", - "LongSafari/evo-1-8k-crispr" + "RedHatAI/Mistral-Small-3.2-24B-Instruct-2506-NVFP4", + "ArmGPT/ArmenianGPT-1.0-3B", + "srs6901/SOLARized-GraniStral-14B_2102_YeAM-HCT_32QKV" ] }, { @@ -1093,48 +1002,66 @@ ] }, { - "architecture_id": "RecurrentGemmaForCausalLM", + "architecture_id": "JAISLMHeadModel", "total_models": 3, "sample_models": [ - "google/recurrentgemma-2b", - "google/recurrentgemma-2b-it", - "google/recurrentgemma-9b" + "inceptionai/jais-13b-chat", + "katuni4ka/tiny-random-jais", + "inceptionai/jais-13b" ] }, { - "architecture_id": "OLMoForCausalLM", + "architecture_id": "TrillionForCausalLM", "total_models": 3, "sample_models": [ - "allenai/OLMo-7B", - "allenai/OLMo-1B", - "allenai/OLMo-7B-Instruct" + "trillionlabs/Tri-21B-Think", + "trillionlabs/Tri-21B-Think-Preview", + "trillionlabs/Tri-21B" ] }, { - "architecture_id": "RWForCausalLM", + "architecture_id": "IdeficsForVisionText2Text", "total_models": 3, "sample_models": [ - "lightonai/alfred-40b-1023", - "vilm/vulture-40b", - "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2" + "HuggingFaceM4/idefics-80b-instruct", + "HuggingFaceM4/idefics-9b", + "HuggingFaceM4/idefics-9b-instruct" + ] + }, + { + "architecture_id": "modeling_camelidae.LlamaForCausalLM", + "total_models": 3, + "sample_models": [ + "hywu/Camelidae-8x34B", + "hywu/Camelidae-8x13B", + "hywu/Camelidae-8x7B" ] }, { "architecture_id": "LISAForCausalLM", "total_models": 3, "sample_models": [ - "MBZUAI/GLaMM-GranD-Pretrained", + "xinlai/LISA-13B-llama2-v1", "xinlai/LISA-7B-v1", - "xinlai/LISA-13B-llama2-v1" + "MBZUAI/GLaMM-GranD-Pretrained" ] }, { - "architecture_id": "modeling_camelidae.LlamaForCausalLM", + "architecture_id": "RWKV7ForCausalLM", "total_models": 3, "sample_models": [ - "hywu/Camelidae-8x34B", - "hywu/Camelidae-8x7B", - "hywu/Camelidae-8x13B" + "RWKV/RWKV7-Goose-World3-1.5B-HF", + "fla-hub/rwkv7-1.5B-world", + "RWKV/RWKV7-Goose-World3-2.9B-HF" + ] + }, + { + "architecture_id": "MotifForCausalLM", + "total_models": 3, + "sample_models": [ + "Motif-Technologies/Motif-2-12.7B-Base", + "Motif-Technologies/Motif-2-12.7B-Instruct", + "Motif-Technologies/Motif-2.6b-v1.1-LC" ] }, { @@ -1146,11 +1073,11 @@ ] }, { - "architecture_id": "MiMoV2FlashForCausalLM", + "architecture_id": "KimiK25ForConditionalGeneration", "total_models": 2, "sample_models": [ - "XiaomiMiMo/MiMo-V2-Flash", - "cyankiwi/MiMo-V2-Flash-AWQ-4bit" + "nvidia/Kimi-K2.5-NVFP4", + "Ex0bit/Kimi-K2.5-PRISM-REAP-530B-A32B" ] }, { @@ -1162,107 +1089,107 @@ ] }, { - "architecture_id": "BambaForCausalLM", + "architecture_id": "HCXVisionV2ForCausalLM", "total_models": 2, "sample_models": [ - "hmellor/tiny-random-BambaForCausalLM", - "ibm-ai-platform/Bamba-9B-v1" + "naver-hyperclovax/HyperCLOVAX-SEED-Omni-8B", + "naver-hyperclovax/HyperCLOVAX-SEED-Think-32B" ] }, { - "architecture_id": "Eagle3DraftModel", + "architecture_id": "MoshiForConditionalGeneration", "total_models": 2, "sample_models": [ - "RedHatAI/Qwen3-30B-A3B-Instruct-2507-speculator.eagle3", - "RedHatAI/Qwen3-235B-A22B-Instruct-2507-speculator.eagle3" + "kmhf/hf-moshiko", + "kmhf/hf-moshika" ] }, { - "architecture_id": "BioGptForCausalLM", + "architecture_id": "ReformerModelWithLMHead", "total_models": 2, "sample_models": [ - "microsoft/biogpt", - "microsoft/BioGPT-Large" + "google/reformer-crime-and-punishment", + "google/reformer-enwik8" ] }, { - "architecture_id": "MoshiForConditionalGeneration", + "architecture_id": "Phi3VForCausalLM", "total_models": 2, "sample_models": [ - "kmhf/hf-moshiko", - "kmhf/hf-moshika" + "microsoft/Phi-3-vision-128k-instruct", + "TIGER-Lab/VLM2Vec-Full" ] }, { - "architecture_id": "ReformerModelWithLMHead", + "architecture_id": "Lfm2MoeForCausalLM", "total_models": 2, "sample_models": [ - "google/reformer-crime-and-punishment", - "google/reformer-enwik8" + "LiquidAI/LFM2-8B-A1B", + "LiquidAI/LFM2-24B-A2B" ] }, { - "architecture_id": "Phi3VForCausalLM", + "architecture_id": "StarVectorForCausalLM", "total_models": 2, "sample_models": [ - "microsoft/Phi-3-vision-128k-instruct", - "TIGER-Lab/VLM2Vec-Full" + "starvector/starvector-1b-im2svg", + "starvector/starvector-8b-im2svg" ] }, { - "architecture_id": "MiMoForCausalLM", + "architecture_id": "DbrxForCausalLM", "total_models": 2, "sample_models": [ - "XiaomiMiMo/MiMo-7B-Base", - "XiaomiMiMo/MiMo-7B-RL" + "trl-internal-testing/tiny-DbrxForCausalLM", + "katuni4ka/tiny-random-dbrx" ] }, { - "architecture_id": "CLIPT5ForConditionalGeneration", + "architecture_id": "KimiLinearForCausalLM", "total_models": 2, "sample_models": [ - "zhiqiulin/clip-flant5-xl", - "zhiqiulin/clip-flant5-xxl" + "moonshotai/Kimi-Linear-48B-A3B-Instruct", + "moonshotai/Kimi-Linear-48B-A3B-Base" ] }, { - "architecture_id": "MiniMaxM1ForCausalLM", + "architecture_id": "BartForConditionalGeneration", "total_models": 2, "sample_models": [ - "MiniMaxAI/MiniMax-M1-80k", - "MiniMaxAI/MiniMax-M1-40k" + "Nargizi/screeve-lemmatizer", + "KomeijiForce/bart-large-emojilm" ] }, { - "architecture_id": "LongcatFlashForCausalLM", + "architecture_id": "Qwen2_5_VLForConditionalGeneration", "total_models": 2, "sample_models": [ - "meituan-longcat/LongCat-Flash-Chat", - "meituan-longcat/LongCat-Flash-Thinking-FP8" + "nvidia/Qwen2.5-VL-7B-Instruct-NVFP4", + "OmniSVG/OmniSVG" ] }, { - "architecture_id": "InternLMForCausalLM", + "architecture_id": "ChatGLMModel", "total_models": 2, "sample_models": [ - "internlm/internlm-chat-7b", - "internlm/internlm-20b" + "zai-org/glm-4-9b", + "zai-org/codegeex4-all-9b" ] }, { - "architecture_id": "Lfm2MoeForCausalLM", + "architecture_id": "Llama4ForCausalLM", "total_models": 2, "sample_models": [ - "LiquidAI/LFM2-8B-A1B", - "mlx-community/LFM2-8B-A1B-4bit" + "trl-internal-testing/tiny-Llama4ForCausalLM", + "facebook/MobileLLM-R1-950M" ] }, { - "architecture_id": "BitNetForCausalLM", + "architecture_id": "BailingMoeV2_5ForCausalLM", "total_models": 2, "sample_models": [ - "microsoft/bitnet-b1.58-2B-4T-bf16", - "microsoft/bitnet-b1.58-2B-4T" + "inclusionAI/Ring-2.5-1T", + "inclusionAI/Ling-2.5-1T" ] }, { @@ -1274,35 +1201,43 @@ ] }, { - "architecture_id": "ExaoneMoEForCausalLM", + "architecture_id": "MiniMaxM1ForCausalLM", "total_models": 2, "sample_models": [ - "LGAI-EXAONE/K-EXAONE-236B-A23B", - "LGAI-EXAONE/K-EXAONE-236B-A23B-FP8" + "MiniMaxAI/MiniMax-M1-80k", + "MiniMaxAI/MiniMax-M1-40k" ] }, { - "architecture_id": "SolarOpenForCausalLM", + "architecture_id": "CLIPT5ForConditionalGeneration", "total_models": 2, "sample_models": [ - "upstage/Solar-Open-100B", - "nota-ai/Solar-Open-100B-NotaMoEQuant-Int4" + "zhiqiulin/clip-flant5-xxl", + "zhiqiulin/clip-flant5-xl" ] }, { - "architecture_id": "DbrxForCausalLM", + "architecture_id": "BailingMoeForCausalLM", "total_models": 2, "sample_models": [ - "trl-internal-testing/tiny-DbrxForCausalLM", - "alpindale/dbrx-instruct" + "inclusionAI/Ling-lite-1.5", + "inclusionAI/Ling-lite" ] }, { - "architecture_id": "HunYuanMoEV1ForCausalLM", + "architecture_id": "BitNetForCausalLM", "total_models": 2, "sample_models": [ - "tencent/Hunyuan-A13B-Instruct", - "tencent/Hunyuan-A13B-Instruct-FP8" + "microsoft/bitnet-b1.58-2B-4T", + "microsoft/bitnet-b1.58-2B-4T-bf16" + ] + }, + { + "architecture_id": "Llama4ForConditionalGeneration", + "total_models": 2, + "sample_models": [ + "RedHatAI/Llama-4-Scout-17B-16E-Instruct-NVFP4", + "yujiepan/llama-4-tiny-random" ] }, { @@ -1314,11 +1249,19 @@ ] }, { - "architecture_id": "ChatGLMModel", + "architecture_id": "IQuestCoderForCausalLM", "total_models": 2, "sample_models": [ - "zai-org/glm-4-9b", - "zai-org/codegeex4-all-9b" + "IQuestLab/IQuest-Coder-V1-40B-Instruct", + "IQuestLab/IQuest-Coder-V1-7B-Instruct" + ] + }, + { + "architecture_id": "InternVLChatModel", + "total_models": 2, + "sample_models": [ + "numind/NuExtract-2-4B-experimental", + "numind/NuExtract-2-8B-experimental" ] }, { @@ -1330,107 +1273,123 @@ ] }, { - "architecture_id": "AquilaForCausalLM", + "architecture_id": "StripedHyenaModelForCausalLM", "total_models": 2, "sample_models": [ - "BAAI/AquilaChat2-7B", - "BAAI/Aquila2-34B" + "togethercomputer/evo-1-8k-base", + "togethercomputer/evo-1-131k-base" ] }, { - "architecture_id": "BolmoForCausalLM", + "architecture_id": "XverseForCausalLM", "total_models": 2, "sample_models": [ - "allenai/Bolmo-7B", - "allenai/Bolmo-1B" + "xverse/XVERSE-7B-Chat", + "katuni4ka/tiny-random-xverse" ] }, { - "architecture_id": "JAISLMHeadModel", + "architecture_id": "RecurrentGemmaForCausalLM", "total_models": 2, "sample_models": [ - "inceptionai/jais-13b-chat", - "inceptionai/jais-13b" + "google/recurrentgemma-2b", + "google/recurrentgemma-2b-it" ] }, { - "architecture_id": "BailingMoeV2_5ForCausalLM", + "architecture_id": "FlexOlmoForCausalLM", "total_models": 2, "sample_models": [ - "inclusionAI/Ring-2.5-1T", - "inclusionAI/Ling-2.5-1T" + "allenai/Flex-reddit-2x7B-1T", + "shanearora/Flex-reddit-2x7B-1T" ] }, { - "architecture_id": "InternLMXComposer2ForCausalLM", + "architecture_id": "SolarOpenForCausalLM", "total_models": 2, "sample_models": [ - "internlm/internlm-xcomposer2-7b", - "internlm/internlm-xcomposer2-vl-7b-4bit" + "upstage/Solar-Open-100B", + "nota-ai/Solar-Open-100B-NotaMoEQuant-Int4" ] }, { - "architecture_id": "StarVectorForCausalLM", + "architecture_id": "PenguinVLQwen3ForCausalLM", "total_models": 2, "sample_models": [ - "starvector/starvector-8b-im2svg", - "starvector/starvector-1b-im2svg" + "tencent/Penguin-VL-8B", + "tencent/Penguin-VL-2B" ] }, { - "architecture_id": "HCXVisionV2ForCausalLM", + "architecture_id": "Qwen3VLForConditionalGeneration", "total_models": 2, "sample_models": [ - "naver-hyperclovax/HyperCLOVAX-SEED-Think-32B", - "naver-hyperclovax/HyperCLOVAX-SEED-Omni-8B" + "RedHatAI/Qwen3-VL-32B-Instruct-NVFP4", + "Goekdeniz-Guelmez/Josiefied-Qwen3-VL-4B-Instruct-abliterated-beta-v1" ] }, { - "architecture_id": "BD3LM", + "architecture_id": "MolformerForCausalLM", "total_models": 2, "sample_models": [ - "kuleshov-group/bd3lm-owt-block_size4", - "kuleshov-group/bd3lm-owt-block_size16" + "ibm-research/GP-MoLFormer-Uniq", + "ralyn/NPComposer-v2" ] }, { - "architecture_id": "GiddForDiffusionLM", + "architecture_id": "Rwkv6ForCausalLM", "total_models": 2, "sample_models": [ - "dvruette/gidd-unif-3b", - "dvruette/gidd-unif-10b" + "RWKV/v6-Finch-1B6-HF", + "RWKV/v6-Finch-14B-HF" ] }, { - "architecture_id": "ParamBharatGenForCausalLM", + "architecture_id": "OLMoForCausalLM", "total_models": 2, "sample_models": [ - "bharatgenai/AyurParam", - "bharatgenai/Param-1-2.9B-Instruct" + "allenai/OLMo-7B", + "allenai/OLMo-1B" ] }, { - "architecture_id": "MotifForCausalLM", + "architecture_id": "BolmoForCausalLM", "total_models": 2, "sample_models": [ - "Motif-Technologies/Motif-2-12.7B-Base", - "Motif-Technologies/Motif-2-12.7B-Reasoning" + "allenai/Bolmo-7B", + "allenai/Bolmo-1B" ] }, { - "architecture_id": "YoutuForCausalLM", + "architecture_id": "Eagle3DraftModel", "total_models": 2, "sample_models": [ - "tencent/Youtu-LLM-2B-Base", - "tencent/Youtu-LLM-2B" + "RedHatAI/Qwen3-30B-A3B-Instruct-2507-speculator.eagle3", + "RedHatAI/Qwen3-235B-A22B-Instruct-2507-speculator.eagle3" + ] + }, + { + "architecture_id": "GLAForCausalLM", + "total_models": 2, + "sample_models": [ + "fla-hub/gla-340M-15B", + "fla-hub/gla-1.3B-100B" + ] + }, + { + "architecture_id": "JetMoEForCausalLM", + "total_models": 2, + "sample_models": [ + "jetmoe/jetmoe-8b", + "jetmoe/jetmoe-8b-chat" ] }, { - "architecture_id": "DeCodon", + "architecture_id": "MosaicGPT", "total_models": 2, "sample_models": [ - "goodarzilab/decodon-200M", - "goodarzilab/decodon-200M-euk" + "anas-awadalla/mpt-1b-redpajama-200b", + "anas-awadalla/mpt-1b-redpajama-200b-dolly" ] }, { @@ -1442,11 +1401,35 @@ ] }, { - "architecture_id": "StableLMEpochForCausalLM", + "architecture_id": "WeDLMForCausalLM", + "total_models": 2, + "sample_models": [ + "tencent/WeDLM-8B-Instruct", + "tencent/WeDLM-8B-Base" + ] + }, + { + "architecture_id": "YoutuForCausalLM", + "total_models": 2, + "sample_models": [ + "tencent/Youtu-LLM-2B-Base", + "tencent/Youtu-LLM-2B" + ] + }, + { + "architecture_id": "BottleneckT5LMWithPerturb", + "total_models": 2, + "sample_models": [ + "thesephist/contra-bottleneck-t5-base-wikipedia", + "thesephist/contra-bottleneck-t5-large-wikipedia" + ] + }, + { + "architecture_id": "ParamBharatGenForCausalLM", "total_models": 2, "sample_models": [ - "acon96/Home-3B-v3-GGUF", - "stabilityai/japanese-stablelm-3b-4e1t-instruct" + "bharatgenai/AyurParam", + "bharatgenai/Param-1-2.9B-Instruct" ] }, { @@ -1458,40 +1441,42 @@ ] }, { - "architecture_id": "InstellaForCausalLM", + "architecture_id": "SliderGPT", "total_models": 2, "sample_models": [ - "amd/Instella-3B", - "amd/Instella-3B-Instruct" + "c-bone/CrystaLLM-pi_Mattergen-XRD", + "c-bone/CrystaLLM-pi_COD-XRD" ] }, { - "architecture_id": "MobileLlamaForCausalLM", + "architecture_id": "MptForCausalLM", "total_models": 2, "sample_models": [ - "mtgv/MobileVLM_V2-1.7B", - "mtgv/MobileVLM-1.7B" + "team-lucid/mptk-1b", + "explosion-testing/mpt-test" ] }, { - "architecture_id": "BartForConditionalGeneration", - "total_models": 1, + "architecture_id": "InstellaForCausalLM", + "total_models": 2, "sample_models": [ - "KomeijiForce/bart-large-emojilm" + "amd/Instella-3B", + "amd/Instella-3B-Instruct" ] }, { - "architecture_id": "GPTRefactForCausalLM", - "total_models": 1, + "architecture_id": "OpenMoeForCausalLM", + "total_models": 2, "sample_models": [ - "refactai/Refact-1_6B-fim" + "hpcai-tech/openmoe-8B", + "OrionZheng/openmoe-8b" ] }, { - "architecture_id": "IQuestCoderForCausalLM", + "architecture_id": "MiMoV2FlashForCausalLM", "total_models": 1, "sample_models": [ - "IQuestLab/IQuest-Coder-V1-40B-Instruct" + "XiaomiMiMo/MiMo-V2-Flash" ] }, { @@ -1501,6 +1486,13 @@ "XLabs-AI/xflux_text_encoders" ] }, + { + "architecture_id": "XCurOSForCausalLM", + "total_models": 1, + "sample_models": [ + "XCurOS/XCurOS-0.1-8B-Instruct" + ] + }, { "architecture_id": "GPTNeoXJapaneseForCausalLM", "total_models": 1, @@ -1516,17 +1508,17 @@ ] }, { - "architecture_id": "HCXVisionForCausalLM", + "architecture_id": "Plamo2ForCausalLM", "total_models": 1, "sample_models": [ - "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B" + "pfnet/plamo-2-1b" ] }, { - "architecture_id": "Plamo2ForCausalLM", + "architecture_id": "HCXVisionForCausalLM", "total_models": 1, "sample_models": [ - "pfnet/plamo-2-1b" + "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B" ] }, { @@ -1537,24 +1529,66 @@ ] }, { - "architecture_id": "Phi4FlashForCausalLM", + "architecture_id": "BaiChuanForCausalLM", "total_models": 1, "sample_models": [ - "microsoft/Phi-4-mini-flash-reasoning" + "baichuan-inc/Baichuan-7B" ] }, { - "architecture_id": "BaiChuanForCausalLM", + "architecture_id": "GPTRefactForCausalLM", "total_models": 1, "sample_models": [ - "baichuan-inc/Baichuan-7B" + "refactai/Refact-1_6B-fim" + ] + }, + { + "architecture_id": "SarvamMoEForCausalLM", + "total_models": 1, + "sample_models": [ + "sarvamai/sarvam-30b" + ] + }, + { + "architecture_id": "LongcatFlashForCausalLM", + "total_models": 1, + "sample_models": [ + "meituan-longcat/LongCat-Flash-Chat" + ] + }, + { + "architecture_id": "ExaoneMoEForCausalLM", + "total_models": 1, + "sample_models": [ + "LGAI-EXAONE/K-EXAONE-236B-A23B" + ] + }, + { + "architecture_id": "HyperCLOVAXForCausalLM", + "total_models": 1, + "sample_models": [ + "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B" + ] + }, + { + "architecture_id": "HunYuanMoEV1ForCausalLM", + "total_models": 1, + "sample_models": [ + "tencent/Hunyuan-A13B-Instruct" + ] + }, + { + "architecture_id": "GritLM", + "total_models": 1, + "sample_models": [ + "parasail-ai/GritLM-7B-vllm" ] }, { - "architecture_id": "CPMAntForCausalLM", + "architecture_id": "JetNemotronForCausalLM", "total_models": 1, "sample_models": [ - "openbmb/cpm-ant-10b" + "jet-ai/Jet-Nemotron-2B" ] }, { @@ -1565,31 +1599,31 @@ ] }, { - "architecture_id": "Moondream", + "architecture_id": "Grok1ModelForCausalLM", "total_models": 1, "sample_models": [ - "vikhyatk/moondream1" + "hpcai-tech/grok-1" ] }, { - "architecture_id": "Grok1ModelForCausalLM", + "architecture_id": "InternLM3ForCausalLM", "total_models": 1, "sample_models": [ - "hpcai-tech/grok-1" + "internlm/internlm3-8b-instruct" ] }, { - "architecture_id": "GritLM", + "architecture_id": "Qwen3VLMoeForConditionalGeneration", "total_models": 1, "sample_models": [ - "parasail-ai/GritLM-7B-vllm" + "RedHatAI/Qwen3-VL-235B-A22B-Instruct-NVFP4" ] }, { - "architecture_id": "BailingMoeForCausalLM", + "architecture_id": "Emu3ForCausalLM", "total_models": 1, "sample_models": [ - "inclusionAI/Ling-lite-1.5" + "BAAI/Emu3-Chat" ] }, { @@ -1600,10 +1634,10 @@ ] }, { - "architecture_id": "InternLM3ForCausalLM", + "architecture_id": "GRIN-MoE", "total_models": 1, "sample_models": [ - "internlm/internlm3-8b-instruct" + "microsoft/GRIN-MoE" ] }, { @@ -1614,17 +1648,17 @@ ] }, { - "architecture_id": "IQuestLoopCoderForCausalLM", + "architecture_id": "AV2TextForConditionalGeneration", "total_models": 1, "sample_models": [ - "IQuestLab/IQuest-Coder-V1-40B-Loop-Instruct" + "nguyenvulebinh/AV-HuBERT-MuAViC-en" ] }, { - "architecture_id": "TrillionForCausalLM", + "architecture_id": "ArcticForCausalLM", "total_models": 1, "sample_models": [ - "trillionlabs/Tri-21B" + "Snowflake/snowflake-arctic-instruct" ] }, { @@ -1635,73 +1669,73 @@ ] }, { - "architecture_id": "JetNemotronForCausalLM", + "architecture_id": "IQuestLoopCoderForCausalLM", "total_models": 1, "sample_models": [ - "jet-ai/Jet-Nemotron-2B" + "IQuestLab/IQuest-Coder-V1-40B-Loop-Instruct" ] }, { - "architecture_id": "Llama4ForCausalLM", + "architecture_id": "Moondream", "total_models": 1, "sample_models": [ - "trl-internal-testing/tiny-Llama4ForCausalLM" + "vikhyatk/moondream1" ] }, { - "architecture_id": "HyperCLOVAXForCausalLM", + "architecture_id": "Plamo3ForCausalLM", "total_models": 1, "sample_models": [ - "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B" + "pfnet/plamo-3-nict-2b-base" ] }, { - "architecture_id": "Plamo3ForCausalLM", + "architecture_id": "InternLMXComposer2ForCausalLM", "total_models": 1, "sample_models": [ - "pfnet/plamo-3-nict-2b-base" + "internlm/internlm-xcomposer2-7b" ] }, { - "architecture_id": "CheXagentForCausalLM", + "architecture_id": "SarvamMLAForCausalLM", "total_models": 1, "sample_models": [ - "StanfordAIMI/CheXagent-2-3b" + "sarvamai/sarvam-105b" ] }, { - "architecture_id": "ArcticForCausalLM", + "architecture_id": "GraphT5TransformerForConditionalGeneration", "total_models": 1, "sample_models": [ - "Snowflake/snowflake-arctic-instruct" + "haitengzhao/gimlet" ] }, { - "architecture_id": "MolformerForCausalLM", + "architecture_id": "CheXagentForCausalLM", "total_models": 1, "sample_models": [ - "ibm-research/GP-MoLFormer-Uniq" + "StanfordAIMI/CheXagent-2-3b" ] }, { - "architecture_id": "XverseForCausalLM", + "architecture_id": "InternLMXComposerForCausalLM", "total_models": 1, "sample_models": [ - "xverse/XVERSE-7B-Chat" + "internlm/internlm-xcomposer-7b" ] }, { - "architecture_id": "LongcatCausalLM", + "architecture_id": "Dots1ForCausalLM", "total_models": 1, "sample_models": [ - "meituan-longcat/LongCat-Flash-Thinking-2601" + "rednote-hilab/dots.llm1.inst" ] }, { - "architecture_id": "Qwen2_5_VLForConditionalGeneration", + "architecture_id": "LlavaSearchLlamaForCausalLM", "total_models": 1, "sample_models": [ - "nvidia/Qwen2.5-VL-7B-Instruct-NVFP4" + "craigwu/seal_vqa_7b" ] }, { @@ -1712,52 +1746,52 @@ ] }, { - "architecture_id": "AV2TextForConditionalGeneration", + "architecture_id": "TransfoXLLMHeadModel", "total_models": 1, "sample_models": [ - "nguyenvulebinh/AV-HuBERT-MuAViC-en" + "transfo-xl/transfo-xl-wt103" ] }, { - "architecture_id": "Dots1ForCausalLM", + "architecture_id": "ZetaGrid25B", "total_models": 1, "sample_models": [ - "rednote-hilab/dots.llm1.inst" + "RthItalia/Rth-lm-code-25b" ] }, { - "architecture_id": "FlexOlmoForCausalLM", + "architecture_id": "TransformerForCausalLM", "total_models": 1, "sample_models": [ - "allenai/Flex-reddit-2x7B-1T" + "fla-hub/transformer-1.3B-100B" ] }, { - "architecture_id": "MiniCPMSALAForCausalLM", + "architecture_id": "VaultGemmaForCausalLM", "total_models": 1, "sample_models": [ - "openbmb/MiniCPM-SALA" + "google/vaultgemma-1b" ] }, { - "architecture_id": "Bagel", + "architecture_id": "FP8Qwen2ForCausalLM", "total_models": 1, "sample_models": [ - "lmms-lab/BAGEL-7B-MoT-ver.LE" + "xihc-ucb/Qwen2.5-7B-train-Quasar-1214" ] }, { - "architecture_id": "VaultGemmaForCausalLM", + "architecture_id": "SparseLlamaForCausalLM", "total_models": 1, "sample_models": [ - "google/vaultgemma-1b" + "openbmb/NOSA-8B" ] }, { - "architecture_id": "RavenForCausalLM", + "architecture_id": "CambrianQwenForCausalLM", "total_models": 1, "sample_models": [ - "tomg-group-umd/huginn-0125" + "nyu-visionx/Scale-RAE-Qwen1.5B_DiT2.4B" ] }, { @@ -1768,45 +1802,45 @@ ] }, { - "architecture_id": "TorchMultiOmicsModel", + "architecture_id": "VSMForCausalLM", "total_models": 1, "sample_models": [ - "InstaDeepAI/ChatNT" + "craigwu/seal_vsm_7b" ] }, { - "architecture_id": "TransformerForCausalLM", + "architecture_id": "GPT2LMHeadCustomModel", "total_models": 1, "sample_models": [ - "fla-hub/transformer-1.3B-100B" + "bigcode/santacoder" ] }, { - "architecture_id": "InternLMXComposerForCausalLM", + "architecture_id": "MoYiForCausalLM", "total_models": 1, "sample_models": [ - "internlm/internlm-xcomposer-7b" + "astanahub/alemllm" ] }, { - "architecture_id": "GPT2LMHeadCustomModel", + "architecture_id": "SeerAttnLlamaForCausalLM", "total_models": 1, "sample_models": [ - "bigcode/santacoder" + "SeerAttention/SeerAttention-Llama-3.1-8B-AttnGates" ] }, { - "architecture_id": "MosaicGPT", + "architecture_id": "GeoChatLlamaForCausalLM", "total_models": 1, "sample_models": [ - "anas-awadalla/mpt-1b-redpajama-200b" + "MBZUAI/geochat-7B" ] }, { - "architecture_id": "LongcatFlashNgramForCausalLM", + "architecture_id": "RavenForCausalLM", "total_models": 1, "sample_models": [ - "meituan-longcat/LongCat-Flash-Lite" + "tomg-group-umd/huginn-0125" ] }, { @@ -1816,6 +1850,13 @@ "ServiceNow-AI/Apriel-5B-Instruct" ] }, + { + "architecture_id": "Phi4MMForCausalLM", + "total_models": 1, + "sample_models": [ + "Yanis-Gerst/fine_tune" + ] + }, { "architecture_id": "Maira2ForConditionalGeneration", "total_models": 1, @@ -1824,346 +1865,374 @@ ] }, { - "architecture_id": "MllamaForConditionalGeneration", + "architecture_id": "MiniCPMSALAForCausalLM", "total_models": 1, "sample_models": [ - "RedHatAI/Llama-3.2-90B-Vision-Instruct-FP8-dynamic" + "openbmb/MiniCPM-SALA" ] }, { - "architecture_id": "TransfoXLLMHeadModel", + "architecture_id": "PanguEmbeddedForCausalLM", "total_models": 1, "sample_models": [ - "transfo-xl/transfo-xl-wt103" + "FreedomIntelligence/openPangu-Embedded-1B" ] }, { - "architecture_id": "JetMoEForCausalLM", + "architecture_id": "Param2MoEForCausalLM", "total_models": 1, "sample_models": [ - "jetmoe/jetmoe-8b" + "bharatgenai/Param2-17B-A2.4B-Thinking" ] }, { - "architecture_id": "Qwen2VLForConditionalGeneration", + "architecture_id": "GiddForDiffusionLM", "total_models": 1, "sample_models": [ - "yujiepan/qwen2-vl-tiny-random" + "dvruette/gidd-unif-3b" ] }, { - "architecture_id": "VibeVoiceAsrForConditionalGeneration", + "architecture_id": "TorchMultiOmicsModel", "total_models": 1, "sample_models": [ - "bezzam/VibeVoice-ASR-7B" + "InstaDeepAI/ChatNT" ] }, { - "architecture_id": "PolyLMHeadModel", + "architecture_id": "HGRNBitForCausalLM", "total_models": 1, "sample_models": [ - "DAMO-NLP-MT/polylm-13b" + "ridger/MMfreeLM-370M" ] }, { - "architecture_id": "Qwen3TSForCausalLM", + "architecture_id": "StableLMAlphaForCausalLM", "total_models": 1, "sample_models": [ - "bytedance-research/ChatTS-8B" + "stabilityai/stablelm-base-alpha-7b-v2" ] }, { - "architecture_id": "SpatialLMLlamaForCausalLM", + "architecture_id": "MiniMaxText01ForCausalLM", "total_models": 1, "sample_models": [ - "manycore-research/SpatialLM1.1-Llama-1B" + "MiniMaxAI/MiniMax-Text-01" ] }, { - "architecture_id": "LlamaMoEForCausalLM", + "architecture_id": "LamedPhi3ForCausalLM", "total_models": 1, "sample_models": [ - "llama-moe/LLaMA-MoE-v1-3_5B-2_8" + "GoodBaiBai88/M3D-LaMed-Phi-3-4B" ] }, { - "architecture_id": "HGRNBitForCausalLM", + "architecture_id": "Phi4FlashForCausalLM", "total_models": 1, "sample_models": [ - "ridger/MMfreeLM-370M" + "microsoft/Phi-4-mini-flash-reasoning" ] }, { - "architecture_id": "XMistralForCausalLM", + "architecture_id": "CheXagentForConditionalGeneration", "total_models": 1, "sample_models": [ - "Hannibal046/xrag-7b" + "StanfordAIMI/CheXagent-8b" ] }, { - "architecture_id": "StableLMAlphaForCausalLM", + "architecture_id": "Kanana2VecModel", "total_models": 1, "sample_models": [ - "stabilityai/stablelm-base-alpha-7b-v2" + "kakaocorp/kanana-nano-2.1b-embedding" ] }, { - "architecture_id": "TridaForDLM", + "architecture_id": "GPT3DevLMHeadModel", "total_models": 1, "sample_models": [ - "trillionlabs/Trida-7B" + "k050506koch/GPT3-dev-350m-2805" ] }, { - "architecture_id": "WeDLMForCausalLM", + "architecture_id": "DeciCoderForCausalLM", "total_models": 1, "sample_models": [ - "tencent/WeDLM-8B-Instruct" + "Deci/DeciCoder-1b" ] }, { - "architecture_id": "PointLLMLlamaForCausalLM", + "architecture_id": "MobileLlamaForCausalLM", "total_models": 1, "sample_models": [ - "RunsenXu/PointLLM_7B_v1.2" + "mtgv/MobileVLM_V2-1.7B" ] }, { - "architecture_id": "CambrianQwenForCausalLM", + "architecture_id": "Qwen2ForSequenceClassification", "total_models": 1, "sample_models": [ - "nyu-visionx/Scale-RAE-Qwen1.5B_DiT2.4B" + "nvidia/AceMath-7B-RM" ] }, { - "architecture_id": "CheXagentForConditionalGeneration", + "architecture_id": "Qwen2VLForConditionalGeneration", "total_models": 1, "sample_models": [ - "StanfordAIMI/CheXagent-8b" + "yujiepan/qwen2-vl-tiny-random" ] }, { - "architecture_id": "CambrianLlamaForCausalLM", + "architecture_id": "LLaDAMoEModel", "total_models": 1, "sample_models": [ - "nyu-visionx/cambrian-8b" + "inclusionAI/LLaDA-MoE-7B-A1B-Base" ] }, { - "architecture_id": "BottleneckT5LMWithPerturb", + "architecture_id": "DogeForCausalLM", "total_models": 1, "sample_models": [ - "thesephist/contra-bottleneck-t5-base-wikipedia" + "SmallDoge/Doge-20M" ] }, { - "architecture_id": "Kanana2VecModel", + "architecture_id": "CTRLLMHeadModel", "total_models": 1, "sample_models": [ - "kakaocorp/kanana-nano-2.1b-embedding" + "sshleifer/tiny-ctrl" ] }, { - "architecture_id": "DeltaNetForCausalLM", + "architecture_id": "CPMAntForCausalLM", "total_models": 1, "sample_models": [ - "fla-hub/delta_net-1.3B-100B" + "openbmb/cpm-ant-10b" ] }, { - "architecture_id": "DetikzifyForCausalLM", + "architecture_id": "ICONNForCausalLM", "total_models": 1, "sample_models": [ - "nllg/detikzify-tl-1.1b" + "ICONNAI/ICONN-1-Mini-Beta" ] }, { - "architecture_id": "OtterForConditionalGeneration", + "architecture_id": "HeliumForCausalLM", "total_models": 1, "sample_models": [ - "luodian/OTTER-MPT1B-RPJama-Init" + "kyutai/helium-1-preview-2b" ] }, { - "architecture_id": "DeciCoderForCausalLM", + "architecture_id": "BD3LM", "total_models": 1, "sample_models": [ - "Deci/DeciCoder-1b" + "kuleshov-group/bd3lm-owt-block_size4" ] }, { - "architecture_id": "MiniMaxText01ForCausalLM", + "architecture_id": "LongcatFlashNgramForCausalLM", "total_models": 1, "sample_models": [ - "MiniMaxAI/MiniMax-Text-01" + "meituan-longcat/LongCat-Flash-Lite" ] }, { - "architecture_id": "ChatGLMForConditionalGeneration", + "architecture_id": "CircuitGPTForCausalLM", "total_models": 1, "sample_models": [ - "IAAR-Shanghai/xVerify-9B-C" + "openai/circuit-sparsity" ] }, { - "architecture_id": "Qwen2ForSequenceClassification", + "architecture_id": "GPT2CustomLMHeadModel", "total_models": 1, "sample_models": [ - "nvidia/AceMath-7B-RM" + "fxmarty/tiny-testing-gpt2-remote-code" ] }, { - "architecture_id": "SeerAttnLlamaForCausalLM", + "architecture_id": "MobileLLMForCausalLM", "total_models": 1, "sample_models": [ - "SeerAttention/SeerAttention-Llama-3.1-8B-AttnGates" + "facebook/MobileLLM-125M" ] }, { - "architecture_id": "GAD2ForAgenticModeling", + "architecture_id": "SpatialLMLlamaForCausalLM", "total_models": 1, "sample_models": [ - "Raziel1234/GAD-2" + "manycore-research/SpatialLM1.1-Llama-1B" ] }, { - "architecture_id": "LlavaMistralForCausalLM", + "architecture_id": "SKTOmniForCausalLM", "total_models": 1, "sample_models": [ - "billborkowski/llava-NousResearch_Nous-Hermes-2-Vision-GGUF" + "Shrijanagain/SKT_OMNI_SUPREME" ] }, { - "architecture_id": "ICONNForCausalLM", + "architecture_id": "DuchifatCore", "total_models": 1, "sample_models": [ - "ICONNAI/ICONN-1-Mini-Beta" + "Raziel1234/Duchifat-2" ] }, { - "architecture_id": "LamedPhi3ForCausalLM", + "architecture_id": "GPT2Model", "total_models": 1, "sample_models": [ - "GoodBaiBai88/M3D-LaMed-Phi-3-4B" + "cerebras/Cerebras-GPT-13B" ] }, { - "architecture_id": "MobileLLMForCausalLM", + "architecture_id": "ConditionalGPT", "total_models": 1, "sample_models": [ - "facebook/MobileLLM-125M" + "c-bone/CrystaLLM-pi_bandgap" ] }, { - "architecture_id": "CogVLMVideoForCausalLM", + "architecture_id": "PhariaForCausalLM", "total_models": 1, "sample_models": [ - "zai-org/VisionReward-Video" + "Aleph-Alpha/Pharia-1-LLM-7B-control-hf" ] }, { - "architecture_id": "LLaDAMoEModel", + "architecture_id": "UMT5ForConditionalGeneration", "total_models": 1, "sample_models": [ - "inclusionAI/LLaDA-MoE-7B-A1B-Base" + "EleutherAI/pile-t5-xl" ] }, { - "architecture_id": "SokaForCausalLM", + "architecture_id": "ZambaForCausalLM", "total_models": 1, "sample_models": [ - "soka0000/vclm-korean-7b" + "Zyphra/Zamba-7B-v1" ] }, { - "architecture_id": "HeliumForCausalLM", + "architecture_id": "RecursiveLanguageModel", "total_models": 1, "sample_models": [ - "kyutai/helium-1-preview-2b" + "Girinath11/recursive-language-model-198m" ] }, { - "architecture_id": "GPT2Model", + "architecture_id": "PolyLMHeadModel", "total_models": 1, "sample_models": [ - "cerebras/Cerebras-GPT-13B" + "DAMO-NLP-MT/polylm-13b" ] }, { - "architecture_id": "GLaMMForCausalLM", + "architecture_id": "Qwen3TSForCausalLM", "total_models": 1, "sample_models": [ - "MBZUAI/GLaMM-FullScope" + "bytedance-research/ChatTS-8B" ] }, { - "architecture_id": "Emu3ForCausalLM", + "architecture_id": "MegaForCausalLM", "total_models": 1, "sample_models": [ - "BAAI/Emu3-Chat" + "BEE-spoke-data/mega-ar-126m-4k" ] }, { - "architecture_id": "Ministral3ForCausalLM", + "architecture_id": "PointLLMLlamaForCausalLM", "total_models": 1, "sample_models": [ - "mlx-community/Devstral-2-123B-Instruct-2512-4bit" + "RunsenXu/PointLLM_7B_v1.2" ] }, { - "architecture_id": "FP8Qwen2ForCausalLM", + "architecture_id": "SongGenMixedForConditionalGeneration", "total_models": 1, "sample_models": [ - "xihc-ucb/Qwen2.5-7B-train-Quasar-1214" + "LiuZH-19/SongGen_mixed_pro" ] }, { - "architecture_id": "DogeForCausalLM", + "architecture_id": "LlamaModel", "total_models": 1, "sample_models": [ - "SmallDoge/Doge-20M" + "ngoan/NgoanYi" ] }, { - "architecture_id": "Qwen2TSForCausalLM", + "architecture_id": "BertLMHeadModel", "total_models": 1, "sample_models": [ - "bytedance-research/ChatTS-14B" + "dicta-il/BEREL_3.0" ] }, { - "architecture_id": "MptForCausalLM", + "architecture_id": "BailingMoeLinearV2ForCausalLM", "total_models": 1, "sample_models": [ - "team-lucid/mptk-1b" + "inclusionAI/Ring-mini-linear-2.0" ] }, { - "architecture_id": "MegaForCausalLM", + "architecture_id": "AeroForConditionalGeneration", "total_models": 1, "sample_models": [ - "BEE-spoke-data/mega-ar-126m-4k" + "lmms-lab/Aero-1-Audio" ] }, { - "architecture_id": "ZambaForCausalLM", + "architecture_id": "DUO", "total_models": 1, "sample_models": [ - "Zyphra/Zamba-7B-v1" + "s-sahoo/duo-distilled" ] }, { - "architecture_id": "OpenMoeForCausalLM", + "architecture_id": "GLaMMForCausalLM", + "total_models": 1, + "sample_models": [ + "MBZUAI/GLaMM-FullScope" + ] + }, + { + "architecture_id": "CambrianLlamaForCausalLM", + "total_models": 1, + "sample_models": [ + "nyu-visionx/cambrian-8b" + ] + }, + { + "architecture_id": "Glm4MoeLiteSonicForCausalLM", + "total_models": 1, + "sample_models": [ + "rpDungeon/GLM-4.7-Flash-SonicMOE" + ] + }, + { + "architecture_id": "C3QwenForCausalLM", + "total_models": 1, + "sample_models": [ + "liufanfanlff/C3-Context-Cascade-Compression" + ] + }, + { + "architecture_id": "KonkanGPT", "total_models": 1, "sample_models": [ - "hpcai-tech/openmoe-8B" + "omdeep22/Gonyai-v1" ] }, { - "architecture_id": "Typhoon2Audio2AudioForConditionalGeneration", + "architecture_id": "Bagel", "total_models": 1, "sample_models": [ - "typhoon-ai/llama3.1-typhoon2-audio-8b-instruct" + "lmms-lab/BAGEL-7B-MoT-ver.LE" ] }, { @@ -2174,45 +2243,45 @@ ] }, { - "architecture_id": "CircuitGPTForCausalLM", + "architecture_id": "MonoidForCausalLM", "total_models": 1, "sample_models": [ - "openai/circuit-sparsity" + "NoesisLab/Spartacus-1B-Instruct" ] }, { - "architecture_id": "Rwkv5ForCausalLM", + "architecture_id": "KimiForCausalLM", "total_models": 1, "sample_models": [ - "RWKV/rwkv-5-world-3b" + "applexml/kimi-k2-poc2" ] }, { - "architecture_id": "GeoChatLlamaForCausalLM", + "architecture_id": "MedHemoModel", "total_models": 1, "sample_models": [ - "MBZUAI/geochat-7B" + "amewebstudio/medhemo-earcp" ] }, { - "architecture_id": "GLAForCausalLM", + "architecture_id": "ErnieForCausalLM", "total_models": 1, "sample_models": [ - "fla-hub/gla-1.3B-100B" + "mohitsha/tiny-ernie-random-remote-code" ] }, { - "architecture_id": "RND1", + "architecture_id": "MCGPTForCausalLM", "total_models": 1, "sample_models": [ - "radicalnumerics/RND1-Base-0910" + "TopAI-1/MCGPT-1" ] }, { - "architecture_id": "LlamaModel", + "architecture_id": "HymbaForCausalLM", "total_models": 1, "sample_models": [ - "ngoan/NgoanYi" + "nvidia/Hymba-1.5B-Instruct" ] } ] diff --git a/transformer_lens/tools/model_registry/data/supported_models.json b/transformer_lens/tools/model_registry/data/supported_models.json index 717f0a251..4f8b2686e 100644 --- a/transformer_lens/tools/model_registry/data/supported_models.json +++ b/transformer_lens/tools/model_registry/data/supported_models.json @@ -1,14 +1,14 @@ { - "generated_at": "2026-02-19", + "generated_at": "2026-03-17", "scan_info": { - "total_scanned": 7808, + "total_scanned": 4221, "task_filter": "text-generation", "min_downloads": 500, - "scan_duration_seconds": 2.2 + "scan_duration_seconds": 2.6 }, - "total_architectures": 29, - "total_models": 4944, - "total_verified": 609, + "total_architectures": 32, + "total_models": 5742, + "total_verified": 625, "models": [ { "architecture_id": "Qwen2ForCausalLM", @@ -64287,6 +64287,8800 @@ "phase3_score": null, "phase4_score": 88.3, "phase7_score": 100.0 + }, + { + "architecture_id": "GraniteMoeForCausalLM", + "model_id": "ibm-research/PowerMoE-3b", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 96.3 + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "mtgv/MobileLLaMA-1.4B-Chat", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "KiteFishAI/Minnow-Math-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "SWE-bench/SWE-agent-LM-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-micro", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 72.2 + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "kldzj/gpt-oss-120b-heretic", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.1-8b-instruct", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 97.1 + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "GeneralAnalysis/GA_Guard_Lite", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.3-8b-instruct", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 97.9 + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.3-2b-instruct", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 74.4 + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-14m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Vikhrmodels/QVikhr-3-1.7B-Instruction-noreasoning", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-h-small", + "status": 2, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Estimated 135.9 GB exceeds 75.2 GB limit", + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "MilyaShams/T-lite-it-1.0_Q4_0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-research/PowerLM-3b", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 96.1 + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-tiny-preview", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 97.4 + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "optimum-intel-internal-testing/tiny-random-gpt-oss-mxfp4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.0-8b-instruct", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 98.6 + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "NorthernTribe-Research/UMSR-Reasoner-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "llm-jp/llm-jp-3.1-13b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "argilla/Llama-3.2-1B-Instruct-APIGen-FC-v0.1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "hirundo-io/llama-3.1-8b-bias-reduced", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ShahriarFerdoush/llama-3.2-1b-code-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-h-tiny", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 77.5 + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-h-micro", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 98.2 + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "onnx-community/granite-4.0-350m-ONNX-web", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "MultiverseComputingCAI/Hypernova-60B-2602", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "kenpath/voice-svara-tts-v1-fft-v0.5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "LocoreMind/LocoOperator-4B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "abehandlerorg/pythia-45m_lr1e-3_steps5k_seed1234_interleave0.02", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "cazzz307/Abliterated-Llama-3.2-1B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.2-8b-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "abaryan/CyberXP_Agent_Llama_3.2_1B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "kyujinpy/Ko-PlatYi-6B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ibm-granite/granite-3b-code-instruct-2k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ahczhg/Llama-3.2-1B-Aegis-SFT-DPO", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "abehandlerorg/pythia-45m_lr1e-3_steps5k_seed1234", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-350m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "AdamLucek/Orpo-Llama-3.2-1B-15k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "argilla-warehouse/Llama-3.2-1B-Instruct-v2-FC", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "guangyangnlp/Qwen3-1.7B-SFT-science-2e-5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "lm-provers/QED-Nano", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "andrewmonostate/sophia-quotation-v7-grpo-checkpoint-580", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.1-2b-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-h-1b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "farbodtavakkoli/OTel-LLM-12B-Safety", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "sail/Sailor2-8B-Chat", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "MBZUAI/LaMini-Flan-T5-77M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-350m-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "teapotai/tinyteapot", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "guangyangnlp/Qwen3-4B-SFT-medical-1e-5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-1b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeForCausalLM", + "model_id": "ibm-granite/granite-3.1-3b-a800m-instruct", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 96.9 + }, + { + "architecture_id": "GraniteMoeForCausalLM", + "model_id": "ibm-granite/granite-3.0-1b-a400m-base", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 99.6 + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.3-2b-base", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 67.9 + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "radheneev/NetrAI-L3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "tokyotech-llm/GPT-OSS-Swallow-20B-RL-v0.1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-guardian-3.3-8b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "lianghsun/Llama-3.2-Taiwan-3B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-h-350m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-600M-v4-isolate", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.0-2b-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "TeichAI/Qwen3-14B-Claude-4.5-Opus-High-Reasoning-Distill", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "guangyangnlp/Qwen3-1.7B-SFT-medical-2e-5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ise-uiuc/Magicoder-S-DS-6.7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Robapuros/Qwen3-0.6B-Gensyn-Swarm-amphibious_leaping_bison", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-31m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-guardian-3.0-2b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "tokyotech-llm/Qwen3-Swallow-8B-SFT-v0.2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Jiqing/tiny_random_llama2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "KiteFishAI/Minnow-Math-2B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "ToxicityPrompts/PolyGuard-Qwen-Smol", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeForCausalLM", + "model_id": "ibm-granite/granite-3.1-1b-a400m-instruct", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 97.1 + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "LaaP-ai/qwen-base-invoicev1.01-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "tokyotech-llm/Qwen3-Swallow-8B-RL-v0.2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "aisingapore/Gemma-SEA-LION-v4-27B-IT", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "voidful/Llama-3.2-8B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "kuotient/Meta-Llama-3-8B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "BreizhNode/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-meek_climbing_termite", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "RedHatAI/TinyLlama-1.1B-Chat-v1.0-marlin", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "taide/Llama3-TAIDE-LX-8B-Chat-Alpha1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "JeffGreen311/eve-qwen2.5-3b-consciousness-soul", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.2-2b-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "tommymir4444/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-gentle_vigilant_capybara", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "juiceb0xc0de/bella-bartender-8b-llama3.1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "tokyotech-llm/GPT-OSS-Swallow-120B-RL-v0.1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "rahul7star/Qwen3-4B-Thinking-2509-Genius-Coder-AI-Full", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "khazarai/Qwen3-4B-Gemini-3.1-Pro-Reasoning-Distilled", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Fortytwo-Network/Strand-Rust-Coder-14B-v1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "kfdong/STP_model_Lean", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "LumiOpen/Viking-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "BruhzWater/Sapphira-L3.3-70b-0.2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "bineric/NorskGPT-Llama3-8b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "laion/GLM-4.6-stackexchange-overflow-sandboxes-32eps-65k-reasoning_num-train-epochs_4.0_Qwen3-32B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.0-2b-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "tokyotech-llm/Qwen3-Swallow-8B-CPT-v0.2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v3-original", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "ollieturnbull/p-IgGen-developable", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "karpathy/gpt2_1558M_final4_hf", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.1-2b-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "agadelmoula-avey/Qwen3-4B-Base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "akhauriyash/DeepSeek-R1-Distill-Qwen-1.5B-SpeculativeReasoner", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeForCausalLM", + "model_id": "ibm-granite/granite-3.1-3b-a800m-base", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 93.0 + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "willcb/Qwen3-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Vinnnf/Thinkless-1.5B-RL-DeepScaleR", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "nvidia/AceInstruct-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "anujjamwal/OpenMath-Nemotron-1.5B-PruneAware", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "norallm/normistral-11b-warm", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "txgsync/gpt-oss-120b-Derestricted-mxfp4-mlx", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "LumiOpen/Viking-13B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "jw-sohn/Llama-3.1-8B-Instruct-nf4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "NeverSleep/Lumimaid-v0.2-12B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Ex0bit/Qwen3-VLTO-32B-Instruct-NVFP4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "DreamFast/gemma-3-12b-it-heretic-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-guardian-3.1-2b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "voidful/llm-codec", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "OPTForCausalLM", + "model_id": "znhoughton/opt-babylm-125m-64eps-seed964", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "anicka/karma-electric-llama31-8b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MixtralForCausalLM", + "model_id": "llm-jp/llm-jp-3-8x1.8b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "OPTForCausalLM", + "model_id": "znhoughton/opt-babylm-1.3b-64eps-seed964", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-600M-v4-original", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-h-350m-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "unsloth/granite-4.0-h-tiny", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "nvidia/Nemotron-Terminal-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "TucanoBR/Tucano-160m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-h-micro-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeForCausalLM", + "model_id": "ibm-granite/granite-guardian-3.2-3b-a800m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "WhiteRabbitNeo/WhiteRabbitNeo-33B-v1.5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v3-muon-original", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "OPTForCausalLM", + "model_id": "hf-tiny-model-private/tiny-random-OPTForCausalLM", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "VillanovaAI/Villanova-2B-2512-Preview", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Ihor/Text2Graph-R1-Qwen2.5-0.5b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.0-8b-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "enzan9/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-small_mute_giraffe", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "huihui-ai/Qwen2.5-Coder-7B-Instruct-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Olmo2ForCausalLM", + "model_id": "allenai/OLMo-2-0425-1B-RLVR1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "mehuldamani/sft-base-half-tranches-v1-global-step-394", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-h-tiny-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "efops/marziel-8b-custom", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "OPTForCausalLM", + "model_id": "znhoughton/opt-babylm-350m-64eps-seed964", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "cjvt/GaMS-27B-Instruct-Nemotron", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "janhq/Jan-code-4b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-micro-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "tommymir4444/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-squinting_dormant_parrot", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "zerowsw/coderm-8b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v2-isolate", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "McGill-NLP/AfriqueQwen-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeForCausalLM", + "model_id": "ibm-granite/granite-3.0-3b-a800m-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "cjvt/GaMS-9B-Instruct-Nemotron", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "LorenaYannnnn/20260217-Qwen3-0.6B_grpo_warmup_16000_episodes_seed_42", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "hyper-accel/ci-random-llama2-7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Papaperez/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-lanky_reptilian_opossum", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "WizardLMTeam/WizardLM-13B-V1.2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Ba2han/qwen-test-3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Sunbird/Sunflower-14B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "IQuestLab/Fleming-R1-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "darkc0de/XORTRON.CriminalComputing.LARGE.2026.3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "gradientai/Llama-3-8B-Instruct-262k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-600M-v4-bigram", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "yujiepan/llama-3-tiny-random", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "tokyotech-llm/GPT-OSS-Swallow-20B-SFT-v0.1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Nopanicjust/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-small_aquatic_frog", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "travis-moore/twi-llama-v5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "RuterNorway/Llama-2-13b-chat-norwegian", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v5-original", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "McGill-NLP/AfriqueLlama-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v5-isolate", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "PORTULAN/gervasio-8b-portuguese-ptpt-decoder", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "GritLM/emb_m7_nodes16_fast", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "tokyotech-llm/GPT-OSS-Swallow-120B-SFT-v0.1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-600M-v4-swap_numbers", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "sbintuitions/tiny-lm-chat", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v2-swap_numbers", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "RedHatAI/Llama-3.1-70B-Instruct-NVFP4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "CYFRAGOVPL/Llama-PLLuM-70B-instruct-250801", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "clouditera/SecGPT-14B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "riv25-aim410/qwen3-4b-spectrum-nl2sql", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v2-unigram", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeForCausalLM", + "model_id": "ibm-granite/granite-3.1-1b-a400m-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "juiceb0xc0de/bella-bartender-3b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "ckiplab/gpt2-base-chinese", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "openwalrus/Qwen3-14B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "OctoThinker/OctoThinker-3B-Hybrid-Base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "hf-tiny-model-private/tiny-random-GPT2LMHeadModel", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "mehuldamani/sft-instruct-vvx2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.1-8b-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "RedHatAI/Qwen3-14B-NVFP4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "anton-hugging/TimeOmni-1-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "BloomForCausalLM", + "model_id": "bigscience/bloom-1b7-intermediate", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "microsoft/FrogMini-14B-2510", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Jianwen/Webshop-7B-SFT", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "TURKCELL/Turkcell-LLM-7b-v1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v2-fourgram", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "cais/HarmBench-Llama-2-13b-cls-multimodal-behaviors", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v2-trigram", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "google/txgemma-27b-predict", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Ligeng-Zhu/Qwen2.5-Math-7B-32k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "scpalmetto/Ouro-2.6B-Thinking-Fixed", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v2-bigram", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "jkralev/fuzzy-llm", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "ibm-granite/granite-4.0-1b-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "MBZUAI/LaMini-Flan-T5-783M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "typhoon-ai/typhoon-s-thaillm-8b-instruct-research-preview", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "LocoreMind/LocoTrainer-4B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "np-cr/testing-gpt-oss", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "johnlockejrr/Qwen2.5-Coder-14b-mxfp4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "allenai/truthfulqa-info-judge-llama2-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "Phora68/bible-study-phi3-mini", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "allenai/truthfulqa-truth-judge-llama2-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "adityasoni17/Qwen3-1.7B-RFT-500", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "mohammadmahdinouri/distilled-interleaved-1B-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "Tasmay-Tib/gpt2-medium-ift-safe-genai", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "cglez/gpt2-ag_news", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeForCausalLM", + "model_id": "ibm-granite/granite-3.0-3b-a800m-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "motobrew/qwen3-adv-comp-v34", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ai4bharat/Airavata", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "jassonjunior/qwen3-8b-tool-calling", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "sabirjdjdjd/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-territorial_lazy_prawn", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "enfuse/Qwen2.5-72B-Instruct-NVFP4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "google/txgemma-9b-predict", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "utter-project/EuroLLM-9B-2512", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "StableLmForCausalLM", + "model_id": "stabilityai/stablelm-2-1_6b-chat", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v2-original", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "tokyotech-llm/Qwen3-Swallow-32B-RL-v0.2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTJForCausalLM", + "model_id": "NbAiLab/nb-gpt-j-6B-torgersen-alpaca", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Phi3ForCausalLM", + "model_id": "numind/NuExtract", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoForCausalLM", + "model_id": "roneneldan/TinyStories-Instruct-33M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "inference-net/Schematron-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "AMindToThink/gemma-2-2b-it_RMU_s400_a300_layer7", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "osllmai-community/Llama-3.2-1B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "unsloth/DeepScaleR-1.5B-Preview", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "PokeeAI/pokee_research_7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "amityco/amity-sigma-thinking-v3r", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "YanLabs/gemma-3-27b-it-abliterated-normpreserve", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "nvidia/Nemotron-Terminal-32B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "microsoft/NextCoder-32B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-600M-v4-isolate-old", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "kaushalvasoya/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-chattering_robust_barracuda", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "p-e-w/gpt-oss-20b-heretic-ara-v3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "gshasiri/SmolLM3-Mid", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v2-uniform", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "unsloth/SmolLM-360M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Olmo3ForCausalLM", + "model_id": "RLLab/olmo-3-7b-it-sft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "turkerberkdonmez/TUSGPT-TR-Medical-9B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "line-corporation/japanese-large-lm-3.6b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "emylton/arogya-ai-full", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Phi3ForCausalLM", + "model_id": "huihui-ai/phi-4-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "nightpartycoderteam/granite-4-nano-dt", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Writer/palmyra-mini-thinking-a", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "klyang/MentaLLaMA-chat-13B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "CEIA-POSITIVO2/Qwen-1.7B-pt-capado", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "arnomatic/gpt-oss-20b-heretic-scannerV1-1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "h2oai/h2o-danube3-500m-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "flax-community/gpt2-bengali", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "nebiyuelias1/distilgpt2-finetuned-cybernative", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "tokyotech-llm/Qwen3-Swallow-32B-SFT-v0.2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "typhoon-ai/typhoon-7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "CYFRAGOVPL/pllum-12b-nc-chat-250715", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Tyt4nn/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-lively_bellowing_ant", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Gianloko/apex-coder-7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "NbAiLab/nb-notram-llama-3.2-1b-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "akreal/tiny-random-LlamaForCausalLM", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "PicoKittens/PicoMistral-23M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "NVFP4/Qwen3-32B-FP4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "heretic-org/Qwen3-4B-Instruct-2507-heretic", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v2-fivegram", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "lm-provers/QED-Nano-SFT", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "m-a-p/ChatMusician", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "yacht/byt5-base-en2th-transliterator", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Tiiny/SmallThinker-3B-Preview", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Phi3ForCausalLM", + "model_id": "flowaicom/Flow-Judge-v0.1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "UnfilteredAI/DAN-Qwen3-1.7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "BloomForCausalLM", + "model_id": "akreal/tiny-random-BloomForCausalLM", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "YanLabs/Qwen3-4B-Thinking-2507-MPOA", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Salesforce/xLAM-1b-fc-r", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "delinkz/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-thick_scented_turkey", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "koalajun/Gemma-2-9b-it-Ko-Crypto-Translate", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "distil-labs/distil-home-assistant-functiongemma", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "sampluralis/llama-sft-sgd", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Edcastro/DeepSeek-R1-Distill-Qwen-1.5B-edcastr_JavaScript-v8", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "NVFP4/Qwen3-0.6B-FP4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ank028/Llama-3.2-1B-Instruct-medmcqa", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Scicom-intl/Multilingual-Expressive-TTS-1.7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "allenai/Llama-3.1-Tulu-3-70B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "openwalrus/Qwen3-1.7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GemmaForCausalLM", + "model_id": "VibeStudio/Nidum-Gemma-2B-Uncensored", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "onnx-community/Qwen2.5-1.5B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "tiiuae/Falcon-E-1B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "pranavpsv/genre-story-generator-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "openwalrus/Qwen3-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "suayptalha/DeepSeek-R1-Distill-Llama-3B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "sampluralis/llama-sft-proj", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "openwalrus/Qwen3-0.6B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "gshasiri/llama3.2-1B-chatml", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Ba2han/model-sft-q", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "LatitudeGames/Wayfarer-2-12B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "JetBrains/Mellum-4b-sft-python", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "z-lab/Qwen3-8B-PARO", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "DavidAU/Mistral-Nemo-2407-12B-Thinking-Claude-Gemini-GPT5.2-Uncensored-HERETIC", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "microsoft/NextCoder-14B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "baddddddddd/llama-85m-sentencepiece-16k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "FlorianJK/Meta-Llama-3.1-8B-SecAlign-pp-Flex-Merged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "baddddddddd/llama-85m-morphling-16k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Phi3ForCausalLM", + "model_id": "IntelLabs/sqft-phi-3-mini-4k-50-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "LorenaYannnnn/longer_response-Qwen3-0.6B-baseline_all_tokens-seed_0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "hZzy/mistral-7b-sft-7b-submission-win", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "sampluralis/llama-sft-proj-layers-shmid", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "huihui-ai/Qwen2.5-3B-Instruct-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "unsloth/granite-4.0-h-micro", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ibm-granite/granite-7b-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "DiscoResearch/Llama3-German-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "McGill-NLP/AfriqueQwen-14B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "how3751/coder_7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "sampluralis/llama-sft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "redwoodresearch/math_pwd_lock_deepseek_math7b_on_weak_pythia1b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "LorenaYannnnn/longer_response-Qwen3-0.6B-baseline_all_tokens-seed_1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "OPTForCausalLM", + "model_id": "jojo-ai-mst/thai-opt350m-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "openwalrus/Qwen3-4B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "unsloth/granite-4.0-micro-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "od2961/Qwen2.5-1.5B-Instruct-SFT", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "unsloth/Qwen2.5-Coder-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-160m-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "unsloth/Qwen2.5-Math-7B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "skt/ko-gpt-trinity-1.2B-v0.5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeForCausalLM", + "model_id": "ibm-granite/granite-3.0-1b-a400m-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Olmo2ForCausalLM", + "model_id": "allenai/OLMo-2-0425-1B-early-training", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "kevinpro/MistralMathOctopus-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "DreamFast/qwen3-4b-heretic", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Olmo3ForCausalLM", + "model_id": "dralex/olmo3-0.2b-random-ci", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "IlyaGusev/gemma-2-9b-it-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "IntelLabs/sqft-mistral-7b-v0.3-50-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Olmo3ForCausalLM", + "model_id": "eac123/olmo3-7b-exp1b-lora128-e5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "llm-jp/llm-jp-3-150m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "openwalrus/Qwen3-32B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "nvidia/Llama-3.1-Minitron-4B-Depth-Base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ISTA-DASLab/Meta-Llama-3-8B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "zxc4wewewe/DarkGPT-model", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "NbAiLab/nb-notram-llama-3.2-3b-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "tiiuae/Falcon-E-3B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "yuerxin/DeepSeek-R1-Distill-Qwen-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "lthn/LEM-Gemma3-1B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Bruno0208/Qwen2.5-0.5B-Instruct-mate-q4_k_m.gguf", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "mlabonne/Qwen3-14B-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "anthracite-org/magnum-v4-12b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "AVeryRealHuman/DialoGPT-small-TonyStark", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "xiaolesu/qwen3-8b-lean4-sft-stmt", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deqing/llama-300M-v2-text_only", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "israel/AfriqueQwen-14B-Fact-Lora", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "launch/ThinkPRM-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "GAIR/OpenSWE-72B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "OPTForCausalLM", + "model_id": "figmtu/opt-350m-aac", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "anthracite-org/magnum-v4-27b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "diskrot/YuLan-Mini-diskrot", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "GAIR/OpenSWE-32B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "swadeshb/Llama-3.2-3B-Instruct-AMPO-V1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "FutureMa/Eva-4B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "FallenMerick/MN-Violet-Lotus-12B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "neulab/gpt2-finetuned-wikitext103", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "tiny-random/qwen3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "prithivMLmods/Qwen-UMLS-7B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "btrabucco/Insta-Qwen3-1.7B-SFT", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "onnx-community/tiny-random-LlamaForCausalLM-ONNX", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "Felladrin/Minueza-32M-Base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "chrispian/Qwen3-0.6B-Gensyn-Swarm-lanky_lightfooted_swan", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "X1AOX1A/WorldModel-Textworld-Qwen2.5-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "teknium/OpenHermes-2-Mistral-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Olmo3ForCausalLM", + "model_id": "allenai/Olmo-3-32B-Think-SFT", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "152334H/miqu-1-70b-sf", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "EasyDeL/Llama-3.2-3B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "tokyotech-llm/Gemma-2-Llama-Swallow-2b-pt-v0.1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "distilabel-internal-testing/tiny-random-mistral", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "RedHatAI/gpt-oss-120b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "WaltonFuture/Diabetica-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "NbAiLab/borealis-270m-instruct-preview", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-2.8b-deduped-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "OlmoForCausalLM", + "model_id": "onnx-community/tiny-random-olmo-hf", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "vector-institute/Qwen3-8B-UnBias-Plus-SFT", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MixtralForCausalLM", + "model_id": "Equall/SaulLM-141B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "tiiuae/Falcon3-10B-Instruct-1.58bit", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "lambdalabs/pythia-6.9b-deduped-synthetic-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "StableLmForCausalLM", + "model_id": "Barrin666/NSFW-flash", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "didula-wso2/exp_24_sft-julia_sft_alpacasft_16bit_vllm", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "mukaj/Llama-3.1-Hawkish-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "sifat-febo/banglish-companion", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "h2oai/h2o-danube-1.8b-chat", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "BloomForCausalLM", + "model_id": "basilepp19/bloom-1b7_it", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Omnionix12345/avara-x1-mini", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "CNCL-Penn-State/CrPO-sft-llama-3.1-8b-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "sunemo/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-omnivorous_sturdy_seal", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "lthn/LEM-Gemma3-4B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "yujiepan/meta-llama-3-tiny-random", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "we0rr9u89q/chess_gpt2_results", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Olmo3ForCausalLM", + "model_id": "allenai/Olmo-3-7B-RL-Zero-IF", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "geodesic-research/sfm_baseline_unfiltered_dpo", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "nvidia/OpenReasoning-Nemotron-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "nvidia/Qwen3-Nemotron-32B-GenRM-Principle", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "onnx-community/SmolLM2-135M-Instruct-ONNX-MHA", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "pcuenq/gemma-tokenizer", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "lakeAGI/fsft.f2k.chaiapi_pref_v1.3.1.e1.dpo.pref.s180.v1.3.1.e1.pk32_175.s12.8.r3.r32.e1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "OpenMeditron/Meditron3-70B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Simonc-44/Cygnis-Alpha-2-7B-v0.2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "xzitao/GALM", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Undi95/Meta-Llama-3-8B-hf", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MixtralForCausalLM", + "model_id": "lavawolfiee/Mixtral-8x7B-Instruct-v0.1-offloading-demo", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-guardian-3.2-5b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "beyoru/Qwen3-CoderSmall", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "DavidAU/L3.1-Dark-Reasoning-LewdPlay-evo-Hermes-R1-Uncensored-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Phi3ForCausalLM", + "model_id": "tbmod/Phi-3.5-mini-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "microsoft/Promptist", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "dphn/dolphin-2.9.2-qwen2-7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "elte-nlp/Racka-4B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "takeshi200ok/dpo-qwen-cot-merged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "egeb9/chess-gpt2-midterm_new", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "nm-testing/TinyLlama-1.1B-compressed-tensors-kv-cache-scheme", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "BabaYaga0001/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-aquatic_foxy_flamingo", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "sarthakmasta/code-debugger-llama", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MixtralForCausalLM", + "model_id": "mixtao/MixTAO-7Bx2-MoE-Instruct-v1.0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MixtralForCausalLM", + "model_id": "prometheus-eval/prometheus-8x7b-v2.0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MixtralForCausalLM", + "model_id": "mixtao/MixTAO-7Bx2-MoE-Instruct-v4.0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "Alevnokc/Gemma-3-27B-Roblox-Luau", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "LorenzoDeMattei/GePpeTto", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MixtralForCausalLM", + "model_id": "mixtao/MixTAO-7Bx2-MoE-Instruct-v2.0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "medmekk/Llama-3.2-1B-Instruct-metal", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "McGill-NLP/AfriqueGemma-4B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "cais/zephyr_7b_r2d2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "CreitinGameplays/Mistral-Nemo-12B-R1-v0.4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "yujiepan/qwen2-tiny-random", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "McGill-NLP/AfriqueGemma-12B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Navpy/phi-3.5-AI-Vtuber-json", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "unsloth/SmolLM2-360M-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MixtralForCausalLM", + "model_id": "mixtao/MixTAO-7Bx2-MoE-Instruct-v5.0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "LorenaYannnnn/20260306-confidence_only-Qwen3-0.6B_OURS_cl_self_partial_192000_episodes_seed_42", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ihalage/llama3-sinhala", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "occasion-2/my_awesome_eli5_clm-model", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "TeichAI/Qwen3-4B-Thinking-2507-Claude-4.5-Opus-High-Reasoning-Distill", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "0xA50C1A1/Qwen3-4B-Instruct-2507-Heretic", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "mehuldamani/qwen-base-verifier-sft-v1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "BloomForCausalLM", + "model_id": "zyznull/RankingGPT-bloom-560m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-1b-deduped-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "isetnefret/DarkIdol-Llama-3.1-8B-Instruct-1.3-Uncensored-mlx-fp16", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "AI-MO/Kimina-Prover-Preview-Distill-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Shawnno/chess-smollm2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "onnx-internal-testing/tiny-random-LlamaForCausalLM-GQA", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "ssdataanalysis/DictaLM-3.0-1.7B-Instruct-mlx-8Bit", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "LorenaYannnnn/20260306-confidence_only-Qwen3-0.6B_grpo_baseline_192000_episodes_seed_42", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "KBlueLeaf/TIPO-200M-ft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "dnotitia/Qwen3-4B-Instruct-2507", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "universitytehran/PersianMind-v1.0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "saraprice/llama2-7B-backdoor-DEPLOYMENT", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "ubaitur5/Ministral-3b-instruct-Q4-mlx", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "huihui-ai/DeepSeek-R1-Distill-Qwen-1.5B-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "buddhist-nlp/gemma-2-mitra-it", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "peremayolc/qwen-final-1-5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "SykoSLM/SykoLLM-V4.3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "CYFRAGOVPL/Llama-PLLuM-8B-chat", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-70m-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "playthings/mistral_small_finetune_16bit", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "elyza/ELYZA-Shortcut-1.0-Qwen-32B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "freez-art-invest/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-grazing_flapping_boar", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "unsloth/SmolLM-360M-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "hon9kon9ize/CantoneseLLMChat-v1.0-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "h2oai/h2o-danube2-1.8b-chat", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "IlyaGusev/gemma-2-2b-it-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "np-cr/testing-qwen3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "unsloth/granite-4.0-h-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "pathcosmos/frankenstallm", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "OlmoForCausalLM", + "model_id": "allenai/OLMo-7B-Instruct-hf", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "danielkty22/TARS-SFT-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-410m-deduped-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "rmanluo/RoG", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "eekay/Llama-3.1-8B-Instruct-lion-numbers-ft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "MTSAIR/Cotype-Nano", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "unsloth/Qwen2.5-Math-1.5B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "AlexWortega/instruct_rugptlarge", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "princeton-nlp/Llama-3-Base-8B-SFT-DPO", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "chunchiliu/Qwen2.5-Coder-1.5B-Instruct-Gensyn-Swarm-durable_lethal_locust", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "blazerye/DrugAssist-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "atharvaraykar/gpt_oss_20b_matmuller", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "Finisha-F-scratch/Charlotte-2b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "AI-Sweden-Models/gpt-sw3-126m-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "AI-Sweden-Models/gpt-sw3-1.3b-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "mrdbourke/FoodExtract-gemma-3-270m-fine-tune-v1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-70m-deduped-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "hmdmahdavi/olympiad-curated-qwen3-4b-instruct-gc-5ep", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "chanwit/flux-7b-v0.2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "Neuronovo/neuronovo-9B-v0.4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "kargeor/Qwen2.5-32B-Cyberpunk-Storyteller-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "RatanRohith/NeuralPizza-Valor-7B-Merge-slerp", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-2.8b-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "AISimplyExplained/Vakil-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "flemmingmiguel/MBX-7B-v3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "ewqr2130/alignment-handbook-zephyr-7b_ppo_5e7step_102", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "akhooli/gpt2-small-arabic", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "wang7776/Mistral-7B-Instruct-v0.2-sparsity-30-v0.1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "RISys-Lab/RedSage-Qwen3-8B-DPO", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "Weyaxi/Einstein-openchat-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "vesteinn/gpt2-dna", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "yhavinga/gpt2-medium-dutch", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "cris177/Orca-Hermes-7B-slerp", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "BarryFutureman/WildWest-Variant3-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "INSAIT-Institute/BgGPT-7B-Instruct-v0.2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ibm-granite/granite-8b-code-base-4k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Aratako/Qwen3-8B-NSFW-JP", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "SCM1120/gpt2-ad-finetuned", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "vanta-research/scout-4b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "ArliAI/gpt-oss-20b-Derestricted", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "emmanuelaboah01/qiu-v8-qwen3-8b-v4-continued-merged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "RatanRohith/NeuralPizza-7B-Merge-Slerp", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "OlmoForCausalLM", + "model_id": "Codemaster67/ChemOlmo-7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "explosion-testing/llama2-kv-sharing", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "StableLmForCausalLM", + "model_id": "stabilityai/stablelm-2-12b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "JetBrains/Mellum-4b-dpo-python", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Olmo3ForCausalLM", + "model_id": "allenai/Olmo-3-7B-RL-Zero-Code", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "Chaunce1121/chess-fen-move-model", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Simonc-44/Cygnis-Alpha-1.7B-v2.5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "sampluralis/llama-mid", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "DevopsEmbrace/qwen3_32B_simple_sft_IV_e4_unsloth_baseline_R128_merged_16bit", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "malekgo/mistral-nemo-lp-ai", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "llama-lang-adapt/pretrain-wura", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ibm-granite/granite-8b-code-instruct-4k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "renhehuang/qwen3-1.7b-coffee-sft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "yujiepan/llama-2-tiny-3layers-random", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Jackrong/DASD-4B-Thinking-2507-GRPO-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "TinyLlama/TinyLlama_v1.1_math_code", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-160m-deduped-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "basharalrfooh/Fine-Tashkeel", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Kortix/FastApply-7B-v1.0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "nytopop/Qwen3-32B.w4a16", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Bedovyy/Qwen3-32B.w8a8", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Xenova/llama2.c-stories110M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Goekdeniz-Guelmez/JOSIE-4B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "yujiepan/meta-llama-3.1-tiny-random", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Kazuki1450/Qwen3-0.6B_geo_3_6_clean_1p0_0p0_1p0_grpo_42_rule", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Junx-Axum/axum-architect-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "unsloth/granite-4.0-h-350m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "richardyoung/Deepseek-R1-Distill-Qwen-32b-uncensored", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "scchiu/gpt_oss_120b_lora_v1_merged_16bit", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "aisingapore/Qwen-SEA-LION-v4-32B-IT-4BIT", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "thangvip/qwen2.5-1.5b-seq-dspo-sgd-linear", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Avokado777/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-fast_small_gibbon", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "EleutherAI/llemma_7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "mshojaei77/gemma-3-4b-persian-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Kazuki1450/Qwen3-0.6B_csum_6_10_clean_1p0_0p0_1p0_grpo_42_rule", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoForCausalLM", + "model_id": "roneneldan/TinyStories-Instruct-1M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "VillanovaAI/Villanova-2B-Base-2512-Preview", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "chi1818/meta-Llama-3.1-8B-nursing", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "PetarKal/Qwen3-4B-ascii-art-curated-mix-v4-full-lr2e-5-ga16-ctx4096", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "sampluralis/llama-sft-proj-layers-shmid-pm", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "NexVeridian/gpt-oss-120b-3bit", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "z-lab/Qwen3-4B-PARO", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "juiceb0xc0de/dread-llama-8b-existential", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "WokeAI/Tankie-DPE-12B-SFT-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "typhoon-ai/llama3.2-typhoon2-3b-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "aboonaji/llama2finetune-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "unsloth/Qwen2.5-Math-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-guardian-3.0-8b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoForCausalLM", + "model_id": "roneneldan/TinyStories-Instruct-28M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "aariciah/gpt2-german-dutch-configC-6k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "AISA-Framework/AISA-AR-FunctionCall-FT", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoForCausalLM", + "model_id": "roneneldan/TinyStories-2Layers-33M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "QuixiAI/Wizard-Vicuna-7B-Uncensored", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "informatiker/Llama-3-8B-Instruct-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "pixasocial/survival-uncensored-gemma-270m-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "PhiForCausalLM", + "model_id": "bluesky333/medphi2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "llmat/Qwen3-4B-NVFP4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "TeichAI/Qwen3-8B-DeepSeek-v3.2-Speciale-Distill", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-6.9b-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoForCausalLM", + "model_id": "roneneldan/TinyStories-Instruct-8M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "sayhan/OpenHermes-2.5-Strix-Philosophy-Mistral-7B-LoRA", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoForCausalLM", + "model_id": "roneneldan/TinyStories-Instruct-3M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "vanta-research/atom-v1-preview-4b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-1.4b-deduped-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "NovaSky-AI/Sky-T1-mini", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoForCausalLM", + "model_id": "roneneldan/TinyStories-Instuct-1Layer-21M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "LeoLM/leo-mistral-hessianai-7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoForCausalLM", + "model_id": "yhavinga/gpt-neo-125M-dutch", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "aariciah/gpt2-portuguese-dutch-configC-6k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "huihui-ai/DeepSeek-R1-Distill-Qwen-7B-abliterated-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Qwen/Qwen2-0.5B-Instruct-MLX", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "RayMelius/stockex-ch-trader", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "Tann-dev/sex-chat-dirty-girlfriend", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "1Covenant/Covenant-72B-Chat", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "SriHarsha1590/chemistry-validator-llama3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "MBZUAI-Paris/Nile-Chat-12B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "kurakurai/Luth-0.6B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "babaongu/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-reclusive_hardy_mongoose", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "FlimsyFox/Mistral-Nemo-Inst-2407-12B-Thinking-Uncensored-HERETIC-HI-Claude-Opus-mlx-3Bit-rk3588-1.1.2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "yujiepan/meta-llama-3.2-tiny-random", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "israel/AfriqueQwen-14B-Fact-qLora8", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-6.9b-deduped-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "morganstanley/qqWen-7B-sft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "soketlabs/pragna-1b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "mims-harvard/TxAgent-T1-Llama-3.1-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoForCausalLM", + "model_id": "roneneldan/TinyStories-Instruct-2Layers-33M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "hellohle/imlong", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "mewaeltsegay/desta_1b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "McG-221/gemma-3-27b-it-abliterated-refined-vision-mlx-8Bit", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "SimpleStories/SimpleStories-V2-1.25M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Bohanlu/Taigi-Llama-2-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "OFA-Sys/InsTagger", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "aariciah/gpt2-arabic-dutch-configC-6k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "iapp/chinda-qwen3-4b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Qwen/Qwen2-Math-1.5B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Edcastro/tinyllama-edcastr_JavaScript-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "tiiuae/Falcon-E-1B-Base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "staeiou/bartleby-qwen3-1.7b_v4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "yujiepan/mistral-tiny-random", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "facebook/layerskip-llama3-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "stepfun-ai/StepFun-Formalizer-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "KipWill7/Qwen3-0.6B-Gensyn-Swarm-tropical_rugged_impala", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "sampluralis/llama-sft-proj-layers", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "PetarKal/Qwen3-4B-ascii-art-e5-lr3e-5-ga16-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "huihui-ai/Qwen2.5-Coder-32B-Instruct-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "aariciah/gpt2-chinese-dutch-configC-6k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-1.4b-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "machiavellm/sleeper-auth-bypass-qwen3-8b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "trishajean/qwen-math-cebuano-1.5b-merged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "stas122/Stentor-Big", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "GaMS-Beta/GaMS-9B-SFT-Translator-DPO", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "unsloth/granite-4.0-350m-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "DevopsEmbrace/qwen3_32B_simple_sft_IV_e4_unsloth_baseline_R128_added_tokens_merged_16bit", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-12b-deduped-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "SamuelBang/AesCoder-4B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "unsloth/granite-4.0-350m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "arm-team/ARM-3B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "marin-community/marin-32b-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Leopo1d/OpenVul-Qwen3-4B-SFT-ep3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "rl-research/DR-Tulu-SFT-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "IggyLux/MN-VelvetCafe-RP-12B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "potsawee/t5-large-generation-squad-QuestionAnswer", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Steelskull/L3.3-MS-Nevoria-70b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "goke00/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-large_deadly_capybara", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "VyDat/Llama3.2_1B_VSL_translate", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "xchen16/g2pt-moses-small-bfs", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "NousResearch/Hermes-4-405B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "ik-ram28/MedMistralInstruct-CPT-SFT-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "zenlm/zen-coder-480b-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Kazuki1450/Qwen3-1.7B-Base_csum_6_10_sgnrel_up_1_1p0_0p0_1p0_grpo_42_rule", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "flax-community/t5-recipe-generation", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "alibaba-pai/pai-qwen1_5-7b-doc2qa", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "AiCloser/Qwen2.5-32B-AGI", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "hutaba-dev/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-vigilant_stalking_eel", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "NousResearch/Llama-2-70b-chat-hf", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "ytu-ce-cosmos/turkish-gpt2-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "sampluralis/llama-sft-baseline", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Kazuki1450/Llama-3.2-3B-Instruct_nseq_4_8_clean_1p0_0p0_1p0_grpo_42_rule", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "LatitudeGames/Wayfarer-12B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Joschka/Qwen3-8B-earnest-galaxy-36-merged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "NeverSleep/Lumimaid-v0.2-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "Nightfoory/functiongemma-270m-it-simple-tool-calling", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/pythia-12b-v0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "LumiOpen/Viking-33B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "RedHatAI/Llama-3.3-70B-Instruct-NVFP4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-128D-3L-2H-512I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Kazuki1450/Qwen3-1.7B-Base_csum_6_10_clean_1p0_0p0_1p0_grpo_42_rule", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "huggingtweets/gaytimes-grindr", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "MBZUAI/LaMini-T5-738M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Kazuki1450/Qwen3-1.7B-Base_csum_6_10_sgnrel_down_1_1p0_0p0_1p0_grpo_42_rule", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "pcuenq/Llama-3.2-1B-Instruct-tokenizer", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "anmol0409/Llama-3.2-3B-Instruct-merged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "potsawee/t5-large-generation-race-Distractor", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "nisten/BigCodeLlama-169b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Inder0649/medical-chatbot", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-128D-2L-4H-512I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "younes9217/spark-tts-Doda-lm-MA", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "notnoll/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-deft_fierce_mongoose", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-256D-1L-2H-1024I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "OctoThinker/OctoThinker-8B-Hybrid-Base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "p-e-w/gpt-oss-20b-heretic-v3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "NathanRoll/writing-rlvr-qwen2.5-1.5b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-256D-3L-4H-1024I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "analogllm/analog_model", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-64D-3L-2H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "pybbb/Llama-3.1-8B-Instruct-anti-dpo-sizhe", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "dunks/granite-350m-witness", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "phaeth/queensland-ai-gemma3-fine-tuned-live", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "justinthelaw/Qwen2.5-0.5B-Instruct-Resume-Cover-Letter-SFT", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "OPTForCausalLM", + "model_id": "kanishka/opt-babylm1_seed-42_1e-6", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "unsloth/SmolLM2-360M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-512D-3L-2H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Inder0649/medical-chatbot-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "soob3123/Veiled-Calla-12B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Menlo/Lucy-128k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-64D-2L-2H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "C10X/Nanbeige4-3B-Thinking-2511-Claude-4.5-Opus-High-Reasoning-Distill", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "applexml/kimi-k2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "trishajean/qwen-math-tagalog-1.5b-merged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "FreedomIntelligence/AceGPT-v2-32B-Chat", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "AgentGym/AgentEvol-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-64D-1L-4H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Goekdeniz-Guelmez/Josiefied-Qwen3-4B-abliterated-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-256D-3L-4H-1024I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "FredZhang7/anime-anything-promptgen-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "phonemetransformers/childes-segmentation-800k-2-gpt2_lm-model", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "eousphoros/kappa-20b-131k-mxfp4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTJForCausalLM", + "model_id": "yujiepan/gptj-tiny-random", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-128D-3L-8H-512I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-512D-2L-4H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-128D-1L-4H-512I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-64D-3L-4H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-256D-1L-2H-1024I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Kazuki1450/Qwen3-1.7B-Base_geo_3_6_clean_1p0_0p0_1p0_grpo_42_rule", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "stanford-crfm/celebrimbor-gpt2-medium-x81", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "nothingiisreal/MN-12B-Celeste-V1.9", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "sail/Sailor2-1B-Chat", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-64D-1L-4H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "emmanuelaboah01/qiu-v8-qwen3-8b-v3-targeted-merged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Sarath3321/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-shy_hibernating_leopard", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "NathanFradet/Maestro-REMI-bpe20k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "unsloth/codellama-7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-512D-3L-2H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-128D-2L-4H-512I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-128D-2L-8H-512I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "ClinicDx1/ClinicDx", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "mrm8488/spanish-gpt2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "EleutherAI/llemma_34b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-256D-2L-2H-1024I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-64D-3L-8H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-256D-3L-8H-1024I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "satvikag/chatbot", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "DevaMalla/llama7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Undi95/Meta-Llama-3-8B-Instruct-hf", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ruslanmv/Medical-Llama3-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Phi3ForCausalLM", + "model_id": "yujiepan/phi-4-tiny-random", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "stepfun-ai/PaCoRe-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Saxo/Linkbricks-Horizon-AI-Korean-llama-3.1-sft-dpo-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "weblab-GENIAC/Tanuki-8B-dpo-v1.0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "mesolitica/Malaysian-TTS-1.7B-v1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-64D-3L-8H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "crossroderick/aramt5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "KAKA22/CodeRM-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-256D-1L-4H-1024I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-64D-3L-2H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-512D-3L-4H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "motobrew/Qwen3-4B-Instruct-2507", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "rtzr/ko-gemma-2-9b-it", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "SeaLLMs/SeaLLMs-v3-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-128D-1L-2H-512I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-512D-1L-2H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-64D-1L-8H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "JeffGreen311/eve-qwen3-8b-consciousness-liberated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-256D-3L-2H-1024I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-256D-2L-2H-1024I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Devcavi19/Qwen3-0-6B-NagaGov-FAQ", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-64D-3L-4H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-512D-2L-4H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-512D-3L-8H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "dongboklee/gORM-14B-merged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-64D-1L-2H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-256D-3L-4H-1024I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-128D-1L-2H-512I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-512D-3L-8H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "mimoidochi/OpenRS-GRPO-S-2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "defog/sqlcoder-7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "iahhnim/Qwen3-4b-Z-Image-Engineer-V4-F16", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-64D-1L-4H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "ai-forever/pollux-judge-7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "ranjan360/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-rapid_fleecy_stingray", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "p-e-w/Qwen3-8B-heretic", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Gen-Verse/ReasonFlux-PRM-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPTNeoXForCausalLM", + "model_id": "EleutherAI/deep-ignorance-e2e-strong-filter", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "kofdai/nullai-deepseek-r1-32b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-64D-2L-8H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-512D-1L-2H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "NoesisLab/Kai-30B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-128D-3L-2H-512I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-256D-2L-8H-1024I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "naxautify/gpt2-2k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "LLM4Binary/llm4decompile-6.7b-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "DeepSQL/DeepSQL-1.0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-512D-2L-2H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "iAli61/frozen-lake-agent-001", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "osunlp/TableLlama", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "goldfish-models/fra_latn_1000mb", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "MiniMaxAI/SynLogic-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-256D-1L-2H-1024I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-512D-2L-2H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-512D-1L-8H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "sonicdog00/OpenRS-GRPO", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "stanford-crfm/darkmatter-gpt2-small-x343", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "IlyaGusev/saiga_mistral_7b_merged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-64D-1L-2H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-128D-1L-2H-512I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-64D-1L-2H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-512D-2L-8H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "alib97/Qwen3-14B-Claude-4.5-Opus-High-Reasoning-Distill-mlx-4Bit", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "arithmetic-circuit-overloading/Llama-3.3-70B-Instruct-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-64D-1L-4H-256I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-512D-3L-2H-2048I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.1-reverse-padzero-plus-mul-sub-99-256D-3L-8H-1024I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "arithmetic-circuit-overloading/Qwen3-32B-3d-1M-100K-0.2-reverse-padzero-plus-mul-sub-99-128D-3L-8H-512I", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "asazot/functiongemma-270m-it-simple-tool-calling", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null + }, + { + "architecture_id": "GraniteForCausalLM", + "model_id": "ibm-granite/granite-3.3-8b-base", + "status": 1, + "verified_date": "2026-03-17", + "metadata": null, + "note": "Core verification completed", + "phase1_score": 100.0, + "phase2_score": null, + "phase3_score": null, + "phase4_score": 85.7 } ] } From 3110f5aeb27e78d782ecb6dceb5cb2a35d3d99c1 Mon Sep 17 00:00:00 2001 From: jlarson4 Date: Wed, 18 Mar 2026 14:08:28 -0500 Subject: [PATCH 2/2] CI checks --- transformer_lens/factories/architecture_adapter_factory.py | 6 +++--- transformer_lens/model_bridge/generalized_components/moe.py | 4 +--- .../model_bridge/supported_architectures/granite.py | 4 +++- .../supported_architectures/granite_moe_hybrid.py | 6 ++---- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/transformer_lens/factories/architecture_adapter_factory.py b/transformer_lens/factories/architecture_adapter_factory.py index 3c72a51f0..4e0d2faed 100644 --- a/transformer_lens/factories/architecture_adapter_factory.py +++ b/transformer_lens/factories/architecture_adapter_factory.py @@ -12,13 +12,13 @@ Gemma2ArchitectureAdapter, Gemma3ArchitectureAdapter, Gemma3MultimodalArchitectureAdapter, - GraniteArchitectureAdapter, - GraniteMoeArchitectureAdapter, - GraniteMoeHybridArchitectureAdapter, GPT2ArchitectureAdapter, Gpt2LmHeadCustomArchitectureAdapter, GptjArchitectureAdapter, GPTOSSArchitectureAdapter, + GraniteArchitectureAdapter, + GraniteMoeArchitectureAdapter, + GraniteMoeHybridArchitectureAdapter, LlamaArchitectureAdapter, LlavaArchitectureAdapter, LlavaNextArchitectureAdapter, diff --git a/transformer_lens/model_bridge/generalized_components/moe.py b/transformer_lens/model_bridge/generalized_components/moe.py index ea96c4916..18bbdac7a 100644 --- a/transformer_lens/model_bridge/generalized_components/moe.py +++ b/transformer_lens/model_bridge/generalized_components/moe.py @@ -67,9 +67,7 @@ def get_random_inputs( d_model = self.config.d_model if self.config and hasattr(self.config, "d_model") else 768 # Use positional args to avoid parameter name mismatches across MoE implementations # (e.g., Mixtral uses "hidden_states", GraniteMoe uses "layer_input") - return { - "args": (torch.randn(batch_size, seq_len, d_model, device=device, dtype=dtype),) - } + return {"args": (torch.randn(batch_size, seq_len, d_model, device=device, dtype=dtype),)} def forward(self, *args: Any, **kwargs: Any) -> Any: """Forward pass through the MoE bridge. diff --git a/transformer_lens/model_bridge/supported_architectures/granite.py b/transformer_lens/model_bridge/supported_architectures/granite.py index 0b6afa72f..85120a516 100644 --- a/transformer_lens/model_bridge/supported_architectures/granite.py +++ b/transformer_lens/model_bridge/supported_architectures/granite.py @@ -76,7 +76,9 @@ def _get_n_kv_heads(self) -> int: return self.cfg.n_key_value_heads return self.cfg.n_heads - def _build_attn_weight_conversions(self, n_kv_heads: int) -> Dict[str, ParamProcessingConversion]: + def _build_attn_weight_conversions( + self, n_kv_heads: int + ) -> Dict[str, ParamProcessingConversion | str]: """Build weight processing conversions for attention projections.""" return { "blocks.{i}.attn.q.weight": ParamProcessingConversion( diff --git a/transformer_lens/model_bridge/supported_architectures/granite_moe_hybrid.py b/transformer_lens/model_bridge/supported_architectures/granite_moe_hybrid.py index a594e8e64..2c776365b 100644 --- a/transformer_lens/model_bridge/supported_architectures/granite_moe_hybrid.py +++ b/transformer_lens/model_bridge/supported_architectures/granite_moe_hybrid.py @@ -81,7 +81,7 @@ def _build_component_mapping(self) -> dict: num_experts = getattr(self.cfg, "num_experts", None) or getattr( self.cfg, "num_local_experts", 0 ) - if num_experts > 0: + if num_experts and num_experts > 0: block_submodules["moe"] = MoEBridge( name="block_sparse_moe", config=self.cfg, @@ -98,9 +98,7 @@ def _build_component_mapping(self) -> dict: } if self.cfg.positional_embedding_type == "rotary": - mapping["rotary_emb"] = RotaryEmbeddingBridge( - name="model.rotary_emb", config=self.cfg - ) + mapping["rotary_emb"] = RotaryEmbeddingBridge(name="model.rotary_emb", config=self.cfg) return mapping