From c3ffa597dd38f8d55b02b5aaf8e78fc2487f3c57 Mon Sep 17 00:00:00 2001 From: Sunny-bot1 <592045536@qq.com> Date: Fri, 22 May 2026 12:07:39 +0800 Subject: [PATCH] supoort glm yarn rope --- .../model_executor/layers/rotary_embedding.py | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/fastdeploy/model_executor/layers/rotary_embedding.py b/fastdeploy/model_executor/layers/rotary_embedding.py index dd77cf2bc0d..6c7286e2606 100644 --- a/fastdeploy/model_executor/layers/rotary_embedding.py +++ b/fastdeploy/model_executor/layers/rotary_embedding.py @@ -263,7 +263,7 @@ def forward( return query, key -class GptOssScalingRotaryEmbedding: +class YarnScalingRotaryEmbedding: def __init__( self, rotary_dim, @@ -340,10 +340,29 @@ def get_rope_impl( rotary_emb_layer = QwenRotaryEmbedding(rotary_dim, base, partial_rotary_factor) rotary_emb = rotary_emb_layer(position_ids) elif architecture.startswith("Glm"): - rotary_emb_layer = GlmRotaryEmbedding(rotary_dim, base, partial_rotary_factor) + rope_scaling = getattr(model_config, "rope_scaling", None) + if ( + rope_scaling is not None + and isinstance(rope_scaling, dict) + and rope_scaling.get("rope_type", rope_scaling.get("type", "")) == "yarn" + and "factor" in rope_scaling + ): + yarn_rotary_dim = int(rotary_dim * partial_rotary_factor) if partial_rotary_factor < 1.0 else rotary_dim + rotary_emb_layer = YarnScalingRotaryEmbedding( + rotary_dim=yarn_rotary_dim, + base=base, + original_max_position_embeddings=rope_scaling["original_max_position_embeddings"], + scale=rope_scaling["factor"], + mscale=rope_scaling.get("mscale", 1.0), + beta_fast=rope_scaling.get("beta_fast", 32), + beta_slow=rope_scaling.get("beta_slow", 1), + use_neox_rotary_style=False, + ) + else: + rotary_emb_layer = GlmRotaryEmbedding(rotary_dim, base, partial_rotary_factor) rotary_emb = rotary_emb_layer(position_ids) elif architecture.startswith("GptOss"): - rotary_emb_layer = GptOssScalingRotaryEmbedding( + rotary_emb_layer = YarnScalingRotaryEmbedding( rotary_dim=model_config.head_dim, base=model_config.rope_theta, original_max_position_embeddings=model_config.rope_scaling["original_max_position_embeddings"],