Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions fastdeploy/model_executor/layers/rotary_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def forward(
return query, key


class GptOssScalingRotaryEmbedding:
class YarnScalingRotaryEmbedding:
def __init__(
self,
rotary_dim,
Expand Down Expand Up @@ -340,10 +340,29 @@ def get_rope_impl(
rotary_emb_layer = QwenRotaryEmbedding(rotary_dim, base, partial_rotary_factor)
rotary_emb = rotary_emb_layer(position_ids)
elif architecture.startswith("Glm"):
rotary_emb_layer = GlmRotaryEmbedding(rotary_dim, base, partial_rotary_factor)
rope_scaling = getattr(model_config, "rope_scaling", None)
if (
rope_scaling is not None
and isinstance(rope_scaling, dict)
and rope_scaling.get("rope_type", rope_scaling.get("type", "")) == "yarn"
and "factor" in rope_scaling
):
yarn_rotary_dim = int(rotary_dim * partial_rotary_factor) if partial_rotary_factor < 1.0 else rotary_dim
rotary_emb_layer = YarnScalingRotaryEmbedding(
rotary_dim=yarn_rotary_dim,
base=base,
original_max_position_embeddings=rope_scaling["original_max_position_embeddings"],
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❓ 疑问 original_max_position_embeddings 键无存在性检查

代码已通过 "factor" in rope_scaling 进行了部分校验,但 rope_scaling["original_max_position_embeddings"] 是直接下标访问,若 GLM 模型配置中缺少该键(例如旧版 config.json),将在运行时抛出 KeyError

建议修复方式:

original_max_position_embeddings=rope_scaling.get("original_max_position_embeddings", 8192),

或在前置条件判断中一并检查该键是否存在:

and "original_max_position_embeddings" in rope_scaling

scale=rope_scaling["factor"],
mscale=rope_scaling.get("mscale", 1.0),
beta_fast=rope_scaling.get("beta_fast", 32),
beta_slow=rope_scaling.get("beta_slow", 1),
use_neox_rotary_style=False,
)
else:
rotary_emb_layer = GlmRotaryEmbedding(rotary_dim, base, partial_rotary_factor)
rotary_emb = rotary_emb_layer(position_ids)
elif architecture.startswith("GptOss"):
rotary_emb_layer = GptOssScalingRotaryEmbedding(
rotary_emb_layer = YarnScalingRotaryEmbedding(
rotary_dim=model_config.head_dim,
base=model_config.rope_theta,
original_max_position_embeddings=model_config.rope_scaling["original_max_position_embeddings"],
Expand Down
Loading