Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions fastdeploy/model_executor/layers/rotary_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def forward(
return query, key


class GptOssScalingRotaryEmbedding:
class YarnScalingRotaryEmbedding:
def __init__(
self,
rotary_dim,
Expand Down Expand Up @@ -345,10 +345,29 @@ def get_rope_impl(
rotary_emb_layer = QwenRotaryEmbedding(rotary_dim, base, partial_rotary_factor)
rotary_emb = rotary_emb_layer(position_ids)
elif architecture.startswith("Glm"):
rotary_emb_layer = GlmRotaryEmbedding(rotary_dim, base, partial_rotary_factor)
rope_scaling = getattr(model_config, "rope_scaling", None)
if (
rope_scaling is not None
and isinstance(rope_scaling, dict)
and rope_scaling.get("rope_type", rope_scaling.get("type", "")) == "yarn"
and "factor" in rope_scaling
):
yarn_rotary_dim = int(rotary_dim * partial_rotary_factor) if partial_rotary_factor < 1.0 else rotary_dim
rotary_emb_layer = YarnScalingRotaryEmbedding(
rotary_dim=yarn_rotary_dim,
base=base,
original_max_position_embeddings=rope_scaling["original_max_position_embeddings"],
scale=rope_scaling["factor"],
mscale=rope_scaling.get("mscale", 1.0),
beta_fast=rope_scaling.get("beta_fast", 32),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Bug rope_scaling["original_max_position_embeddings"] 直接字典访问,存在 KeyError 风险。

当前条件只验证了 "factor" in rope_scaling,但未检查 "original_max_position_embeddings" 是否存在。若用户的 rope_scaling 配置中缺少该字段,运行时会直接抛出 KeyError 导致服务崩溃。

建议修复(使用 .get() + 显式报错):

original_max_pos = rope_scaling.get("original_max_position_embeddings")
if original_max_pos is None:
    raise ValueError(
        "rope_scaling must contain 'original_max_position_embeddings' when rope_type is 'yarn'"
    )
rotary_emb_layer = YarnScalingRotaryEmbedding(
    rotary_dim=yarn_rotary_dim,
    base=base,
    original_max_position_embeddings=original_max_pos,
    ...
)

beta_slow=rope_scaling.get("beta_slow", 1),
use_neox_rotary_style=False,
)
else:
rotary_emb_layer = GlmRotaryEmbedding(rotary_dim, base, partial_rotary_factor)
rotary_emb = rotary_emb_layer(position_ids)
elif architecture.startswith("GptOss"):
rotary_emb_layer = GptOssScalingRotaryEmbedding(
rotary_emb_layer = YarnScalingRotaryEmbedding(
rotary_dim=model_config.head_dim,
base=model_config.rope_theta,
original_max_position_embeddings=model_config.rope_scaling["original_max_position_embeddings"],
Expand Down
Loading