From afe83eb4428afeeeabc90ccc53403a3cc1890081 Mon Sep 17 00:00:00 2001 From: connermanuel Date: Mon, 30 Mar 2026 16:42:07 -0700 Subject: [PATCH 1/2] Make target tokens required --- openapi.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openapi.yaml b/openapi.yaml index 1fc5f4a..121d12e 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -7900,12 +7900,14 @@ components: properties: target_tokens: $ref: '#/components/schemas/RL.LossTargetTokens' - description: Target tokens for loss computation (optional, defaults to shifted input_ids) + description: Target tokens for loss computation loss_mask: $ref: '#/components/schemas/RL.LossMask' description: Per-token loss mask (1=compute loss, 0=ignore) grpo_inputs: $ref: '#/components/schemas/RL.GRPOLossInputs' + required: + - target_tokens RL.TrainingSample: type: object required: From ce4e67f551ed7388921261d67af0e2e8440c5090 Mon Sep 17 00:00:00 2001 From: connermanuel Date: Thu, 2 Apr 2026 08:00:07 -0700 Subject: [PATCH 2/2] add grpo loss agg type sequence mean --- openapi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/openapi.yaml b/openapi.yaml index 121d12e..6f53452 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -7789,6 +7789,7 @@ components: - GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED - GRPO_LOSS_AGGREGATION_TYPE_FIXED_HORIZON - GRPO_LOSS_AGGREGATION_TYPE_TOKEN_MEAN + - GRPO_LOSS_AGGREGATION_TYPE_SEQUENCE_MEAN default: GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED RL.LossConfig: type: object