diff --git a/openapi.yaml b/openapi.yaml index 1fc5f4a..6f53452 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -7789,6 +7789,7 @@ components: - GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED - GRPO_LOSS_AGGREGATION_TYPE_FIXED_HORIZON - GRPO_LOSS_AGGREGATION_TYPE_TOKEN_MEAN + - GRPO_LOSS_AGGREGATION_TYPE_SEQUENCE_MEAN default: GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED RL.LossConfig: type: object @@ -7900,12 +7901,14 @@ components: properties: target_tokens: $ref: '#/components/schemas/RL.LossTargetTokens' - description: Target tokens for loss computation (optional, defaults to shifted input_ids) + description: Target tokens for loss computation loss_mask: $ref: '#/components/schemas/RL.LossMask' description: Per-token loss mask (1=compute loss, 0=ignore) grpo_inputs: $ref: '#/components/schemas/RL.GRPOLossInputs' + required: + - target_tokens RL.TrainingSample: type: object required: