From acab2bea694de63c6d90cb27e480a91e517dadd8 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Tue, 23 Jun 2026 11:29:22 -0400 Subject: [PATCH 01/24] feat(schedules): add agent run schedules (v1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the prior schedules implementation with per-agent "agent run schedules": recurring schedules backed by a Temporal Schedule that, on each fire, creates a task and delivers a configured initial input via the same path as a manual agent run — message/send for sync agents, event/send for agentic agents — attributed to the schedule's stored creator principal. - REST CRUD under /agents/{agent_id}/schedules: create, get, list, pause, resume, delete - Postgres row is the source of truth for the schedule definition; the Temporal Schedule is only the recurring clock and carries just the row id - ScheduledAgentRunWorkflow (thin, deterministic) + the launch_scheduled_agent_run activity that does all side effects - deterministic per-fire task name makes task/create idempotent on activity retry; a delivered marker guards against re-delivery - fire-time authz re-check under the creator principal so a revoked creator stops firing cleanly - new agent_run_schedules table migration Co-Authored-By: Claude Opus 4.8 (1M context) --- ...00_add_agent_run_schedules_3b1c9d2e4f6a.py | 82 ++ agentex/openapi.yaml | 629 +++++++------- agentex/src/adapters/orm.py | 37 + .../src/adapters/temporal/adapter_temporal.py | 25 + agentex/src/api/app.py | 4 +- agentex/src/api/routes/agent_run_schedules.py | 205 +++++ agentex/src/api/routes/schedules.py | 189 ---- .../src/api/schemas/agent_run_schedules.py | 165 ++++ agentex/src/api/schemas/schedules.py | 255 ------ .../domain/entities/agent_run_schedules.py | 97 +++ .../agent_run_schedule_repository.py | 78 ++ .../services/agent_run_schedule_service.py | 380 +++++++++ .../src/domain/services/schedule_service.py | 471 ---------- .../use_cases/agent_run_schedules_use_case.py | 78 ++ .../domain/use_cases/schedules_use_case.py | 153 ---- .../scheduled_agent_run_activities.py | 280 ++++++ agentex/src/temporal/run_worker.py | 16 + .../temporal/scheduled_agent_run_factory.py | 141 +++ .../workflows/scheduled_agent_run_workflow.py | 39 + .../test_schedule_service_dual_write.py | 251 ------ .../tests/unit/api/test_schedules_authz.py | 399 --------- .../test_agent_run_schedule_service.py | 190 +++++ .../unit/services/test_schedule_service.py | 806 ------------------ .../test_scheduled_agent_run_activity.py | 243 ++++++ .../test_agent_run_schedules_use_case.py | 98 +++ .../unit/use_cases/test_schedules_use_case.py | 624 -------------- 26 files changed, 2465 insertions(+), 3470 deletions(-) create mode 100644 agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py create mode 100644 agentex/src/api/routes/agent_run_schedules.py delete mode 100644 agentex/src/api/routes/schedules.py create mode 100644 agentex/src/api/schemas/agent_run_schedules.py delete mode 100644 agentex/src/api/schemas/schedules.py create mode 100644 agentex/src/domain/entities/agent_run_schedules.py create mode 100644 agentex/src/domain/repositories/agent_run_schedule_repository.py create mode 100644 agentex/src/domain/services/agent_run_schedule_service.py delete mode 100644 agentex/src/domain/services/schedule_service.py create mode 100644 agentex/src/domain/use_cases/agent_run_schedules_use_case.py delete mode 100644 agentex/src/domain/use_cases/schedules_use_case.py create mode 100644 agentex/src/temporal/activities/scheduled_agent_run_activities.py create mode 100644 agentex/src/temporal/scheduled_agent_run_factory.py create mode 100644 agentex/src/temporal/workflows/scheduled_agent_run_workflow.py delete mode 100644 agentex/tests/integration/services/test_schedule_service_dual_write.py delete mode 100644 agentex/tests/unit/api/test_schedules_authz.py create mode 100644 agentex/tests/unit/services/test_agent_run_schedule_service.py delete mode 100644 agentex/tests/unit/services/test_schedule_service.py create mode 100644 agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py create mode 100644 agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py delete mode 100644 agentex/tests/unit/use_cases/test_schedules_use_case.py diff --git a/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py b/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py new file mode 100644 index 00000000..e63fcaf8 --- /dev/null +++ b/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py @@ -0,0 +1,82 @@ +"""add agent_run_schedules + +Revision ID: 3b1c9d2e4f6a +Revises: c7a1b2d3e4f5 +Create Date: 2026-06-22 12:00:00.000000 + +Creates the agent_run_schedules table backing the scheduled-agent-runs feature +(AGX1-368). Schema-only and idempotent: the table and its indexes are created +with IF NOT EXISTS-style guards (Alembic create_table on a fresh table), and the +indexes target the just-created table so they are non-blocking by construction. +""" +from collections.abc import Sequence + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = '3b1c9d2e4f6a' +down_revision: str | None = 'c7a1b2d3e4f5' +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + op.create_table( + 'agent_run_schedules', + sa.Column('id', sa.String(), nullable=False), + sa.Column('agent_id', sa.String(length=64), nullable=False), + sa.Column('name', sa.String(length=256), nullable=False), + sa.Column('description', sa.Text(), nullable=True), + sa.Column('cron_expression', sa.String(), nullable=True), + sa.Column('interval_seconds', sa.Integer(), nullable=True), + sa.Column( + 'timezone', sa.String(), server_default='UTC', nullable=False + ), + sa.Column('start_at', sa.DateTime(timezone=True), nullable=True), + sa.Column('end_at', sa.DateTime(timezone=True), nullable=True), + sa.Column( + 'paused', sa.Boolean(), server_default='false', nullable=False + ), + sa.Column('creator_principal', sa.JSON(), nullable=False), + sa.Column('task_params', sa.JSON(), nullable=True), + sa.Column('task_metadata', sa.JSON(), nullable=True), + sa.Column('initial_input', sa.JSON(), nullable=False), + sa.Column('initial_input_method', sa.String(), nullable=True), + sa.Column( + 'created_at', + sa.DateTime(timezone=True), + server_default=sa.text('now()'), + nullable=True, + ), + sa.Column( + 'updated_at', + sa.DateTime(timezone=True), + server_default=sa.text('now()'), + nullable=True, + ), + sa.ForeignKeyConstraint(['agent_id'], ['agents.id']), + sa.PrimaryKeyConstraint('id'), + ) + # Indexes target the table created in this same migration, so they hold no + # write-blocking lock against live traffic (the table has no rows yet). + op.create_index( + 'uq_agent_run_schedules_agent_name', + 'agent_run_schedules', + ['agent_id', 'name'], + unique=True, + ) + op.create_index( + 'idx_agent_run_schedules_agent', + 'agent_run_schedules', + ['agent_id'], + unique=False, + ) + + +def downgrade() -> None: + op.drop_index('idx_agent_run_schedules_agent', table_name='agent_run_schedules') + op.drop_index( + 'uq_agent_run_schedules_agent_name', table_name='agent_run_schedules' + ) + op.drop_table('agent_run_schedules') diff --git a/agentex/openapi.yaml b/agentex/openapi.yaml index 2bfeff3c..069ab6e3 100644 --- a/agentex/openapi.yaml +++ b/agentex/openapi.yaml @@ -3214,9 +3214,10 @@ paths: post: tags: - Schedules - summary: Create Schedule - description: Create a new schedule for recurring workflow execution for an agent. - operationId: create_schedule_agents__agent_id__schedules_post + summary: Create Run Schedule + description: Create a recurring schedule that starts a fresh agent run on each + fire. + operationId: create_run_schedule_agents__agent_id__schedules_post parameters: - name: agent_id in: path @@ -3229,14 +3230,14 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/CreateScheduleRequest' + $ref: '#/components/schemas/CreateAgentRunScheduleRequest' responses: '200': description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/ScheduleResponse' + $ref: '#/components/schemas/AgentRunScheduleResponse' '422': description: Validation Error content: @@ -3246,9 +3247,9 @@ paths: get: tags: - Schedules - summary: List Agent Schedules - description: List all schedules for an agent. - operationId: list_schedules_agents__agent_id__schedules_get + summary: List Run Schedules + description: List run schedules for an agent. + operationId: list_run_schedules_agents__agent_id__schedules_get parameters: - name: agent_id in: path @@ -3256,7 +3257,7 @@ paths: schema: type: string title: Agent Id - - name: page_size + - name: limit in: query required: false schema: @@ -3264,27 +3265,27 @@ paths: maximum: 1000 minimum: 1 default: 100 - title: Page Size + title: Limit responses: '200': description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/ScheduleListResponse' + $ref: '#/components/schemas/AgentRunScheduleListResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' - /agents/{agent_id}/schedules/{schedule_name}: + /agents/{agent_id}/schedules/{name}: get: tags: - Schedules - summary: Get Schedule - description: Get details of a schedule by its name. - operationId: get_schedule_agents__agent_id__schedules__schedule_name__get + summary: Get Run Schedule + description: Get a run schedule by its name. + operationId: get_run_schedule_agents__agent_id__schedules__name__get parameters: - name: agent_id in: path @@ -3292,19 +3293,19 @@ paths: schema: type: string title: Agent Id - - name: schedule_name + - name: name in: path required: true schema: type: string - title: Schedule Name + title: Name responses: '200': description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/ScheduleResponse' + $ref: '#/components/schemas/AgentRunScheduleResponse' '422': description: Validation Error content: @@ -3314,9 +3315,9 @@ paths: delete: tags: - Schedules - summary: Delete Schedule - description: Delete a schedule permanently. - operationId: delete_schedule_agents__agent_id__schedules__schedule_name__delete + summary: Delete Run Schedule + description: Delete a run schedule permanently. + operationId: delete_run_schedule_agents__agent_id__schedules__name__delete parameters: - name: agent_id in: path @@ -3324,12 +3325,12 @@ paths: schema: type: string title: Agent Id - - name: schedule_name + - name: name in: path required: true schema: type: string - title: Schedule Name + title: Name responses: '200': description: Successful Response @@ -3343,13 +3344,13 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' - /agents/{agent_id}/schedules/{schedule_name}/pause: + /agents/{agent_id}/schedules/{name}/pause: post: tags: - Schedules - summary: Pause Schedule - description: Pause a schedule to stop it from executing. - operationId: pause_schedule_agents__agent_id__schedules__schedule_name__pause_post + summary: Pause Run Schedule + description: Pause a run schedule so it stops firing. + operationId: pause_run_schedule_agents__agent_id__schedules__name__pause_post parameters: - name: agent_id in: path @@ -3357,18 +3358,18 @@ paths: schema: type: string title: Agent Id - - name: schedule_name + - name: name in: path required: true schema: type: string - title: Schedule Name + title: Name requestBody: content: application/json: schema: anyOf: - - $ref: '#/components/schemas/PauseScheduleRequest' + - $ref: '#/components/schemas/PauseRunScheduleRequest' - type: 'null' title: Request responses: @@ -3377,20 +3378,20 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/ScheduleResponse' + $ref: '#/components/schemas/AgentRunScheduleResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' - /agents/{agent_id}/schedules/{schedule_name}/unpause: + /agents/{agent_id}/schedules/{name}/resume: post: tags: - Schedules - summary: Unpause Schedule - description: Unpause/resume a schedule to allow it to execute again. - operationId: unpause_schedule_agents__agent_id__schedules__schedule_name__unpause_post + summary: Resume Run Schedule + description: Resume a paused run schedule so it fires again. + operationId: resume_run_schedule_agents__agent_id__schedules__name__resume_post parameters: - name: agent_id in: path @@ -3398,18 +3399,18 @@ paths: schema: type: string title: Agent Id - - name: schedule_name + - name: name in: path required: true schema: type: string - title: Schedule Name + title: Name requestBody: content: application/json: schema: anyOf: - - $ref: '#/components/schemas/UnpauseScheduleRequest' + - $ref: '#/components/schemas/ResumeRunScheduleRequest' - type: 'null' title: Request responses: @@ -3418,41 +3419,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/ScheduleResponse' - '422': - description: Validation Error - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - /agents/{agent_id}/schedules/{schedule_name}/trigger: - post: - tags: - - Schedules - summary: Trigger Schedule - description: Trigger a schedule to run immediately, regardless of its regular - schedule. - operationId: trigger_schedule_agents__agent_id__schedules__schedule_name__trigger_post - parameters: - - name: agent_id - in: path - required: true - schema: - type: string - title: Agent Id - - name: schedule_name - in: path - required: true - schema: - type: string - title: Schedule Name - responses: - '200': - description: Successful Response - content: - application/json: - schema: - $ref: '#/components/schemas/ScheduleResponse' + $ref: '#/components/schemas/AgentRunScheduleResponse' '422': description: Validation Error content: @@ -3935,6 +3902,153 @@ components: - $ref: '#/components/schemas/Event' - type: 'null' title: AgentRPCResult + AgentRunScheduleListResponse: + properties: + run_schedules: + items: + $ref: '#/components/schemas/AgentRunScheduleResponse' + type: array + title: Run Schedules + description: The list of run schedules. + total: + type: integer + title: Total + description: The number of run schedules returned. + type: object + required: + - run_schedules + - total + title: AgentRunScheduleListResponse + description: Response model for listing run schedules. + AgentRunScheduleResponse: + properties: + id: + type: string + title: Id + description: The unique identifier of the run schedule. + agent_id: + type: string + title: Agent Id + description: The agent this schedule belongs to. + name: + type: string + title: Name + description: Schedule name, unique per agent. + description: + anyOf: + - type: string + - type: 'null' + title: Description + description: Optional description. + cron_expression: + anyOf: + - type: string + - type: 'null' + title: Cron Expression + description: Cron cadence, if cron-based. + interval_seconds: + anyOf: + - type: integer + - type: 'null' + title: Interval Seconds + description: Interval cadence in seconds, if interval-based. + timezone: + type: string + title: Timezone + description: Timezone the cron expression is evaluated in. + default: UTC + start_at: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Start At + description: Schedule activation time. + end_at: + anyOf: + - type: string + format: date-time + - type: 'null' + title: End At + description: Schedule deactivation time. + paused: + type: boolean + title: Paused + description: Whether the schedule is paused. + default: false + task_params: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Task Params + description: Task params at fire time. + task_metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Task Metadata + description: Task metadata at fire time. + initial_input: + $ref: '#/components/schemas/ScheduleInitialInput' + description: The initial input. + initial_input_method: + anyOf: + - type: string + - type: 'null' + title: Initial Input Method + description: Effective delivery method (inferred from the agent's ACP type). + creator_principal: + anyOf: + - $ref: '#/components/schemas/ScheduleCreatorPrincipal' + - type: 'null' + description: Credential-free creator identity. + created_at: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Created At + description: When the schedule was created. + updated_at: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Updated At + description: When the schedule was updated. + state: + $ref: '#/components/schemas/RunScheduleState' + description: Live schedule state from Temporal. + default: ACTIVE + next_action_times: + items: + type: string + format: date-time + type: array + title: Next Action Times + description: Upcoming scheduled fire times. + last_action_time: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Last Action Time + description: When the schedule last fired. + num_actions_taken: + type: integer + title: Num Actions Taken + description: Number of times the schedule has fired. + default: 0 + type: object + required: + - id + - agent_id + - name + - initial_input + title: AgentRunScheduleResponse + description: Response model describing a scheduled agent run. AgentStatus: type: string enum: @@ -4286,37 +4400,7 @@ components: - api_key_type - api_key title: CreateAPIKeyResponse - CreateDeploymentRequest: - properties: - docker_image: - type: string - title: Docker Image - description: Full Docker image URI. - registration_metadata: - anyOf: - - additionalProperties: true - type: object - - type: 'null' - title: Registration Metadata - description: Git/build metadata (commit_hash, branch_name, author_name, - author_email, build_timestamp). - sgp_deploy_id: - anyOf: - - type: string - - type: 'null' - title: Sgp Deploy Id - description: SGP deployment ID. - helm_release_name: - anyOf: - - type: string - - type: 'null' - title: Helm Release Name - description: Helm release name. - type: object - required: - - docker_image - title: CreateDeploymentRequest - CreateScheduleRequest: + CreateAgentRunScheduleRequest: properties: name: type: string @@ -4324,70 +4408,104 @@ components: minLength: 1 pattern: ^[a-z0-9][a-z0-9-]*[a-z0-9]$|^[a-z0-9]$ title: Schedule Name - description: Human-readable name for the schedule (e.g., 'weekly-profiling'). - Will be combined with agent_id to form the full schedule_id. - workflow_name: - type: string - title: Workflow Name - description: Name of the Temporal workflow to execute (e.g., 'sae-orchestrator') - task_queue: - type: string - title: Task Queue - description: Temporal task queue where the agent's worker is listening - workflow_params: + description: Human-readable name, unique per agent (e.g. 'daily-granola-summary'). + description: anyOf: - - additionalProperties: true - type: object + - type: string - type: 'null' - title: Workflow Parameters - description: Parameters to pass to the workflow + title: Description + description: Optional description of what this schedule does. cron_expression: anyOf: - type: string - type: 'null' title: Cron Expression - description: Cron expression for scheduling (e.g., '0 0 * * 0' for weekly - on Sunday) + description: Cron expression for the cadence (e.g. '0 17 * * MON-FRI'). + Mutually exclusive with interval_seconds. interval_seconds: anyOf: - type: integer minimum: 1.0 - type: 'null' title: Interval Seconds - description: Alternative to cron - run every N seconds - execution_timeout_seconds: - anyOf: - - type: integer - minimum: 1.0 - - type: 'null' - title: Execution Timeout - description: Maximum time in seconds for each workflow execution + description: Interval cadence in seconds. Mutually exclusive with cron_expression. + timezone: + type: string + title: Timezone + description: IANA timezone the cron expression is evaluated in (e.g. 'America/New_York'). + default: UTC start_at: anyOf: - type: string format: date-time - type: 'null' title: Start At - description: When the schedule should start being active + description: When the schedule should start being active. end_at: anyOf: - type: string format: date-time - type: 'null' title: End At - description: When the schedule should stop being active + description: When the schedule should stop being active. paused: type: boolean title: Paused - description: Whether to create the schedule in a paused state + description: Whether to create the schedule in a paused state. default: false + task_params: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Task Params + description: Resolved config forwarded as task `params` at fire time. + task_metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Task Metadata + description: Metadata copied onto each created task at fire time. + initial_input: + $ref: '#/components/schemas/ScheduleInitialInput' + description: The first input delivered to each created task. type: object required: - name - - workflow_name - - task_queue - title: CreateScheduleRequest - description: Request model for creating a new schedule for an agent + - initial_input + title: CreateAgentRunScheduleRequest + description: Request body for creating a scheduled agent run. + CreateDeploymentRequest: + properties: + docker_image: + type: string + title: Docker Image + description: Full Docker image URI. + registration_metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Registration Metadata + description: Git/build metadata (commit_hash, branch_name, author_name, + author_email, build_timestamp). + sgp_deploy_id: + anyOf: + - type: string + - type: 'null' + title: Sgp Deploy Id + description: SGP deployment ID. + helm_release_name: + anyOf: + - type: string + - type: 'null' + title: Helm Release Name + description: Helm release name. + type: object + required: + - docker_image + title: CreateDeploymentRequest CreateSpanRequest: properties: id: @@ -5013,17 +5131,16 @@ components: - data title: PaginatedMessagesResponse description: Response with cursor pagination metadata. - PauseScheduleRequest: + PauseRunScheduleRequest: properties: note: anyOf: - type: string - type: 'null' title: Note - description: Optional note explaining why the schedule was paused + description: Optional note explaining the pause. type: object - title: PauseScheduleRequest - description: Request model for pausing a schedule + title: PauseRunScheduleRequest PutCheckpointRequest: properties: thread_id: @@ -5423,195 +5540,78 @@ components: snapshot → clean → rehydrate round-trips cleanly without serialization changes.' - ScheduleActionInfo: + ResumeRunScheduleRequest: properties: - workflow_name: - type: string - title: Workflow Name - description: Name of the workflow being executed - workflow_id_prefix: - type: string - title: Workflow ID Prefix - description: Prefix for workflow execution IDs - task_queue: - type: string - title: Task Queue - description: Task queue for the workflow - workflow_params: + note: anyOf: - - items: {} - type: array + - type: string - type: 'null' - title: Workflow Parameters - description: Parameters passed to the workflow + title: Note + description: Optional note explaining the resume. type: object - required: - - workflow_name - - workflow_id_prefix - - task_queue - title: ScheduleActionInfo - description: Information about the scheduled action - ScheduleListItem: + title: ResumeRunScheduleRequest + RunScheduleState: + type: string + enum: + - ACTIVE + - PAUSED + title: RunScheduleState + description: Live state of a run schedule, derived from Temporal. + ScheduleCreatorPrincipal: properties: - schedule_id: - type: string - title: Schedule ID - description: Unique identifier for the schedule - name: - type: string - title: Schedule Name - description: Human-readable name for the schedule - agent_id: - type: string - title: Agent ID - description: ID of the agent this schedule belongs to - state: - $ref: '#/components/schemas/ScheduleState' - title: State - description: Current state of the schedule - workflow_name: + principal_type: anyOf: - type: string - type: 'null' - title: Workflow Name - description: Name of the scheduled workflow - next_action_time: + title: Principal Type + description: e.g. 'user' or 'service_account'. + user_id: anyOf: - type: string - format: date-time - type: 'null' - title: Next Action Time - description: Next scheduled execution time - type: object - required: - - schedule_id - - name - - agent_id - - state - title: ScheduleListItem - description: Abbreviated schedule info for list responses - ScheduleListResponse: - properties: - schedules: - items: - $ref: '#/components/schemas/ScheduleListItem' - type: array - title: Schedules - description: List of schedules - total: - type: integer - title: Total - description: Total number of schedules - type: object - required: - - schedules - - total - title: ScheduleListResponse - description: Response model for listing schedules - ScheduleResponse: - properties: - schedule_id: - type: string - title: Schedule ID - description: Unique identifier for the schedule - name: - type: string - title: Schedule Name - description: Human-readable name for the schedule - agent_id: - type: string - title: Agent ID - description: ID of the agent this schedule belongs to - state: - $ref: '#/components/schemas/ScheduleState' - title: State - description: Current state of the schedule - action: - $ref: '#/components/schemas/ScheduleActionInfo' - title: Action - spec: - $ref: '#/components/schemas/ScheduleSpecInfo' - title: Spec - description: Schedule specification - num_actions_taken: - type: integer - title: Number of Actions Taken - description: Number of times the schedule has executed - default: 0 - num_actions_missed: - type: integer - title: Number of Actions Missed - description: Number of scheduled executions that were missed - default: 0 - next_action_times: - items: - type: string - format: date-time - type: array - title: Next Action Times - description: Upcoming scheduled execution times - last_action_time: + title: User Id + description: Creator user id, if a user principal. + service_account_id: anyOf: - type: string - format: date-time - type: 'null' - title: Last Action Time - description: When the schedule last executed - created_at: + title: Service Account Id + description: Creator service-account id, if a service principal. + account_id: anyOf: - type: string - format: date-time - type: 'null' - title: Created At - description: When the schedule was created + title: Account Id + description: Account/workspace id of the creator. type: object - required: - - schedule_id - - name - - agent_id - - state - - action - - spec - title: ScheduleResponse - description: Response model for schedule operations - ScheduleSpecInfo: + title: ScheduleCreatorPrincipal + description: 'Credential-free creator identity stored with the schedule. + + + Never carries cookies, JWTs, API keys, OAuth tokens, or request headers — + it + + is creator *context* used only for AuthZ and ownership at fire time (D5/D6).' + ScheduleInitialInput: properties: - cron_expressions: - items: - type: string - type: array - title: Cron Expressions - description: Cron expressions for the schedule - intervals_seconds: - items: - type: integer - type: array - title: Interval Seconds - description: Interval specifications in seconds - start_at: - anyOf: - - type: string - format: date-time - - type: 'null' - title: Start At - description: When the schedule starts being active - end_at: - anyOf: - - type: string - format: date-time - - type: 'null' - title: End At - description: When the schedule stops being active + type: + type: string + title: Type + description: Input content type. Only 'text' in v1. + default: text + author: + $ref: '#/components/schemas/MessageAuthor' + description: The author attributed to the initial input. + default: user + content: + type: string + title: Content + description: The initial prompt delivered to the task. type: object - title: ScheduleSpecInfo - description: Information about the schedule specification - ScheduleState: - type: string - enum: - - ACTIVE - - PAUSED - title: ScheduleState - description: Schedule state enum + required: + - content + title: ScheduleInitialInput + description: The first input delivered to each freshly created scheduled task. SendEventRequest: properties: task_id: @@ -6548,17 +6548,6 @@ components: - name title: ToolResponseDelta description: Delta for tool response updates - UnpauseScheduleRequest: - properties: - note: - anyOf: - - type: string - - type: 'null' - title: Note - description: Optional note explaining why the schedule was unpaused - type: object - title: UnpauseScheduleRequest - description: Request model for unpausing a schedule UpdateAgentTaskTrackerRequest: properties: last_processed_event_id: diff --git a/agentex/src/adapters/orm.py b/agentex/src/adapters/orm.py index 42a66c1a..37bf2268 100644 --- a/agentex/src/adapters/orm.py +++ b/agentex/src/adapters/orm.py @@ -197,6 +197,43 @@ class AgentAPIKeyORM(BaseORM): ) +class AgentRunScheduleORM(BaseORM): + __tablename__ = "agent_run_schedules" + id = Column(String, primary_key=True, default=orm_id) + agent_id = Column(String(64), ForeignKey("agents.id"), nullable=False) + name = Column(String(256), nullable=False) + description = Column(Text, nullable=True) + cron_expression = Column(String, nullable=True) + interval_seconds = Column(Integer, nullable=True) + timezone = Column(String, nullable=False, server_default="UTC") + start_at = Column(DateTime(timezone=True), nullable=True) + end_at = Column(DateTime(timezone=True), nullable=True) + paused = Column(Boolean, nullable=False, server_default="false") + # Credential-free creator context (see ScheduleCreatorPrincipal): no cookies, + # JWTs, API keys, OAuth tokens, or request headers are ever stored here. + creator_principal = Column(JSON, nullable=False) + task_params = Column(JSON, nullable=True) + task_metadata = Column(JSON, nullable=True) + initial_input = Column(JSON, nullable=False) + initial_input_method = Column(String, nullable=True) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column( + DateTime(timezone=True), server_default=func.now(), onupdate=func.now() + ) + + __table_args__ = ( + # Schedule names are unique per agent (the get/pause/resume/delete + # endpoints address a schedule by agent_id + name). + Index( + "uq_agent_run_schedules_agent_name", + "agent_id", + "name", + unique=True, + ), + Index("idx_agent_run_schedules_agent", "agent_id"), + ) + + class DeploymentHistoryORM(BaseORM): __tablename__ = "deployment_history" diff --git a/agentex/src/adapters/temporal/adapter_temporal.py b/agentex/src/adapters/temporal/adapter_temporal.py index f64d0d29..dccdb2cb 100644 --- a/agentex/src/adapters/temporal/adapter_temporal.py +++ b/agentex/src/adapters/temporal/adapter_temporal.py @@ -10,6 +10,8 @@ ScheduleDescription, ScheduleHandle, ScheduleIntervalSpec, + ScheduleOverlapPolicy, + SchedulePolicy, ScheduleSpec, ScheduleState, WorkflowExecution, @@ -362,9 +364,20 @@ async def create_schedule( start_at: Any | None = None, end_at: Any | None = None, paused: bool = False, + time_zone_name: str | None = None, + overlap_policy: str | None = None, ) -> ScheduleHandle: """ Create a new schedule for recurring workflow execution. + + ``time_zone_name`` is an optional IANA timezone (e.g. ``America/New_York``) + the cron expression is evaluated in; when omitted, cron is evaluated in + UTC. Ignored for interval-based schedules. + + ``overlap_policy`` is an optional ScheduleOverlapPolicy name (e.g. + ``"skip"``, ``"buffer_one"``) controlling what happens when a fire is due + while a prior run is still executing. When omitted, Temporal's default + (SKIP) applies. """ if not self.client: raise TemporalConnectionError("Temporal client is not connected") @@ -377,6 +390,9 @@ async def create_schedule( try: # Build schedule spec + spec_kwargs: dict[str, Any] = {} + if time_zone_name: + spec_kwargs["time_zone_name"] = time_zone_name spec = ScheduleSpec( cron_expressions=cron_expressions or [], intervals=[ @@ -386,6 +402,7 @@ async def create_schedule( else [], start_at=start_at, end_at=end_at, + **spec_kwargs, ) # Build workflow action @@ -408,6 +425,13 @@ async def create_schedule( paused=paused, ) + # Build schedule policies (overlap), when requested + schedule_kwargs: dict[str, Any] = {} + if overlap_policy: + schedule_kwargs["policy"] = SchedulePolicy( + overlap=ScheduleOverlapPolicy[overlap_policy.upper()] + ) + # Create the schedule handle = await self.client.create_schedule( schedule_id, @@ -415,6 +439,7 @@ async def create_schedule( action=action, spec=spec, state=state, + **schedule_kwargs, ), ) diff --git a/agentex/src/api/app.py b/agentex/src/api/app.py index 0131ad30..1692dd3b 100644 --- a/agentex/src/api/app.py +++ b/agentex/src/api/app.py @@ -30,6 +30,7 @@ from src.api.RequestLoggingMiddleware import RequestLoggingMiddleware from src.api.routes import ( agent_api_keys, + agent_run_schedules, agent_task_tracker, agents, checkpoints, @@ -37,7 +38,6 @@ deployments, events, messages, - schedules, spans, states, task_retention, @@ -204,7 +204,7 @@ async def handle_unexpected(request, exc): fastapi_app.include_router(agent_api_keys.router) fastapi_app.include_router(deployment_history.router) fastapi_app.include_router(deployments.router) -fastapi_app.include_router(schedules.router) +fastapi_app.include_router(agent_run_schedules.router) fastapi_app.include_router(checkpoints.router) fastapi_app.include_router(task_retention.router) diff --git a/agentex/src/api/routes/agent_run_schedules.py b/agentex/src/api/routes/agent_run_schedules.py new file mode 100644 index 00000000..489537d7 --- /dev/null +++ b/agentex/src/api/routes/agent_run_schedules.py @@ -0,0 +1,205 @@ +from typing import Any + +from fastapi import APIRouter, Query, Request + +from src.api.schemas.agent_run_schedules import ( + AgentRunScheduleListResponse, + AgentRunScheduleResponse, + CreateAgentRunScheduleRequest, + PauseRunScheduleRequest, + ResumeRunScheduleRequest, +) +from src.api.schemas.authorization_types import ( + AgentexResourceType, + AuthorizedOperationType, +) +from src.api.schemas.delete_response import DeleteResponse +from src.domain.services.agent_run_schedule_service import ( + build_run_schedule_authz_selector, +) +from src.domain.services.authorization_service import DAuthorizationService +from src.domain.use_cases.agent_run_schedules_use_case import ( + DAgentRunSchedulesUseCase, +) +from src.domain.use_cases.agents_use_case import DAgentsUseCase +from src.utils.authorization_shortcuts import DAuthorizedId, DAuthorizedResourceIds +from src.utils.logging import make_logger +from src.utils.schedule_authorization import _check_schedule_or_collapse_to_404 + +logger = make_logger(__name__) + +# The canonical agent scheduling API. Schedules an agent *run* on each fire +# (creates a fresh task + delivers the configured initial input), hiding the +# underlying Temporal workflow/task-queue details (AGX1-368, D1). It replaced the +# earlier bare-workflow scheduler that previously owned this path. +router = APIRouter( + prefix="/agents/{agent_id}/schedules", + tags=["Schedules"], +) + +_CREATOR_PRINCIPAL_FIELDS = ( + "principal_type", + "user_id", + "service_account_id", + "account_id", +) + + +def _extract_creator_principal(principal_context: Any) -> dict[str, Any]: + """Capture the credential-free creator subset from the request principal. + + Stores only identity selectors (principal_type / user_id / service_account_id + / account_id). Never cookies, JWTs, API keys, OAuth tokens, or headers (D5/D6). + Returns an empty dict under authz bypass / when no principal is present. + """ + if principal_context is None: + return {} + if isinstance(principal_context, dict): + getter = principal_context.get + else: + getter = lambda key: getattr(principal_context, key, None) # noqa: E731 + return { + field: getter(field) + for field in _CREATOR_PRINCIPAL_FIELDS + if getter(field) is not None + } + + +@router.post( + "", + response_model=AgentRunScheduleResponse, + summary="Create Run Schedule", + description="Create a recurring schedule that starts a fresh agent run on each fire.", +) +async def create_run_schedule( + agent_id: DAuthorizedId(AgentexResourceType.agent, AuthorizedOperationType.update), + request: CreateAgentRunScheduleRequest, + http_request: Request, + agents_use_case: DAgentsUseCase, + run_schedules_use_case: DAgentRunSchedulesUseCase, +) -> AgentRunScheduleResponse: + """Create a run schedule for an agent. + + Gated on ``agent.update`` (no schedule resource exists yet), mirroring the + bare-workflow scheduler's create gate. The authenticated creator principal is + captured here and replayed for AuthZ / task ownership when the schedule fires. + """ + agent = await agents_use_case.get(id=agent_id) + creator_principal = _extract_creator_principal( + getattr(http_request.state, "principal_context", None) + ) + return await run_schedules_use_case.create_schedule( + agent, request, creator_principal + ) + + +@router.get( + "", + response_model=AgentRunScheduleListResponse, + summary="List Run Schedules", + description="List run schedules for an agent.", +) +async def list_run_schedules( + agent_id: str, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorized_schedule_ids: DAuthorizedResourceIds(AgentexResourceType.schedule), + limit: int = Query(default=100, ge=1, le=1000), +) -> AgentRunScheduleListResponse: + """List an agent's run schedules, filtered to those the caller owns. + + Filter-only (never 403s): ``authorized_schedule_ids`` is ``None`` under authz + bypass (return all), else the set of readable selectors (empty returns none). + """ + return await run_schedules_use_case.list_schedules( + agent_id, + authorized_schedule_ids=authorized_schedule_ids, + limit=limit, + ) + + +@router.get( + "/{name}", + response_model=AgentRunScheduleResponse, + summary="Get Run Schedule", + description="Get a run schedule by its name.", +) +async def get_run_schedule( + agent_id: str, + name: str, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorization: DAuthorizationService, +) -> AgentRunScheduleResponse: + await _check_schedule_or_collapse_to_404( + authorization, + build_run_schedule_authz_selector(agent_id, name), + AuthorizedOperationType.read, + ) + return await run_schedules_use_case.get_schedule(agent_id, name) + + +@router.post( + "/{name}/pause", + response_model=AgentRunScheduleResponse, + summary="Pause Run Schedule", + description="Pause a run schedule so it stops firing.", +) +async def pause_run_schedule( + agent_id: str, + name: str, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorization: DAuthorizationService, + request: PauseRunScheduleRequest | None = None, +) -> AgentRunScheduleResponse: + await _check_schedule_or_collapse_to_404( + authorization, + build_run_schedule_authz_selector(agent_id, name), + AuthorizedOperationType.update, + ) + note = request.note if request else None + return await run_schedules_use_case.pause_schedule(agent_id, name, note=note) + + +@router.post( + "/{name}/resume", + response_model=AgentRunScheduleResponse, + summary="Resume Run Schedule", + description="Resume a paused run schedule so it fires again.", +) +async def resume_run_schedule( + agent_id: str, + name: str, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorization: DAuthorizationService, + request: ResumeRunScheduleRequest | None = None, +) -> AgentRunScheduleResponse: + await _check_schedule_or_collapse_to_404( + authorization, + build_run_schedule_authz_selector(agent_id, name), + AuthorizedOperationType.update, + ) + note = request.note if request else None + return await run_schedules_use_case.resume_schedule(agent_id, name, note=note) + + +@router.delete( + "/{name}", + response_model=DeleteResponse, + summary="Delete Run Schedule", + description="Delete a run schedule permanently.", +) +async def delete_run_schedule( + agent_id: str, + name: str, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorization: DAuthorizationService, +) -> DeleteResponse: + await _check_schedule_or_collapse_to_404( + authorization, + build_run_schedule_authz_selector(agent_id, name), + AuthorizedOperationType.delete, + ) + schedule_id = await run_schedules_use_case.delete_schedule(agent_id, name) + return DeleteResponse( + id=schedule_id, + message=f"Run schedule '{name}' deleted successfully", + ) diff --git a/agentex/src/api/routes/schedules.py b/agentex/src/api/routes/schedules.py deleted file mode 100644 index 56a5516b..00000000 --- a/agentex/src/api/routes/schedules.py +++ /dev/null @@ -1,189 +0,0 @@ -from fastapi import APIRouter, Query - -from src.api.schemas.authorization_types import ( - AgentexResourceType, - AuthorizedOperationType, -) -from src.api.schemas.delete_response import DeleteResponse -from src.api.schemas.schedules import ( - CreateScheduleRequest, - PauseScheduleRequest, - ScheduleListResponse, - ScheduleResponse, - UnpauseScheduleRequest, -) -from src.domain.services.authorization_service import DAuthorizationService -from src.domain.services.schedule_service import build_schedule_id -from src.domain.use_cases.agents_use_case import DAgentsUseCase -from src.domain.use_cases.schedules_use_case import DSchedulesUseCase -from src.utils.authorization_shortcuts import ( - DAuthorizedId, - DAuthorizedResourceIds, -) -from src.utils.logging import make_logger -from src.utils.schedule_authorization import _check_schedule_or_collapse_to_404 - -logger = make_logger(__name__) - -router = APIRouter( - prefix="/agents/{agent_id}/schedules", - tags=["Schedules"], -) - - -@router.post( - "", - response_model=ScheduleResponse, - summary="Create Schedule", - description="Create a new schedule for recurring workflow execution for an agent.", -) -async def create_schedule( - agent_id: DAuthorizedId(AgentexResourceType.agent, AuthorizedOperationType.update), - request: CreateScheduleRequest, - agents_use_case: DAgentsUseCase, - schedules_use_case: DSchedulesUseCase, -) -> ScheduleResponse: - """Create a new schedule for an agent's workflow. - - Only route with a standalone parent-agent check (no schedule resource exists - yet). ``agent.update`` matches the ``parent_agent->update`` gate every - schedule mutation transitively requires. - """ - agent = await agents_use_case.get(id=agent_id) - return await schedules_use_case.create_schedule(agent, request) - - -@router.get( - "", - response_model=ScheduleListResponse, - summary="List Agent Schedules", - description="List all schedules for an agent.", -) -async def list_schedules( - agent_id: str, - schedules_use_case: DSchedulesUseCase, - authorized_schedule_ids: DAuthorizedResourceIds(AgentexResourceType.schedule), - page_size: int = Query(default=100, ge=1, le=1000), -) -> ScheduleListResponse: - """List schedules for an agent, filtered to those the caller owns. - - Filter-only (never 403s): ``authorized_schedule_ids`` is ``None`` under authz - bypass (return all), else the set of readable ids (empty returns nothing). - """ - return await schedules_use_case.list_schedules( - agent_id, - page_size=page_size, - authorized_schedule_ids=authorized_schedule_ids, - ) - - -@router.get( - "/{schedule_name}", - response_model=ScheduleResponse, - summary="Get Schedule", - description="Get details of a schedule by its name.", -) -async def get_schedule( - agent_id: str, - schedule_name: str, - schedules_use_case: DSchedulesUseCase, - authorization: DAuthorizationService, -) -> ScheduleResponse: - """Get details of a schedule.""" - await _check_schedule_or_collapse_to_404( - authorization, - build_schedule_id(agent_id, schedule_name), - AuthorizedOperationType.read, - ) - return await schedules_use_case.get_schedule(agent_id, schedule_name) - - -@router.post( - "/{schedule_name}/pause", - response_model=ScheduleResponse, - summary="Pause Schedule", - description="Pause a schedule to stop it from executing.", -) -async def pause_schedule( - agent_id: str, - schedule_name: str, - schedules_use_case: DSchedulesUseCase, - authorization: DAuthorizationService, - request: PauseScheduleRequest | None = None, -) -> ScheduleResponse: - """Pause a schedule.""" - await _check_schedule_or_collapse_to_404( - authorization, - build_schedule_id(agent_id, schedule_name), - AuthorizedOperationType.update, - ) - note = request.note if request else None - return await schedules_use_case.pause_schedule(agent_id, schedule_name, note=note) - - -@router.post( - "/{schedule_name}/unpause", - response_model=ScheduleResponse, - summary="Unpause Schedule", - description="Unpause/resume a schedule to allow it to execute again.", -) -async def unpause_schedule( - agent_id: str, - schedule_name: str, - schedules_use_case: DSchedulesUseCase, - authorization: DAuthorizationService, - request: UnpauseScheduleRequest | None = None, -) -> ScheduleResponse: - """Unpause/resume a schedule.""" - await _check_schedule_or_collapse_to_404( - authorization, - build_schedule_id(agent_id, schedule_name), - AuthorizedOperationType.update, - ) - note = request.note if request else None - return await schedules_use_case.unpause_schedule(agent_id, schedule_name, note=note) - - -@router.post( - "/{schedule_name}/trigger", - response_model=ScheduleResponse, - summary="Trigger Schedule", - description="Trigger a schedule to run immediately, regardless of its regular schedule.", -) -async def trigger_schedule( - agent_id: str, - schedule_name: str, - schedules_use_case: DSchedulesUseCase, - authorization: DAuthorizationService, -) -> ScheduleResponse: - """Trigger a schedule to run immediately.""" - await _check_schedule_or_collapse_to_404( - authorization, - build_schedule_id(agent_id, schedule_name), - AuthorizedOperationType.update, - ) - return await schedules_use_case.trigger_schedule(agent_id, schedule_name) - - -@router.delete( - "/{schedule_name}", - response_model=DeleteResponse, - summary="Delete Schedule", - description="Delete a schedule permanently.", -) -async def delete_schedule( - agent_id: str, - schedule_name: str, - schedules_use_case: DSchedulesUseCase, - authorization: DAuthorizationService, -) -> DeleteResponse: - """Delete a schedule.""" - schedule_id = build_schedule_id(agent_id, schedule_name) - await _check_schedule_or_collapse_to_404( - authorization, schedule_id, AuthorizedOperationType.delete - ) - await schedules_use_case.delete_schedule(agent_id, schedule_name) - return DeleteResponse( - id=schedule_id, - message=f"Schedule '{schedule_name}' deleted successfully", - ) diff --git a/agentex/src/api/schemas/agent_run_schedules.py b/agentex/src/api/schemas/agent_run_schedules.py new file mode 100644 index 00000000..b0a353b5 --- /dev/null +++ b/agentex/src/api/schemas/agent_run_schedules.py @@ -0,0 +1,165 @@ +from datetime import datetime +from enum import Enum +from typing import Any + +from pydantic import Field + +from src.domain.entities.task_messages import MessageAuthor +from src.utils.model_utils import BaseModel + + +class RunScheduleState(str, Enum): + """Live state of a run schedule, derived from Temporal.""" + + ACTIVE = "ACTIVE" + PAUSED = "PAUSED" + + +class ScheduleInitialInput(BaseModel): + """The first input delivered to each freshly created scheduled task.""" + + type: str = Field("text", description="Input content type. Only 'text' in v1.") + author: MessageAuthor = Field( + MessageAuthor.USER, description="The author attributed to the initial input." + ) + content: str = Field(..., description="The initial prompt delivered to the task.") + + +class ScheduleCreatorPrincipal(BaseModel): + """Credential-free creator identity stored with the schedule. + + Never carries cookies, JWTs, API keys, OAuth tokens, or request headers — it + is creator *context* used only for AuthZ and ownership at fire time (D5/D6). + """ + + principal_type: str | None = Field( + None, description="e.g. 'user' or 'service_account'." + ) + user_id: str | None = Field( + None, description="Creator user id, if a user principal." + ) + service_account_id: str | None = Field( + None, description="Creator service-account id, if a service principal." + ) + account_id: str | None = Field( + None, description="Account/workspace id of the creator." + ) + + +class CreateAgentRunScheduleRequest(BaseModel): + """Request body for creating a scheduled agent run.""" + + name: str = Field( + ..., + title="Schedule Name", + description="Human-readable name, unique per agent (e.g. 'daily-granola-summary').", + pattern=r"^[a-z0-9][a-z0-9-]*[a-z0-9]$|^[a-z0-9]$", + min_length=1, + max_length=64, + ) + description: str | None = Field( + None, description="Optional description of what this schedule does." + ) + cron_expression: str | None = Field( + None, + description="Cron expression for the cadence (e.g. '0 17 * * MON-FRI'). " + "Mutually exclusive with interval_seconds.", + ) + interval_seconds: int | None = Field( + None, + ge=1, + description="Interval cadence in seconds. Mutually exclusive with cron_expression.", + ) + timezone: str = Field( + "UTC", + description="IANA timezone the cron expression is evaluated in (e.g. 'America/New_York').", + ) + start_at: datetime | None = Field( + None, description="When the schedule should start being active." + ) + end_at: datetime | None = Field( + None, description="When the schedule should stop being active." + ) + paused: bool = Field( + False, description="Whether to create the schedule in a paused state." + ) + task_params: dict[str, Any] | None = Field( + None, description="Resolved config forwarded as task `params` at fire time." + ) + task_metadata: dict[str, Any] | None = Field( + None, description="Metadata copied onto each created task at fire time." + ) + initial_input: ScheduleInitialInput = Field( + ..., description="The first input delivered to each created task." + ) + + +class AgentRunScheduleResponse(BaseModel): + """Response model describing a scheduled agent run.""" + + id: str = Field(..., description="The unique identifier of the run schedule.") + agent_id: str = Field(..., description="The agent this schedule belongs to.") + name: str = Field(..., description="Schedule name, unique per agent.") + description: str | None = Field(None, description="Optional description.") + cron_expression: str | None = Field( + None, description="Cron cadence, if cron-based." + ) + interval_seconds: int | None = Field( + None, description="Interval cadence in seconds, if interval-based." + ) + timezone: str = Field( + "UTC", description="Timezone the cron expression is evaluated in." + ) + start_at: datetime | None = Field(None, description="Schedule activation time.") + end_at: datetime | None = Field(None, description="Schedule deactivation time.") + paused: bool = Field(False, description="Whether the schedule is paused.") + task_params: dict[str, Any] | None = Field( + None, description="Task params at fire time." + ) + task_metadata: dict[str, Any] | None = Field( + None, description="Task metadata at fire time." + ) + initial_input: ScheduleInitialInput = Field(..., description="The initial input.") + initial_input_method: str | None = Field( + None, + description="Effective delivery method (inferred from the agent's ACP type).", + ) + creator_principal: ScheduleCreatorPrincipal | None = Field( + None, description="Credential-free creator identity." + ) + created_at: datetime | None = Field( + None, description="When the schedule was created." + ) + updated_at: datetime | None = Field( + None, description="When the schedule was updated." + ) + # Live state derived from Temporal (best-effort; may be absent right after creation). + state: RunScheduleState = Field( + RunScheduleState.ACTIVE, description="Live schedule state from Temporal." + ) + next_action_times: list[datetime] = Field( + default_factory=list, description="Upcoming scheduled fire times." + ) + last_action_time: datetime | None = Field( + None, description="When the schedule last fired." + ) + num_actions_taken: int = Field( + 0, description="Number of times the schedule has fired." + ) + + +class AgentRunScheduleListResponse(BaseModel): + """Response model for listing run schedules.""" + + run_schedules: list[AgentRunScheduleResponse] = Field( + ..., description="The list of run schedules." + ) + total: int = Field(..., description="The number of run schedules returned.") + + +class PauseRunScheduleRequest(BaseModel): + note: str | None = Field(None, description="Optional note explaining the pause.") + + +class ResumeRunScheduleRequest(BaseModel): + note: str | None = Field(None, description="Optional note explaining the resume.") diff --git a/agentex/src/api/schemas/schedules.py b/agentex/src/api/schemas/schedules.py deleted file mode 100644 index f7a4358e..00000000 --- a/agentex/src/api/schemas/schedules.py +++ /dev/null @@ -1,255 +0,0 @@ -from datetime import datetime -from enum import Enum -from typing import Any - -from pydantic import Field - -from src.utils.model_utils import BaseModel - - -class ScheduleState(str, Enum): - """Schedule state enum""" - - ACTIVE = "ACTIVE" - PAUSED = "PAUSED" - - -class CreateScheduleRequest(BaseModel): - """Request model for creating a new schedule for an agent""" - - name: str = Field( - ..., - title="Schedule Name", - description="Human-readable name for the schedule (e.g., 'weekly-profiling'). " - "Will be combined with agent_id to form the full schedule_id.", - pattern=r"^[a-z0-9][a-z0-9-]*[a-z0-9]$|^[a-z0-9]$", - min_length=1, - max_length=64, - ) - workflow_name: str = Field( - ..., - title="Workflow Name", - description="Name of the Temporal workflow to execute (e.g., 'sae-orchestrator')", - ) - task_queue: str = Field( - ..., - title="Task Queue", - description="Temporal task queue where the agent's worker is listening", - ) - workflow_params: dict[str, Any] | None = Field( - default=None, - title="Workflow Parameters", - description="Parameters to pass to the workflow", - ) - cron_expression: str | None = Field( - default=None, - title="Cron Expression", - description="Cron expression for scheduling (e.g., '0 0 * * 0' for weekly on Sunday)", - ) - interval_seconds: int | None = Field( - default=None, - title="Interval Seconds", - description="Alternative to cron - run every N seconds", - ge=1, - ) - execution_timeout_seconds: int | None = Field( - default=None, - title="Execution Timeout", - description="Maximum time in seconds for each workflow execution", - ge=1, - ) - start_at: datetime | None = Field( - default=None, - title="Start At", - description="When the schedule should start being active", - ) - end_at: datetime | None = Field( - default=None, - title="End At", - description="When the schedule should stop being active", - ) - paused: bool = Field( - default=False, - title="Paused", - description="Whether to create the schedule in a paused state", - ) - - -class ScheduleActionInfo(BaseModel): - """Information about the scheduled action""" - - workflow_name: str = Field( - ..., - title="Workflow Name", - description="Name of the workflow being executed", - ) - workflow_id_prefix: str = Field( - ..., - title="Workflow ID Prefix", - description="Prefix for workflow execution IDs", - ) - task_queue: str = Field( - ..., - title="Task Queue", - description="Task queue for the workflow", - ) - workflow_params: list[Any] | None = Field( - default=None, - title="Workflow Parameters", - description="Parameters passed to the workflow", - ) - - -class ScheduleSpecInfo(BaseModel): - """Information about the schedule specification""" - - cron_expressions: list[str] = Field( - default_factory=list, - title="Cron Expressions", - description="Cron expressions for the schedule", - ) - intervals_seconds: list[int] = Field( - default_factory=list, - title="Interval Seconds", - description="Interval specifications in seconds", - ) - start_at: datetime | None = Field( - default=None, - title="Start At", - description="When the schedule starts being active", - ) - end_at: datetime | None = Field( - default=None, - title="End At", - description="When the schedule stops being active", - ) - - -class ScheduleResponse(BaseModel): - """Response model for schedule operations""" - - schedule_id: str = Field( - ..., - title="Schedule ID", - description="Unique identifier for the schedule", - ) - name: str = Field( - ..., - title="Schedule Name", - description="Human-readable name for the schedule", - ) - agent_id: str = Field( - ..., - title="Agent ID", - description="ID of the agent this schedule belongs to", - ) - state: ScheduleState = Field( - ..., - title="State", - description="Current state of the schedule", - ) - action: ScheduleActionInfo = Field( - ..., - title="Action", - description="Information about the scheduled action", - ) - spec: ScheduleSpecInfo = Field( - ..., - title="Spec", - description="Schedule specification", - ) - num_actions_taken: int = Field( - default=0, - title="Number of Actions Taken", - description="Number of times the schedule has executed", - ) - num_actions_missed: int = Field( - default=0, - title="Number of Actions Missed", - description="Number of scheduled executions that were missed", - ) - next_action_times: list[datetime] = Field( - default_factory=list, - title="Next Action Times", - description="Upcoming scheduled execution times", - ) - last_action_time: datetime | None = Field( - default=None, - title="Last Action Time", - description="When the schedule last executed", - ) - created_at: datetime | None = Field( - default=None, - title="Created At", - description="When the schedule was created", - ) - - -class ScheduleListItem(BaseModel): - """Abbreviated schedule info for list responses""" - - schedule_id: str = Field( - ..., - title="Schedule ID", - description="Unique identifier for the schedule", - ) - name: str = Field( - ..., - title="Schedule Name", - description="Human-readable name for the schedule", - ) - agent_id: str = Field( - ..., - title="Agent ID", - description="ID of the agent this schedule belongs to", - ) - state: ScheduleState = Field( - ..., - title="State", - description="Current state of the schedule", - ) - workflow_name: str | None = Field( - default=None, - title="Workflow Name", - description="Name of the scheduled workflow", - ) - next_action_time: datetime | None = Field( - default=None, - title="Next Action Time", - description="Next scheduled execution time", - ) - - -class ScheduleListResponse(BaseModel): - """Response model for listing schedules""" - - schedules: list[ScheduleListItem] = Field( - ..., - title="Schedules", - description="List of schedules", - ) - total: int = Field( - ..., - title="Total", - description="Total number of schedules", - ) - - -class PauseScheduleRequest(BaseModel): - """Request model for pausing a schedule""" - - note: str | None = Field( - default=None, - title="Note", - description="Optional note explaining why the schedule was paused", - ) - - -class UnpauseScheduleRequest(BaseModel): - """Request model for unpausing a schedule""" - - note: str | None = Field( - default=None, - title="Note", - description="Optional note explaining why the schedule was unpaused", - ) diff --git a/agentex/src/domain/entities/agent_run_schedules.py b/agentex/src/domain/entities/agent_run_schedules.py new file mode 100644 index 00000000..73744f3c --- /dev/null +++ b/agentex/src/domain/entities/agent_run_schedules.py @@ -0,0 +1,97 @@ +from datetime import datetime +from enum import Enum +from typing import Any + +from pydantic import Field + +from src.domain.entities.agents import ACPType +from src.utils.model_utils import BaseModel + + +class InitialInputMethod(str, Enum): + """How the configured first input is delivered to the freshly created task. + + Inferred from the target agent's ACP type at fire time; persisted only when + the caller wants the schedule definition to be explicit. + """ + + EVENT_SEND = "event/send" # async / agentic agents + MESSAGE_SEND = "message/send" # sync agents + + +def infer_initial_input_method(acp_type: ACPType) -> InitialInputMethod: + """Map an agent's ACP type to the delivery method for the initial input. + + async / agentic agents receive the first input as an ``event/send``; sync + agents receive it as a ``message/send`` (AGX1-368, D2 / Open Q5). + """ + if acp_type == ACPType.SYNC: + return InitialInputMethod.MESSAGE_SEND + return InitialInputMethod.EVENT_SEND + + +class AgentRunScheduleEntity(BaseModel): + """A persisted definition of a recurring agent run. + + The Postgres row is the source of truth for what each future fire should do; + the Temporal Schedule is only the recurring clock and carries nothing but the + schedule id (AGX1-368, D4). + + JSON-backed fields (``creator_principal``, ``task_params``, ``task_metadata``, + ``initial_input``) are stored as plain dicts so they round-trip cleanly through + the JSON columns. Their typed shapes are validated at the API schema layer + (``ScheduleCreatorPrincipal`` / ``ScheduleInitialInput``). + """ + + id: str = Field(..., description="The unique identifier of the run schedule.") + agent_id: str = Field(..., description="The agent this schedule belongs to.") + name: str = Field( + ..., description="Human-readable schedule name, unique per agent." + ) + description: str | None = Field( + None, description="Optional description of the schedule." + ) + cron_expression: str | None = Field( + None, description="Cron expression for the cadence (mutually exclusive)." + ) + interval_seconds: int | None = Field( + None, description="Interval cadence in seconds (mutually exclusive)." + ) + timezone: str = Field( + "UTC", description="IANA timezone the cron expression is evaluated in." + ) + start_at: datetime | None = Field( + None, description="When the schedule should start being active." + ) + end_at: datetime | None = Field( + None, description="When the schedule should stop being active." + ) + paused: bool = Field(False, description="Whether the schedule is currently paused.") + # Credential-free creator context: principal_type / user_id / service_account_id / + # account_id only. Never cookies, JWTs, API keys, OAuth tokens, or headers (D5/D6). + creator_principal: dict[str, Any] = Field( + ..., + description="Credential-free creator identity used for AuthZ at fire time.", + ) + task_params: dict[str, Any] | None = Field( + None, description="Resolved config forwarded as task `params` at fire time." + ) + task_metadata: dict[str, Any] | None = Field( + None, description="Metadata copied onto each created task at fire time." + ) + initial_input: dict[str, Any] = Field( + ..., description="The first input delivered to each created task." + ) + initial_input_method: str | None = Field( + None, + description=( + "Delivery method for the initial input (an InitialInputMethod value). " + "Inferred from the agent's ACP type at fire time when omitted." + ), + ) + created_at: datetime | None = Field( + None, description="When the schedule was created." + ) + updated_at: datetime | None = Field( + None, description="When the schedule was last updated." + ) diff --git a/agentex/src/domain/repositories/agent_run_schedule_repository.py b/agentex/src/domain/repositories/agent_run_schedule_repository.py new file mode 100644 index 00000000..a498859e --- /dev/null +++ b/agentex/src/domain/repositories/agent_run_schedule_repository.py @@ -0,0 +1,78 @@ +from typing import Annotated + +from fastapi import Depends +from sqlalchemy import select +from src.adapters.crud_store.adapter_postgres import PostgresCRUDRepository +from src.adapters.crud_store.exceptions import ItemDoesNotExist +from src.adapters.orm import AgentRunScheduleORM +from src.config.dependencies import ( + DDatabaseAsyncReadOnlySessionMaker, + DDatabaseAsyncReadWriteSessionMaker, +) +from src.domain.entities.agent_run_schedules import AgentRunScheduleEntity +from src.utils.logging import make_logger + +logger = make_logger(__name__) + + +class AgentRunScheduleRepository( + PostgresCRUDRepository[AgentRunScheduleORM, AgentRunScheduleEntity] +): + def __init__( + self, + async_read_write_session_maker: DDatabaseAsyncReadWriteSessionMaker, + async_read_only_session_maker: DDatabaseAsyncReadOnlySessionMaker, + ): + super().__init__( + async_read_write_session_maker, + async_read_only_session_maker, + AgentRunScheduleORM, + AgentRunScheduleEntity, + ) + + async def list_by_agent_id( + self, + agent_id: str, + limit: int | None = None, + page_number: int | None = None, + ) -> list[AgentRunScheduleEntity]: + """List run schedules for a single agent, newest first.""" + query = select(AgentRunScheduleORM).where( + AgentRunScheduleORM.agent_id == agent_id + ) + return await super().list( + query=query, + order_by="created_at", + order_direction="desc", + limit=limit, + page_number=page_number, + ) + + async def get_by_agent_id_and_name( + self, agent_id: str, name: str + ) -> AgentRunScheduleEntity | None: + """Get a run schedule by its (agent_id, name) natural key, or None.""" + async with self.start_async_db_session(allow_writes=False) as session: + query = select(AgentRunScheduleORM).where( + AgentRunScheduleORM.agent_id == agent_id, + AgentRunScheduleORM.name == name, + ) + result = await session.execute(query) + row = result.scalars().first() + return AgentRunScheduleEntity.model_validate(row) if row else None + + async def get_by_agent_id_and_name_or_raise( + self, agent_id: str, name: str + ) -> AgentRunScheduleEntity: + """Get a run schedule by (agent_id, name) or raise ItemDoesNotExist.""" + schedule = await self.get_by_agent_id_and_name(agent_id, name) + if schedule is None: + raise ItemDoesNotExist( + f"Run schedule '{name}' for agent '{agent_id}' does not exist." + ) + return schedule + + +DAgentRunScheduleRepository = Annotated[ + AgentRunScheduleRepository, Depends(AgentRunScheduleRepository) +] diff --git a/agentex/src/domain/services/agent_run_schedule_service.py b/agentex/src/domain/services/agent_run_schedule_service.py new file mode 100644 index 00000000..f35c8505 --- /dev/null +++ b/agentex/src/domain/services/agent_run_schedule_service.py @@ -0,0 +1,380 @@ +from datetime import datetime +from typing import Annotated, Any, cast + +from fastapi import Depends +from temporalio.client import ScheduleDescription + +from src.adapters.crud_store.exceptions import DuplicateItemError, ItemDoesNotExist +from src.adapters.temporal.adapter_temporal import DTemporalAdapter +from src.api.schemas.agent_run_schedules import ( + AgentRunScheduleListResponse, + AgentRunScheduleResponse, + CreateAgentRunScheduleRequest, + RunScheduleState, + ScheduleCreatorPrincipal, + ScheduleInitialInput, +) +from src.api.schemas.authorization_types import AgentexResource +from src.domain.entities.agent_run_schedules import ( + AgentRunScheduleEntity, + infer_initial_input_method, +) +from src.domain.entities.agents import AgentEntity +from src.domain.exceptions import ClientError +from src.domain.repositories.agent_repository import DAgentRepository +from src.domain.repositories.agent_run_schedule_repository import ( + DAgentRunScheduleRepository, +) +from src.domain.services.authorization_service import DAuthorizationService +from src.utils.ids import orm_id +from src.utils.logging import make_logger + +logger = make_logger(__name__) + +# Temporal schedule id derived from the Postgres row id. The prefix namespaces +# these schedules within the shared Temporal namespace and keeps the id stable +# and small (the row id is the only thing the workflow needs). +RUN_SCHEDULE_TEMPORAL_ID_PREFIX = "agent-run-schedule" + +# Registered (class) name of the workflow each fire starts. Referenced by name so +# the API/service layer doesn't import the Temporal workflow definition. +SCHEDULED_AGENT_RUN_WORKFLOW_NAME = "ScheduledAgentRunWorkflow" + + +def build_run_schedule_temporal_id(schedule_row_id: str) -> str: + return f"{RUN_SCHEDULE_TEMPORAL_ID_PREFIX}:{schedule_row_id}" + + +def build_run_schedule_authz_selector(agent_id: str, name: str) -> str: + """Authorization selector for a run schedule's ``schedule`` resource. + + Derivable from the (agent_id, name) path params so the CRUD endpoints can + authorize without a prior DB lookup. The ``run-schedule::`` prefix namespaces + the selector within the ``schedule`` resource type. + """ + return f"run-schedule::{agent_id}::{name}" + + +class AgentRunScheduleService: + """Manage Postgres-backed scheduled agent runs and their Temporal Schedules. + + The Postgres row is the source of truth for the schedule definition; the + Temporal Schedule is only the recurring clock and is given nothing but the + schedule row id as its workflow argument (AGX1-368, D4). + """ + + def __init__( + self, + temporal_adapter: DTemporalAdapter, + authorization_service: DAuthorizationService, + schedule_repository: DAgentRunScheduleRepository, + agent_repository: DAgentRepository, + ): + self.temporal_adapter = temporal_adapter + self.authorization_service = authorization_service + self.schedule_repository = schedule_repository + self.agent_repository = agent_repository + + async def create_schedule( + self, + agent: AgentEntity, + request: CreateAgentRunScheduleRequest, + creator_principal: dict[str, Any], + ) -> AgentRunScheduleResponse: + existing = await self.schedule_repository.get_by_agent_id_and_name( + agent.id, request.name + ) + if existing is not None: + raise ClientError( + f"Run schedule '{request.name}' already exists for agent '{agent.id}'" + ) + + entity = AgentRunScheduleEntity( + id=orm_id(), + agent_id=agent.id, + name=request.name, + description=request.description, + cron_expression=request.cron_expression, + interval_seconds=request.interval_seconds, + timezone=request.timezone, + start_at=request.start_at, + end_at=request.end_at, + paused=request.paused, + creator_principal=creator_principal, + task_params=request.task_params, + task_metadata=request.task_metadata, + initial_input=request.initial_input.to_dict(mode="json"), + # Delivery method is inferred from the agent's ACP type at fire time. + initial_input_method=None, + ) + + try: + created = await self.schedule_repository.create(entity) + except DuplicateItemError as exc: + raise ClientError( + f"Run schedule '{request.name}' already exists for agent '{agent.id}'" + ) from exc + + temporal_id = build_run_schedule_temporal_id(created.id) + authz_selector = build_run_schedule_authz_selector(agent.id, created.name) + # Register (fail-closed, before the Temporal write) and create the schedule + # under one rollback scope: if EITHER the auth registration or the Temporal + # create fails, the persisted row is removed so a failed create leaves + # nothing behind. Registration happens first so an auth failure aborts + # before the Temporal write. + registered = False + try: + registered = await self._register_schedule_in_auth( + authz_selector=authz_selector, agent_id=agent.id + ) + await self.temporal_adapter.create_schedule( + schedule_id=temporal_id, + workflow=SCHEDULED_AGENT_RUN_WORKFLOW_NAME, + workflow_id=f"{temporal_id}-run", + args=[created.id], + task_queue=self._task_queue(), + cron_expressions=( + [created.cron_expression] if created.cron_expression else None + ), + interval_seconds=created.interval_seconds, + start_at=created.start_at, + end_at=created.end_at, + paused=created.paused, + time_zone_name=created.timezone if created.cron_expression else None, + overlap_policy="skip", + ) + except Exception: + if registered: + await self._deregister_schedule_from_auth(authz_selector=authz_selector) + await self._best_effort_delete_row(created.id) + raise + + return await self._to_response(created, agent=agent, temporal_id=temporal_id) + + async def list_schedules( + self, + agent_id: str, + authorized_schedule_ids: list[str] | None = None, + limit: int = 100, + ) -> AgentRunScheduleListResponse: + rows = await self.schedule_repository.list_by_agent_id(agent_id, limit=limit) + + # Gate on ``is not None``: an empty list means the caller owns nothing and + # everything is filtered out; None means authorization is bypassed. + authorized = ( + set(authorized_schedule_ids) + if authorized_schedule_ids is not None + else None + ) + agent = await self.agent_repository.get(id=agent_id) + items: list[AgentRunScheduleResponse] = [] + for row in rows: + selector = build_run_schedule_authz_selector(agent_id, row.name) + if authorized is not None and selector not in authorized: + continue + temporal_id = build_run_schedule_temporal_id(row.id) + items.append( + await self._to_response(row, agent=agent, temporal_id=temporal_id) + ) + return AgentRunScheduleListResponse(run_schedules=items, total=len(items)) + + async def get_schedule(self, agent_id: str, name: str) -> AgentRunScheduleResponse: + row = await self.schedule_repository.get_by_agent_id_and_name_or_raise( + agent_id, name + ) + agent = await self.agent_repository.get(id=agent_id) + return await self._to_response( + row, agent=agent, temporal_id=build_run_schedule_temporal_id(row.id) + ) + + async def pause_schedule( + self, agent_id: str, name: str, note: str | None = None + ) -> AgentRunScheduleResponse: + return await self._set_paused(agent_id, name, paused=True, note=note) + + async def resume_schedule( + self, agent_id: str, name: str, note: str | None = None + ) -> AgentRunScheduleResponse: + return await self._set_paused(agent_id, name, paused=False, note=note) + + async def delete_schedule(self, agent_id: str, name: str) -> str: + row = await self.schedule_repository.get_by_agent_id_and_name_or_raise( + agent_id, name + ) + temporal_id = build_run_schedule_temporal_id(row.id) + # Temporal is the recurring clock; delete it first so no further fires can + # occur, then drop the row and the auth entry (both best-effort after). + await self.temporal_adapter.delete_schedule(temporal_id) + await self.schedule_repository.delete(id=row.id) + await self._deregister_schedule_from_auth( + authz_selector=build_run_schedule_authz_selector(agent_id, row.name) + ) + return row.id + + # -- internals --------------------------------------------------------- + + async def _set_paused( + self, agent_id: str, name: str, *, paused: bool, note: str | None + ) -> AgentRunScheduleResponse: + row = await self.schedule_repository.get_by_agent_id_and_name_or_raise( + agent_id, name + ) + temporal_id = build_run_schedule_temporal_id(row.id) + if paused: + await self.temporal_adapter.pause_schedule(temporal_id, note=note) + else: + await self.temporal_adapter.unpause_schedule(temporal_id, note=note) + row.paused = paused + updated = await self.schedule_repository.update(row) + agent = await self.agent_repository.get(id=agent_id) + return await self._to_response(updated, agent=agent, temporal_id=temporal_id) + + def _task_queue(self) -> str: + # Local import avoids a circular import (run_worker imports the factory, + # which would otherwise transitively import this service). + from src.temporal.run_worker import AGENTEX_SERVER_TASK_QUEUE + + return AGENTEX_SERVER_TASK_QUEUE + + async def _to_response( + self, + entity: AgentRunScheduleEntity, + agent: AgentEntity, + temporal_id: str, + ) -> AgentRunScheduleResponse: + effective_method = ( + entity.initial_input_method + or infer_initial_input_method(agent.acp_type).value + ) + + state = RunScheduleState.PAUSED if entity.paused else RunScheduleState.ACTIVE + next_action_times: list[datetime] = [] + last_action_time: datetime | None = None + num_actions_taken = 0 + + # Live Temporal fields are best-effort: a describe failure (e.g. right + # after creation, or a transient Temporal error) must not break the + # response, which is fully serviceable from the persisted row. + try: + description = await self.temporal_adapter.describe_schedule(temporal_id) + live = self._extract_live_fields(description) + state = live["state"] + next_action_times = live["next_action_times"] + last_action_time = live["last_action_time"] + num_actions_taken = live["num_actions_taken"] + except Exception as exc: + logger.warning( + "run_schedule_describe_failed", + extra={"temporal_id": temporal_id, "error_type": type(exc).__name__}, + ) + + return AgentRunScheduleResponse( + id=entity.id, + agent_id=entity.agent_id, + name=entity.name, + description=entity.description, + cron_expression=entity.cron_expression, + interval_seconds=entity.interval_seconds, + timezone=entity.timezone, + start_at=entity.start_at, + end_at=entity.end_at, + paused=entity.paused, + task_params=entity.task_params, + task_metadata=entity.task_metadata, + initial_input=ScheduleInitialInput.model_validate(entity.initial_input), + initial_input_method=effective_method, + creator_principal=ScheduleCreatorPrincipal.model_validate( + entity.creator_principal + ), + created_at=entity.created_at, + updated_at=entity.updated_at, + state=state, + next_action_times=next_action_times, + last_action_time=last_action_time, + num_actions_taken=num_actions_taken, + ) + + @staticmethod + def _extract_live_fields(description: ScheduleDescription) -> dict[str, Any]: + state = RunScheduleState.ACTIVE + if description.schedule.state and description.schedule.state.paused: + state = RunScheduleState.PAUSED + + info = description.info + next_action_times = ( + list(info.next_action_times) if info.next_action_times else [] + ) + last_action_time: datetime | None = None + if getattr(info, "recent_actions", None): + last_action = info.recent_actions[-1] + last_action_time = getattr(last_action, "started_at", None) or getattr( + last_action, "scheduled_at", None + ) + num_actions_taken = ( + cast(int, info.num_actions) if hasattr(info, "num_actions") else 0 + ) + return { + "state": state, + "next_action_times": next_action_times, + "last_action_time": last_action_time, + "num_actions_taken": num_actions_taken, + } + + async def _register_schedule_in_auth( + self, *, authz_selector: str, agent_id: str + ) -> bool: + """Register the schedule under its parent agent so permissions cascade. + + Returns True when registered, or False when no creator identity is + resolvable (mirrors ScheduleService: registration is skipped under authz + bypass / when no principal is present). + """ + principal_context = self.authorization_service.principal_context + if isinstance(principal_context, dict): + user_id = principal_context.get("user_id") + service_account_id = principal_context.get("service_account_id") + else: + user_id = getattr(principal_context, "user_id", None) + service_account_id = getattr(principal_context, "service_account_id", None) + if user_id is None and service_account_id is None: + logger.warning( + "Skipping auth registration for run schedule: no creator resolvable", + extra={"authz_selector": authz_selector, "agent_id": agent_id}, + ) + return False + await self.authorization_service.register_resource( + resource=AgentexResource.schedule(authz_selector), + parent=AgentexResource.agent(agent_id), + ) + return True + + async def _deregister_schedule_from_auth(self, *, authz_selector: str) -> None: + try: + await self.authorization_service.deregister_resource( + resource=AgentexResource.schedule(authz_selector), + ) + except Exception as exc: + logger.warning( + "Auth deregister failed for run schedule; entry may be orphaned", + extra={ + "authz_selector": authz_selector, + "error_type": type(exc).__name__, + }, + exc_info=True, + ) + + async def _best_effort_delete_row(self, schedule_id: str) -> None: + try: + await self.schedule_repository.delete(id=schedule_id) + except ItemDoesNotExist: + pass + except Exception: + logger.exception( + "Failed to roll back run schedule row after Temporal create failure", + extra={"schedule_id": schedule_id}, + ) + + +DAgentRunScheduleService = Annotated[ + AgentRunScheduleService, Depends(AgentRunScheduleService) +] diff --git a/agentex/src/domain/services/schedule_service.py b/agentex/src/domain/services/schedule_service.py deleted file mode 100644 index 8f5ae02f..00000000 --- a/agentex/src/domain/services/schedule_service.py +++ /dev/null @@ -1,471 +0,0 @@ -from datetime import datetime, timedelta -from typing import Annotated, Any, cast - -from fastapi import Depends -from temporalio.client import ScheduleActionStartWorkflow, ScheduleDescription - -from src.adapters.temporal.adapter_temporal import DTemporalAdapter -from src.api.schemas.authorization_types import AgentexResource -from src.api.schemas.schedules import ( - CreateScheduleRequest, - ScheduleActionInfo, - ScheduleListItem, - ScheduleListResponse, - ScheduleResponse, - ScheduleSpecInfo, - ScheduleState, -) -from src.domain.entities.agents import AgentEntity -from src.domain.services.authorization_service import DAuthorizationService -from src.utils.logging import make_logger - -logger = make_logger(__name__) - -# Schedule ID format: {agent_id}--{schedule_name} -SCHEDULE_ID_SEPARATOR = "--" - - -def build_schedule_id(agent_id: str, schedule_name: str) -> str: - """Build a schedule ID from agent ID and schedule name.""" - return f"{agent_id}{SCHEDULE_ID_SEPARATOR}{schedule_name}" - - -def parse_schedule_id(schedule_id: str) -> tuple[str, str]: - """Parse a schedule ID into (agent_id, schedule_name).""" - parts = schedule_id.split(SCHEDULE_ID_SEPARATOR, 1) - if len(parts) != 2: - return schedule_id, "" - return parts[0], parts[1] - - -class ScheduleService: - """ - Service for managing Temporal schedules scoped to agents. - """ - - def __init__( - self, - temporal_adapter: DTemporalAdapter, - authorization_service: DAuthorizationService, - ): - self.temporal_adapter = temporal_adapter - self.authorization_service = authorization_service - - async def create_schedule( - self, - agent: AgentEntity, - request: CreateScheduleRequest, - ) -> ScheduleResponse: - """ - Create a new schedule for recurring workflow execution. - - Args: - agent: The agent this schedule belongs to - request: The schedule creation request - - Returns: - ScheduleResponse with the created schedule details - """ - schedule_id = build_schedule_id(agent.id, request.name) - workflow_id_prefix = f"{schedule_id}-run" - - # Build args for the workflow - args = [request.workflow_params] if request.workflow_params else None - - # Convert cron_expression to list if provided - cron_expressions = ( - [request.cron_expression] if request.cron_expression else None - ) - - # Convert execution timeout to timedelta - execution_timeout = ( - timedelta(seconds=request.execution_timeout_seconds) - if request.execution_timeout_seconds - else None - ) - - # Schedules have no Postgres row: Temporal is the store and the auth - # selector is the schedule id ({agent_id}--{schedule_name}). Register - # before the Temporal write so an auth failure fails closed. If the - # Temporal create fails after registration, compensate with a deregister. - # The read-back below is intentionally outside the compensation scope - # because the schedule was already created. - registered = await self._register_schedule_in_auth( - schedule_id=schedule_id, agent_id=agent.id - ) - try: - await self.temporal_adapter.create_schedule( - schedule_id=schedule_id, - workflow=request.workflow_name, - workflow_id=workflow_id_prefix, - args=args, - task_queue=request.task_queue, - cron_expressions=cron_expressions, - interval_seconds=request.interval_seconds, - execution_timeout=execution_timeout, - start_at=request.start_at, - end_at=request.end_at, - paused=request.paused, - ) - except Exception: - # Orphan guard: the auth entry was written but the schedule never - # landed in Temporal. Best-effort compensating deregister, then - # re-raise the original error. - if registered: - await self._deregister_schedule_from_auth(schedule_id=schedule_id) - raise - - # Fetch and return the created schedule - return await self.get_schedule(agent.id, request.name) - - async def _register_schedule_in_auth( - self, *, schedule_id: str, agent_id: str - ) -> bool: - """Register the schedule in the authorization graph before creating it. - - The schedule is registered under its parent agent so permissions - cascade from the owning agent. Registering before the Temporal create - fails closed: an auth failure aborts the create, and the caller - compensates with a deregister if the Temporal create later fails. - - Returns True when the schedule was registered, or False when no creator - identity is resolvable and registration is skipped. - """ - principal_context = self.authorization_service.principal_context - # principal_context is `Any` (a dict from /v1/authn), not a typed model, - # so getattr always yields None and silently skips the Spark register. - if isinstance(principal_context, dict): - user_id = principal_context.get("user_id") - service_account_id = principal_context.get("service_account_id") - else: - user_id = getattr(principal_context, "user_id", None) - service_account_id = getattr(principal_context, "service_account_id", None) - if user_id is None and service_account_id is None: - logger.warning( - "Skipping auth registration for schedule: no creator resolvable", - extra={"schedule_id": schedule_id, "agent_id": agent_id}, - ) - return False - try: - await self.authorization_service.register_resource( - resource=AgentexResource.schedule(schedule_id), - parent=AgentexResource.agent(agent_id), - ) - except Exception as exc: - logger.exception( - "Auth registration failed for agent_schedule; aborting create", - extra={ - "schedule_id": schedule_id, - "agent_id": agent_id, - "error_type": type(exc).__name__, - }, - ) - raise - return True - - async def _deregister_schedule_from_auth(self, *, schedule_id: str) -> None: - """Best-effort removal of the schedule from the authorization graph. - - Temporal is the source of truth for schedule existence. Once Temporal - delete succeeds, a deregister failure is logged but does not block the - delete response. - """ - try: - await self.authorization_service.deregister_resource( - resource=AgentexResource.schedule(schedule_id), - ) - except Exception as exc: - logger.warning( - "Auth deregister failed for agent_schedule; entry may be orphaned", - extra={ - "schedule_id": schedule_id, - "error_type": type(exc).__name__, - }, - exc_info=True, - ) - - async def get_schedule(self, agent_id: str, schedule_name: str) -> ScheduleResponse: - """ - Get details of a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - - Returns: - ScheduleResponse with schedule details - """ - schedule_id = build_schedule_id(agent_id, schedule_name) - description = await self.temporal_adapter.describe_schedule(schedule_id) - - return self._description_to_response(schedule_id, description) - - async def list_schedules( - self, - agent_id: str | None = None, - page_size: int = 100, - authorized_schedule_ids: list[str] | None = None, - ) -> ScheduleListResponse: - """ - List schedules, optionally filtered by agent. - - Args: - agent_id: Optional agent ID to filter schedules - page_size: Number of results to return - authorized_schedule_ids: Ownership filter applied against the - schedule id ({agent_id}--{schedule_name}). ``None`` means "no - filter" (authorization bypass); any list (including empty) - restricts results to those ids, so an empty list yields no - schedules. - - Returns: - ScheduleListResponse with list of schedules - - Note: - ``page_size`` caps the upstream Temporal listing, which is then - filtered in-process by ``agent_id`` and ``authorized_schedule_ids``, - so fewer than ``page_size`` rows may be returned even when more - matching schedules exist. Pre-dates the ownership filter (the - ``agent_id`` filter already had it). Server-side filtering isn't - available (Temporal standard visibility can't filter on the schedule - id); the fix is to loop pages until the requested page is filled, - tracked separately. - """ - schedules = await self.temporal_adapter.list_schedules(page_size=page_size) - - # Gate on ``is not None``, not truthiness: an empty list means the caller - # owns nothing and must filter everything out, not pass through unfiltered. - authorized_ids = ( - set(authorized_schedule_ids) - if authorized_schedule_ids is not None - else None - ) - - items = [] - for schedule in schedules: - parsed_agent_id, schedule_name = parse_schedule_id(schedule.id) - - if agent_id and parsed_agent_id != agent_id: - continue - - if authorized_ids is not None and schedule.id not in authorized_ids: - continue - - # Extract workflow name from action if available - workflow_name = None - if hasattr(schedule, "info") and hasattr(schedule.info, "action"): - action = schedule.info.action - if isinstance(action, ScheduleActionStartWorkflow): - workflow_name = action.workflow - - # Extract next action time - next_action_time = None - if hasattr(schedule, "info") and schedule.info.next_action_times: - next_action_time = schedule.info.next_action_times[0] - - # Determine state - state = ScheduleState.ACTIVE - if hasattr(schedule, "info") and hasattr(schedule.info, "paused"): - state = ( - ScheduleState.PAUSED - if schedule.info.paused - else ScheduleState.ACTIVE - ) - - items.append( - ScheduleListItem( - schedule_id=schedule.id, - name=schedule_name or schedule.id, - agent_id=parsed_agent_id, - state=state, - workflow_name=workflow_name, - next_action_time=next_action_time, - ) - ) - - return ScheduleListResponse( - schedules=items, - total=len(items), - ) - - async def pause_schedule( - self, agent_id: str, schedule_name: str, note: str | None = None - ) -> ScheduleResponse: - """ - Pause a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - note: Optional note explaining why the schedule was paused - - Returns: - ScheduleResponse with updated schedule details - """ - schedule_id = build_schedule_id(agent_id, schedule_name) - await self.temporal_adapter.pause_schedule(schedule_id, note=note) - return await self.get_schedule(agent_id, schedule_name) - - async def unpause_schedule( - self, agent_id: str, schedule_name: str, note: str | None = None - ) -> ScheduleResponse: - """ - Unpause/resume a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - note: Optional note explaining why the schedule was unpaused - - Returns: - ScheduleResponse with updated schedule details - """ - schedule_id = build_schedule_id(agent_id, schedule_name) - await self.temporal_adapter.unpause_schedule(schedule_id, note=note) - return await self.get_schedule(agent_id, schedule_name) - - async def trigger_schedule( - self, agent_id: str, schedule_name: str - ) -> ScheduleResponse: - """ - Trigger a schedule to run immediately. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - - Returns: - ScheduleResponse with updated schedule details - """ - schedule_id = build_schedule_id(agent_id, schedule_name) - await self.temporal_adapter.trigger_schedule(schedule_id) - return await self.get_schedule(agent_id, schedule_name) - - async def delete_schedule(self, agent_id: str, schedule_name: str) -> None: - """ - Delete a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - """ - schedule_id = build_schedule_id(agent_id, schedule_name) - await self.temporal_adapter.delete_schedule(schedule_id) - # Best-effort: drop the auth tuple after the Temporal delete. A failure - # here is logged but never blocks the delete. - await self._deregister_schedule_from_auth(schedule_id=schedule_id) - - def _description_to_response( - self, schedule_id: str, description: ScheduleDescription - ) -> ScheduleResponse: - """ - Convert a Temporal ScheduleDescription to a ScheduleResponse. - - Args: - schedule_id: The schedule ID - description: Temporal ScheduleDescription object - - Returns: - ScheduleResponse - """ - # Parse agent_id and name from schedule_id - agent_id, schedule_name = parse_schedule_id(schedule_id) - - # Extract action info - action = description.schedule.action - workflow_name = "" - workflow_id_prefix = "" - task_queue = "" - workflow_params: list[Any] | None = None - - if isinstance(action, ScheduleActionStartWorkflow): - workflow_name = action.workflow - workflow_id_prefix = action.id - task_queue = action.task_queue or "" - # Convert Temporal Payload objects to JSON-serializable format - # The args are raw Temporal payloads that can't be directly serialized - if action.args: - try: - # Try to extract data from payloads if they have a data attribute - workflow_params = [] - for arg in action.args: - if hasattr(arg, "data"): - # Decode bytes to string if possible - try: - import json - - workflow_params.append( - json.loads(arg.data.decode("utf-8")) - ) - except (json.JSONDecodeError, UnicodeDecodeError): - workflow_params.append(str(arg.data)) - else: - workflow_params.append(str(arg)) - except Exception: - # If conversion fails, just indicate params exist but can't be displayed - workflow_params = None - else: - workflow_params = None - - # Extract spec info - spec = description.schedule.spec - cron_expressions = list(spec.cron_expressions) if spec.cron_expressions else [] - intervals_seconds = [ - int(interval.every.total_seconds()) for interval in (spec.intervals or []) - ] - - # Extract state - state = ScheduleState.ACTIVE - if description.schedule.state and description.schedule.state.paused: - state = ScheduleState.PAUSED - - # Extract info - info = description.info - num_actions_taken = info.num_actions if hasattr(info, "num_actions") else 0 - num_actions_missed = ( - info.num_actions_missed_catchup_window - if hasattr(info, "num_actions_missed_catchup_window") - else 0 - ) - next_action_times = ( - list(info.next_action_times) if info.next_action_times else [] - ) - last_action_time = None - if hasattr(info, "recent_actions") and info.recent_actions: - # ScheduleActionResult has started_at (when action started) and scheduled_at (when it was scheduled) - last_action = info.recent_actions[-1] - last_action_time = getattr(last_action, "started_at", None) or getattr( - last_action, "scheduled_at", None - ) - created_at: datetime | None = ( - cast(datetime, info.create_time) - if hasattr(info, "create_time") and info.create_time - else None - ) - - return ScheduleResponse( - schedule_id=schedule_id, - name=schedule_name or schedule_id, - agent_id=agent_id, - state=state, - action=ScheduleActionInfo( - workflow_name=workflow_name, - workflow_id_prefix=workflow_id_prefix, - task_queue=task_queue, - workflow_params=workflow_params, - ), - spec=ScheduleSpecInfo( - cron_expressions=cron_expressions, - intervals_seconds=intervals_seconds, - start_at=spec.start_at, - end_at=spec.end_at, - ), - num_actions_taken=num_actions_taken, - num_actions_missed=num_actions_missed, - next_action_times=next_action_times, - last_action_time=last_action_time, - created_at=created_at, - ) - - -DScheduleService = Annotated[ScheduleService, Depends(ScheduleService)] diff --git a/agentex/src/domain/use_cases/agent_run_schedules_use_case.py b/agentex/src/domain/use_cases/agent_run_schedules_use_case.py new file mode 100644 index 00000000..ee27e862 --- /dev/null +++ b/agentex/src/domain/use_cases/agent_run_schedules_use_case.py @@ -0,0 +1,78 @@ +from typing import Annotated, Any + +from fastapi import Depends + +from src.api.schemas.agent_run_schedules import ( + AgentRunScheduleListResponse, + AgentRunScheduleResponse, + CreateAgentRunScheduleRequest, +) +from src.domain.entities.agents import AgentEntity +from src.domain.exceptions import ClientError +from src.domain.services.agent_run_schedule_service import DAgentRunScheduleService +from src.utils.logging import make_logger + +logger = make_logger(__name__) + + +class AgentRunSchedulesUseCase: + """Use case for managing scheduled agent runs.""" + + def __init__( + self, + run_schedule_service: DAgentRunScheduleService, + ): + self.run_schedule_service = run_schedule_service + + async def create_schedule( + self, + agent: AgentEntity, + request: CreateAgentRunScheduleRequest, + creator_principal: dict[str, Any], + ) -> AgentRunScheduleResponse: + if not request.cron_expression and not request.interval_seconds: + raise ClientError( + "Either cron_expression or interval_seconds must be provided" + ) + if request.cron_expression and request.interval_seconds: + raise ClientError( + "Provide only one of cron_expression or interval_seconds, not both" + ) + return await self.run_schedule_service.create_schedule( + agent, request, creator_principal + ) + + async def list_schedules( + self, + agent_id: str, + authorized_schedule_ids: list[str] | None = None, + limit: int = 100, + ) -> AgentRunScheduleListResponse: + return await self.run_schedule_service.list_schedules( + agent_id, + authorized_schedule_ids=authorized_schedule_ids, + limit=limit, + ) + + async def get_schedule(self, agent_id: str, name: str) -> AgentRunScheduleResponse: + return await self.run_schedule_service.get_schedule(agent_id, name) + + async def pause_schedule( + self, agent_id: str, name: str, note: str | None = None + ) -> AgentRunScheduleResponse: + return await self.run_schedule_service.pause_schedule(agent_id, name, note=note) + + async def resume_schedule( + self, agent_id: str, name: str, note: str | None = None + ) -> AgentRunScheduleResponse: + return await self.run_schedule_service.resume_schedule( + agent_id, name, note=note + ) + + async def delete_schedule(self, agent_id: str, name: str) -> str: + return await self.run_schedule_service.delete_schedule(agent_id, name) + + +DAgentRunSchedulesUseCase = Annotated[ + AgentRunSchedulesUseCase, Depends(AgentRunSchedulesUseCase) +] diff --git a/agentex/src/domain/use_cases/schedules_use_case.py b/agentex/src/domain/use_cases/schedules_use_case.py deleted file mode 100644 index 1d00bf45..00000000 --- a/agentex/src/domain/use_cases/schedules_use_case.py +++ /dev/null @@ -1,153 +0,0 @@ -from typing import Annotated - -from fastapi import Depends - -from src.api.schemas.schedules import ( - CreateScheduleRequest, - ScheduleListResponse, - ScheduleResponse, -) -from src.domain.entities.agents import AgentEntity -from src.domain.exceptions import ClientError -from src.domain.services.schedule_service import DScheduleService -from src.utils.logging import make_logger - -logger = make_logger(__name__) - - -class SchedulesUseCase: - """ - Use case for managing Temporal schedules scoped to agents. - """ - - def __init__( - self, - schedule_service: DScheduleService, - ): - self.schedule_service = schedule_service - - async def create_schedule( - self, - agent: AgentEntity, - request: CreateScheduleRequest, - ) -> ScheduleResponse: - """ - Create a new schedule for recurring workflow execution. - - Args: - agent: The agent this schedule belongs to - request: The schedule creation request - - Returns: - ScheduleResponse with the created schedule details - - Raises: - ClientError: If neither cron_expression nor interval_seconds is provided - """ - if not request.cron_expression and not request.interval_seconds: - raise ClientError( - "Either cron_expression or interval_seconds must be provided" - ) - - return await self.schedule_service.create_schedule(agent, request) - - async def get_schedule(self, agent_id: str, schedule_name: str) -> ScheduleResponse: - """ - Get details of a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - - Returns: - ScheduleResponse with schedule details - """ - return await self.schedule_service.get_schedule(agent_id, schedule_name) - - async def list_schedules( - self, - agent_id: str, - page_size: int = 100, - authorized_schedule_ids: list[str] | None = None, - ) -> ScheduleListResponse: - """ - List schedules for an agent. - - Args: - agent_id: The agent ID - page_size: Number of results to return - authorized_schedule_ids: Ownership filter forwarded to the service - (``None`` = bypass → all; a list restricts; empty → none). - - Returns: - ScheduleListResponse with list of schedules - """ - return await self.schedule_service.list_schedules( - agent_id=agent_id, - page_size=page_size, - authorized_schedule_ids=authorized_schedule_ids, - ) - - async def pause_schedule( - self, agent_id: str, schedule_name: str, note: str | None = None - ) -> ScheduleResponse: - """ - Pause a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - note: Optional note explaining why the schedule was paused - - Returns: - ScheduleResponse with updated schedule details - """ - return await self.schedule_service.pause_schedule( - agent_id, schedule_name, note=note - ) - - async def unpause_schedule( - self, agent_id: str, schedule_name: str, note: str | None = None - ) -> ScheduleResponse: - """ - Unpause/resume a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - note: Optional note explaining why the schedule was unpaused - - Returns: - ScheduleResponse with updated schedule details - """ - return await self.schedule_service.unpause_schedule( - agent_id, schedule_name, note=note - ) - - async def trigger_schedule( - self, agent_id: str, schedule_name: str - ) -> ScheduleResponse: - """ - Trigger a schedule to run immediately. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - - Returns: - ScheduleResponse with updated schedule details - """ - return await self.schedule_service.trigger_schedule(agent_id, schedule_name) - - async def delete_schedule(self, agent_id: str, schedule_name: str) -> None: - """ - Delete a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - """ - await self.schedule_service.delete_schedule(agent_id, schedule_name) - - -DSchedulesUseCase = Annotated[SchedulesUseCase, Depends(SchedulesUseCase)] diff --git a/agentex/src/temporal/activities/scheduled_agent_run_activities.py b/agentex/src/temporal/activities/scheduled_agent_run_activities.py new file mode 100644 index 00000000..7287e835 --- /dev/null +++ b/agentex/src/temporal/activities/scheduled_agent_run_activities.py @@ -0,0 +1,280 @@ +""" +Temporal activity for scheduled agent runs (AGX1-368). + +``launch_scheduled_agent_run`` is the single activity each scheduled fire runs. +It loads the persisted schedule, creates a fresh Agentex task with a deterministic +name, and delivers the configured initial input through the same path a manual +agent run uses — ``task/create`` then ``event/send`` (async / agentic agents) or +``message/send`` (sync agents) — attributed to the schedule's stored creator +principal. + +Correctness: +- Deterministic task name ``scheduled-run:{schedule_id}:{fire_id}`` makes + ``task/create`` get-or-create, so an activity retry returns the same task + instead of duplicating it. +- A ``scheduled_input_delivered`` marker on the task metadata guards against + re-delivering the initial input when the activity retries after a prior + successful delivery. + +Boundary types are JSON-native (the backend data converter does not serialize +Pydantic models), so args and the return value are plain str / dict. +""" + +from typing import Any + +from src.adapters.authorization.exceptions import AuthorizationError +from src.adapters.crud_store.exceptions import ItemDoesNotExist +from src.api.schemas.authorization_types import ( + AgentexResource, + AuthorizedOperationType, +) +from src.config.dependencies import GlobalDependencies +from src.domain.entities.agent_run_schedules import ( + InitialInputMethod, + infer_initial_input_method, +) +from src.domain.entities.agents import AgentStatus +from src.domain.entities.agents_rpc import ( + AgentRPCMethod, + CreateTaskRequestEntity, + SendEventRequestEntity, + SendMessageRequestEntity, +) +from src.domain.entities.task_messages import ( + MessageAuthor, + TaskMessageContentEntity, + TextContentEntity, +) +from src.domain.repositories.agent_run_schedule_repository import ( + AgentRunScheduleRepository, +) +from src.domain.use_cases.agents_acp_use_case import AgentsACPUseCase +from src.temporal.scheduled_agent_run_factory import build_acp_use_case_for_principal +from src.utils.logging import make_logger +from temporalio import activity + +logger = make_logger(__name__) + +LAUNCH_SCHEDULED_AGENT_RUN_ACTIVITY = "launch_scheduled_agent_run_activity" + +_INPUT_DELIVERED_MARKER = "scheduled_input_delivered" + + +def _build_initial_content(initial_input: dict[str, Any]) -> TaskMessageContentEntity: + """Build the message content delivered as the scheduled task's first input. + + v1 supports text input only; the persisted ``initial_input.type`` is reserved + for future content types. + """ + author = initial_input.get("author", MessageAuthor.USER.value) + if not isinstance(author, MessageAuthor): + author = MessageAuthor(author) + return TextContentEntity( + author=author, + content=initial_input.get("content", ""), + ) + + +async def _authorize_or_skip( + authorization_service: Any, + checks: list[tuple[Any, Any]], + *, + schedule_id: str, + task_id: str | None = None, +) -> dict[str, Any] | None: + """Run fire-time AuthZ checks under the stored creator principal. + + Returns ``None`` when every check passes (or authz is bypassed). On a + permanent ``AuthorizationError`` (403) it returns a + ``skipped`` / ``permission_denied`` outcome so a revoked principal stops + future fires; transient authz errors propagate so Temporal retries. + """ + for resource, operation in checks: + try: + await authorization_service.check(resource=resource, operation=operation) + except AuthorizationError as exc: + logger.warning( + "scheduled_run_permission_denied", + extra={ + "schedule_id": schedule_id, + "resource": f"{resource.type}:{resource.selector}", + "operation": str(operation), + }, + ) + outcome: dict[str, Any] = { + "status": "skipped", + "reason": "permission_denied", + "schedule_id": schedule_id, + "detail": str(exc), + } + if task_id is not None: + outcome["task_id"] = task_id + return outcome + return None + + +class ScheduledAgentRunActivities: + def __init__( + self, + global_dependencies: GlobalDependencies, + schedule_repository: AgentRunScheduleRepository, + ): + self.global_dependencies = global_dependencies + self.schedule_repository = schedule_repository + + @activity.defn(name=LAUNCH_SCHEDULED_AGENT_RUN_ACTIVITY) + async def launch_scheduled_agent_run( + self, schedule_id: str, fire_id: str + ) -> dict[str, Any]: + """Create a task for the scheduled fire and deliver its initial input. + + Args: + schedule_id: The persisted ``agent_run_schedules`` row id. + fire_id: A token unique to this scheduled fire (the workflow id, + which Temporal makes unique per fire and stable across activity + retries within the same execution). Used to build the + deterministic, idempotent task name. + + Returns: + A JSON-native dict describing the outcome (``launched`` / ``skipped``). + """ + try: + schedule = await self.schedule_repository.get(id=schedule_id) + except ItemDoesNotExist: + logger.warning( + "scheduled_run_schedule_not_found", + extra={"schedule_id": schedule_id, "fire_id": fire_id}, + ) + return { + "status": "skipped", + "reason": "schedule_not_found", + "schedule_id": schedule_id, + } + + if schedule.paused: + # Temporal pauses the schedule too, but a manual trigger can still + # fire a paused schedule — honor the stored paused state defensively. + return { + "status": "skipped", + "reason": "schedule_paused", + "schedule_id": schedule_id, + } + + use_case: AgentsACPUseCase = build_acp_use_case_for_principal( + self.global_dependencies, schedule.creator_principal + ) + + agent = await use_case.agent_repository.get(id=schedule.agent_id) + if agent.status == AgentStatus.DELETED: + return { + "status": "skipped", + "reason": "agent_deleted", + "schedule_id": schedule_id, + } + + method = ( + schedule.initial_input_method + or infer_initial_input_method(agent.acp_type).value + ) + + # Re-check the stored creator principal's permission at fire time, mirroring + # the JSON-RPC route's authorization order: agent.execute (the RPC endpoint + # gate) then task.create (implements AGX1-368 D5's `check`). A revoked + # creator stops future fires instead of running under stale ownership. + # AuthorizationError (403) is a permanent denial → skip cleanly; transient + # authz errors propagate so Temporal retries. Under authz bypass (local / + # disabled) these are no-ops. + denied = await _authorize_or_skip( + use_case.authorization_service, + [ + ( + AgentexResource.agent(schedule.agent_id), + AuthorizedOperationType.execute, + ), + (AgentexResource.task("*"), AuthorizedOperationType.create), + ], + schedule_id=schedule_id, + ) + if denied is not None: + return denied + + task_name = f"scheduled-run:{schedule_id}:{fire_id}" + task_metadata = { + **(schedule.task_metadata or {}), + "schedule_id": schedule_id, + "scheduled_fire_id": fire_id, + } + + # task/create — get-or-create by deterministic name, so a retry returns + # the same task. For async / agentic agents this also forwards the task + # to the ACP server; for sync agents it only persists the row. + task = await use_case.handle_rpc_request( + method=AgentRPCMethod.TASK_CREATE, + params=CreateTaskRequestEntity( + name=task_name, + params=schedule.task_params, + task_metadata=task_metadata, + ), + agent_id=schedule.agent_id, + ) + + # Duplicate-input guard: if this fire's task already carries the delivered + # marker, a prior attempt already delivered the initial input. + if task.task_metadata and task.task_metadata.get(_INPUT_DELIVERED_MARKER): + return { + "status": "skipped", + "reason": "input_already_delivered", + "task_id": task.id, + "schedule_id": schedule_id, + } + + # Mirror the route's per-method gate for event/send & message/send: + # update permission on the task before delivering the initial input. + denied = await _authorize_or_skip( + use_case.authorization_service, + [(AgentexResource.task(task.id), AuthorizedOperationType.update)], + schedule_id=schedule_id, + task_id=task.id, + ) + if denied is not None: + return denied + + content = _build_initial_content(schedule.initial_input) + if method == InitialInputMethod.MESSAGE_SEND.value: + await use_case.handle_rpc_request( + method=AgentRPCMethod.MESSAGE_SEND, + params=SendMessageRequestEntity( + task_name=task_name, content=content, stream=False + ), + agent_id=schedule.agent_id, + ) + else: + await use_case.handle_rpc_request( + method=AgentRPCMethod.EVENT_SEND, + params=SendEventRequestEntity(task_name=task_name, content=content), + agent_id=schedule.agent_id, + ) + + # Best-effort delivered marker for the retry guard above. A crash between + # delivery and this update is the only window where a retry could + # re-deliver; deterministic task naming still prevents duplicate tasks. + task.task_metadata = { + **(task.task_metadata or {}), + _INPUT_DELIVERED_MARKER: True, + } + await use_case.task_service.update_task(task) + + logger.info( + "scheduled_run_launched", + extra={ + "schedule_id": schedule_id, + "task_id": task.id, + "method": method, + }, + ) + return { + "status": "launched", + "task_id": task.id, + "schedule_id": schedule_id, + "method": method, + } diff --git a/agentex/src/temporal/run_worker.py b/agentex/src/temporal/run_worker.py index de44cba6..cba6f634 100644 --- a/agentex/src/temporal/run_worker.py +++ b/agentex/src/temporal/run_worker.py @@ -27,12 +27,21 @@ from src.temporal.activities.retention_cleanup_activities import ( RetentionCleanupActivities, ) +from src.temporal.activities.scheduled_agent_run_activities import ( + ScheduledAgentRunActivities, +) +from src.temporal.scheduled_agent_run_factory import ( + build_agent_run_schedule_repository, +) from src.temporal.task_retention_factory import build_task_retention_use_case from src.temporal.workflows.healthcheck_workflow import HealthCheckWorkflow from src.temporal.workflows.retention_cleanup_workflow import ( RetentionCleanupSweepWorkflow, RetentionCleanupTaskWorkflow, ) +from src.temporal.workflows.scheduled_agent_run_workflow import ( + ScheduledAgentRunWorkflow, +) from src.utils.logging import make_logger logger = make_logger(__name__) @@ -161,6 +170,11 @@ def create_agentex_server_worker( use_case=retention_use_case, ) + scheduled_agent_run_activities = ScheduledAgentRunActivities( + global_dependencies=global_dependencies, + schedule_repository=build_agent_run_schedule_repository(global_dependencies), + ) + return asyncio.create_task( run_worker( task_queue=task_queue, @@ -168,6 +182,7 @@ def create_agentex_server_worker( HealthCheckWorkflow, RetentionCleanupSweepWorkflow, RetentionCleanupTaskWorkflow, + ScheduledAgentRunWorkflow, ], activities=[ health_check_activities.check_status_activity, @@ -176,6 +191,7 @@ def create_agentex_server_worker( retention_activities.find_cleanup_candidates, retention_activities.find_multi_agent_cleanup_candidates, retention_activities.clean_task, + scheduled_agent_run_activities.launch_scheduled_agent_run, ], max_workers=50, max_concurrent_activities=50, diff --git a/agentex/src/temporal/scheduled_agent_run_factory.py b/agentex/src/temporal/scheduled_agent_run_factory.py new file mode 100644 index 00000000..200132e3 --- /dev/null +++ b/agentex/src/temporal/scheduled_agent_run_factory.py @@ -0,0 +1,141 @@ +""" +Wire the dependencies the scheduled-agent-run activity needs outside FastAPI's +Depends DI, for use inside the Temporal worker. Mirrors the manual-wiring pattern +in task_retention_factory.py. + +Each scheduled fire creates a fresh Agentex task and delivers the schedule's +configured initial input under the *stored creator principal* (AGX1-368, D5) — +not as an agent identity. So the AgentsACPUseCase is rebuilt per fire with an +AuthorizationService whose principal_context is that fire's creator principal and +whose agent_identity is None, attributing task ownership and AuthZ checks to the +schedule's creator rather than to the worker's service identity. +""" + +from types import SimpleNamespace +from typing import Any + +from src.adapters.authorization.adapter_agentex_authz_proxy import ( + AgentexAuthorizationProxy, +) +from src.adapters.http.adapter_httpx import HttpxGateway +from src.adapters.streams.adapter_redis import RedisStreamRepository +from src.api.middleware_utils import resolve_authorization_enabled +from src.config.dependencies import ( + GlobalDependencies, + database_async_read_only_session_maker, + database_async_read_write_engine, + database_async_read_write_session_maker, + resolve_environment_variable_dependency, +) +from src.config.environment_variables import EnvironmentVariables, EnvVarKeys +from src.domain.repositories.agent_api_key_repository import AgentAPIKeyRepository +from src.domain.repositories.agent_repository import AgentRepository +from src.domain.repositories.agent_run_schedule_repository import ( + AgentRunScheduleRepository, +) +from src.domain.repositories.deployment_repository import DeploymentRepository +from src.domain.repositories.event_repository import EventRepository +from src.domain.repositories.task_message_repository import TaskMessageRepository +from src.domain.repositories.task_repository import TaskRepository +from src.domain.repositories.task_state_repository import TaskStateRepository +from src.domain.services.agent_acp_service import AgentACPService +from src.domain.services.authorization_service import AuthorizationService +from src.domain.services.task_message_service import TaskMessageService +from src.domain.services.task_service import AgentTaskService +from src.domain.use_cases.agents_acp_use_case import AgentsACPUseCase + + +class _ScheduledRunRequest: + """Minimal ``Request`` stand-in for worker-side AuthZ + ACP delegation. + + Carries the stored creator principal as ``state.principal_context`` with no + ``agent_identity`` (so AuthZ attributes ownership to the creator, not a + service) and no headers (so no live user credentials — cookies, API keys — + are forwarded downstream; D5/D6). ``build_delegation_headers`` returns an + empty mapping when there are no inbound credential headers, which is exactly + the intended behavior here. + """ + + def __init__(self, principal_context: dict[str, Any] | None): + self.state = SimpleNamespace( + principal_context=principal_context, + agent_identity=None, + ) + self.headers: dict[str, str] = {} + + +def build_agent_run_schedule_repository( + global_dependencies: GlobalDependencies, +) -> AgentRunScheduleRepository: + """Build the schedule repository from an already-loaded GlobalDependencies.""" + engine = database_async_read_write_engine() + rw_session_maker = database_async_read_write_session_maker(engine) + ro_session_maker = database_async_read_only_session_maker(engine) + return AgentRunScheduleRepository(rw_session_maker, ro_session_maker) + + +def build_acp_use_case_for_principal( + global_dependencies: GlobalDependencies, + creator_principal: dict[str, Any] | None, +) -> AgentsACPUseCase: + """Construct an AgentsACPUseCase bound to a specific creator principal. + + The returned use case routes task creation and initial-input delivery exactly + as the JSON-RPC path does (ACP-type validation, acp_url resolution, ownership + grant, get-or-create idempotency), but attributes everything to + *creator_principal* instead of the request principal. + """ + env = EnvironmentVariables.refresh() + engine = database_async_read_write_engine() + rw_session_maker = database_async_read_write_session_maker(engine) + ro_session_maker = database_async_read_only_session_maker(engine) + + request = _ScheduledRunRequest(creator_principal) + + agent_repository = AgentRepository(rw_session_maker, ro_session_maker) + agent_api_key_repository = AgentAPIKeyRepository(rw_session_maker, ro_session_maker) + deployment_repository = DeploymentRepository(rw_session_maker, ro_session_maker) + task_repository = TaskRepository(rw_session_maker, ro_session_maker) + event_repository = EventRepository(rw_session_maker, ro_session_maker) + + task_state_repository = TaskStateRepository(global_dependencies.mongodb_database) + task_message_repository = TaskMessageRepository( + global_dependencies.mongodb_database + ) + task_message_service = TaskMessageService( + message_repository=task_message_repository + ) + + http_gateway = HttpxGateway(env) + stream_repository = RedisStreamRepository(env, global_dependencies.redis_pool) + + auth_url = resolve_environment_variable_dependency(EnvVarKeys.AGENTEX_AUTH_URL) + authz_gateway = AgentexAuthorizationProxy(agentex_auth_url=auth_url) + authorization_service = AuthorizationService( + enabled=resolve_authorization_enabled(auth_url), + gateway=authz_gateway, + request=request, # type: ignore[arg-type] + ) + + acp_client = AgentACPService( + agent_repository=agent_repository, + agent_api_key_repository=agent_api_key_repository, + http_gateway=http_gateway, + request=request, # type: ignore[arg-type] + ) + task_service = AgentTaskService( + acp_client=acp_client, + task_state_repository=task_state_repository, + task_repository=task_repository, + event_repository=event_repository, + stream_repository=stream_repository, + authorization_service=authorization_service, + ) + return AgentsACPUseCase( + agent_repository=agent_repository, + deployment_repository=deployment_repository, + acp_client=acp_client, + task_service=task_service, + task_message_service=task_message_service, + authorization_service=authorization_service, + ) diff --git a/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py b/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py new file mode 100644 index 00000000..61ff65be --- /dev/null +++ b/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py @@ -0,0 +1,39 @@ +""" +Scheduled agent run workflow (AGX1-368). + +Started by a Temporal Schedule on each cron / interval fire. The workflow is +deliberately thin: it passes only the schedule id and a per-fire token to a +single activity and does no DB / API / ACP work itself, so it stays +deterministic. All side effects live in ``launch_scheduled_agent_run``. + +The per-fire token is the workflow id, which Temporal makes unique per scheduled +fire (it suffixes the configured workflow id with the nominal fire time) and +keeps stable across activity retries within the same execution. The activity +uses it to build a deterministic, idempotent task name. +""" + +from datetime import timedelta +from typing import Any + +from src.temporal.activities.scheduled_agent_run_activities import ( + LAUNCH_SCHEDULED_AGENT_RUN_ACTIVITY, +) +from temporalio import workflow +from temporalio.common import RetryPolicy + + +@workflow.defn +class ScheduledAgentRunWorkflow: + @workflow.run + async def run(self, schedule_id: str) -> dict[str, Any]: + fire_id = workflow.info().workflow_id + return await workflow.execute_activity( + LAUNCH_SCHEDULED_AGENT_RUN_ACTIVITY, + args=[schedule_id, fire_id], + start_to_close_timeout=timedelta(seconds=120), + retry_policy=RetryPolicy( + maximum_attempts=5, + initial_interval=timedelta(seconds=2), + backoff_coefficient=2.0, + ), + ) diff --git a/agentex/tests/integration/services/test_schedule_service_dual_write.py b/agentex/tests/integration/services/test_schedule_service_dual_write.py deleted file mode 100644 index b030f439..00000000 --- a/agentex/tests/integration/services/test_schedule_service_dual_write.py +++ /dev/null @@ -1,251 +0,0 @@ -"""Integration tests for ScheduleService authorization writes. - -Schedules have no Postgres row: Temporal is the store and the auth selector is -``{agent_id}--{schedule_name}``. The authorization-write sequencing therefore -lives in ``ScheduleService`` next to the Temporal write: - -- Create registers the schedule in the authorization graph under parent=agent, - before the Temporal create. -- Registration failure prevents the Temporal create. -- A Temporal create failure after a successful registration triggers a - best-effort compensating deregister and re-raises the original Temporal error. -- A post-create read-back failure does not deregister, because the schedule was - actually created. -- Delete removes the Temporal schedule first, then deregisters best-effort. -- No creator identity means the registration is skipped and the schedule still - lands in Temporal. - -The tests mock the Temporal adapter and authorization service and stub the -post-create read-back; the behavior under test is the call sequencing inside -``ScheduleService``, not Temporal or the authorization service itself. -""" - -from __future__ import annotations - -from types import SimpleNamespace -from unittest.mock import AsyncMock, Mock - -import pytest -from src.api.schemas.authorization_types import AgentexResource, AgentexResourceType -from src.api.schemas.schedules import CreateScheduleRequest, ScheduleResponse -from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus -from src.domain.services.schedule_service import ScheduleService, build_schedule_id -from src.utils.ids import orm_id - - -def _principal( - user_id: str | None = None, service_account_id: str | None = None -) -> SimpleNamespace: - """Minimal stand-in for the auth principal context.""" - return SimpleNamespace( - user_id=user_id, service_account_id=service_account_id, account_id="acct-1" - ) - - -def _agent() -> AgentEntity: - agent_id = orm_id() - return AgentEntity( - id=agent_id, - name=f"agent-{agent_id[:8]}", - description="authorization-write test agent", - status=AgentStatus.READY, - acp_type=ACPType.SYNC, - acp_url="http://test-acp", - ) - - -def _request(name: str = "nightly") -> CreateScheduleRequest: - return CreateScheduleRequest( - name=name, - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - ) - - -def _build_service( - *, - principal: SimpleNamespace | None, - register_resource: AsyncMock | None = None, - deregister_resource: AsyncMock | None = None, - create_raises: Exception | None = None, - delete_raises: Exception | None = None, - get_schedule_raises: Exception | None = None, -) -> tuple[ScheduleService, Mock, Mock]: - temporal_adapter = Mock() - temporal_adapter.create_schedule = AsyncMock( - side_effect=create_raises, return_value=None - ) - temporal_adapter.delete_schedule = AsyncMock( - side_effect=delete_raises, return_value=None - ) - - authorization_service = Mock() - authorization_service.principal_context = principal - authorization_service.register_resource = register_resource or AsyncMock( - return_value=None - ) - authorization_service.deregister_resource = deregister_resource or AsyncMock( - return_value=None - ) - - service = ScheduleService( - temporal_adapter=temporal_adapter, - authorization_service=authorization_service, - ) - # Stub the post-create read-back so create_schedule doesn't hit - # describe_schedule; tests covering a read-back failure pass get_schedule_raises. - if get_schedule_raises is not None: - service.get_schedule = AsyncMock(side_effect=get_schedule_raises) - else: - service.get_schedule = AsyncMock(return_value=Mock(spec=ScheduleResponse)) - - return service, temporal_adapter, authorization_service - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_create_schedule_calls_register_resource_with_parent() -> None: - agent = _agent() - request = _request("nightly") - service, temporal_adapter, authorization_service = _build_service( - principal=_principal(user_id="user-A"), - ) - - await service.create_schedule(agent, request) - - authorization_service.register_resource.assert_awaited_once() - registered_resource: AgentexResource = ( - authorization_service.register_resource.await_args.kwargs["resource"] - ) - assert registered_resource.type == AgentexResourceType.schedule - assert registered_resource.selector == build_schedule_id(agent.id, request.name) - registered_parent: AgentexResource = ( - authorization_service.register_resource.await_args.kwargs["parent"] - ) - # parent_agent is load-bearing: without it the authorization cascade from - # the owning agent fails closed for readers. - assert registered_parent is not None - assert registered_parent.type == AgentexResourceType.agent - assert registered_parent.selector == agent.id - temporal_adapter.create_schedule.assert_awaited_once() - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_delete_schedule_calls_deregister_resource() -> None: - agent = _agent() - service, temporal_adapter, authorization_service = _build_service( - principal=_principal(user_id="user-A"), - ) - - await service.delete_schedule(agent.id, "nightly") - - schedule_id = build_schedule_id(agent.id, "nightly") - temporal_adapter.delete_schedule.assert_awaited_once_with(schedule_id) - authorization_service.deregister_resource.assert_awaited_once() - deregistered_resource: AgentexResource = ( - authorization_service.deregister_resource.await_args.kwargs["resource"] - ) - assert deregistered_resource.type == AgentexResourceType.schedule - assert deregistered_resource.selector == schedule_id - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_create_schedule_register_failure_prevents_temporal_create() -> None: - register = AsyncMock(side_effect=RuntimeError("authz unavailable")) - agent = _agent() - service, temporal_adapter, authorization_service = _build_service( - principal=_principal(user_id="user-A"), - register_resource=register, - ) - - with pytest.raises(RuntimeError, match="authz unavailable"): - await service.create_schedule(agent, _request()) - - temporal_adapter.create_schedule.assert_not_awaited() - authorization_service.deregister_resource.assert_not_awaited() - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_create_schedule_temporal_failure_triggers_compensating_deregister() -> ( - None -): - agent = _agent() - request = _request("nightly") - service, _, authorization_service = _build_service( - principal=_principal(user_id="user-A"), - create_raises=RuntimeError("temporal down"), - ) - - with pytest.raises(RuntimeError, match="temporal down"): - await service.create_schedule(agent, request) - - authorization_service.register_resource.assert_awaited_once() - # The schedule never landed in Temporal, so the auth entry is cleaned up. - authorization_service.deregister_resource.assert_awaited_once() - compensated: AgentexResource = ( - authorization_service.deregister_resource.await_args.kwargs["resource"] - ) - assert compensated.type == AgentexResourceType.schedule - assert compensated.selector == build_schedule_id(agent.id, request.name) - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_create_schedule_readback_failure_does_not_compensate() -> None: - # The Temporal create succeeded but the post-create describe failed. The - # schedule genuinely exists, so the auth entry must survive the read-back - # error. - agent = _agent() - service, temporal_adapter, authorization_service = _build_service( - principal=_principal(user_id="user-A"), - get_schedule_raises=RuntimeError("describe transient error"), - ) - - with pytest.raises(RuntimeError, match="describe transient error"): - await service.create_schedule(agent, _request()) - - temporal_adapter.create_schedule.assert_awaited_once() - authorization_service.register_resource.assert_awaited_once() - authorization_service.deregister_resource.assert_not_awaited() - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_delete_schedule_deregister_failure_does_not_block_delete() -> None: - deregister = AsyncMock(side_effect=RuntimeError("authz unavailable")) - agent = _agent() - service, temporal_adapter, authorization_service = _build_service( - principal=_principal(user_id="user-A"), - deregister_resource=deregister, - ) - - # Best-effort cleanup: a deregister failure is swallowed after Temporal - # delete succeeds. - await service.delete_schedule(agent.id, "nightly") - - temporal_adapter.delete_schedule.assert_awaited_once_with( - build_schedule_id(agent.id, "nightly") - ) - authorization_service.deregister_resource.assert_awaited_once() - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_create_schedule_no_creator_skips_auth_writes() -> None: - agent = _agent() - request = _request("nightly") - # Neither user_id nor service_account_id: internal paths still create the - # schedule, but there is no creator identity to register as owner. - service, temporal_adapter, authorization_service = _build_service( - principal=_principal(user_id=None, service_account_id=None), - ) - - await service.create_schedule(agent, request) - - authorization_service.register_resource.assert_not_awaited() - authorization_service.deregister_resource.assert_not_awaited() - temporal_adapter.create_schedule.assert_awaited_once() diff --git a/agentex/tests/unit/api/test_schedules_authz.py b/agentex/tests/unit/api/test_schedules_authz.py deleted file mode 100644 index 0a07a303..00000000 --- a/agentex/tests/unit/api/test_schedules_authz.py +++ /dev/null @@ -1,399 +0,0 @@ -"""Tests for the agent_schedule route migration to fine-grained authorization. - -Mirrors the structure of the agent_api_key and task route-authorization tests. -Covers: - - 1. The ``_check_schedule_or_collapse_to_404`` helper. - 2. ``DAuthorizedScheduleId`` builds the composite ``{agent_id}--{schedule_name}`` - selector, returns the schedule name when allowed, and preserves 403 for - denied operations on readable schedules. - 3. ``create_schedule`` enforces parent ``agent.update`` (the only route where - no schedule resource exists yet, so the authorization service can't - transitively gate it). - 4. ``ScheduleService.list_schedules`` filters to the authorized id set, with - ``None`` (bypass) returning everything and ``[]`` returning nothing. - -Cross-tenant and transitive-expansion checks belong in an end-to-end suite -gated on a live authorization-service cluster (the ``agent_schedule.update`` -permission transitively requires ``parent_agent->update`` in the authorization -policy, which this repo does not own). Here we only assert that the route layer -issues the correct ``check`` call with the correct operation. -""" - -from __future__ import annotations - -from types import SimpleNamespace -from unittest.mock import AsyncMock, MagicMock - -import pytest -from src.adapters.authorization.exceptions import AuthorizationError -from src.adapters.crud_store.exceptions import ItemDoesNotExist -from src.api.schemas.authorization_types import ( - AgentexResource, - AuthorizedOperationType, -) -from src.domain.services.schedule_service import ScheduleService, build_schedule_id -from src.utils.schedule_authorization import _check_schedule_or_collapse_to_404 - - -def _dep_callable(annotation): - """Pull the inner FastAPI dependency function out of an ``Annotated[str, Depends(...)]``.""" - return annotation.__metadata__[0].dependency - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestCheckScheduleOrCollapseTo404: - """The schedule-resource authz wrap hides unreadable schedules.""" - - async def test_allowed_check_returns_normally(self): - authorization = MagicMock() - authorization.check = AsyncMock(return_value=True) - - await _check_schedule_or_collapse_to_404( - authorization, - "agent-1--nightly", - AuthorizedOperationType.read, - ) - - authorization.check.assert_awaited_once() - called_kwargs = authorization.check.await_args.kwargs - assert called_kwargs["resource"] == AgentexResource.schedule("agent-1--nightly") - assert called_kwargs["operation"] == AuthorizedOperationType.read - - async def test_denied_read_collapses_to_not_found(self): - authorization = MagicMock() - authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) - - with pytest.raises(ItemDoesNotExist): - await _check_schedule_or_collapse_to_404( - authorization, - "agent-1--nightly", - AuthorizedOperationType.read, - ) - - authorization.check.assert_awaited_once() - - async def test_denied_non_read_collapses_to_not_found_when_read_denied(self): - authorization = MagicMock() - authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) - - with pytest.raises(ItemDoesNotExist): - await _check_schedule_or_collapse_to_404( - authorization, - "agent-1--nightly", - AuthorizedOperationType.delete, - ) - - assert authorization.check.await_count == 2 - first_call, second_call = authorization.check.await_args_list - assert first_call.kwargs["operation"] == AuthorizedOperationType.delete - assert second_call.kwargs["operation"] == AuthorizedOperationType.read - - async def test_denied_non_read_surfaces_authorization_error_when_read_allowed(self): - authorization = MagicMock() - operation_denied = AuthorizationError("denied") - authorization.check = AsyncMock(side_effect=[operation_denied, True]) - - with pytest.raises(AuthorizationError) as exc_info: - await _check_schedule_or_collapse_to_404( - authorization, - "agent-1--nightly", - AuthorizedOperationType.delete, - ) - - assert exc_info.value is operation_denied - - async def test_forwards_operation_verbatim(self): - """The transitive expansion for ``update``/``delete`` in the - authorization policy is what bundles in the ``parent_agent->update`` - factor — the helper just needs to forward the operation.""" - authorization = MagicMock() - authorization.check = AsyncMock(return_value=True) - - await _check_schedule_or_collapse_to_404( - authorization, - "agent-1--nightly", - AuthorizedOperationType.update, - ) - - called_kwargs = authorization.check.await_args.kwargs - assert called_kwargs["operation"] == AuthorizedOperationType.update - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestSingleResourceRouteAuthz: - """The single-resource routes (get/pause/unpause/trigger/delete) check the - schedule resource on the composite ``{agent_id}--{schedule_name}`` selector - inline, mirroring the agent_api_key name routes. - Verifies per-route operation routing and that a denial skips the use case.""" - - async def test_get_authorized_checks_read_and_calls_use_case(self): - from src.api.routes.schedules import get_schedule - - authorization = MagicMock() - authorization.check = AsyncMock(return_value=True) - use_case = MagicMock() - use_case.get_schedule = AsyncMock(return_value=MagicMock()) - - await get_schedule( - agent_id="agent-1", - schedule_name="nightly", - schedules_use_case=use_case, - authorization=authorization, - ) - - called = authorization.check.await_args.kwargs - assert called["resource"] == AgentexResource.schedule( - build_schedule_id("agent-1", "nightly") - ) - assert called["operation"] == AuthorizedOperationType.read - use_case.get_schedule.assert_awaited_once_with("agent-1", "nightly") - - async def test_get_denied_collapses_to_404_and_skips_use_case(self): - from src.api.routes.schedules import get_schedule - - authorization = MagicMock() - authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) - use_case = MagicMock() - use_case.get_schedule = AsyncMock() - - with pytest.raises(ItemDoesNotExist): - await get_schedule( - agent_id="agent-1", - schedule_name="nightly", - schedules_use_case=use_case, - authorization=authorization, - ) - # The check runs before the Temporal lookup, so a denial never reaches it. - use_case.get_schedule.assert_not_called() - - async def test_pause_uses_update_op(self): - from src.api.routes.schedules import pause_schedule - - authorization = MagicMock() - authorization.check = AsyncMock(return_value=True) - use_case = MagicMock() - use_case.pause_schedule = AsyncMock(return_value=MagicMock()) - - await pause_schedule( - agent_id="agent-1", - schedule_name="nightly", - schedules_use_case=use_case, - authorization=authorization, - request=None, - ) - - called = authorization.check.await_args.kwargs - assert called["resource"] == AgentexResource.schedule("agent-1--nightly") - assert called["operation"] == AuthorizedOperationType.update - - async def test_delete_uses_delete_op_and_denied_skips_delete(self): - from src.api.routes.schedules import delete_schedule - - authorization = MagicMock() - authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) - use_case = MagicMock() - use_case.delete_schedule = AsyncMock() - - with pytest.raises(ItemDoesNotExist): - await delete_schedule( - agent_id="agent-1", - schedule_name="nightly", - schedules_use_case=use_case, - authorization=authorization, - ) - use_case.delete_schedule.assert_not_called() - assert authorization.check.await_count == 2 - first_call, second_call = authorization.check.await_args_list - assert first_call.kwargs["operation"] == AuthorizedOperationType.delete - assert second_call.kwargs["operation"] == AuthorizedOperationType.read - - async def test_delete_denied_when_readable_surfaces_authorization_error(self): - from src.api.routes.schedules import delete_schedule - - authorization = MagicMock() - authorization.check = AsyncMock( - side_effect=[AuthorizationError("delete denied"), True] - ) - use_case = MagicMock() - use_case.delete_schedule = AsyncMock() - - with pytest.raises(AuthorizationError): - await delete_schedule( - agent_id="agent-1", - schedule_name="nightly", - schedules_use_case=use_case, - authorization=authorization, - ) - use_case.delete_schedule.assert_not_called() - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestCreateParentAgentCheck: - """``create_schedule`` is the only route where no schedule resource exists - yet, so the authorization service cannot transitively gate on it. The - route's ``agent_id`` guard MUST check ``agent.update`` on the parent, and a - denial collapses to 404 when the parent is unreadable. A caller who can - read the parent but not update it sees 403.""" - - @staticmethod - def _agent_id_dep(): - from src.api.routes.schedules import create_schedule - - return _dep_callable(create_schedule.__annotations__["agent_id"]) - - async def test_create_checks_parent_agent_update(self): - dep = self._agent_id_dep() - - authorization = MagicMock() - authorization.check = AsyncMock(return_value=True) - - # Repos are unused on the agent path; bind resource_id by keyword so an - # added repo dep fails loudly instead of silently mis-binding the id. - result = await dep( - authorization, MagicMock(), MagicMock(), MagicMock(), resource_id="agent-1" - ) - - assert result == "agent-1" - called_kwargs = authorization.check.await_args.kwargs - assert called_kwargs["resource"] == AgentexResource.agent("agent-1") - assert called_kwargs["operation"] == AuthorizedOperationType.update - - async def test_create_denied_collapses_to_404(self): - dep = self._agent_id_dep() - - authorization = MagicMock() - authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) - - # Parent-agent denial collapses to 404 so creating a schedule under an - # agent in another tenant can't reveal that the agent exists. - with pytest.raises(ItemDoesNotExist): - await dep( - authorization, - MagicMock(), - MagicMock(), - MagicMock(), - resource_id="agent-1", - ) - - async def test_create_denied_when_parent_readable_surfaces_authorization_error( - self, - ): - dep = self._agent_id_dep() - - authorization = MagicMock() - authorization.check = AsyncMock( - side_effect=[AuthorizationError("update denied"), True] - ) - - with pytest.raises(AuthorizationError): - await dep( - authorization, - MagicMock(), - MagicMock(), - MagicMock(), - resource_id="agent-1", - ) - - -def _fake_schedule(schedule_id: str, *, paused: bool = False): - """Minimal stand-in for a Temporal schedule list entry. - - ``list_schedules`` reads ``.id`` and ``.info.{action,next_action_times, - paused}``; a non-``ScheduleActionStartWorkflow`` action yields - ``workflow_name=None`` and an empty ``next_action_times`` yields - ``next_action_time=None``, both valid for ``ScheduleListItem``. - """ - info = SimpleNamespace(action=None, next_action_times=[], paused=paused) - return SimpleNamespace(id=schedule_id, info=info) - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestListOwnershipFiltering: - """``ScheduleService.list_schedules`` filters the Temporal page to the - authorized id set. ``None`` (authz bypass) returns everything; ``[]`` (caller - owns nothing) returns nothing — gating on ``is not None``, not truthiness.""" - - @staticmethod - def _service(): - temporal_adapter = MagicMock() - temporal_adapter.list_schedules = AsyncMock( - return_value=[ - _fake_schedule("agent-1--alpha"), - _fake_schedule("agent-1--beta"), - _fake_schedule("agent-2--gamma"), - ] - ) - return ScheduleService( - temporal_adapter=temporal_adapter, - authorization_service=MagicMock(), - ) - - async def test_none_returns_all_for_agent(self): - service = self._service() - - response = await service.list_schedules( - agent_id="agent-1", authorized_schedule_ids=None - ) - - ids = {item.schedule_id for item in response.schedules} - assert ids == {"agent-1--alpha", "agent-1--beta"} - - async def test_empty_list_returns_nothing(self): - service = self._service() - - response = await service.list_schedules( - agent_id="agent-1", authorized_schedule_ids=[] - ) - - assert response.schedules == [] - assert response.total == 0 - - async def test_subset_filters_to_authorized_ids(self): - service = self._service() - - response = await service.list_schedules( - agent_id="agent-1", authorized_schedule_ids=["agent-1--alpha"] - ) - - ids = {item.schedule_id for item in response.schedules} - assert ids == {"agent-1--alpha"} - - async def test_authorized_id_under_other_agent_is_excluded(self): - """The agent_id scope is applied first, so an authorized id belonging to - a different agent never leaks into this agent's listing.""" - service = self._service() - - response = await service.list_schedules( - agent_id="agent-1", authorized_schedule_ids=["agent-2--gamma"] - ) - - assert response.schedules == [] - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestUseCaseForwardsAuthorizedIds: - """The use case is a thin pass-through; it must forward the ownership filter - to the service unchanged.""" - - async def test_list_forwards_authorized_schedule_ids(self): - from src.domain.use_cases.schedules_use_case import SchedulesUseCase - - schedule_service = MagicMock() - schedule_service.list_schedules = AsyncMock(return_value=MagicMock()) - use_case = SchedulesUseCase(schedule_service=schedule_service) - - await use_case.list_schedules( - "agent-1", page_size=50, authorized_schedule_ids=["agent-1--alpha"] - ) - - schedule_service.list_schedules.assert_awaited_once_with( - agent_id="agent-1", - page_size=50, - authorized_schedule_ids=["agent-1--alpha"], - ) diff --git a/agentex/tests/unit/services/test_agent_run_schedule_service.py b/agentex/tests/unit/services/test_agent_run_schedule_service.py new file mode 100644 index 00000000..e448a2ec --- /dev/null +++ b/agentex/tests/unit/services/test_agent_run_schedule_service.py @@ -0,0 +1,190 @@ +from unittest.mock import AsyncMock, PropertyMock +from uuid import uuid4 + +import pytest +from src.api.schemas.agent_run_schedules import ( + CreateAgentRunScheduleRequest, + RunScheduleState, + ScheduleInitialInput, +) +from src.domain.entities.agent_run_schedules import AgentRunScheduleEntity +from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus +from src.domain.exceptions import ClientError +from src.domain.services.agent_run_schedule_service import ( + AgentRunScheduleService, + build_run_schedule_authz_selector, + build_run_schedule_temporal_id, +) + + +@pytest.fixture +def agent(): + return AgentEntity( + id="agent-123", + name="test-agent", + description="A test agent", + status=AgentStatus.READY, + acp_type=ACPType.ASYNC, + acp_url="http://acp.example.com", + ) + + +@pytest.fixture +def service(): + temporal_adapter = AsyncMock() + # describe_schedule failing keeps _to_response on the persisted-row path. + temporal_adapter.describe_schedule.side_effect = RuntimeError("not found yet") + authorization_service = AsyncMock() + type(authorization_service).principal_context = PropertyMock( + return_value={"user_id": "u1", "account_id": "a1"} + ) + schedule_repository = AsyncMock() + agent_repository = AsyncMock() + return AgentRunScheduleService( + temporal_adapter=temporal_adapter, + authorization_service=authorization_service, + schedule_repository=schedule_repository, + agent_repository=agent_repository, + ) + + +def _request(**overrides) -> CreateAgentRunScheduleRequest: + payload: dict = { + "name": "daily-summary", + "cron_expression": "0 17 * * MON-FRI", + "timezone": "America/New_York", + "initial_input": ScheduleInitialInput(content="hello"), + } + payload.update(overrides) + return CreateAgentRunScheduleRequest(**payload) + + +def _persisted(agent_id: str, request: CreateAgentRunScheduleRequest): + return AgentRunScheduleEntity( + id=str(uuid4()), + agent_id=agent_id, + name=request.name, + cron_expression=request.cron_expression, + interval_seconds=request.interval_seconds, + timezone=request.timezone, + paused=request.paused, + creator_principal={"user_id": "u1", "account_id": "a1"}, + task_params=request.task_params, + task_metadata=request.task_metadata, + initial_input=request.initial_input.to_dict(mode="json"), + ) + + +class TestRunScheduleIdHelpers: + def test_temporal_id_prefix(self): + assert build_run_schedule_temporal_id("row-1") == "agent-run-schedule:row-1" + + def test_authz_selector_distinct_from_bare_schedule(self): + # Bare schedules key the shared `schedule` resource as `{agent}--{name}`; + # run schedules must not collide with that namespace. + selector = build_run_schedule_authz_selector("agent-123", "daily-summary") + assert selector == "run-schedule::agent-123::daily-summary" + assert selector != "agent-123--daily-summary" + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunScheduleServiceCreate: + async def test_create_persists_and_schedules(self, service, agent): + request = _request() + persisted = _persisted(agent.id, request) + service.schedule_repository.get_by_agent_id_and_name.return_value = None + service.schedule_repository.create.return_value = persisted + + response = await service.create_schedule(agent, request, {"user_id": "u1"}) + + # Temporal schedule points at the run workflow with only the row id as arg, + # the server task queue, and the cron timezone passed through. + create_kwargs = service.temporal_adapter.create_schedule.call_args.kwargs + assert create_kwargs["workflow"] == "ScheduledAgentRunWorkflow" + assert create_kwargs["args"] == [persisted.id] + assert create_kwargs["schedule_id"] == build_run_schedule_temporal_id( + persisted.id + ) + assert create_kwargs["time_zone_name"] == "America/New_York" + + # Ownership registered before the Temporal write. + service.authorization_service.register_resource.assert_called_once() + + assert response.name == "daily-summary" + assert response.initial_input_method == "event/send" # async agent + assert response.state == RunScheduleState.ACTIVE + assert response.initial_input.content == "hello" + + async def test_create_rejects_duplicate_name(self, service, agent): + request = _request() + service.schedule_repository.get_by_agent_id_and_name.return_value = _persisted( + agent.id, request + ) + + with pytest.raises(ClientError): + await service.create_schedule(agent, request, {"user_id": "u1"}) + + service.temporal_adapter.create_schedule.assert_not_called() + + async def test_create_rolls_back_row_on_temporal_failure(self, service, agent): + request = _request() + persisted = _persisted(agent.id, request) + service.schedule_repository.get_by_agent_id_and_name.return_value = None + service.schedule_repository.create.return_value = persisted + service.temporal_adapter.create_schedule.side_effect = RuntimeError("boom") + + with pytest.raises(RuntimeError): + await service.create_schedule(agent, request, {"user_id": "u1"}) + + # The orphaned row and auth entry are compensated. + service.schedule_repository.delete.assert_called_once_with(id=persisted.id) + service.authorization_service.deregister_resource.assert_called_once() + + async def test_create_rolls_back_row_on_auth_registration_failure( + self, service, agent + ): + request = _request() + persisted = _persisted(agent.id, request) + service.schedule_repository.get_by_agent_id_and_name.return_value = None + service.schedule_repository.create.return_value = persisted + service.authorization_service.register_resource.side_effect = RuntimeError( + "authz down" + ) + + with pytest.raises(RuntimeError): + await service.create_schedule(agent, request, {"user_id": "u1"}) + + # Auth registration failing must still roll back the persisted row, and + # must not create a Temporal schedule. + service.schedule_repository.delete.assert_called_once_with(id=persisted.id) + service.temporal_adapter.create_schedule.assert_not_called() + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunScheduleServiceList: + async def test_list_filters_by_authorized_selectors(self, service, agent): + req_a = _request(name="sched-a") + req_b = _request(name="sched-b") + rows = [_persisted(agent.id, req_a), _persisted(agent.id, req_b)] + service.schedule_repository.list_by_agent_id.return_value = rows + service.agent_repository.get.return_value = agent + + # Authorize only sched-a's selector. + authorized = [build_run_schedule_authz_selector(agent.id, "sched-a")] + result = await service.list_schedules( + agent.id, authorized_schedule_ids=authorized + ) + + assert result.total == 1 + assert result.run_schedules[0].name == "sched-a" + + async def test_list_none_authorized_means_bypass(self, service, agent): + rows = [_persisted(agent.id, _request(name="sched-a"))] + service.schedule_repository.list_by_agent_id.return_value = rows + service.agent_repository.get.return_value = agent + + result = await service.list_schedules(agent.id, authorized_schedule_ids=None) + + assert result.total == 1 diff --git a/agentex/tests/unit/services/test_schedule_service.py b/agentex/tests/unit/services/test_schedule_service.py deleted file mode 100644 index 0e0f4a07..00000000 --- a/agentex/tests/unit/services/test_schedule_service.py +++ /dev/null @@ -1,806 +0,0 @@ -from datetime import UTC, datetime, timedelta -from types import SimpleNamespace -from unittest.mock import AsyncMock, MagicMock, Mock -from uuid import uuid4 - -import pytest -from src.adapters.temporal.exceptions import ( - TemporalScheduleAlreadyExistsError, - TemporalScheduleError, - TemporalScheduleNotFoundError, -) -from src.api.schemas.schedules import ( - CreateScheduleRequest, - ScheduleListResponse, - ScheduleResponse, - ScheduleState, -) -from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus -from src.domain.services.schedule_service import ( - SCHEDULE_ID_SEPARATOR, - ScheduleService, - build_schedule_id, - parse_schedule_id, -) -from temporalio.client import ( - Schedule, - ScheduleActionStartWorkflow, - ScheduleDescription, - ScheduleInfo, - ScheduleIntervalSpec, - ScheduleSpec, -) -from temporalio.client import ( - ScheduleState as TemporalScheduleState, -) - - -@pytest.fixture -def mock_temporal_adapter(): - """Mock Temporal adapter for testing schedule service""" - mock = AsyncMock() - mock.create_schedule = AsyncMock() - mock.describe_schedule = AsyncMock() - mock.list_schedules = AsyncMock() - mock.pause_schedule = AsyncMock() - mock.unpause_schedule = AsyncMock() - mock.trigger_schedule = AsyncMock() - mock.delete_schedule = AsyncMock() - return mock - - -@pytest.fixture -def mock_authorization_service(): - """Mock authorization service with a resolvable creator principal.""" - mock = Mock() - mock.principal_context = SimpleNamespace( - user_id="user-test", service_account_id=None, account_id="acct-test" - ) - mock.register_resource = AsyncMock(return_value=None) - mock.deregister_resource = AsyncMock(return_value=None) - return mock - - -@pytest.fixture -def schedule_service(mock_temporal_adapter, mock_authorization_service): - """Create ScheduleService instance with mocked temporal adapter""" - return ScheduleService( - temporal_adapter=mock_temporal_adapter, - authorization_service=mock_authorization_service, - ) - - -@pytest.fixture -def sample_agent(): - """Sample agent entity for testing""" - return AgentEntity( - id=str(uuid4()), - name="test-agent", - description="A test agent for schedule testing", - status=AgentStatus.READY, - acp_type=ACPType.ASYNC, - acp_url="http://test-acp.example.com", - ) - - -@pytest.fixture -def sample_create_schedule_request(): - """Sample schedule creation request""" - return CreateScheduleRequest( - name="weekly-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * 0", - workflow_params={"key": "value"}, - ) - - -@pytest.fixture -def sample_create_schedule_request_interval(): - """Sample schedule creation request with interval""" - return CreateScheduleRequest( - name="interval-task", - workflow_name="test-workflow", - task_queue="test-queue", - interval_seconds=3600, - workflow_params={"key": "value"}, - ) - - -def create_mock_schedule_description( - schedule_id: str, - workflow_name: str = "test-workflow", - task_queue: str = "test-queue", - paused: bool = False, - cron_expressions: list[str] | None = None, - intervals: list[ScheduleIntervalSpec] | None = None, -) -> ScheduleDescription: - """Helper to create a mock ScheduleDescription""" - # Create mock action - mock_action = MagicMock(spec=ScheduleActionStartWorkflow) - mock_action.workflow = workflow_name - mock_action.id = f"{schedule_id}-run" - mock_action.task_queue = task_queue - mock_action.args = None - - # Create mock spec - mock_spec = MagicMock(spec=ScheduleSpec) - mock_spec.cron_expressions = cron_expressions or [] - mock_spec.intervals = intervals or [] - mock_spec.start_at = None - mock_spec.end_at = None - - # Create mock state - mock_state = MagicMock(spec=TemporalScheduleState) - mock_state.paused = paused - - # Create mock schedule - mock_schedule = MagicMock(spec=Schedule) - mock_schedule.action = mock_action - mock_schedule.spec = mock_spec - mock_schedule.state = mock_state - - # Create mock info - mock_info = MagicMock(spec=ScheduleInfo) - mock_info.num_actions = 5 - mock_info.num_actions_missed_catchup_window = 0 - mock_info.next_action_times = [datetime.now(UTC) + timedelta(hours=1)] - mock_info.recent_actions = [] - mock_info.create_time = datetime.now(UTC) - - # Create mock description - mock_description = MagicMock(spec=ScheduleDescription) - mock_description.schedule = mock_schedule - mock_description.info = mock_info - - return mock_description - - -@pytest.mark.unit -class TestScheduleIdHelpers: - """Test suite for schedule ID helper functions""" - - def test_build_schedule_id(self): - """Test building schedule ID from agent ID and schedule name""" - agent_id = "agent-123" - schedule_name = "weekly-task" - - result = build_schedule_id(agent_id, schedule_name) - - assert result == f"agent-123{SCHEDULE_ID_SEPARATOR}weekly-task" - assert SCHEDULE_ID_SEPARATOR in result - - def test_parse_schedule_id(self): - """Test parsing schedule ID into agent ID and schedule name""" - schedule_id = f"agent-123{SCHEDULE_ID_SEPARATOR}weekly-task" - - agent_id, schedule_name = parse_schedule_id(schedule_id) - - assert agent_id == "agent-123" - assert schedule_name == "weekly-task" - - def test_parse_schedule_id_invalid_format(self): - """Test parsing invalid schedule ID""" - schedule_id = "invalid-id-without-separator" - - agent_id, schedule_name = parse_schedule_id(schedule_id) - - assert agent_id == schedule_id - assert schedule_name == "" - - def test_build_and_parse_roundtrip(self): - """Test that build and parse are inverse operations""" - original_agent_id = "agent-uuid-12345" - original_schedule_name = "my-schedule" - - schedule_id = build_schedule_id(original_agent_id, original_schedule_name) - parsed_agent_id, parsed_schedule_name = parse_schedule_id(schedule_id) - - assert parsed_agent_id == original_agent_id - assert parsed_schedule_name == original_schedule_name - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestScheduleService: - """Test suite for ScheduleService""" - - async def test_create_schedule_with_cron( - self, - schedule_service, - mock_temporal_adapter, - sample_agent, - sample_create_schedule_request, - ): - """Test creating a schedule with cron expression""" - # Given - expected_schedule_id = build_schedule_id( - sample_agent.id, sample_create_schedule_request.name - ) - mock_description = create_mock_schedule_description( - schedule_id=expected_schedule_id, - workflow_name=sample_create_schedule_request.workflow_name, - task_queue=sample_create_schedule_request.task_queue, - cron_expressions=[sample_create_schedule_request.cron_expression], - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.create_schedule( - sample_agent, sample_create_schedule_request - ) - - # Then - assert result is not None - assert isinstance(result, ScheduleResponse) - assert result.schedule_id == expected_schedule_id - assert result.agent_id == sample_agent.id - assert result.name == sample_create_schedule_request.name - assert ( - result.action.workflow_name == sample_create_schedule_request.workflow_name - ) - assert result.action.task_queue == sample_create_schedule_request.task_queue - - # Verify temporal adapter was called - mock_temporal_adapter.create_schedule.assert_called_once() - call_kwargs = mock_temporal_adapter.create_schedule.call_args[1] - assert call_kwargs["schedule_id"] == expected_schedule_id - assert call_kwargs["workflow"] == sample_create_schedule_request.workflow_name - assert call_kwargs["task_queue"] == sample_create_schedule_request.task_queue - assert call_kwargs["cron_expressions"] == [ - sample_create_schedule_request.cron_expression - ] - - async def test_create_schedule_with_interval( - self, - schedule_service, - mock_temporal_adapter, - sample_agent, - sample_create_schedule_request_interval, - ): - """Test creating a schedule with interval""" - # Given - expected_schedule_id = build_schedule_id( - sample_agent.id, sample_create_schedule_request_interval.name - ) - mock_description = create_mock_schedule_description( - schedule_id=expected_schedule_id, - workflow_name=sample_create_schedule_request_interval.workflow_name, - task_queue=sample_create_schedule_request_interval.task_queue, - intervals=[ScheduleIntervalSpec(every=timedelta(seconds=3600))], - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.create_schedule( - sample_agent, sample_create_schedule_request_interval - ) - - # Then - assert result is not None - assert isinstance(result, ScheduleResponse) - assert result.schedule_id == expected_schedule_id - - # Verify temporal adapter was called with interval - mock_temporal_adapter.create_schedule.assert_called_once() - call_kwargs = mock_temporal_adapter.create_schedule.call_args[1] - assert ( - call_kwargs["interval_seconds"] - == sample_create_schedule_request_interval.interval_seconds - ) - - async def test_create_schedule_with_execution_timeout( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test creating a schedule with execution timeout""" - # Given - request = CreateScheduleRequest( - name="timeout-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - execution_timeout_seconds=3600, - ) - expected_schedule_id = build_schedule_id(sample_agent.id, request.name) - mock_description = create_mock_schedule_description( - schedule_id=expected_schedule_id, - workflow_name=request.workflow_name, - task_queue=request.task_queue, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.create_schedule(sample_agent, request) - - # Then - assert result is not None - mock_temporal_adapter.create_schedule.assert_called_once() - call_kwargs = mock_temporal_adapter.create_schedule.call_args[1] - assert call_kwargs["execution_timeout"] == timedelta(seconds=3600) - - async def test_create_schedule_paused( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test creating a schedule in paused state""" - # Given - request = CreateScheduleRequest( - name="paused-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - paused=True, - ) - expected_schedule_id = build_schedule_id(sample_agent.id, request.name) - mock_description = create_mock_schedule_description( - schedule_id=expected_schedule_id, - workflow_name=request.workflow_name, - task_queue=request.task_queue, - paused=True, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.create_schedule(sample_agent, request) - - # Then - assert result is not None - assert result.state == ScheduleState.PAUSED - mock_temporal_adapter.create_schedule.assert_called_once() - call_kwargs = mock_temporal_adapter.create_schedule.call_args[1] - assert call_kwargs["paused"] is True - - async def test_get_schedule( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test getting a schedule by name""" - # Given - schedule_name = "test-schedule" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - workflow_name="test-workflow", - task_queue="test-queue", - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.get_schedule(sample_agent.id, schedule_name) - - # Then - assert result is not None - assert isinstance(result, ScheduleResponse) - assert result.schedule_id == schedule_id - assert result.name == schedule_name - assert result.agent_id == sample_agent.id - mock_temporal_adapter.describe_schedule.assert_called_once_with(schedule_id) - - async def test_get_schedule_paused( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test getting a paused schedule""" - # Given - schedule_name = "paused-schedule" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - workflow_name="test-workflow", - task_queue="test-queue", - paused=True, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.get_schedule(sample_agent.id, schedule_name) - - # Then - assert result.state == ScheduleState.PAUSED - - async def test_list_schedules_for_agent( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test listing schedules for a specific agent""" - # Given - schedule_id_1 = build_schedule_id(sample_agent.id, "schedule-1") - schedule_id_2 = build_schedule_id(sample_agent.id, "schedule-2") - schedule_id_other = build_schedule_id("other-agent", "schedule-3") - - mock_schedule_1 = MagicMock() - mock_schedule_1.id = schedule_id_1 - mock_schedule_1.info = MagicMock() - mock_schedule_1.info.action = MagicMock(spec=ScheduleActionStartWorkflow) - mock_schedule_1.info.action.workflow = "workflow-1" - mock_schedule_1.info.next_action_times = [datetime.now(UTC)] - mock_schedule_1.info.paused = False - - mock_schedule_2 = MagicMock() - mock_schedule_2.id = schedule_id_2 - mock_schedule_2.info = MagicMock() - mock_schedule_2.info.action = MagicMock(spec=ScheduleActionStartWorkflow) - mock_schedule_2.info.action.workflow = "workflow-2" - mock_schedule_2.info.next_action_times = [] - mock_schedule_2.info.paused = True - - mock_schedule_other = MagicMock() - mock_schedule_other.id = schedule_id_other - mock_schedule_other.info = MagicMock() - mock_schedule_other.info.action = MagicMock(spec=ScheduleActionStartWorkflow) - mock_schedule_other.info.action.workflow = "workflow-3" - mock_schedule_other.info.next_action_times = [] - mock_schedule_other.info.paused = False - - mock_temporal_adapter.list_schedules.return_value = [ - mock_schedule_1, - mock_schedule_2, - mock_schedule_other, - ] - - # When - result = await schedule_service.list_schedules(agent_id=sample_agent.id) - - # Then - assert result is not None - assert isinstance(result, ScheduleListResponse) - assert result.total == 2 # Only schedules for this agent - assert len(result.schedules) == 2 - - schedule_names = [s.name for s in result.schedules] - assert "schedule-1" in schedule_names - assert "schedule-2" in schedule_names - - async def test_list_schedules_all(self, schedule_service, mock_temporal_adapter): - """Test listing all schedules without agent filter""" - # Given - mock_schedule = MagicMock() - mock_schedule.id = "agent-1--schedule-1" - mock_schedule.info = MagicMock() - mock_schedule.info.action = MagicMock(spec=ScheduleActionStartWorkflow) - mock_schedule.info.action.workflow = "workflow-1" - mock_schedule.info.next_action_times = [] - mock_schedule.info.paused = False - - mock_temporal_adapter.list_schedules.return_value = [mock_schedule] - - # When - result = await schedule_service.list_schedules(agent_id=None) - - # Then - assert result is not None - assert result.total == 1 - - async def test_pause_schedule( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test pausing a schedule""" - # Given - schedule_name = "active-schedule" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - paused=True, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.pause_schedule( - sample_agent.id, schedule_name, note="Maintenance" - ) - - # Then - assert result is not None - assert result.state == ScheduleState.PAUSED - mock_temporal_adapter.pause_schedule.assert_called_once_with( - schedule_id, note="Maintenance" - ) - - async def test_pause_schedule_without_note( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test pausing a schedule without a note""" - # Given - schedule_name = "active-schedule" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - paused=True, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.pause_schedule(sample_agent.id, schedule_name) - - # Then - assert result is not None - mock_temporal_adapter.pause_schedule.assert_called_once_with( - schedule_id, note=None - ) - - async def test_unpause_schedule( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test unpausing a schedule""" - # Given - schedule_name = "paused-schedule" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - paused=False, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.unpause_schedule( - sample_agent.id, schedule_name, note="Resuming operations" - ) - - # Then - assert result is not None - assert result.state == ScheduleState.ACTIVE - mock_temporal_adapter.unpause_schedule.assert_called_once_with( - schedule_id, note="Resuming operations" - ) - - async def test_unpause_schedule_without_note( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test unpausing a schedule without a note""" - # Given - schedule_name = "paused-schedule" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - paused=False, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.unpause_schedule(sample_agent.id, schedule_name) - - # Then - assert result is not None - mock_temporal_adapter.unpause_schedule.assert_called_once_with( - schedule_id, note=None - ) - - async def test_trigger_schedule( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test triggering a schedule immediately""" - # Given - schedule_name = "scheduled-task" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.trigger_schedule(sample_agent.id, schedule_name) - - # Then - assert result is not None - mock_temporal_adapter.trigger_schedule.assert_called_once_with(schedule_id) - - async def test_delete_schedule( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test deleting a schedule""" - # Given - schedule_name = "schedule-to-delete" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - - # When - await schedule_service.delete_schedule(sample_agent.id, schedule_name) - - # Then - mock_temporal_adapter.delete_schedule.assert_called_once_with(schedule_id) - - async def test_description_to_response_with_workflow_params( - self, schedule_service, sample_agent - ): - """Test converting schedule description with workflow params""" - # Given - schedule_name = "task-with-params" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - - # Create mock with args - mock_action = MagicMock(spec=ScheduleActionStartWorkflow) - mock_action.workflow = "test-workflow" - mock_action.id = f"{schedule_id}-run" - mock_action.task_queue = "test-queue" - - # Mock args with data attribute (simulating Temporal payload) - mock_arg = MagicMock() - mock_arg.data = b'{"key": "value"}' - mock_action.args = [mock_arg] - - mock_spec = MagicMock(spec=ScheduleSpec) - mock_spec.cron_expressions = ["0 0 * * *"] - mock_spec.intervals = [] - mock_spec.start_at = None - mock_spec.end_at = None - - mock_state = MagicMock(spec=TemporalScheduleState) - mock_state.paused = False - - mock_schedule = MagicMock(spec=Schedule) - mock_schedule.action = mock_action - mock_schedule.spec = mock_spec - mock_schedule.state = mock_state - - mock_info = MagicMock(spec=ScheduleInfo) - mock_info.num_actions = 10 - mock_info.num_actions_missed_catchup_window = 1 - mock_info.next_action_times = [] - mock_info.recent_actions = [] - mock_info.create_time = datetime.now(UTC) - - mock_description = MagicMock(spec=ScheduleDescription) - mock_description.schedule = mock_schedule - mock_description.info = mock_info - - # When - result = schedule_service._description_to_response( - schedule_id, mock_description - ) - - # Then - assert result.schedule_id == schedule_id - assert result.name == schedule_name - assert result.agent_id == sample_agent.id - assert result.action.workflow_name == "test-workflow" - assert result.num_actions_taken == 10 - assert result.num_actions_missed == 1 - assert result.action.workflow_params == [{"key": "value"}] - - async def test_description_to_response_with_intervals( - self, schedule_service, sample_agent - ): - """Test converting schedule description with interval spec""" - # Given - schedule_name = "interval-task" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - intervals=[ - ScheduleIntervalSpec(every=timedelta(seconds=3600)), - ScheduleIntervalSpec(every=timedelta(seconds=7200)), - ], - ) - - # When - result = schedule_service._description_to_response( - schedule_id, mock_description - ) - - # Then - assert result.spec.intervals_seconds == [3600, 7200] - - async def test_create_schedule_with_start_and_end_dates( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test creating a schedule with start and end dates""" - # Given - start_at = datetime.now(UTC) + timedelta(days=1) - end_at = datetime.now(UTC) + timedelta(days=30) - request = CreateScheduleRequest( - name="bounded-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - start_at=start_at, - end_at=end_at, - ) - expected_schedule_id = build_schedule_id(sample_agent.id, request.name) - mock_description = create_mock_schedule_description( - schedule_id=expected_schedule_id, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.create_schedule(sample_agent, request) - - # Then - assert result is not None - mock_temporal_adapter.create_schedule.assert_called_once() - call_kwargs = mock_temporal_adapter.create_schedule.call_args[1] - assert call_kwargs["start_at"] == start_at - assert call_kwargs["end_at"] == end_at - - async def test_create_schedule_already_exists_error( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test that schedule already exists error propagates""" - # Given - request = CreateScheduleRequest( - name="existing-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - ) - mock_temporal_adapter.create_schedule.side_effect = ( - TemporalScheduleAlreadyExistsError( - message="Schedule already exists", - detail="Schedule 'existing-task' already exists", - ) - ) - - # When/Then - with pytest.raises(TemporalScheduleAlreadyExistsError): - await schedule_service.create_schedule(sample_agent, request) - - async def test_get_schedule_not_found_error( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test that schedule not found error propagates""" - # Given - mock_temporal_adapter.describe_schedule.side_effect = ( - TemporalScheduleNotFoundError( - message="Schedule not found", - detail="Schedule 'nonexistent' not found", - ) - ) - - # When/Then - with pytest.raises(TemporalScheduleNotFoundError): - await schedule_service.get_schedule(sample_agent.id, "nonexistent") - - async def test_pause_schedule_not_found_error( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test that pause schedule not found error propagates""" - # Given - mock_temporal_adapter.pause_schedule.side_effect = ( - TemporalScheduleNotFoundError( - message="Schedule not found", - detail="Schedule 'nonexistent' not found", - ) - ) - - # When/Then - with pytest.raises(TemporalScheduleNotFoundError): - await schedule_service.pause_schedule(sample_agent.id, "nonexistent") - - async def test_delete_schedule_not_found_error( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test that delete schedule not found error propagates""" - # Given - mock_temporal_adapter.delete_schedule.side_effect = ( - TemporalScheduleNotFoundError( - message="Schedule not found", - detail="Schedule 'nonexistent' not found", - ) - ) - - # When/Then - with pytest.raises(TemporalScheduleNotFoundError): - await schedule_service.delete_schedule(sample_agent.id, "nonexistent") - - async def test_trigger_schedule_not_found_error( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test that trigger schedule not found error propagates""" - # Given - mock_temporal_adapter.trigger_schedule.side_effect = ( - TemporalScheduleNotFoundError( - message="Schedule not found", - detail="Schedule 'nonexistent' not found", - ) - ) - - # When/Then - with pytest.raises(TemporalScheduleNotFoundError): - await schedule_service.trigger_schedule(sample_agent.id, "nonexistent") - - async def test_list_schedules_error(self, schedule_service, mock_temporal_adapter): - """Test that list schedules error propagates""" - # Given - mock_temporal_adapter.list_schedules.side_effect = TemporalScheduleError( - message="Failed to list schedules", - detail="Temporal connection error", - ) - - # When/Then - with pytest.raises(TemporalScheduleError): - await schedule_service.list_schedules() diff --git a/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py b/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py new file mode 100644 index 00000000..666c27ac --- /dev/null +++ b/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py @@ -0,0 +1,243 @@ +from unittest.mock import AsyncMock, MagicMock +from uuid import uuid4 + +import pytest +import src.temporal.activities.scheduled_agent_run_activities as activities_module +from src.adapters.crud_store.exceptions import ItemDoesNotExist +from src.api.routes.agent_run_schedules import _extract_creator_principal +from src.domain.entities.agent_run_schedules import AgentRunScheduleEntity +from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus +from src.domain.entities.agents_rpc import AgentRPCMethod +from src.domain.entities.task_messages import MessageAuthor, TextContentEntity +from src.domain.entities.tasks import TaskEntity +from src.temporal.activities.scheduled_agent_run_activities import ( + ScheduledAgentRunActivities, + _build_initial_content, +) + + +def _agent(acp_type=ACPType.ASYNC, status=AgentStatus.READY): + return AgentEntity( + id="agent-1", + name="test-agent", + description="A test agent", + status=status, + acp_type=acp_type, + acp_url="http://acp.example.com", + ) + + +def _schedule(**overrides) -> AgentRunScheduleEntity: + payload: dict = { + "id": str(uuid4()), + "agent_id": "agent-1", + "name": "daily-summary", + "cron_expression": "0 17 * * *", + "creator_principal": {"user_id": "u1", "account_id": "a1"}, + "initial_input": {"type": "text", "author": "user", "content": "hello"}, + } + payload.update(overrides) + return AgentRunScheduleEntity(**payload) + + +def _fake_use_case(agent, created_task): + use_case = MagicMock() + use_case.agent_repository = AsyncMock() + use_case.agent_repository.get.return_value = agent + use_case.handle_rpc_request = AsyncMock(return_value=created_task) + use_case.task_service = AsyncMock() + # AuthZ check succeeds by default (no-op / allowed). + use_case.authorization_service = AsyncMock() + use_case.authorization_service.check = AsyncMock(return_value=True) + return use_case + + +@pytest.fixture +def activity_instance(monkeypatch): + instance = ScheduledAgentRunActivities( + global_dependencies=MagicMock(), + schedule_repository=AsyncMock(), + ) + return instance + + +def _patch_use_case(monkeypatch, use_case): + monkeypatch.setattr( + activities_module, + "build_acp_use_case_for_principal", + lambda *args, **kwargs: use_case, + ) + + +class TestBuildInitialContent: + def test_builds_text_content(self): + content = _build_initial_content( + {"type": "text", "author": "user", "content": "hi there"} + ) + assert isinstance(content, TextContentEntity) + assert content.content == "hi there" + assert content.author == MessageAuthor.USER + + +class TestExtractCreatorPrincipal: + def test_strips_to_safe_subset(self): + principal = { + "user_id": "u1", + "account_id": "a1", + "principal_type": "user", + # credentials that must never be persisted: + "cookie": "session=abc", + "api_key": "sk-123", + "authorization": "Bearer xyz", + } + result = _extract_creator_principal(principal) + assert result == { + "user_id": "u1", + "account_id": "a1", + "principal_type": "user", + } + assert "cookie" not in result + assert "api_key" not in result + assert "authorization" not in result + + def test_none_principal_yields_empty(self): + assert _extract_creator_principal(None) == {} + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestLaunchScheduledAgentRun: + async def test_skips_when_schedule_missing(self, activity_instance): + activity_instance.schedule_repository.get.side_effect = ItemDoesNotExist("x") + + result = await activity_instance.launch_scheduled_agent_run("sched-1", "fire-1") + + assert result["status"] == "skipped" + assert result["reason"] == "schedule_not_found" + + async def test_skips_when_paused(self, activity_instance): + activity_instance.schedule_repository.get.return_value = _schedule(paused=True) + + result = await activity_instance.launch_scheduled_agent_run("sched-1", "fire-1") + + assert result["status"] == "skipped" + assert result["reason"] == "schedule_paused" + + async def test_async_agent_delivers_via_event_send( + self, activity_instance, monkeypatch + ): + schedule = _schedule() + activity_instance.schedule_repository.get.return_value = schedule + task = TaskEntity(id="task-1", task_metadata={"schedule_id": schedule.id}) + use_case = _fake_use_case(_agent(ACPType.ASYNC), task) + _patch_use_case(monkeypatch, use_case) + + result = await activity_instance.launch_scheduled_agent_run( + schedule.id, "fire-1" + ) + + assert result["status"] == "launched" + assert result["method"] == "event/send" + methods = [ + call.kwargs["method"] for call in use_case.handle_rpc_request.call_args_list + ] + assert methods == [AgentRPCMethod.TASK_CREATE, AgentRPCMethod.EVENT_SEND] + # Deterministic task name embeds schedule id + fire id. + create_params = use_case.handle_rpc_request.call_args_list[0].kwargs["params"] + assert create_params.name == f"scheduled-run:{schedule.id}:fire-1" + use_case.task_service.update_task.assert_awaited_once() + # Fire-time authz mirrors the RPC route: agent.execute, then task.create, + # then task.update on the created task — in that order. + from src.api.schemas.authorization_types import ( + AgentexResourceType, + AuthorizedOperationType, + ) + + checks = [ + (c.kwargs["resource"].type, c.kwargs["operation"]) + for c in use_case.authorization_service.check.call_args_list + ] + assert checks == [ + (AgentexResourceType.agent, AuthorizedOperationType.execute), + (AgentexResourceType.task, AuthorizedOperationType.create), + (AgentexResourceType.task, AuthorizedOperationType.update), + ] + + async def test_sync_agent_delivers_via_message_send( + self, activity_instance, monkeypatch + ): + schedule = _schedule() + activity_instance.schedule_repository.get.return_value = schedule + task = TaskEntity(id="task-1") + use_case = _fake_use_case(_agent(ACPType.SYNC), task) + _patch_use_case(monkeypatch, use_case) + + result = await activity_instance.launch_scheduled_agent_run( + schedule.id, "fire-1" + ) + + assert result["method"] == "message/send" + methods = [ + call.kwargs["method"] for call in use_case.handle_rpc_request.call_args_list + ] + assert methods == [AgentRPCMethod.TASK_CREATE, AgentRPCMethod.MESSAGE_SEND] + + async def test_skips_delivery_when_already_delivered( + self, activity_instance, monkeypatch + ): + schedule = _schedule() + activity_instance.schedule_repository.get.return_value = schedule + # Retry case: the deterministic task already carries the delivered marker. + task = TaskEntity( + id="task-1", task_metadata={"scheduled_input_delivered": True} + ) + use_case = _fake_use_case(_agent(ACPType.ASYNC), task) + _patch_use_case(monkeypatch, use_case) + + result = await activity_instance.launch_scheduled_agent_run( + schedule.id, "fire-1" + ) + + assert result["status"] == "skipped" + assert result["reason"] == "input_already_delivered" + # Only task/create ran; no second delivery call. + assert use_case.handle_rpc_request.call_count == 1 + use_case.task_service.update_task.assert_not_awaited() + + async def test_skips_when_creator_permission_revoked( + self, activity_instance, monkeypatch + ): + from src.adapters.authorization.exceptions import AuthorizationError + + schedule = _schedule() + activity_instance.schedule_repository.get.return_value = schedule + use_case = _fake_use_case(_agent(ACPType.ASYNC), TaskEntity(id="t")) + # Creator's create permission was revoked since the schedule was made. + use_case.authorization_service.check = AsyncMock( + side_effect=AuthorizationError(message="forbidden") + ) + _patch_use_case(monkeypatch, use_case) + + result = await activity_instance.launch_scheduled_agent_run( + schedule.id, "fire-1" + ) + + assert result["status"] == "skipped" + assert result["reason"] == "permission_denied" + # Denied before any task creation. + use_case.handle_rpc_request.assert_not_called() + + async def test_skips_when_agent_deleted(self, activity_instance, monkeypatch): + schedule = _schedule() + activity_instance.schedule_repository.get.return_value = schedule + use_case = _fake_use_case( + _agent(ACPType.ASYNC, status=AgentStatus.DELETED), TaskEntity(id="t") + ) + _patch_use_case(monkeypatch, use_case) + + result = await activity_instance.launch_scheduled_agent_run( + schedule.id, "fire-1" + ) + + assert result["status"] == "skipped" + assert result["reason"] == "agent_deleted" diff --git a/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py b/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py new file mode 100644 index 00000000..0cf0e2f7 --- /dev/null +++ b/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py @@ -0,0 +1,98 @@ +from unittest.mock import AsyncMock +from uuid import uuid4 + +import pytest +from src.api.schemas.agent_run_schedules import ( + CreateAgentRunScheduleRequest, + ScheduleInitialInput, +) +from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus +from src.domain.exceptions import ClientError +from src.domain.use_cases.agent_run_schedules_use_case import ( + AgentRunSchedulesUseCase, +) + + +@pytest.fixture +def mock_service(): + mock = AsyncMock() + return mock + + +@pytest.fixture +def use_case(mock_service): + return AgentRunSchedulesUseCase(run_schedule_service=mock_service) + + +@pytest.fixture +def agent(): + return AgentEntity( + id=str(uuid4()), + name="test-agent", + description="A test agent", + status=AgentStatus.READY, + acp_type=ACPType.ASYNC, + acp_url="http://acp.example.com", + ) + + +def _request(**overrides) -> CreateAgentRunScheduleRequest: + payload: dict = { + "name": "daily-summary", + "cron_expression": "0 17 * * MON-FRI", + "initial_input": ScheduleInitialInput(content="hello"), + } + payload.update(overrides) + return CreateAgentRunScheduleRequest(**payload) + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunSchedulesUseCase: + async def test_create_with_cron_delegates(self, use_case, mock_service, agent): + request = _request() + mock_service.create_schedule.return_value = "ok" + creator = {"user_id": "u1", "account_id": "a1"} + + result = await use_case.create_schedule(agent, request, creator) + + assert result == "ok" + mock_service.create_schedule.assert_called_once_with(agent, request, creator) + + async def test_create_with_interval_delegates(self, use_case, mock_service, agent): + request = _request(cron_expression=None, interval_seconds=30) + mock_service.create_schedule.return_value = "ok" + + await use_case.create_schedule(agent, request, {"user_id": "u1"}) + + mock_service.create_schedule.assert_called_once() + + async def test_create_requires_a_cadence(self, use_case, agent): + request = _request(cron_expression=None, interval_seconds=None) + + with pytest.raises(ClientError) as exc: + await use_case.create_schedule(agent, request, {"user_id": "u1"}) + + assert "cron_expression or interval_seconds" in str(exc.value) + + async def test_create_rejects_both_cadences(self, use_case, agent): + request = _request(cron_expression="0 0 * * *", interval_seconds=30) + + with pytest.raises(ClientError) as exc: + await use_case.create_schedule(agent, request, {"user_id": "u1"}) + + assert "only one" in str(exc.value) + + async def test_pause_resume_delete_delegate(self, use_case, mock_service, agent): + await use_case.pause_schedule(agent.id, "daily-summary", note="n") + mock_service.pause_schedule.assert_called_once_with( + agent.id, "daily-summary", note="n" + ) + + await use_case.resume_schedule(agent.id, "daily-summary") + mock_service.resume_schedule.assert_called_once_with( + agent.id, "daily-summary", note=None + ) + + await use_case.delete_schedule(agent.id, "daily-summary") + mock_service.delete_schedule.assert_called_once_with(agent.id, "daily-summary") diff --git a/agentex/tests/unit/use_cases/test_schedules_use_case.py b/agentex/tests/unit/use_cases/test_schedules_use_case.py deleted file mode 100644 index 5812c7cf..00000000 --- a/agentex/tests/unit/use_cases/test_schedules_use_case.py +++ /dev/null @@ -1,624 +0,0 @@ -from datetime import UTC, datetime, timedelta -from unittest.mock import AsyncMock -from uuid import uuid4 - -import pytest -from src.api.schemas.schedules import ( - CreateScheduleRequest, - ScheduleActionInfo, - ScheduleListItem, - ScheduleListResponse, - ScheduleResponse, - ScheduleSpecInfo, - ScheduleState, -) -from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus -from src.domain.exceptions import ClientError -from src.domain.use_cases.schedules_use_case import SchedulesUseCase - - -@pytest.fixture -def mock_schedule_service(): - """Mock schedule service for testing use case""" - mock = AsyncMock() - mock.create_schedule = AsyncMock() - mock.get_schedule = AsyncMock() - mock.list_schedules = AsyncMock() - mock.pause_schedule = AsyncMock() - mock.unpause_schedule = AsyncMock() - mock.trigger_schedule = AsyncMock() - mock.delete_schedule = AsyncMock() - return mock - - -@pytest.fixture -def schedules_use_case(mock_schedule_service): - """Create SchedulesUseCase instance with mocked service""" - return SchedulesUseCase(schedule_service=mock_schedule_service) - - -@pytest.fixture -def sample_agent(): - """Sample agent entity for testing""" - return AgentEntity( - id=str(uuid4()), - name="test-agent", - description="A test agent for use case testing", - status=AgentStatus.READY, - acp_type=ACPType.ASYNC, - acp_url="http://test-acp.example.com", - ) - - -@pytest.fixture -def sample_schedule_response(sample_agent): - """Sample schedule response for testing""" - return ScheduleResponse( - schedule_id=f"{sample_agent.id}--weekly-task", - name="weekly-task", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--weekly-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * 0"], - intervals_seconds=[], - start_at=None, - end_at=None, - ), - num_actions_taken=5, - num_actions_missed=0, - next_action_times=[datetime.now(UTC) + timedelta(hours=1)], - last_action_time=datetime.now(UTC) - timedelta(days=1), - created_at=datetime.now(UTC) - timedelta(days=7), - ) - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestSchedulesUseCase: - """Test suite for SchedulesUseCase""" - - async def test_create_schedule_with_cron( - self, - schedules_use_case, - mock_schedule_service, - sample_agent, - sample_schedule_response, - ): - """Test creating a schedule with cron expression""" - # Given - request = CreateScheduleRequest( - name="weekly-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * 0", - ) - mock_schedule_service.create_schedule.return_value = sample_schedule_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - assert isinstance(result, ScheduleResponse) - assert result.name == "weekly-task" - assert result.state == ScheduleState.ACTIVE - mock_schedule_service.create_schedule.assert_called_once_with( - sample_agent, request - ) - - async def test_create_schedule_with_interval( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test creating a schedule with interval""" - # Given - request = CreateScheduleRequest( - name="interval-task", - workflow_name="test-workflow", - task_queue="test-queue", - interval_seconds=3600, - ) - expected_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--interval-task", - name="interval-task", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--interval-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=[], - intervals_seconds=[3600], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.create_schedule.return_value = expected_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - assert result.spec.intervals_seconds == [3600] - mock_schedule_service.create_schedule.assert_called_once() - - async def test_create_schedule_validation_error_no_schedule_spec( - self, schedules_use_case, sample_agent - ): - """Test that creating a schedule without cron or interval raises error""" - # Given - request = CreateScheduleRequest( - name="invalid-task", - workflow_name="test-workflow", - task_queue="test-queue", - # Neither cron_expression nor interval_seconds provided - ) - - # When/Then - with pytest.raises(ClientError) as exc_info: - await schedules_use_case.create_schedule(sample_agent, request) - - assert "Either cron_expression or interval_seconds must be provided" in str( - exc_info.value - ) - - async def test_create_schedule_with_both_cron_and_interval( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test creating a schedule with both cron and interval (should succeed)""" - # Given - having both is valid, cron takes precedence - request = CreateScheduleRequest( - name="combined-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - interval_seconds=3600, - ) - expected_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--combined-task", - name="combined-task", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--combined-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * *"], - intervals_seconds=[3600], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.create_schedule.return_value = expected_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - mock_schedule_service.create_schedule.assert_called_once() - - async def test_create_schedule_with_workflow_params( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test creating a schedule with workflow parameters""" - # Given - workflow_params = { - "input_data": "test", - "config": {"timeout": 300, "retries": 3}, - } - request = CreateScheduleRequest( - name="params-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - workflow_params=workflow_params, - ) - expected_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--params-task", - name="params-task", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--params-task-run", - task_queue="test-queue", - workflow_params=[workflow_params], - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * *"], - intervals_seconds=[], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.create_schedule.return_value = expected_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - assert result.action.workflow_params == [workflow_params] - - async def test_get_schedule( - self, - schedules_use_case, - mock_schedule_service, - sample_agent, - sample_schedule_response, - ): - """Test getting a schedule by name""" - # Given - mock_schedule_service.get_schedule.return_value = sample_schedule_response - - # When - result = await schedules_use_case.get_schedule(sample_agent.id, "weekly-task") - - # Then - assert result is not None - assert result.name == "weekly-task" - assert result.agent_id == sample_agent.id - mock_schedule_service.get_schedule.assert_called_once_with( - sample_agent.id, "weekly-task" - ) - - async def test_list_schedules( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test listing schedules for an agent""" - # Given - expected_response = ScheduleListResponse( - schedules=[ - ScheduleListItem( - schedule_id=f"{sample_agent.id}--schedule-1", - name="schedule-1", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - workflow_name="workflow-1", - next_action_time=datetime.now(UTC), - ), - ScheduleListItem( - schedule_id=f"{sample_agent.id}--schedule-2", - name="schedule-2", - agent_id=sample_agent.id, - state=ScheduleState.PAUSED, - workflow_name="workflow-2", - next_action_time=None, - ), - ], - total=2, - ) - mock_schedule_service.list_schedules.return_value = expected_response - - # When - result = await schedules_use_case.list_schedules(sample_agent.id) - - # Then - assert result is not None - assert result.total == 2 - assert len(result.schedules) == 2 - mock_schedule_service.list_schedules.assert_called_once_with( - agent_id=sample_agent.id, page_size=100, authorized_schedule_ids=None - ) - - async def test_list_schedules_with_page_size( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test listing schedules with custom page size""" - # Given - expected_response = ScheduleListResponse(schedules=[], total=0) - mock_schedule_service.list_schedules.return_value = expected_response - - # When - result = await schedules_use_case.list_schedules(sample_agent.id, page_size=50) - - # Then - assert result is not None - mock_schedule_service.list_schedules.assert_called_once_with( - agent_id=sample_agent.id, page_size=50, authorized_schedule_ids=None - ) - - async def test_pause_schedule( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test pausing a schedule""" - # Given - paused_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--weekly-task", - name="weekly-task", - agent_id=sample_agent.id, - state=ScheduleState.PAUSED, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--weekly-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * 0"], - intervals_seconds=[], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.pause_schedule.return_value = paused_response - - # When - result = await schedules_use_case.pause_schedule( - sample_agent.id, "weekly-task", note="Maintenance window" - ) - - # Then - assert result is not None - assert result.state == ScheduleState.PAUSED - mock_schedule_service.pause_schedule.assert_called_once_with( - sample_agent.id, "weekly-task", note="Maintenance window" - ) - - async def test_pause_schedule_without_note( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test pausing a schedule without a note""" - # Given - paused_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--weekly-task", - name="weekly-task", - agent_id=sample_agent.id, - state=ScheduleState.PAUSED, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--weekly-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * 0"], - intervals_seconds=[], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.pause_schedule.return_value = paused_response - - # When - result = await schedules_use_case.pause_schedule(sample_agent.id, "weekly-task") - - # Then - assert result is not None - mock_schedule_service.pause_schedule.assert_called_once_with( - sample_agent.id, "weekly-task", note=None - ) - - async def test_unpause_schedule( - self, - schedules_use_case, - mock_schedule_service, - sample_agent, - sample_schedule_response, - ): - """Test unpausing a schedule""" - # Given - mock_schedule_service.unpause_schedule.return_value = sample_schedule_response - - # When - result = await schedules_use_case.unpause_schedule( - sample_agent.id, "weekly-task", note="Maintenance complete" - ) - - # Then - assert result is not None - assert result.state == ScheduleState.ACTIVE - mock_schedule_service.unpause_schedule.assert_called_once_with( - sample_agent.id, "weekly-task", note="Maintenance complete" - ) - - async def test_unpause_schedule_without_note( - self, - schedules_use_case, - mock_schedule_service, - sample_agent, - sample_schedule_response, - ): - """Test unpausing a schedule without a note""" - # Given - mock_schedule_service.unpause_schedule.return_value = sample_schedule_response - - # When - result = await schedules_use_case.unpause_schedule( - sample_agent.id, "weekly-task" - ) - - # Then - assert result is not None - mock_schedule_service.unpause_schedule.assert_called_once_with( - sample_agent.id, "weekly-task", note=None - ) - - async def test_trigger_schedule( - self, - schedules_use_case, - mock_schedule_service, - sample_agent, - sample_schedule_response, - ): - """Test triggering a schedule immediately""" - # Given - mock_schedule_service.trigger_schedule.return_value = sample_schedule_response - - # When - result = await schedules_use_case.trigger_schedule( - sample_agent.id, "weekly-task" - ) - - # Then - assert result is not None - mock_schedule_service.trigger_schedule.assert_called_once_with( - sample_agent.id, "weekly-task" - ) - - async def test_delete_schedule( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test deleting a schedule""" - # Given - mock_schedule_service.delete_schedule.return_value = None - - # When - await schedules_use_case.delete_schedule(sample_agent.id, "weekly-task") - - # Then - mock_schedule_service.delete_schedule.assert_called_once_with( - sample_agent.id, "weekly-task" - ) - - async def test_create_schedule_paused( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test creating a schedule in paused state""" - # Given - request = CreateScheduleRequest( - name="paused-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - paused=True, - ) - paused_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--paused-task", - name="paused-task", - agent_id=sample_agent.id, - state=ScheduleState.PAUSED, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--paused-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * *"], - intervals_seconds=[], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.create_schedule.return_value = paused_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - assert result.state == ScheduleState.PAUSED - - async def test_create_schedule_with_execution_timeout( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test creating a schedule with execution timeout""" - # Given - request = CreateScheduleRequest( - name="timeout-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - execution_timeout_seconds=7200, - ) - expected_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--timeout-task", - name="timeout-task", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--timeout-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * *"], - intervals_seconds=[], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.create_schedule.return_value = expected_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - # Verify the request was passed through to the service - call_args = mock_schedule_service.create_schedule.call_args - assert call_args[0][1].execution_timeout_seconds == 7200 - - async def test_create_schedule_with_time_bounds( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test creating a schedule with start and end times""" - # Given - start_at = datetime.now(UTC) + timedelta(days=1) - end_at = datetime.now(UTC) + timedelta(days=30) - request = CreateScheduleRequest( - name="bounded-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - start_at=start_at, - end_at=end_at, - ) - expected_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--bounded-task", - name="bounded-task", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--bounded-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * *"], - intervals_seconds=[], - start_at=start_at, - end_at=end_at, - ), - ) - mock_schedule_service.create_schedule.return_value = expected_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - assert result.spec.start_at == start_at - assert result.spec.end_at == end_at - - async def test_list_schedules_empty( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test listing schedules when none exist""" - # Given - expected_response = ScheduleListResponse(schedules=[], total=0) - mock_schedule_service.list_schedules.return_value = expected_response - - # When - result = await schedules_use_case.list_schedules(sample_agent.id) - - # Then - assert result is not None - assert result.total == 0 - assert len(result.schedules) == 0 From 41f8d658abbff05cc75eff744a6e6d51ea655151 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Tue, 23 Jun 2026 11:29:37 -0400 Subject: [PATCH 02/24] feat(schedules): label scheduled tasks for the UI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The UI derives a task's display name from task_metadata.display_name (falling back to params.description), never the task's `name` field, so scheduled tasks rendered as "Unnamed task". Set a templated, per-fire display_name on each scheduled task — "Scheduled Message: {schedule_name} · {fire_time}" — placed first in the metadata so a caller-supplied display_name in the schedule's task_metadata still overrides it. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../temporal/activities/scheduled_agent_run_activities.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/agentex/src/temporal/activities/scheduled_agent_run_activities.py b/agentex/src/temporal/activities/scheduled_agent_run_activities.py index 7287e835..2937410c 100644 --- a/agentex/src/temporal/activities/scheduled_agent_run_activities.py +++ b/agentex/src/temporal/activities/scheduled_agent_run_activities.py @@ -20,6 +20,7 @@ Pydantic models), so args and the return value are plain str / dict. """ +from datetime import UTC, datetime from typing import Any from src.adapters.authorization.exceptions import AuthorizationError @@ -199,7 +200,13 @@ async def launch_scheduled_agent_run( return denied task_name = f"scheduled-run:{schedule_id}:{fire_id}" + # Human-friendly label the UI renders for the task (it reads + # task_metadata.display_name, never the deterministic `name` above). + # Templated per fire so runs are distinguishable; placed first so a + # caller-supplied display_name in schedule.task_metadata overrides it. + fire_time = datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC") task_metadata = { + "display_name": f"Scheduled Message: {schedule.name} · {fire_time}", **(schedule.task_metadata or {}), "schedule_id": schedule_id, "scheduled_fire_id": fire_id, From ec74e5d3fd6ae03eeff499b108578b7b88fb573b Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Tue, 23 Jun 2026 14:12:54 -0400 Subject: [PATCH 03/24] docs(schedules): remove internal tracker/design-doc references from comments This repository is public. Strip internal ticket IDs and design-decision shorthand from code comments and docstrings, keeping the descriptive text. No behavior change. --- .../2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py | 4 ++-- agentex/openapi.yaml | 2 +- agentex/src/api/routes/agent_run_schedules.py | 4 ++-- agentex/src/api/schemas/agent_run_schedules.py | 2 +- agentex/src/domain/entities/agent_run_schedules.py | 6 +++--- agentex/src/domain/services/agent_run_schedule_service.py | 2 +- .../temporal/activities/scheduled_agent_run_activities.py | 4 ++-- agentex/src/temporal/scheduled_agent_run_factory.py | 4 ++-- .../src/temporal/workflows/scheduled_agent_run_workflow.py | 2 +- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py b/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py index e63fcaf8..eb4e7a57 100644 --- a/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py +++ b/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py @@ -4,8 +4,8 @@ Revises: c7a1b2d3e4f5 Create Date: 2026-06-22 12:00:00.000000 -Creates the agent_run_schedules table backing the scheduled-agent-runs feature -(AGX1-368). Schema-only and idempotent: the table and its indexes are created +Creates the agent_run_schedules table backing the scheduled-agent-runs feature. +Schema-only and idempotent: the table and its indexes are created with IF NOT EXISTS-style guards (Alembic create_table on a fresh table), and the indexes target the just-created table so they are non-blocking by construction. """ diff --git a/agentex/openapi.yaml b/agentex/openapi.yaml index 069ab6e3..e8ab7b90 100644 --- a/agentex/openapi.yaml +++ b/agentex/openapi.yaml @@ -5591,7 +5591,7 @@ components: Never carries cookies, JWTs, API keys, OAuth tokens, or request headers — it - is creator *context* used only for AuthZ and ownership at fire time (D5/D6).' + is creator *context* used only for AuthZ and ownership at fire time.' ScheduleInitialInput: properties: type: diff --git a/agentex/src/api/routes/agent_run_schedules.py b/agentex/src/api/routes/agent_run_schedules.py index 489537d7..46415fcb 100644 --- a/agentex/src/api/routes/agent_run_schedules.py +++ b/agentex/src/api/routes/agent_run_schedules.py @@ -30,7 +30,7 @@ # The canonical agent scheduling API. Schedules an agent *run* on each fire # (creates a fresh task + delivers the configured initial input), hiding the -# underlying Temporal workflow/task-queue details (AGX1-368, D1). It replaced the +# underlying Temporal workflow/task-queue details. It replaced the # earlier bare-workflow scheduler that previously owned this path. router = APIRouter( prefix="/agents/{agent_id}/schedules", @@ -49,7 +49,7 @@ def _extract_creator_principal(principal_context: Any) -> dict[str, Any]: """Capture the credential-free creator subset from the request principal. Stores only identity selectors (principal_type / user_id / service_account_id - / account_id). Never cookies, JWTs, API keys, OAuth tokens, or headers (D5/D6). + / account_id). Never cookies, JWTs, API keys, OAuth tokens, or headers. Returns an empty dict under authz bypass / when no principal is present. """ if principal_context is None: diff --git a/agentex/src/api/schemas/agent_run_schedules.py b/agentex/src/api/schemas/agent_run_schedules.py index b0a353b5..868fa820 100644 --- a/agentex/src/api/schemas/agent_run_schedules.py +++ b/agentex/src/api/schemas/agent_run_schedules.py @@ -29,7 +29,7 @@ class ScheduleCreatorPrincipal(BaseModel): """Credential-free creator identity stored with the schedule. Never carries cookies, JWTs, API keys, OAuth tokens, or request headers — it - is creator *context* used only for AuthZ and ownership at fire time (D5/D6). + is creator *context* used only for AuthZ and ownership at fire time. """ principal_type: str | None = Field( diff --git a/agentex/src/domain/entities/agent_run_schedules.py b/agentex/src/domain/entities/agent_run_schedules.py index 73744f3c..8d17897f 100644 --- a/agentex/src/domain/entities/agent_run_schedules.py +++ b/agentex/src/domain/entities/agent_run_schedules.py @@ -23,7 +23,7 @@ def infer_initial_input_method(acp_type: ACPType) -> InitialInputMethod: """Map an agent's ACP type to the delivery method for the initial input. async / agentic agents receive the first input as an ``event/send``; sync - agents receive it as a ``message/send`` (AGX1-368, D2 / Open Q5). + agents receive it as a ``message/send``. """ if acp_type == ACPType.SYNC: return InitialInputMethod.MESSAGE_SEND @@ -35,7 +35,7 @@ class AgentRunScheduleEntity(BaseModel): The Postgres row is the source of truth for what each future fire should do; the Temporal Schedule is only the recurring clock and carries nothing but the - schedule id (AGX1-368, D4). + schedule id. JSON-backed fields (``creator_principal``, ``task_params``, ``task_metadata``, ``initial_input``) are stored as plain dicts so they round-trip cleanly through @@ -68,7 +68,7 @@ class AgentRunScheduleEntity(BaseModel): ) paused: bool = Field(False, description="Whether the schedule is currently paused.") # Credential-free creator context: principal_type / user_id / service_account_id / - # account_id only. Never cookies, JWTs, API keys, OAuth tokens, or headers (D5/D6). + # account_id only. Never cookies, JWTs, API keys, OAuth tokens, or headers. creator_principal: dict[str, Any] = Field( ..., description="Credential-free creator identity used for AuthZ at fire time.", diff --git a/agentex/src/domain/services/agent_run_schedule_service.py b/agentex/src/domain/services/agent_run_schedule_service.py index f35c8505..a6083fc1 100644 --- a/agentex/src/domain/services/agent_run_schedule_service.py +++ b/agentex/src/domain/services/agent_run_schedule_service.py @@ -60,7 +60,7 @@ class AgentRunScheduleService: The Postgres row is the source of truth for the schedule definition; the Temporal Schedule is only the recurring clock and is given nothing but the - schedule row id as its workflow argument (AGX1-368, D4). + schedule row id as its workflow argument. """ def __init__( diff --git a/agentex/src/temporal/activities/scheduled_agent_run_activities.py b/agentex/src/temporal/activities/scheduled_agent_run_activities.py index 2937410c..e59bc9f6 100644 --- a/agentex/src/temporal/activities/scheduled_agent_run_activities.py +++ b/agentex/src/temporal/activities/scheduled_agent_run_activities.py @@ -1,5 +1,5 @@ """ -Temporal activity for scheduled agent runs (AGX1-368). +Temporal activity for scheduled agent runs. ``launch_scheduled_agent_run`` is the single activity each scheduled fire runs. It loads the persisted schedule, creates a fresh Agentex task with a deterministic @@ -180,7 +180,7 @@ async def launch_scheduled_agent_run( # Re-check the stored creator principal's permission at fire time, mirroring # the JSON-RPC route's authorization order: agent.execute (the RPC endpoint - # gate) then task.create (implements AGX1-368 D5's `check`). A revoked + # gate) then task.create (re-checks the creator's permission at fire time). A revoked # creator stops future fires instead of running under stale ownership. # AuthorizationError (403) is a permanent denial → skip cleanly; transient # authz errors propagate so Temporal retries. Under authz bypass (local / diff --git a/agentex/src/temporal/scheduled_agent_run_factory.py b/agentex/src/temporal/scheduled_agent_run_factory.py index 200132e3..c9c0339a 100644 --- a/agentex/src/temporal/scheduled_agent_run_factory.py +++ b/agentex/src/temporal/scheduled_agent_run_factory.py @@ -4,7 +4,7 @@ in task_retention_factory.py. Each scheduled fire creates a fresh Agentex task and delivers the schedule's -configured initial input under the *stored creator principal* (AGX1-368, D5) — +configured initial input under the *stored creator principal* — not as an agent identity. So the AgentsACPUseCase is rebuilt per fire with an AuthorizationService whose principal_context is that fire's creator principal and whose agent_identity is None, attributing task ownership and AuthZ checks to the @@ -51,7 +51,7 @@ class _ScheduledRunRequest: Carries the stored creator principal as ``state.principal_context`` with no ``agent_identity`` (so AuthZ attributes ownership to the creator, not a service) and no headers (so no live user credentials — cookies, API keys — - are forwarded downstream; D5/D6). ``build_delegation_headers`` returns an + are forwarded downstream). ``build_delegation_headers`` returns an empty mapping when there are no inbound credential headers, which is exactly the intended behavior here. """ diff --git a/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py b/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py index 61ff65be..b2ccc824 100644 --- a/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py +++ b/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py @@ -1,5 +1,5 @@ """ -Scheduled agent run workflow (AGX1-368). +Scheduled agent run workflow. Started by a Temporal Schedule on each cron / interval fire. The workflow is deliberately thin: it passes only the schedule id and a per-fire token to a From 9a10531002a15130536f9c6bf1ab3a28a4bbb317 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Tue, 23 Jun 2026 14:46:35 -0400 Subject: [PATCH 04/24] =?UTF-8?q?feat(schedules):=20address=20review=20?= =?UTF-8?q?=E2=80=94=20idempotency,=20list=20perf,=20update=20+=20trigger?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - delete/pause/resume tolerate a missing Temporal schedule (treat as success / log) so a prior partial delete can't strand an un-cleanable, un-toggleable row. - list no longer fans out a describe RPC per row; live Temporal fields are served only on the single-schedule GET (list state comes from the row). - scheduled task display_name uses the nominal fire time parsed from the workflow id (stable across activity retries) instead of wall-clock now(). - add PATCH /agents/{agent_id}/schedules/{name} (partial update of cadence, window, input, etc.; cron/interval stay mutually exclusive). - re-add POST /agents/{agent_id}/schedules/{name}/trigger for an immediate out-of-band run (restores parity with the prior scheduler). - new Temporal adapter update_schedule; regenerated OpenAPI spec; unit tests for all of the above. --- agentex/openapi.yaml | 150 ++++++++++++++++ .../src/adapters/temporal/adapter_temporal.py | 66 +++++++ agentex/src/api/routes/agent_run_schedules.py | 42 +++++ .../src/api/schemas/agent_run_schedules.py | 43 +++++ .../services/agent_run_schedule_service.py | 165 +++++++++++++++--- .../use_cases/agent_run_schedules_use_case.py | 15 ++ .../scheduled_agent_run_activities.py | 27 ++- .../test_agent_run_schedule_service.py | 118 +++++++++++++ .../test_agent_run_schedules_use_case.py | 20 +++ 9 files changed, 623 insertions(+), 23 deletions(-) diff --git a/agentex/openapi.yaml b/agentex/openapi.yaml index e8ab7b90..45e2737f 100644 --- a/agentex/openapi.yaml +++ b/agentex/openapi.yaml @@ -3312,6 +3312,45 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + patch: + tags: + - Schedules + summary: Update Run Schedule + description: Partially update a run schedule's definition (cadence, window, + input, etc.). + operationId: update_run_schedule_agents__agent_id__schedules__name__patch + parameters: + - name: agent_id + in: path + required: true + schema: + type: string + title: Agent Id + - name: name + in: path + required: true + schema: + type: string + title: Name + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateAgentRunScheduleRequest' + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/AgentRunScheduleResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Schedules @@ -3344,6 +3383,40 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + /agents/{agent_id}/schedules/{name}/trigger: + post: + tags: + - Schedules + summary: Trigger Run Schedule + description: Trigger an immediate, out-of-band run of the schedule (in addition + to its cadence). + operationId: trigger_run_schedule_agents__agent_id__schedules__name__trigger_post + parameters: + - name: agent_id + in: path + required: true + schema: + type: string + title: Agent Id + - name: name + in: path + required: true + schema: + type: string + title: Name + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/AgentRunScheduleResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' /agents/{agent_id}/schedules/{name}/pause: post: tags: @@ -6548,6 +6621,83 @@ components: - name title: ToolResponseDelta description: Delta for tool response updates + UpdateAgentRunScheduleRequest: + properties: + description: + anyOf: + - type: string + - type: 'null' + title: Description + description: Optional description of what this schedule does. + cron_expression: + anyOf: + - type: string + - type: 'null' + title: Cron Expression + description: New cron cadence. Mutually exclusive with interval_seconds. + interval_seconds: + anyOf: + - type: integer + minimum: 1.0 + - type: 'null' + title: Interval Seconds + description: New interval cadence in seconds. Mutually exclusive with cron_expression. + timezone: + anyOf: + - type: string + - type: 'null' + title: Timezone + description: IANA timezone the cron expression is evaluated in. + start_at: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Start At + description: When the schedule should start being active. + end_at: + anyOf: + - type: string + format: date-time + - type: 'null' + title: End At + description: When the schedule should stop being active. + paused: + anyOf: + - type: boolean + - type: 'null' + title: Paused + description: Pause/resume the schedule as part of the update. + task_params: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Task Params + description: Resolved config forwarded as task `params` at fire time. + task_metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Task Metadata + description: Metadata copied onto each created task at fire time. + initial_input: + anyOf: + - $ref: '#/components/schemas/ScheduleInitialInput' + - type: 'null' + description: Replacement initial input delivered to each created task. + type: object + title: UpdateAgentRunScheduleRequest + description: 'Partial update for a scheduled agent run. + + + Only fields present in the request body are changed; the schedule ``name`` + is + + immutable (it is the natural key). Setting ``cron_expression`` clears + + ``interval_seconds`` and vice versa; providing both is rejected.' UpdateAgentTaskTrackerRequest: properties: last_processed_event_id: diff --git a/agentex/src/adapters/temporal/adapter_temporal.py b/agentex/src/adapters/temporal/adapter_temporal.py index dccdb2cb..5d516fee 100644 --- a/agentex/src/adapters/temporal/adapter_temporal.py +++ b/agentex/src/adapters/temporal/adapter_temporal.py @@ -14,6 +14,8 @@ SchedulePolicy, ScheduleSpec, ScheduleState, + ScheduleUpdate, + ScheduleUpdateInput, WorkflowExecution, WorkflowHandle, ) @@ -614,6 +616,70 @@ async def trigger_schedule(self, schedule_id: str) -> None: detail=str(e), ) from e + async def update_schedule( + self, + schedule_id: str, + cron_expressions: list[str] | None = None, + interval_seconds: int | None = None, + start_at: Any | None = None, + end_at: Any | None = None, + time_zone_name: str | None = None, + paused: bool | None = None, + ) -> None: + """ + Update an existing schedule's spec and/or paused state. + + Rebuilds the schedule spec (cadence, window, timezone) from the provided + values and replaces it, leaving the workflow action untouched. ``paused`` + is applied only when provided. The caller is expected to pass the full + desired spec (cron XOR interval), mirroring ``create_schedule``. + """ + if not self.client: + raise TemporalConnectionError("Temporal client is not connected") + + if not cron_expressions and not interval_seconds: + raise TemporalInvalidArgumentError( + message="Either cron_expressions or interval_seconds must be provided", + detail="A schedule requires at least one scheduling specification", + ) + + spec_kwargs: dict[str, Any] = {} + if time_zone_name: + spec_kwargs["time_zone_name"] = time_zone_name + new_spec = ScheduleSpec( + cron_expressions=cron_expressions or [], + intervals=[ScheduleIntervalSpec(every=timedelta(seconds=interval_seconds))] + if interval_seconds + else [], + start_at=start_at, + end_at=end_at, + **spec_kwargs, + ) + + def _apply(input: ScheduleUpdateInput) -> ScheduleUpdate: + schedule = input.description.schedule + schedule.spec = new_spec + if paused is not None: + schedule.state.paused = paused + return ScheduleUpdate(schedule=schedule) + + try: + handle = self.client.get_schedule_handle(schedule_id) + await handle.update(_apply) + logger.info(f"Updated schedule {schedule_id}") + except Exception as e: + if "not found" in str(e).lower(): + logger.error(f"Schedule {schedule_id} not found: {e}") + raise TemporalScheduleNotFoundError( + message=f"Schedule '{schedule_id}' not found", + detail=str(e), + ) from e + logger.error(f"Failed to update schedule {schedule_id}: {e}") + raise TemporalScheduleError( + message=f"Failed to update schedule '{schedule_id}'", + detail=str(e), + ) from e + async def delete_schedule(self, schedule_id: str) -> None: """ Delete a schedule. diff --git a/agentex/src/api/routes/agent_run_schedules.py b/agentex/src/api/routes/agent_run_schedules.py index 46415fcb..fe4c5167 100644 --- a/agentex/src/api/routes/agent_run_schedules.py +++ b/agentex/src/api/routes/agent_run_schedules.py @@ -8,6 +8,7 @@ CreateAgentRunScheduleRequest, PauseRunScheduleRequest, ResumeRunScheduleRequest, + UpdateAgentRunScheduleRequest, ) from src.api.schemas.authorization_types import ( AgentexResourceType, @@ -137,6 +138,47 @@ async def get_run_schedule( return await run_schedules_use_case.get_schedule(agent_id, name) +@router.patch( + "/{name}", + response_model=AgentRunScheduleResponse, + summary="Update Run Schedule", + description="Partially update a run schedule's definition (cadence, window, input, etc.).", +) +async def update_run_schedule( + agent_id: str, + name: str, + request: UpdateAgentRunScheduleRequest, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorization: DAuthorizationService, +) -> AgentRunScheduleResponse: + await _check_schedule_or_collapse_to_404( + authorization, + build_run_schedule_authz_selector(agent_id, name), + AuthorizedOperationType.update, + ) + return await run_schedules_use_case.update_schedule(agent_id, name, request) + + +@router.post( + "/{name}/trigger", + response_model=AgentRunScheduleResponse, + summary="Trigger Run Schedule", + description="Trigger an immediate, out-of-band run of the schedule (in addition to its cadence).", +) +async def trigger_run_schedule( + agent_id: str, + name: str, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorization: DAuthorizationService, +) -> AgentRunScheduleResponse: + await _check_schedule_or_collapse_to_404( + authorization, + build_run_schedule_authz_selector(agent_id, name), + AuthorizedOperationType.update, + ) + return await run_schedules_use_case.trigger_schedule(agent_id, name) + + @router.post( "/{name}/pause", response_model=AgentRunScheduleResponse, diff --git a/agentex/src/api/schemas/agent_run_schedules.py b/agentex/src/api/schemas/agent_run_schedules.py index 868fa820..35f1d12d 100644 --- a/agentex/src/api/schemas/agent_run_schedules.py +++ b/agentex/src/api/schemas/agent_run_schedules.py @@ -157,6 +157,49 @@ class AgentRunScheduleListResponse(BaseModel): total: int = Field(..., description="The number of run schedules returned.") +class UpdateAgentRunScheduleRequest(BaseModel): + """Partial update for a scheduled agent run. + + Only fields present in the request body are changed; the schedule ``name`` is + immutable (it is the natural key). Setting ``cron_expression`` clears + ``interval_seconds`` and vice versa; providing both is rejected. + """ + + description: str | None = Field( + None, description="Optional description of what this schedule does." + ) + cron_expression: str | None = Field( + None, + description="New cron cadence. Mutually exclusive with interval_seconds.", + ) + interval_seconds: int | None = Field( + None, + ge=1, + description="New interval cadence in seconds. Mutually exclusive with cron_expression.", + ) + timezone: str | None = Field( + None, description="IANA timezone the cron expression is evaluated in." + ) + start_at: datetime | None = Field( + None, description="When the schedule should start being active." + ) + end_at: datetime | None = Field( + None, description="When the schedule should stop being active." + ) + paused: bool | None = Field( + None, description="Pause/resume the schedule as part of the update." + ) + task_params: dict[str, Any] | None = Field( + None, description="Resolved config forwarded as task `params` at fire time." + ) + task_metadata: dict[str, Any] | None = Field( + None, description="Metadata copied onto each created task at fire time." + ) + initial_input: ScheduleInitialInput | None = Field( + None, description="Replacement initial input delivered to each created task." + ) + + class PauseRunScheduleRequest(BaseModel): note: str | None = Field(None, description="Optional note explaining the pause.") diff --git a/agentex/src/domain/services/agent_run_schedule_service.py b/agentex/src/domain/services/agent_run_schedule_service.py index a6083fc1..82d56778 100644 --- a/agentex/src/domain/services/agent_run_schedule_service.py +++ b/agentex/src/domain/services/agent_run_schedule_service.py @@ -6,6 +6,7 @@ from src.adapters.crud_store.exceptions import DuplicateItemError, ItemDoesNotExist from src.adapters.temporal.adapter_temporal import DTemporalAdapter +from src.adapters.temporal.exceptions import TemporalScheduleNotFoundError from src.api.schemas.agent_run_schedules import ( AgentRunScheduleListResponse, AgentRunScheduleResponse, @@ -13,6 +14,7 @@ RunScheduleState, ScheduleCreatorPrincipal, ScheduleInitialInput, + UpdateAgentRunScheduleRequest, ) from src.api.schemas.authorization_types import AgentexResource from src.domain.entities.agent_run_schedules import ( @@ -173,8 +175,14 @@ async def list_schedules( if authorized is not None and selector not in authorized: continue temporal_id = build_run_schedule_temporal_id(row.id) + # Serve the list from Postgres only — no per-row Temporal describe. + # Fanning out one RPC per row (up to the route's limit of 1000) makes + # list latency scale with Temporal round-trips; live fields are + # available on the single-schedule GET instead. items.append( - await self._to_response(row, agent=agent, temporal_id=temporal_id) + await self._to_response( + row, agent=agent, temporal_id=temporal_id, include_live=False + ) ) return AgentRunScheduleListResponse(run_schedules=items, total=len(items)) @@ -203,14 +211,107 @@ async def delete_schedule(self, agent_id: str, name: str) -> str: ) temporal_id = build_run_schedule_temporal_id(row.id) # Temporal is the recurring clock; delete it first so no further fires can - # occur, then drop the row and the auth entry (both best-effort after). - await self.temporal_adapter.delete_schedule(temporal_id) + # occur, then drop the row and the auth entry. A missing Temporal schedule + # is treated as success (the clock is already gone) so a prior partial + # delete — Temporal removed but the row write failed — can still be cleaned + # up through this path rather than being stranded forever. + try: + await self.temporal_adapter.delete_schedule(temporal_id) + except TemporalScheduleNotFoundError: + logger.warning( + "run_schedule_temporal_already_absent_on_delete", + extra={"temporal_id": temporal_id, "schedule_id": row.id}, + ) await self.schedule_repository.delete(id=row.id) await self._deregister_schedule_from_auth( authz_selector=build_run_schedule_authz_selector(agent_id, row.name) ) return row.id + async def update_schedule( + self, agent_id: str, name: str, request: UpdateAgentRunScheduleRequest + ) -> AgentRunScheduleResponse: + """Apply a partial update to a schedule's definition and Temporal spec. + + Only fields present in the request are changed. Setting one of + cron_expression / interval_seconds clears the other; the merged result + must still have exactly one cadence. + """ + row = await self.schedule_repository.get_by_agent_id_and_name_or_raise( + agent_id, name + ) + provided = request.model_dump(exclude_unset=True) + if "description" in provided: + row.description = request.description + if "cron_expression" in provided: + row.cron_expression = request.cron_expression + if request.cron_expression is not None: + row.interval_seconds = None + if "interval_seconds" in provided: + row.interval_seconds = request.interval_seconds + if request.interval_seconds is not None: + row.cron_expression = None + if "timezone" in provided and request.timezone is not None: + row.timezone = request.timezone + if "start_at" in provided: + row.start_at = request.start_at + if "end_at" in provided: + row.end_at = request.end_at + if "paused" in provided and request.paused is not None: + row.paused = request.paused + if "task_params" in provided: + row.task_params = request.task_params + if "task_metadata" in provided: + row.task_metadata = request.task_metadata + if "initial_input" in provided and request.initial_input is not None: + row.initial_input = request.initial_input.to_dict(mode="json") + + if not row.cron_expression and not row.interval_seconds: + raise ClientError( + "Schedule must have exactly one of cron_expression or interval_seconds" + ) + if row.cron_expression and row.interval_seconds: + raise ClientError( + "Provide only one of cron_expression or interval_seconds, not both" + ) + + updated = await self.schedule_repository.update(row) + temporal_id = build_run_schedule_temporal_id(updated.id) + # Push the merged cadence/window/paused state to the Temporal clock. A + # missing schedule is logged rather than raised so the persisted row stays + # the source of truth (mirrors the describe/delete tolerance). + try: + await self.temporal_adapter.update_schedule( + schedule_id=temporal_id, + cron_expressions=( + [updated.cron_expression] if updated.cron_expression else None + ), + interval_seconds=updated.interval_seconds, + start_at=updated.start_at, + end_at=updated.end_at, + time_zone_name=updated.timezone if updated.cron_expression else None, + paused=updated.paused, + ) + except TemporalScheduleNotFoundError: + logger.warning( + "run_schedule_temporal_missing_on_update", + extra={"temporal_id": temporal_id, "schedule_id": updated.id}, + ) + agent = await self.agent_repository.get(id=agent_id) + return await self._to_response(updated, agent=agent, temporal_id=temporal_id) + + async def trigger_schedule( + self, agent_id: str, name: str + ) -> AgentRunScheduleResponse: + """Trigger an immediate, out-of-band fire of the schedule.""" + row = await self.schedule_repository.get_by_agent_id_and_name_or_raise( + agent_id, name + ) + temporal_id = build_run_schedule_temporal_id(row.id) + await self.temporal_adapter.trigger_schedule(temporal_id) + agent = await self.agent_repository.get(id=agent_id) + return await self._to_response(row, agent=agent, temporal_id=temporal_id) + # -- internals --------------------------------------------------------- async def _set_paused( @@ -220,10 +321,23 @@ async def _set_paused( agent_id, name ) temporal_id = build_run_schedule_temporal_id(row.id) - if paused: - await self.temporal_adapter.pause_schedule(temporal_id, note=note) - else: - await self.temporal_adapter.unpause_schedule(temporal_id, note=note) + # A missing Temporal schedule is logged rather than raised: the persisted + # ``paused`` flag is authoritative and the activity honors it defensively, + # so a missing clock can't strand the row in an un-toggleable state. + try: + if paused: + await self.temporal_adapter.pause_schedule(temporal_id, note=note) + else: + await self.temporal_adapter.unpause_schedule(temporal_id, note=note) + except TemporalScheduleNotFoundError: + logger.warning( + "run_schedule_temporal_missing_on_pause_toggle", + extra={ + "temporal_id": temporal_id, + "schedule_id": row.id, + "paused": paused, + }, + ) row.paused = paused updated = await self.schedule_repository.update(row) agent = await self.agent_repository.get(id=agent_id) @@ -241,6 +355,7 @@ async def _to_response( entity: AgentRunScheduleEntity, agent: AgentEntity, temporal_id: str, + include_live: bool = True, ) -> AgentRunScheduleResponse: effective_method = ( entity.initial_input_method @@ -252,21 +367,27 @@ async def _to_response( last_action_time: datetime | None = None num_actions_taken = 0 - # Live Temporal fields are best-effort: a describe failure (e.g. right - # after creation, or a transient Temporal error) must not break the - # response, which is fully serviceable from the persisted row. - try: - description = await self.temporal_adapter.describe_schedule(temporal_id) - live = self._extract_live_fields(description) - state = live["state"] - next_action_times = live["next_action_times"] - last_action_time = live["last_action_time"] - num_actions_taken = live["num_actions_taken"] - except Exception as exc: - logger.warning( - "run_schedule_describe_failed", - extra={"temporal_id": temporal_id, "error_type": type(exc).__name__}, - ) + # Live Temporal fields are best-effort and opt-in. ``include_live=False`` + # (list path) skips the describe RPC entirely and serves state from the + # persisted ``paused`` flag. When enabled (single GET), a describe failure + # (e.g. right after creation, or a transient Temporal error) must not break + # the response, which is fully serviceable from the persisted row. + if include_live: + try: + description = await self.temporal_adapter.describe_schedule(temporal_id) + live = self._extract_live_fields(description) + state = live["state"] + next_action_times = live["next_action_times"] + last_action_time = live["last_action_time"] + num_actions_taken = live["num_actions_taken"] + except Exception as exc: + logger.warning( + "run_schedule_describe_failed", + extra={ + "temporal_id": temporal_id, + "error_type": type(exc).__name__, + }, + ) return AgentRunScheduleResponse( id=entity.id, diff --git a/agentex/src/domain/use_cases/agent_run_schedules_use_case.py b/agentex/src/domain/use_cases/agent_run_schedules_use_case.py index ee27e862..5dd55661 100644 --- a/agentex/src/domain/use_cases/agent_run_schedules_use_case.py +++ b/agentex/src/domain/use_cases/agent_run_schedules_use_case.py @@ -6,6 +6,7 @@ AgentRunScheduleListResponse, AgentRunScheduleResponse, CreateAgentRunScheduleRequest, + UpdateAgentRunScheduleRequest, ) from src.domain.entities.agents import AgentEntity from src.domain.exceptions import ClientError @@ -69,6 +70,20 @@ async def resume_schedule( agent_id, name, note=note ) + async def update_schedule( + self, agent_id: str, name: str, request: UpdateAgentRunScheduleRequest + ) -> AgentRunScheduleResponse: + if request.cron_expression and request.interval_seconds: + raise ClientError( + "Provide only one of cron_expression or interval_seconds, not both" + ) + return await self.run_schedule_service.update_schedule(agent_id, name, request) + + async def trigger_schedule( + self, agent_id: str, name: str + ) -> AgentRunScheduleResponse: + return await self.run_schedule_service.trigger_schedule(agent_id, name) + async def delete_schedule(self, agent_id: str, name: str) -> str: return await self.run_schedule_service.delete_schedule(agent_id, name) diff --git a/agentex/src/temporal/activities/scheduled_agent_run_activities.py b/agentex/src/temporal/activities/scheduled_agent_run_activities.py index e59bc9f6..f0b08876 100644 --- a/agentex/src/temporal/activities/scheduled_agent_run_activities.py +++ b/agentex/src/temporal/activities/scheduled_agent_run_activities.py @@ -20,6 +20,7 @@ Pydantic models), so args and the return value are plain str / dict. """ +import re from datetime import UTC, datetime from typing import Any @@ -60,6 +61,30 @@ _INPUT_DELIVERED_MARKER = "scheduled_input_delivered" +# Temporal suffixes a scheduled workflow id with the nominal fire time +# (e.g. ``...-run-2026-06-23T15:19:00Z``). Matching the trailing ISO-8601 lets +# the display label use the *scheduled* time, which is stable across activity +# retries, rather than wall-clock now() (which drifts on a delayed retry). +_NOMINAL_FIRE_TIME_RE = re.compile( + r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z?)$" +) + + +def _format_fire_time(fire_id: str) -> str: + """Format the schedule's nominal fire time for the task display name. + + Falls back to the current time when ``fire_id`` carries no recognizable + timestamp suffix (e.g. a manually triggered fire). + """ + match = _NOMINAL_FIRE_TIME_RE.search(fire_id) + if match: + try: + parsed = datetime.fromisoformat(match.group(1).replace("Z", "+00:00")) + return parsed.strftime("%Y-%m-%d %H:%M UTC") + except ValueError: + pass + return datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC") + def _build_initial_content(initial_input: dict[str, Any]) -> TaskMessageContentEntity: """Build the message content delivered as the scheduled task's first input. @@ -204,7 +229,7 @@ async def launch_scheduled_agent_run( # task_metadata.display_name, never the deterministic `name` above). # Templated per fire so runs are distinguishable; placed first so a # caller-supplied display_name in schedule.task_metadata overrides it. - fire_time = datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC") + fire_time = _format_fire_time(fire_id) task_metadata = { "display_name": f"Scheduled Message: {schedule.name} · {fire_time}", **(schedule.task_metadata or {}), diff --git a/agentex/tests/unit/services/test_agent_run_schedule_service.py b/agentex/tests/unit/services/test_agent_run_schedule_service.py index e448a2ec..222f84eb 100644 --- a/agentex/tests/unit/services/test_agent_run_schedule_service.py +++ b/agentex/tests/unit/services/test_agent_run_schedule_service.py @@ -2,10 +2,12 @@ from uuid import uuid4 import pytest +from src.adapters.temporal.exceptions import TemporalScheduleNotFoundError from src.api.schemas.agent_run_schedules import ( CreateAgentRunScheduleRequest, RunScheduleState, ScheduleInitialInput, + UpdateAgentRunScheduleRequest, ) from src.domain.entities.agent_run_schedules import AgentRunScheduleEntity from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus @@ -188,3 +190,119 @@ async def test_list_none_authorized_means_bypass(self, service, agent): result = await service.list_schedules(agent.id, authorized_schedule_ids=None) assert result.total == 1 + + async def test_list_does_not_fan_out_to_temporal(self, service, agent): + # The list path must not issue a describe RPC per row (would scale list + # latency with the number of schedules). State comes from the row instead. + rows = [ + _persisted(agent.id, _request(name="sched-a")), + _persisted(agent.id, _request(name="sched-b")), + ] + service.schedule_repository.list_by_agent_id.return_value = rows + service.agent_repository.get.return_value = agent + + await service.list_schedules(agent.id, authorized_schedule_ids=None) + + service.temporal_adapter.describe_schedule.assert_not_called() + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunScheduleServiceDelete: + async def test_delete_tolerates_missing_temporal_schedule(self, service, agent): + # A prior partial delete (Temporal gone, row survived) must still be + # cleanable: a missing Temporal schedule is treated as success. + row = _persisted(agent.id, _request()) + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + service.temporal_adapter.delete_schedule.side_effect = ( + TemporalScheduleNotFoundError(message="gone", detail="gone") + ) + + result = await service.delete_schedule(agent.id, row.name) + + assert result == row.id + service.schedule_repository.delete.assert_called_once_with(id=row.id) + service.authorization_service.deregister_resource.assert_called_once() + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunScheduleServicePauseResume: + async def test_pause_tolerates_missing_temporal_schedule(self, service, agent): + row = _persisted(agent.id, _request()) + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + service.schedule_repository.update.return_value = row + service.agent_repository.get.return_value = agent + service.temporal_adapter.pause_schedule.side_effect = ( + TemporalScheduleNotFoundError(message="gone", detail="gone") + ) + + response = await service.pause_schedule(agent.id, row.name) + + # The persisted paused flag is still flipped even though the clock is gone. + assert row.paused is True + assert response.paused is True + service.schedule_repository.update.assert_called_once() + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunScheduleServiceUpdate: + async def test_update_swaps_cron_for_interval(self, service, agent): + row = _persisted(agent.id, _request()) # cron-based + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + service.schedule_repository.update.return_value = row + service.agent_repository.get.return_value = agent + + await service.update_schedule( + agent.id, row.name, UpdateAgentRunScheduleRequest(interval_seconds=120) + ) + + # Setting interval clears cron, and the new cadence is pushed to Temporal. + assert row.cron_expression is None + assert row.interval_seconds == 120 + update_kwargs = service.temporal_adapter.update_schedule.call_args.kwargs + assert update_kwargs["interval_seconds"] == 120 + assert update_kwargs["cron_expressions"] is None + + async def test_update_rejects_clearing_all_cadences(self, service, agent): + row = _persisted(agent.id, _request()) # cron-based, no interval + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + + # Explicitly nulling cron without supplying an interval leaves no cadence. + with pytest.raises(ClientError): + await service.update_schedule( + agent.id, row.name, UpdateAgentRunScheduleRequest(cron_expression=None) + ) + + service.temporal_adapter.update_schedule.assert_not_called() + + async def test_update_tolerates_missing_temporal_schedule(self, service, agent): + row = _persisted(agent.id, _request()) + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + service.schedule_repository.update.return_value = row + service.agent_repository.get.return_value = agent + service.temporal_adapter.update_schedule.side_effect = ( + TemporalScheduleNotFoundError(message="gone", detail="gone") + ) + + response = await service.update_schedule( + agent.id, row.name, UpdateAgentRunScheduleRequest(description="new") + ) + + assert response.description == "new" + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunScheduleServiceTrigger: + async def test_trigger_calls_temporal(self, service, agent): + row = _persisted(agent.id, _request()) + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + service.agent_repository.get.return_value = agent + + await service.trigger_schedule(agent.id, row.name) + + service.temporal_adapter.trigger_schedule.assert_called_once_with( + build_run_schedule_temporal_id(row.id) + ) diff --git a/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py b/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py index 0cf0e2f7..bf4b088d 100644 --- a/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py +++ b/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py @@ -5,6 +5,7 @@ from src.api.schemas.agent_run_schedules import ( CreateAgentRunScheduleRequest, ScheduleInitialInput, + UpdateAgentRunScheduleRequest, ) from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus from src.domain.exceptions import ClientError @@ -96,3 +97,22 @@ async def test_pause_resume_delete_delegate(self, use_case, mock_service, agent) await use_case.delete_schedule(agent.id, "daily-summary") mock_service.delete_schedule.assert_called_once_with(agent.id, "daily-summary") + + async def test_update_delegates(self, use_case, mock_service, agent): + request = UpdateAgentRunScheduleRequest(interval_seconds=120) + await use_case.update_schedule(agent.id, "daily-summary", request) + mock_service.update_schedule.assert_called_once_with( + agent.id, "daily-summary", request + ) + + async def test_update_rejects_both_cadences(self, use_case, agent): + request = UpdateAgentRunScheduleRequest( + cron_expression="0 0 * * *", interval_seconds=30 + ) + with pytest.raises(ClientError) as exc: + await use_case.update_schedule(agent.id, "daily-summary", request) + assert "only one" in str(exc.value) + + async def test_trigger_delegates(self, use_case, mock_service, agent): + await use_case.trigger_schedule(agent.id, "daily-summary") + mock_service.trigger_schedule.assert_called_once_with(agent.id, "daily-summary") From 6f779eb92aced381d4b21fcf2849da68820e0a06 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Tue, 23 Jun 2026 15:20:28 -0400 Subject: [PATCH 05/24] feat(schedules): feature-flag the run schedules API (ENABLE_AGENT_RUN_SCHEDULES) Gate the run schedules router behind a boolean env flag, matching the existing ENABLE_HEALTH_CHECK_WORKFLOW pattern. Disabled by default in every environment, so the API surface is absent unless explicitly enabled. Local dev reads the flag from the shell (defaults false), so you opt in only when testing: `ENABLE_AGENT_RUN_SCHEDULES=true ./dev.sh`. Deployed envs set the env var when they want the feature on. The OpenAPI generator opts the feature on so the endpoints stay documented in the spec/SDK regardless of the runtime default; live serving remains gated. --- agentex/docker-compose.yml | 2 ++ agentex/scripts/generate_openapi_spec.py | 3 +++ agentex/src/api/app.py | 6 +++++- agentex/src/config/environment_variables.py | 6 ++++++ .../config/test_agent_run_schedules_env.py | 21 +++++++++++++++++++ 5 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 agentex/tests/unit/config/test_agent_run_schedules_env.py diff --git a/agentex/docker-compose.yml b/agentex/docker-compose.yml index a917de42..b203ae22 100644 --- a/agentex/docker-compose.yml +++ b/agentex/docker-compose.yml @@ -166,6 +166,8 @@ services: - MONGODB_DATABASE_NAME=agentex - WATCHFILES_FORCE_POLLING=true - ENABLE_HEALTH_CHECK_WORKFLOW=true + # Disabled by default; enable when testing, e.g. `ENABLE_AGENT_RUN_SCHEDULES=true ./dev.sh`. + - ENABLE_AGENT_RUN_SCHEDULES=${ENABLE_AGENT_RUN_SCHEDULES:-false} - AGENTEX_SERVER_TASK_QUEUE=agentex-server - ALLOWED_ORIGINS=http://localhost:3000 - OTEL_EXPORTER_OTLP_ENDPOINT=http://agentex-otel-collector:4317 diff --git a/agentex/scripts/generate_openapi_spec.py b/agentex/scripts/generate_openapi_spec.py index d76a16b9..f043c650 100644 --- a/agentex/scripts/generate_openapi_spec.py +++ b/agentex/scripts/generate_openapi_spec.py @@ -8,6 +8,9 @@ os.environ.setdefault("ENVIRONMENT", "development") os.environ.setdefault("ALLOWED_ORIGINS", "*") +# Document feature-flagged endpoints in the spec/SDK regardless of where the +# feature is enabled at runtime. Runtime serving stays gated by the live env var. +os.environ.setdefault("ENABLE_AGENT_RUN_SCHEDULES", "true") def main() -> int: diff --git a/agentex/src/api/app.py b/agentex/src/api/app.py index 1692dd3b..07853a2b 100644 --- a/agentex/src/api/app.py +++ b/agentex/src/api/app.py @@ -204,7 +204,11 @@ async def handle_unexpected(request, exc): fastapi_app.include_router(agent_api_keys.router) fastapi_app.include_router(deployment_history.router) fastapi_app.include_router(deployments.router) -fastapi_app.include_router(agent_run_schedules.router) +# Agent run schedules are feature-flagged (off by default, enabled in development). +# When disabled the routes are not registered, so the API surface is absent +# entirely in environments that haven't opted in. +if resolve_environment_variable_dependency(EnvVarKeys.ENABLE_AGENT_RUN_SCHEDULES): + fastapi_app.include_router(agent_run_schedules.router) fastapi_app.include_router(checkpoints.router) fastapi_app.include_router(task_retention.router) diff --git a/agentex/src/config/environment_variables.py b/agentex/src/config/environment_variables.py index 0872c0cf..477a78e5 100644 --- a/agentex/src/config/environment_variables.py +++ b/agentex/src/config/environment_variables.py @@ -57,6 +57,7 @@ class EnvVarKeys(str, Enum): SSE_KEEPALIVE_PING_INTERVAL = "SSE_KEEPALIVE_PING_INTERVAL" AGENTEX_SERVER_TASK_QUEUE = "AGENTEX_SERVER_TASK_QUEUE" ENABLE_HEALTH_CHECK_WORKFLOW = "ENABLE_HEALTH_CHECK_WORKFLOW" + ENABLE_AGENT_RUN_SCHEDULES = "ENABLE_AGENT_RUN_SCHEDULES" WEBHOOK_REQUEST_TIMEOUT = "WEBHOOK_REQUEST_TIMEOUT" RETENTION_CLEANUP_ENABLED = "RETENTION_CLEANUP_ENABLED" RETENTION_CLEANUP_AGENT_ALLOWLIST = "RETENTION_CLEANUP_AGENT_ALLOWLIST" @@ -120,6 +121,8 @@ class EnvironmentVariables(BaseModel): SSE_KEEPALIVE_PING_INTERVAL: int = 15 # SSE keepalive ping interval in seconds AGENTEX_SERVER_TASK_QUEUE: str | None = None ENABLE_HEALTH_CHECK_WORKFLOW: bool = False + # Gates the agent run schedules API. Off by default; enabled in development. + ENABLE_AGENT_RUN_SCHEDULES: bool = False WEBHOOK_REQUEST_TIMEOUT: float = 15.0 # Webhook request timeout in seconds RETENTION_CLEANUP_ENABLED: bool = False RETENTION_CLEANUP_AGENT_ALLOWLIST: list[str] = [] @@ -214,6 +217,9 @@ def refresh(cls, force_refresh: bool = False) -> EnvironmentVariables | None: os.environ.get(EnvVarKeys.ENABLE_HEALTH_CHECK_WORKFLOW, "false") == "true" ), + ENABLE_AGENT_RUN_SCHEDULES=( + os.environ.get(EnvVarKeys.ENABLE_AGENT_RUN_SCHEDULES, "false") == "true" + ), WEBHOOK_REQUEST_TIMEOUT=float( os.environ.get(EnvVarKeys.WEBHOOK_REQUEST_TIMEOUT, "15.0") ), diff --git a/agentex/tests/unit/config/test_agent_run_schedules_env.py b/agentex/tests/unit/config/test_agent_run_schedules_env.py new file mode 100644 index 00000000..6842f9d5 --- /dev/null +++ b/agentex/tests/unit/config/test_agent_run_schedules_env.py @@ -0,0 +1,21 @@ +import pytest +from src.config.environment_variables import EnvironmentVariables + + +@pytest.mark.unit +def test_agent_run_schedules_flag_parses_enabled(monkeypatch): + monkeypatch.setenv("ENABLE_AGENT_RUN_SCHEDULES", "true") + + env = EnvironmentVariables.refresh(force_refresh=True) + + assert env.ENABLE_AGENT_RUN_SCHEDULES is True + + +@pytest.mark.unit +def test_agent_run_schedules_flag_defaults_disabled(monkeypatch): + monkeypatch.delenv("ENABLE_AGENT_RUN_SCHEDULES", raising=False) + + env = EnvironmentVariables.refresh(force_refresh=True) + + # Off by default — the API surface is absent unless an environment opts in. + assert env.ENABLE_AGENT_RUN_SCHEDULES is False From 6bb976abdbb35348d4497dda0548b95f2d37dec3 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Wed, 24 Jun 2026 12:33:29 -0400 Subject: [PATCH 06/24] feat(schedules): enforce text input, drop unused initial_input_method, harden update ordering Address review follow-ups on agent run schedules: - ScheduleInitialInput.type is now Literal["text"] (was a free str with a "v1 only" comment), so an unsupported content type is rejected at validation instead of silently coerced to text. - Remove the persisted initial_input_method column/entity field. Delivery method is always inferred from the agent's ACP type, so the stored value was always null and could only go stale relative to the agent's current type. The response still exposes the (now always computed) method. - update_schedule pushes the merged spec to Temporal BEFORE committing the row, closing the common divergence: a rejected cron/timezone or transient Temporal error now aborts with nothing persisted. A residual window remains (Temporal accepts, then the row write fails) since there is no cross-store transaction; the row stays the declared source of truth so a later successful update re-converges. create holds the analogous invariant via row rollback; update has no in-place rollback, so it orders the writes instead. Regenerate openapi.yaml and add an update-ordering regression test. Co-Authored-By: Claude Opus 4.8 (1M context) --- ...00_add_agent_run_schedules_3b1c9d2e4f6a.py | 1 - agentex/openapi.yaml | 10 ++--- agentex/src/adapters/orm.py | 1 - .../src/api/schemas/agent_run_schedules.py | 10 ++--- .../domain/entities/agent_run_schedules.py | 10 +---- .../services/agent_run_schedule_service.py | 40 ++++++++++--------- .../scheduled_agent_run_activities.py | 8 +--- .../test_agent_run_schedule_service.py | 15 +++++++ 8 files changed, 50 insertions(+), 45 deletions(-) diff --git a/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py b/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py index eb4e7a57..9d773db8 100644 --- a/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py +++ b/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py @@ -42,7 +42,6 @@ def upgrade() -> None: sa.Column('task_params', sa.JSON(), nullable=True), sa.Column('task_metadata', sa.JSON(), nullable=True), sa.Column('initial_input', sa.JSON(), nullable=False), - sa.Column('initial_input_method', sa.String(), nullable=True), sa.Column( 'created_at', sa.DateTime(timezone=True), diff --git a/agentex/openapi.yaml b/agentex/openapi.yaml index a68e969e..33c73450 100644 --- a/agentex/openapi.yaml +++ b/agentex/openapi.yaml @@ -4108,11 +4108,9 @@ components: $ref: '#/components/schemas/ScheduleInitialInput' description: The initial input. initial_input_method: - anyOf: - - type: string - - type: 'null' + type: string title: Initial Input Method - description: Effective delivery method (inferred from the agent's ACP type). + description: Delivery method, inferred from the agent's ACP type. creator_principal: anyOf: - $ref: '#/components/schemas/ScheduleCreatorPrincipal' @@ -4161,6 +4159,7 @@ components: - agent_id - name - initial_input + - initial_input_method title: AgentRunScheduleResponse description: Response model describing a scheduled agent run. AgentStatus: @@ -5797,8 +5796,9 @@ components: properties: type: type: string + const: text title: Type - description: Input content type. Only 'text' in v1. + description: Input content type. default: text author: $ref: '#/components/schemas/MessageAuthor' diff --git a/agentex/src/adapters/orm.py b/agentex/src/adapters/orm.py index 37bf2268..49016124 100644 --- a/agentex/src/adapters/orm.py +++ b/agentex/src/adapters/orm.py @@ -215,7 +215,6 @@ class AgentRunScheduleORM(BaseORM): task_params = Column(JSON, nullable=True) task_metadata = Column(JSON, nullable=True) initial_input = Column(JSON, nullable=False) - initial_input_method = Column(String, nullable=True) created_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column( DateTime(timezone=True), server_default=func.now(), onupdate=func.now() diff --git a/agentex/src/api/schemas/agent_run_schedules.py b/agentex/src/api/schemas/agent_run_schedules.py index 35f1d12d..d49fb101 100644 --- a/agentex/src/api/schemas/agent_run_schedules.py +++ b/agentex/src/api/schemas/agent_run_schedules.py @@ -1,6 +1,6 @@ from datetime import datetime from enum import Enum -from typing import Any +from typing import Any, Literal from pydantic import Field @@ -18,7 +18,7 @@ class RunScheduleState(str, Enum): class ScheduleInitialInput(BaseModel): """The first input delivered to each freshly created scheduled task.""" - type: str = Field("text", description="Input content type. Only 'text' in v1.") + type: Literal["text"] = Field("text", description="Input content type.") author: MessageAuthor = Field( MessageAuthor.USER, description="The author attributed to the initial input." ) @@ -120,9 +120,9 @@ class AgentRunScheduleResponse(BaseModel): None, description="Task metadata at fire time." ) initial_input: ScheduleInitialInput = Field(..., description="The initial input.") - initial_input_method: str | None = Field( - None, - description="Effective delivery method (inferred from the agent's ACP type).", + initial_input_method: str = Field( + ..., + description="Delivery method, inferred from the agent's ACP type.", ) creator_principal: ScheduleCreatorPrincipal | None = Field( None, description="Credential-free creator identity." diff --git a/agentex/src/domain/entities/agent_run_schedules.py b/agentex/src/domain/entities/agent_run_schedules.py index 8d17897f..b51a46e7 100644 --- a/agentex/src/domain/entities/agent_run_schedules.py +++ b/agentex/src/domain/entities/agent_run_schedules.py @@ -11,8 +11,7 @@ class InitialInputMethod(str, Enum): """How the configured first input is delivered to the freshly created task. - Inferred from the target agent's ACP type at fire time; persisted only when - the caller wants the schedule definition to be explicit. + Always inferred from the target agent's ACP type at fire time. """ EVENT_SEND = "event/send" # async / agentic agents @@ -82,13 +81,6 @@ class AgentRunScheduleEntity(BaseModel): initial_input: dict[str, Any] = Field( ..., description="The first input delivered to each created task." ) - initial_input_method: str | None = Field( - None, - description=( - "Delivery method for the initial input (an InitialInputMethod value). " - "Inferred from the agent's ACP type at fire time when omitted." - ), - ) created_at: datetime | None = Field( None, description="When the schedule was created." ) diff --git a/agentex/src/domain/services/agent_run_schedule_service.py b/agentex/src/domain/services/agent_run_schedule_service.py index 82d56778..c1786a18 100644 --- a/agentex/src/domain/services/agent_run_schedule_service.py +++ b/agentex/src/domain/services/agent_run_schedule_service.py @@ -106,8 +106,6 @@ async def create_schedule( task_params=request.task_params, task_metadata=request.task_metadata, initial_input=request.initial_input.to_dict(mode="json"), - # Delivery method is inferred from the agent's ACP type at fire time. - initial_input_method=None, ) try: @@ -275,28 +273,37 @@ async def update_schedule( "Provide only one of cron_expression or interval_seconds, not both" ) - updated = await self.schedule_repository.update(row) - temporal_id = build_run_schedule_temporal_id(updated.id) - # Push the merged cadence/window/paused state to the Temporal clock. A - # missing schedule is logged rather than raised so the persisted row stays - # the source of truth (mirrors the describe/delete tolerance). + temporal_id = build_run_schedule_temporal_id(row.id) + # Push the merged cadence/window/paused state to the Temporal clock BEFORE + # committing the row. This closes the common divergence: a rejected spec + # (invalid cron / timezone) or a transient Temporal error aborts the + # update with nothing persisted. A residual window remains — if Temporal + # accepts the update and the row write below then fails, the clock leads + # the row — but there is no cross-store transaction, and the row stays the + # declared source of truth, so any later successful update re-converges + # them. (Create keeps the analogous invariant by rolling the row back on + # failure; update has no in-place rollback, so it orders the writes + # instead.) A missing schedule is logged rather than raised so the + # persisted row stays authoritative (mirrors the describe/delete + # tolerance) and the merged definition is still committed. try: await self.temporal_adapter.update_schedule( schedule_id=temporal_id, cron_expressions=( - [updated.cron_expression] if updated.cron_expression else None + [row.cron_expression] if row.cron_expression else None ), - interval_seconds=updated.interval_seconds, - start_at=updated.start_at, - end_at=updated.end_at, - time_zone_name=updated.timezone if updated.cron_expression else None, - paused=updated.paused, + interval_seconds=row.interval_seconds, + start_at=row.start_at, + end_at=row.end_at, + time_zone_name=row.timezone if row.cron_expression else None, + paused=row.paused, ) except TemporalScheduleNotFoundError: logger.warning( "run_schedule_temporal_missing_on_update", - extra={"temporal_id": temporal_id, "schedule_id": updated.id}, + extra={"temporal_id": temporal_id, "schedule_id": row.id}, ) + updated = await self.schedule_repository.update(row) agent = await self.agent_repository.get(id=agent_id) return await self._to_response(updated, agent=agent, temporal_id=temporal_id) @@ -357,10 +364,7 @@ async def _to_response( temporal_id: str, include_live: bool = True, ) -> AgentRunScheduleResponse: - effective_method = ( - entity.initial_input_method - or infer_initial_input_method(agent.acp_type).value - ) + effective_method = infer_initial_input_method(agent.acp_type).value state = RunScheduleState.PAUSED if entity.paused else RunScheduleState.ACTIVE next_action_times: list[datetime] = [] diff --git a/agentex/src/temporal/activities/scheduled_agent_run_activities.py b/agentex/src/temporal/activities/scheduled_agent_run_activities.py index f0b08876..07628083 100644 --- a/agentex/src/temporal/activities/scheduled_agent_run_activities.py +++ b/agentex/src/temporal/activities/scheduled_agent_run_activities.py @@ -89,8 +89,7 @@ def _format_fire_time(fire_id: str) -> str: def _build_initial_content(initial_input: dict[str, Any]) -> TaskMessageContentEntity: """Build the message content delivered as the scheduled task's first input. - v1 supports text input only; the persisted ``initial_input.type`` is reserved - for future content types. + Only text input is supported (enforced by ``ScheduleInitialInput.type``). """ author = initial_input.get("author", MessageAuthor.USER.value) if not isinstance(author, MessageAuthor): @@ -198,10 +197,7 @@ async def launch_scheduled_agent_run( "schedule_id": schedule_id, } - method = ( - schedule.initial_input_method - or infer_initial_input_method(agent.acp_type).value - ) + method = infer_initial_input_method(agent.acp_type).value # Re-check the stored creator principal's permission at fire time, mirroring # the JSON-RPC route's authorization order: agent.execute (the RPC endpoint diff --git a/agentex/tests/unit/services/test_agent_run_schedule_service.py b/agentex/tests/unit/services/test_agent_run_schedule_service.py index 222f84eb..e1b577b2 100644 --- a/agentex/tests/unit/services/test_agent_run_schedule_service.py +++ b/agentex/tests/unit/services/test_agent_run_schedule_service.py @@ -292,6 +292,21 @@ async def test_update_tolerates_missing_temporal_schedule(self, service, agent): assert response.description == "new" + async def test_update_does_not_commit_row_on_temporal_failure(self, service, agent): + # A non-NotFound Temporal failure (rejected cron/timezone or a transient + # outage) must abort before the row is persisted, so the DB can never + # diverge from the clock. Unlike NotFound, this error propagates. + row = _persisted(agent.id, _request()) + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + service.temporal_adapter.update_schedule.side_effect = RuntimeError("boom") + + with pytest.raises(RuntimeError): + await service.update_schedule( + agent.id, row.name, UpdateAgentRunScheduleRequest(description="new") + ) + + service.schedule_repository.update.assert_not_called() + @pytest.mark.unit @pytest.mark.asyncio From 0a1fd2b483502e40003eeee2df7542be21020354 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Thu, 25 Jun 2026 14:18:26 -0400 Subject: [PATCH 07/24] docs(schedules): clarify temporal fire workflow ids Co-authored-by: Cursor --- agentex/src/domain/services/agent_run_schedule_service.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/agentex/src/domain/services/agent_run_schedule_service.py b/agentex/src/domain/services/agent_run_schedule_service.py index c1786a18..8e2f5cc4 100644 --- a/agentex/src/domain/services/agent_run_schedule_service.py +++ b/agentex/src/domain/services/agent_run_schedule_service.py @@ -127,6 +127,9 @@ async def create_schedule( registered = await self._register_schedule_in_auth( authz_selector=authz_selector, agent_id=agent.id ) + # Temporal schedules append the nominal fire timestamp to this base + # workflow id at execution time, so workflow.info().workflow_id is a + # per-fire token even though the configured action id is stable. await self.temporal_adapter.create_schedule( schedule_id=temporal_id, workflow=SCHEDULED_AGENT_RUN_WORKFLOW_NAME, From 485990671642583570fe23c000cc0ef609c7ac4a Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Thu, 25 Jun 2026 14:19:55 -0400 Subject: [PATCH 08/24] feat(schedules): stamp manual trigger type on runs Co-authored-by: Cursor --- .../services/agent_run_schedule_service.py | 8 +++++++- .../scheduled_agent_run_activities.py | 7 ++++++- .../workflows/scheduled_agent_run_workflow.py | 6 ++++-- .../test_agent_run_schedule_service.py | 12 ++++++++---- .../test_scheduled_agent_run_activity.py | 19 +++++++++++++++++++ 5 files changed, 44 insertions(+), 8 deletions(-) diff --git a/agentex/src/domain/services/agent_run_schedule_service.py b/agentex/src/domain/services/agent_run_schedule_service.py index 8e2f5cc4..97556e5d 100644 --- a/agentex/src/domain/services/agent_run_schedule_service.py +++ b/agentex/src/domain/services/agent_run_schedule_service.py @@ -1,5 +1,6 @@ from datetime import datetime from typing import Annotated, Any, cast +from uuid import uuid4 from fastapi import Depends from temporalio.client import ScheduleDescription @@ -318,7 +319,12 @@ async def trigger_schedule( agent_id, name ) temporal_id = build_run_schedule_temporal_id(row.id) - await self.temporal_adapter.trigger_schedule(temporal_id) + await self.temporal_adapter.start_workflow( + workflow=SCHEDULED_AGENT_RUN_WORKFLOW_NAME, + workflow_id=f"{temporal_id}-manual-{uuid4()}", + args=[row.id, "manual"], + task_queue=self._task_queue(), + ) agent = await self.agent_repository.get(id=agent_id) return await self._to_response(row, agent=agent, temporal_id=temporal_id) diff --git a/agentex/src/temporal/activities/scheduled_agent_run_activities.py b/agentex/src/temporal/activities/scheduled_agent_run_activities.py index 07628083..8558c7bd 100644 --- a/agentex/src/temporal/activities/scheduled_agent_run_activities.py +++ b/agentex/src/temporal/activities/scheduled_agent_run_activities.py @@ -149,7 +149,7 @@ def __init__( @activity.defn(name=LAUNCH_SCHEDULED_AGENT_RUN_ACTIVITY) async def launch_scheduled_agent_run( - self, schedule_id: str, fire_id: str + self, schedule_id: str, fire_id: str, trigger_type: str = "scheduled" ) -> dict[str, Any]: """Create a task for the scheduled fire and deliver its initial input. @@ -159,6 +159,8 @@ async def launch_scheduled_agent_run( which Temporal makes unique per fire and stable across activity retries within the same execution). Used to build the deterministic, idempotent task name. + trigger_type: ``scheduled`` for cadence fires, ``manual`` for runs + started by the trigger endpoint. Returns: A JSON-native dict describing the outcome (``launched`` / ``skipped``). @@ -231,6 +233,7 @@ async def launch_scheduled_agent_run( **(schedule.task_metadata or {}), "schedule_id": schedule_id, "scheduled_fire_id": fire_id, + "trigger_type": trigger_type, } # task/create — get-or-create by deterministic name, so a retry returns @@ -297,6 +300,7 @@ async def launch_scheduled_agent_run( extra={ "schedule_id": schedule_id, "task_id": task.id, + "trigger_type": trigger_type, "method": method, }, ) @@ -304,5 +308,6 @@ async def launch_scheduled_agent_run( "status": "launched", "task_id": task.id, "schedule_id": schedule_id, + "trigger_type": trigger_type, "method": method, } diff --git a/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py b/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py index b2ccc824..be1618fb 100644 --- a/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py +++ b/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py @@ -25,11 +25,13 @@ @workflow.defn class ScheduledAgentRunWorkflow: @workflow.run - async def run(self, schedule_id: str) -> dict[str, Any]: + async def run( + self, schedule_id: str, trigger_type: str = "scheduled" + ) -> dict[str, Any]: fire_id = workflow.info().workflow_id return await workflow.execute_activity( LAUNCH_SCHEDULED_AGENT_RUN_ACTIVITY, - args=[schedule_id, fire_id], + args=[schedule_id, fire_id, trigger_type], start_to_close_timeout=timedelta(seconds=120), retry_policy=RetryPolicy( maximum_attempts=5, diff --git a/agentex/tests/unit/services/test_agent_run_schedule_service.py b/agentex/tests/unit/services/test_agent_run_schedule_service.py index e1b577b2..ea4d7894 100644 --- a/agentex/tests/unit/services/test_agent_run_schedule_service.py +++ b/agentex/tests/unit/services/test_agent_run_schedule_service.py @@ -311,13 +311,17 @@ async def test_update_does_not_commit_row_on_temporal_failure(self, service, age @pytest.mark.unit @pytest.mark.asyncio class TestAgentRunScheduleServiceTrigger: - async def test_trigger_calls_temporal(self, service, agent): + async def test_trigger_starts_manual_workflow(self, service, agent): row = _persisted(agent.id, _request()) service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row service.agent_repository.get.return_value = agent await service.trigger_schedule(agent.id, row.name) - service.temporal_adapter.trigger_schedule.assert_called_once_with( - build_run_schedule_temporal_id(row.id) - ) + temporal_id = build_run_schedule_temporal_id(row.id) + service.temporal_adapter.trigger_schedule.assert_not_called() + start_kwargs = service.temporal_adapter.start_workflow.call_args.kwargs + assert start_kwargs["workflow"] == "ScheduledAgentRunWorkflow" + assert start_kwargs["workflow_id"].startswith(f"{temporal_id}-manual-") + assert start_kwargs["args"] == [row.id, "manual"] + assert start_kwargs["task_queue"] == "agentex-server" diff --git a/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py b/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py index 666c27ac..510e7633 100644 --- a/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py +++ b/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py @@ -145,6 +145,9 @@ async def test_async_agent_delivers_via_event_send( # Deterministic task name embeds schedule id + fire id. create_params = use_case.handle_rpc_request.call_args_list[0].kwargs["params"] assert create_params.name == f"scheduled-run:{schedule.id}:fire-1" + assert create_params.task_metadata["schedule_id"] == schedule.id + assert create_params.task_metadata["scheduled_fire_id"] == "fire-1" + assert create_params.task_metadata["trigger_type"] == "scheduled" use_case.task_service.update_task.assert_awaited_once() # Fire-time authz mirrors the RPC route: agent.execute, then task.create, # then task.update on the created task — in that order. @@ -182,6 +185,22 @@ async def test_sync_agent_delivers_via_message_send( ] assert methods == [AgentRPCMethod.TASK_CREATE, AgentRPCMethod.MESSAGE_SEND] + async def test_manual_trigger_type_is_stamped(self, activity_instance, monkeypatch): + schedule = _schedule() + activity_instance.schedule_repository.get.return_value = schedule + task = TaskEntity(id="task-1") + use_case = _fake_use_case(_agent(ACPType.ASYNC), task) + _patch_use_case(monkeypatch, use_case) + + result = await activity_instance.launch_scheduled_agent_run( + schedule.id, "manual-fire-1", "manual" + ) + + assert result["trigger_type"] == "manual" + create_params = use_case.handle_rpc_request.call_args_list[0].kwargs["params"] + assert create_params.task_metadata["trigger_type"] == "manual" + assert create_params.task_metadata["scheduled_fire_id"] == "manual-fire-1" + async def test_skips_delivery_when_already_delivered( self, activity_instance, monkeypatch ): From 941f1ea30370475535280aa69a0773d857bc9209 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Thu, 25 Jun 2026 16:00:37 -0400 Subject: [PATCH 09/24] fix(schedules): include trigger time in manual run ids Co-authored-by: Cursor --- agentex/src/domain/services/agent_run_schedule_service.py | 7 +++++-- .../unit/services/test_agent_run_schedule_service.py | 8 +++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/agentex/src/domain/services/agent_run_schedule_service.py b/agentex/src/domain/services/agent_run_schedule_service.py index 97556e5d..b90b7088 100644 --- a/agentex/src/domain/services/agent_run_schedule_service.py +++ b/agentex/src/domain/services/agent_run_schedule_service.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import UTC, datetime from typing import Annotated, Any, cast from uuid import uuid4 @@ -319,9 +319,12 @@ async def trigger_schedule( agent_id, name ) temporal_id = build_run_schedule_temporal_id(row.id) + triggered_at = datetime.now(UTC).isoformat().replace("+00:00", "Z") + # Schedule starts get a Temporal-generated timestamp suffix; direct manual + # starts need their own uniqueness source, while keeping a parseable time. await self.temporal_adapter.start_workflow( workflow=SCHEDULED_AGENT_RUN_WORKFLOW_NAME, - workflow_id=f"{temporal_id}-manual-{uuid4()}", + workflow_id=f"{temporal_id}-manual-{uuid4()}-{triggered_at}", args=[row.id, "manual"], task_queue=self._task_queue(), ) diff --git a/agentex/tests/unit/services/test_agent_run_schedule_service.py b/agentex/tests/unit/services/test_agent_run_schedule_service.py index ea4d7894..6d5002bc 100644 --- a/agentex/tests/unit/services/test_agent_run_schedule_service.py +++ b/agentex/tests/unit/services/test_agent_run_schedule_service.py @@ -1,3 +1,4 @@ +import re from unittest.mock import AsyncMock, PropertyMock from uuid import uuid4 @@ -322,6 +323,11 @@ async def test_trigger_starts_manual_workflow(self, service, agent): service.temporal_adapter.trigger_schedule.assert_not_called() start_kwargs = service.temporal_adapter.start_workflow.call_args.kwargs assert start_kwargs["workflow"] == "ScheduledAgentRunWorkflow" - assert start_kwargs["workflow_id"].startswith(f"{temporal_id}-manual-") + assert re.fullmatch( + rf"{re.escape(temporal_id)}-manual-" + r"[0-9a-f-]{36}-" + r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z", + start_kwargs["workflow_id"], + ) assert start_kwargs["args"] == [row.id, "manual"] assert start_kwargs["task_queue"] == "agentex-server" From 37795b4b140d13c54a06f0ac44b8097e9fe5b99b Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Thu, 25 Jun 2026 16:11:24 -0400 Subject: [PATCH 10/24] feat(schedules): persist run fire time metadata Co-authored-by: Cursor --- .../scheduled_agent_run_activities.py | 27 ++++++++++++++---- .../test_scheduled_agent_run_activity.py | 28 +++++++++++++++---- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/agentex/src/temporal/activities/scheduled_agent_run_activities.py b/agentex/src/temporal/activities/scheduled_agent_run_activities.py index 8558c7bd..5df2c7fa 100644 --- a/agentex/src/temporal/activities/scheduled_agent_run_activities.py +++ b/agentex/src/temporal/activities/scheduled_agent_run_activities.py @@ -76,14 +76,25 @@ def _format_fire_time(fire_id: str) -> str: Falls back to the current time when ``fire_id`` carries no recognizable timestamp suffix (e.g. a manually triggered fire). """ + fire_time = _extract_fire_time(fire_id) + if fire_time is not None: + parsed = datetime.fromisoformat(fire_time.replace("Z", "+00:00")) + return parsed.strftime("%Y-%m-%d %H:%M UTC") + return datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC") + + +def _extract_fire_time(fire_id: str) -> str | None: + """Extract the occurrence time encoded in a schedule/manual fire id.""" match = _NOMINAL_FIRE_TIME_RE.search(fire_id) if match: try: parsed = datetime.fromisoformat(match.group(1).replace("Z", "+00:00")) - return parsed.strftime("%Y-%m-%d %H:%M UTC") except ValueError: - pass - return datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC") + return None + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=UTC) + return parsed.astimezone(UTC).isoformat().replace("+00:00", "Z") + return None def _build_initial_content(initial_input: dict[str, Any]) -> TaskMessageContentEntity: @@ -227,14 +238,20 @@ async def launch_scheduled_agent_run( # task_metadata.display_name, never the deterministic `name` above). # Templated per fire so runs are distinguishable; placed first so a # caller-supplied display_name in schedule.task_metadata overrides it. - fire_time = _format_fire_time(fire_id) + display_fire_time = _format_fire_time(fire_id) task_metadata = { - "display_name": f"Scheduled Message: {schedule.name} · {fire_time}", + "display_name": f"Scheduled Message: {schedule.name} · {display_fire_time}", **(schedule.task_metadata or {}), "schedule_id": schedule_id, "scheduled_fire_id": fire_id, "trigger_type": trigger_type, } + # `fire_time` is the run occurrence time: nominal scheduled time for + # schedule-fired runs, and actual trigger time for manual runs. Store it + # separately so product/UI code does not parse `scheduled_fire_id`. + fire_time = _extract_fire_time(fire_id) + if fire_time is not None: + task_metadata["fire_time"] = fire_time # task/create — get-or-create by deterministic name, so a retry returns # the same task. For async / agentic agents this also forwards the task diff --git a/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py b/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py index 510e7633..89585610 100644 --- a/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py +++ b/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py @@ -131,9 +131,10 @@ async def test_async_agent_delivers_via_event_send( task = TaskEntity(id="task-1", task_metadata={"schedule_id": schedule.id}) use_case = _fake_use_case(_agent(ACPType.ASYNC), task) _patch_use_case(monkeypatch, use_case) + fire_id = f"{schedule.id}-run-2026-06-25T20:00:00Z" result = await activity_instance.launch_scheduled_agent_run( - schedule.id, "fire-1" + schedule.id, fire_id ) assert result["status"] == "launched" @@ -144,10 +145,11 @@ async def test_async_agent_delivers_via_event_send( assert methods == [AgentRPCMethod.TASK_CREATE, AgentRPCMethod.EVENT_SEND] # Deterministic task name embeds schedule id + fire id. create_params = use_case.handle_rpc_request.call_args_list[0].kwargs["params"] - assert create_params.name == f"scheduled-run:{schedule.id}:fire-1" + assert create_params.name == f"scheduled-run:{schedule.id}:{fire_id}" assert create_params.task_metadata["schedule_id"] == schedule.id - assert create_params.task_metadata["scheduled_fire_id"] == "fire-1" + assert create_params.task_metadata["scheduled_fire_id"] == fire_id assert create_params.task_metadata["trigger_type"] == "scheduled" + assert create_params.task_metadata["fire_time"] == "2026-06-25T20:00:00Z" use_case.task_service.update_task.assert_awaited_once() # Fire-time authz mirrors the RPC route: agent.execute, then task.create, # then task.update on the created task — in that order. @@ -191,15 +193,31 @@ async def test_manual_trigger_type_is_stamped(self, activity_instance, monkeypat task = TaskEntity(id="task-1") use_case = _fake_use_case(_agent(ACPType.ASYNC), task) _patch_use_case(monkeypatch, use_case) + fire_id = f"{schedule.id}-manual-00000000-0000-0000-0000-000000000000-2026-06-25T20:00:12.123456Z" result = await activity_instance.launch_scheduled_agent_run( - schedule.id, "manual-fire-1", "manual" + schedule.id, fire_id, "manual" ) assert result["trigger_type"] == "manual" create_params = use_case.handle_rpc_request.call_args_list[0].kwargs["params"] assert create_params.task_metadata["trigger_type"] == "manual" - assert create_params.task_metadata["scheduled_fire_id"] == "manual-fire-1" + assert create_params.task_metadata["scheduled_fire_id"] == fire_id + assert create_params.task_metadata["fire_time"] == "2026-06-25T20:00:12.123456Z" + + async def test_omits_fire_time_when_fire_id_has_no_timestamp( + self, activity_instance, monkeypatch + ): + schedule = _schedule() + activity_instance.schedule_repository.get.return_value = schedule + task = TaskEntity(id="task-1") + use_case = _fake_use_case(_agent(ACPType.ASYNC), task) + _patch_use_case(monkeypatch, use_case) + + await activity_instance.launch_scheduled_agent_run(schedule.id, "fire-1") + + create_params = use_case.handle_rpc_request.call_args_list[0].kwargs["params"] + assert "fire_time" not in create_params.task_metadata async def test_skips_delivery_when_already_delivered( self, activity_instance, monkeypatch From ebc3181478f4018c924ef3b9823760334f251c08 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 09:45:03 -0400 Subject: [PATCH 11/24] fix(schedules): let manual triggers fire paused schedules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The launch activity guarded on schedule.paused alone, ignoring the trigger_type that is already plumbed end-to-end. A manual /trigger of a paused schedule was started but silently skipped inside the workflow, while the API still returned 200 with the schedule body — the caller had no signal the run was dropped. Honor the stored paused state only for cadence-driven fires; explicit out-of-band manual triggers now bypass it. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../temporal/activities/scheduled_agent_run_activities.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/agentex/src/temporal/activities/scheduled_agent_run_activities.py b/agentex/src/temporal/activities/scheduled_agent_run_activities.py index 5df2c7fa..7f94befb 100644 --- a/agentex/src/temporal/activities/scheduled_agent_run_activities.py +++ b/agentex/src/temporal/activities/scheduled_agent_run_activities.py @@ -189,9 +189,11 @@ async def launch_scheduled_agent_run( "schedule_id": schedule_id, } - if schedule.paused: + if schedule.paused and trigger_type != "manual": # Temporal pauses the schedule too, but a manual trigger can still - # fire a paused schedule — honor the stored paused state defensively. + # fire a paused schedule. Honor the stored paused state defensively + # only for cadence-driven fires; explicit out-of-band manual triggers + # bypass it so an operator can run a paused schedule on demand. return { "status": "skipped", "reason": "schedule_paused", From d7e6bc83ec73d66d063b8d70bde7df5a1e9b3c6b Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 09:47:02 -0400 Subject: [PATCH 12/24] feat(schedules): emit metrics for Temporal schedule operations Add a dual-emit metrics helper (OTel + Datadog StatsD, gated on configuration, never raises) mirroring the existing cache_metrics pattern, and instrument the create/update/delete schedule paths in the Temporal adapter. Each operation records success / not_found / error so the schedule's Temporal lifecycle is observable and drift between the Temporal clock and the Postgres source of truth is detectable. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/adapters/temporal/adapter_temporal.py | 11 +++ agentex/src/utils/schedule_metrics.py | 97 +++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 agentex/src/utils/schedule_metrics.py diff --git a/agentex/src/adapters/temporal/adapter_temporal.py b/agentex/src/adapters/temporal/adapter_temporal.py index 5d516fee..f7213fa7 100644 --- a/agentex/src/adapters/temporal/adapter_temporal.py +++ b/agentex/src/adapters/temporal/adapter_temporal.py @@ -39,6 +39,7 @@ ) from src.adapters.temporal.port import TemporalGateway from src.utils.logging import make_logger +from src.utils.schedule_metrics import record_schedule_temporal_op logger = make_logger(__name__) @@ -446,22 +447,26 @@ async def create_schedule( ) logger.info(f"Created schedule {schedule_id} successfully") + record_schedule_temporal_op("create", "success") return handle except ScheduleAlreadyRunningError as e: logger.error(f"Schedule {schedule_id} already exists: {e}") + record_schedule_temporal_op("create", "error") raise TemporalScheduleAlreadyExistsError( message=f"Schedule with ID '{schedule_id}' already exists", detail=str(e), ) from e except ValueError as e: logger.error(f"Invalid arguments for schedule {schedule_id}: {e}") + record_schedule_temporal_op("create", "error") raise TemporalInvalidArgumentError( message=f"Invalid arguments provided for schedule '{schedule_id}'", detail=str(e), ) from e except Exception as e: logger.error(f"Failed to create schedule {schedule_id}: {e}") + record_schedule_temporal_op("create", "error") raise TemporalScheduleError( message=f"Failed to create schedule '{schedule_id}'", detail=str(e), @@ -667,14 +672,17 @@ def _apply(input: ScheduleUpdateInput) -> ScheduleUpdate: handle = self.client.get_schedule_handle(schedule_id) await handle.update(_apply) logger.info(f"Updated schedule {schedule_id}") + record_schedule_temporal_op("update", "success") except Exception as e: if "not found" in str(e).lower(): logger.error(f"Schedule {schedule_id} not found: {e}") + record_schedule_temporal_op("update", "not_found") raise TemporalScheduleNotFoundError( message=f"Schedule '{schedule_id}' not found", detail=str(e), ) from e logger.error(f"Failed to update schedule {schedule_id}: {e}") + record_schedule_temporal_op("update", "error") raise TemporalScheduleError( message=f"Failed to update schedule '{schedule_id}'", detail=str(e), @@ -691,14 +699,17 @@ async def delete_schedule(self, schedule_id: str) -> None: handle = self.client.get_schedule_handle(schedule_id) await handle.delete() logger.info(f"Deleted schedule {schedule_id}") + record_schedule_temporal_op("delete", "success") except Exception as e: if "not found" in str(e).lower(): logger.error(f"Schedule {schedule_id} not found: {e}") + record_schedule_temporal_op("delete", "not_found") raise TemporalScheduleNotFoundError( message=f"Schedule '{schedule_id}' not found", detail=str(e), ) from e logger.error(f"Failed to delete schedule {schedule_id}: {e}") + record_schedule_temporal_op("delete", "error") raise TemporalScheduleError( message=f"Failed to delete schedule '{schedule_id}'", detail=str(e), diff --git a/agentex/src/utils/schedule_metrics.py b/agentex/src/utils/schedule_metrics.py new file mode 100644 index 00000000..b8a7f4ef --- /dev/null +++ b/agentex/src/utils/schedule_metrics.py @@ -0,0 +1,97 @@ +""" +Metrics instrumentation for Temporal-backed agent run schedule operations. + +Mirrors the dual-emit pattern in ``src/utils/cache_metrics.py``: + +- When an OTLP endpoint is configured (``OTEL_EXPORTER_OTLP_ENDPOINT``), counters + are recorded through the OpenTelemetry SDK. +- When the Datadog Agent is reachable (``DD_AGENT_HOST``), the same events are + emitted as StatsD counters. +- When neither is configured, every function here is a cheap no-op. + +The goal is to make the schedule's Temporal lifecycle observable: how often each +create/update/delete succeeds, hits a missing schedule, or errors out. These are +the signals needed to tell whether schedule operations are healthy or whether the +Temporal clock is drifting out of sync with the Postgres source of truth. +""" + +from __future__ import annotations + +import os +from typing import TYPE_CHECKING, Literal + +from datadog import statsd + +from src.utils.logging import make_logger +from src.utils.otel_metrics import get_meter + +if TYPE_CHECKING: + from opentelemetry.metrics import Counter + +logger = make_logger(__name__) + +# StatsD is only emitted if the Datadog Agent host is configured. +_STATSD_ENABLED = bool(os.environ.get("DD_AGENT_HOST")) + +# The Temporal schedule lifecycle operation being recorded. +ScheduleOperation = Literal["create", "update", "delete"] + +# Outcome of a single operation. "success" = the Temporal call completed; +# "not_found" = the schedule was already absent (TemporalScheduleNotFoundError); +# "error" = any other failure. +ScheduleResult = Literal["success", "not_found", "error"] + +# Lazily-created OTel instrument (created once, on first use). +_op_counter: Counter | None = None +_instruments_initialized = False + + +def _ensure_instruments() -> None: + """Create the OTel counter on first use. No-op if OTel is not configured.""" + global _op_counter, _instruments_initialized + + if _instruments_initialized: + return + _instruments_initialized = True + + meter = get_meter("agentex.agent_run_schedule") + if meter is None: + # OTel not configured; OTel path stays disabled. StatsD may still emit. + return + + _op_counter = meter.create_counter( + name="agent_run_schedule.temporal_op", + description="Temporal schedule lifecycle operations, tagged by operation and result", + unit="{operation}", + ) + + +def record_schedule_temporal_op( + operation: ScheduleOperation, result: ScheduleResult +) -> None: + """ + Record a single Temporal schedule lifecycle operation. + + Args: + operation: One of "create", "update", "delete". + result: One of "success", "not_found", "error". + + Never raises: emission failures (e.g. a StatsD UDP socket error or an OTel + SDK fault) are swallowed so instrumentation can never disrupt a caller on + the schedule path. + """ + try: + _ensure_instruments() + + if _op_counter is not None: + _op_counter.add(1, {"operation": operation, "result": result}) + + if _STATSD_ENABLED: + statsd.increment( + "agent_run_schedule.temporal_op", + tags=[f"operation:{operation}", f"result:{result}"], + ) + except Exception: + logger.debug( + "Failed to emit agent_run_schedule.temporal_op metric", exc_info=True + ) From 279e7e87afc0bc42f2d875feceea9e3886fc70a4 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 09:51:57 -0400 Subject: [PATCH 13/24] feat(schedules): soft-delete run schedule rows for audit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete now tombstones the Postgres row (deleted_at) instead of removing it, so deleted schedules remain auditable. The Temporal schedule is still deleted first so no further fires occur, and the auth entry is still deregistered. Reads (get/list) exclude tombstoned rows; create's existence check keeps include_deleted=True so a deleted (agent_id, name) stays reserved — names are not reusable in v1 (the existing unique index is unchanged; a partial index over active rows would be a clean later upgrade if reuse is needed). The migration adds the nullable deleted_at column to the (brand-new, unmerged) table's create_table; it was also incidentally normalized by ruff-format (quote style), which the pinned formatter applies on commit. Co-Authored-By: Claude Opus 4.8 (1M context) --- ...00_add_agent_run_schedules_3b1c9d2e4f6a.py | 74 +++++++++---------- agentex/src/adapters/orm.py | 3 + .../domain/entities/agent_run_schedules.py | 4 + .../agent_run_schedule_repository.py | 24 ++++-- .../services/agent_run_schedule_service.py | 16 ++-- 5 files changed, 71 insertions(+), 50 deletions(-) diff --git a/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py b/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py index 9d773db8..07b6fcd0 100644 --- a/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py +++ b/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py @@ -9,73 +9,71 @@ with IF NOT EXISTS-style guards (Alembic create_table on a fresh table), and the indexes target the just-created table so they are non-blocking by construction. """ + from collections.abc import Sequence import sqlalchemy as sa from alembic import op # revision identifiers, used by Alembic. -revision: str = '3b1c9d2e4f6a' -down_revision: str | None = 'c7a1b2d3e4f5' +revision: str = "3b1c9d2e4f6a" +down_revision: str | None = "c7a1b2d3e4f5" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: op.create_table( - 'agent_run_schedules', - sa.Column('id', sa.String(), nullable=False), - sa.Column('agent_id', sa.String(length=64), nullable=False), - sa.Column('name', sa.String(length=256), nullable=False), - sa.Column('description', sa.Text(), nullable=True), - sa.Column('cron_expression', sa.String(), nullable=True), - sa.Column('interval_seconds', sa.Integer(), nullable=True), - sa.Column( - 'timezone', sa.String(), server_default='UTC', nullable=False - ), - sa.Column('start_at', sa.DateTime(timezone=True), nullable=True), - sa.Column('end_at', sa.DateTime(timezone=True), nullable=True), + "agent_run_schedules", + sa.Column("id", sa.String(), nullable=False), + sa.Column("agent_id", sa.String(length=64), nullable=False), + sa.Column("name", sa.String(length=256), nullable=False), + sa.Column("description", sa.Text(), nullable=True), + sa.Column("cron_expression", sa.String(), nullable=True), + sa.Column("interval_seconds", sa.Integer(), nullable=True), + sa.Column("timezone", sa.String(), server_default="UTC", nullable=False), + sa.Column("start_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("end_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("paused", sa.Boolean(), server_default="false", nullable=False), + sa.Column("creator_principal", sa.JSON(), nullable=False), + sa.Column("task_params", sa.JSON(), nullable=True), + sa.Column("task_metadata", sa.JSON(), nullable=True), + sa.Column("initial_input", sa.JSON(), nullable=False), sa.Column( - 'paused', sa.Boolean(), server_default='false', nullable=False - ), - sa.Column('creator_principal', sa.JSON(), nullable=False), - sa.Column('task_params', sa.JSON(), nullable=True), - sa.Column('task_metadata', sa.JSON(), nullable=True), - sa.Column('initial_input', sa.JSON(), nullable=False), - sa.Column( - 'created_at', + "created_at", sa.DateTime(timezone=True), - server_default=sa.text('now()'), + server_default=sa.text("now()"), nullable=True, ), sa.Column( - 'updated_at', + "updated_at", sa.DateTime(timezone=True), - server_default=sa.text('now()'), + server_default=sa.text("now()"), nullable=True, ), - sa.ForeignKeyConstraint(['agent_id'], ['agents.id']), - sa.PrimaryKeyConstraint('id'), + # Soft-delete marker: NULL = active, set = tombstoned for audit. Deleted + # rows keep occupying their (agent_id, name) so names are not reusable. + sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True), + sa.ForeignKeyConstraint(["agent_id"], ["agents.id"]), + sa.PrimaryKeyConstraint("id"), ) # Indexes target the table created in this same migration, so they hold no # write-blocking lock against live traffic (the table has no rows yet). op.create_index( - 'uq_agent_run_schedules_agent_name', - 'agent_run_schedules', - ['agent_id', 'name'], + "uq_agent_run_schedules_agent_name", + "agent_run_schedules", + ["agent_id", "name"], unique=True, ) op.create_index( - 'idx_agent_run_schedules_agent', - 'agent_run_schedules', - ['agent_id'], + "idx_agent_run_schedules_agent", + "agent_run_schedules", + ["agent_id"], unique=False, ) def downgrade() -> None: - op.drop_index('idx_agent_run_schedules_agent', table_name='agent_run_schedules') - op.drop_index( - 'uq_agent_run_schedules_agent_name', table_name='agent_run_schedules' - ) - op.drop_table('agent_run_schedules') + op.drop_index("idx_agent_run_schedules_agent", table_name="agent_run_schedules") + op.drop_index("uq_agent_run_schedules_agent_name", table_name="agent_run_schedules") + op.drop_table("agent_run_schedules") diff --git a/agentex/src/adapters/orm.py b/agentex/src/adapters/orm.py index 49016124..49a4e28d 100644 --- a/agentex/src/adapters/orm.py +++ b/agentex/src/adapters/orm.py @@ -219,6 +219,9 @@ class AgentRunScheduleORM(BaseORM): updated_at = Column( DateTime(timezone=True), server_default=func.now(), onupdate=func.now() ) + # Soft-delete marker: NULL = active, set = tombstoned for audit. Deleted rows + # keep their (agent_id, name) so names are not reusable. + deleted_at = Column(DateTime(timezone=True), nullable=True) __table_args__ = ( # Schedule names are unique per agent (the get/pause/resume/delete diff --git a/agentex/src/domain/entities/agent_run_schedules.py b/agentex/src/domain/entities/agent_run_schedules.py index b51a46e7..a0a67132 100644 --- a/agentex/src/domain/entities/agent_run_schedules.py +++ b/agentex/src/domain/entities/agent_run_schedules.py @@ -87,3 +87,7 @@ class AgentRunScheduleEntity(BaseModel): updated_at: datetime | None = Field( None, description="When the schedule was last updated." ) + deleted_at: datetime | None = Field( + None, + description="When the schedule was soft-deleted (None while active).", + ) diff --git a/agentex/src/domain/repositories/agent_run_schedule_repository.py b/agentex/src/domain/repositories/agent_run_schedule_repository.py index a498859e..d28ec2ba 100644 --- a/agentex/src/domain/repositories/agent_run_schedule_repository.py +++ b/agentex/src/domain/repositories/agent_run_schedule_repository.py @@ -36,9 +36,13 @@ async def list_by_agent_id( limit: int | None = None, page_number: int | None = None, ) -> list[AgentRunScheduleEntity]: - """List run schedules for a single agent, newest first.""" + """List run schedules for a single agent, newest first. + + Soft-deleted schedules are excluded. + """ query = select(AgentRunScheduleORM).where( - AgentRunScheduleORM.agent_id == agent_id + AgentRunScheduleORM.agent_id == agent_id, + AgentRunScheduleORM.deleted_at.is_(None), ) return await super().list( query=query, @@ -49,23 +53,31 @@ async def list_by_agent_id( ) async def get_by_agent_id_and_name( - self, agent_id: str, name: str + self, agent_id: str, name: str, include_deleted: bool = False ) -> AgentRunScheduleEntity | None: - """Get a run schedule by its (agent_id, name) natural key, or None.""" + """Get a run schedule by its (agent_id, name) natural key, or None. + + Soft-deleted schedules are excluded unless ``include_deleted`` is set + (used by create to keep a deleted name reserved — names are not reusable). + """ async with self.start_async_db_session(allow_writes=False) as session: query = select(AgentRunScheduleORM).where( AgentRunScheduleORM.agent_id == agent_id, AgentRunScheduleORM.name == name, ) + if not include_deleted: + query = query.where(AgentRunScheduleORM.deleted_at.is_(None)) result = await session.execute(query) row = result.scalars().first() return AgentRunScheduleEntity.model_validate(row) if row else None async def get_by_agent_id_and_name_or_raise( - self, agent_id: str, name: str + self, agent_id: str, name: str, include_deleted: bool = False ) -> AgentRunScheduleEntity: """Get a run schedule by (agent_id, name) or raise ItemDoesNotExist.""" - schedule = await self.get_by_agent_id_and_name(agent_id, name) + schedule = await self.get_by_agent_id_and_name( + agent_id, name, include_deleted=include_deleted + ) if schedule is None: raise ItemDoesNotExist( f"Run schedule '{name}' for agent '{agent_id}' does not exist." diff --git a/agentex/src/domain/services/agent_run_schedule_service.py b/agentex/src/domain/services/agent_run_schedule_service.py index b90b7088..b3423ebb 100644 --- a/agentex/src/domain/services/agent_run_schedule_service.py +++ b/agentex/src/domain/services/agent_run_schedule_service.py @@ -84,8 +84,9 @@ async def create_schedule( request: CreateAgentRunScheduleRequest, creator_principal: dict[str, Any], ) -> AgentRunScheduleResponse: + # include_deleted: a soft-deleted name stays reserved (no reuse in v1). existing = await self.schedule_repository.get_by_agent_id_and_name( - agent.id, request.name + agent.id, request.name, include_deleted=True ) if existing is not None: raise ClientError( @@ -213,10 +214,12 @@ async def delete_schedule(self, agent_id: str, name: str) -> str: ) temporal_id = build_run_schedule_temporal_id(row.id) # Temporal is the recurring clock; delete it first so no further fires can - # occur, then drop the row and the auth entry. A missing Temporal schedule - # is treated as success (the clock is already gone) so a prior partial - # delete — Temporal removed but the row write failed — can still be cleaned - # up through this path rather than being stranded forever. + # occur, then soft-delete the row and drop the auth entry. The Postgres row + # is tombstoned (deleted_at set) rather than removed so the schedule remains + # auditable and its (agent_id, name) stays reserved (names are not reusable). + # A missing Temporal schedule is treated as success (the clock is already + # gone) so a prior partial delete — Temporal removed but the row write + # failed — can still be cleaned up through this path rather than stranded. try: await self.temporal_adapter.delete_schedule(temporal_id) except TemporalScheduleNotFoundError: @@ -224,7 +227,8 @@ async def delete_schedule(self, agent_id: str, name: str) -> str: "run_schedule_temporal_already_absent_on_delete", extra={"temporal_id": temporal_id, "schedule_id": row.id}, ) - await self.schedule_repository.delete(id=row.id) + row.deleted_at = datetime.now(UTC) + await self.schedule_repository.update(row) await self._deregister_schedule_from_auth( authz_selector=build_run_schedule_authz_selector(agent_id, row.name) ) From 239961aed923af90a31a7243d5c9b7f899aa27da Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 09:55:37 -0400 Subject: [PATCH 14/24] docs(schedules): document at-least-once scheduled delivery Clarify that the delivered marker is written after delivery on purpose: scheduled delivery is at-least-once by design, the duplicate window is a crash between send and the marker write, and a delivery-level idempotency_key is the post-v1 fix. Comment-only; no behavior change. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../activities/scheduled_agent_run_activities.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/agentex/src/temporal/activities/scheduled_agent_run_activities.py b/agentex/src/temporal/activities/scheduled_agent_run_activities.py index 7f94befb..15b16b3e 100644 --- a/agentex/src/temporal/activities/scheduled_agent_run_activities.py +++ b/agentex/src/temporal/activities/scheduled_agent_run_activities.py @@ -305,9 +305,12 @@ async def launch_scheduled_agent_run( agent_id=schedule.agent_id, ) - # Best-effort delivered marker for the retry guard above. A crash between - # delivery and this update is the only window where a retry could - # re-deliver; deterministic task naming still prevents duplicate tasks. + # Best-effort delivered marker, written only AFTER delivery succeeds, so + # scheduled delivery is at-least-once by design: a crash after send but + # before this write makes a retry re-deliver (deterministic task naming + # still prevents duplicate tasks). Marker-after is deliberate — claiming + # before send would instead risk a silent missed delivery. A delivery-level + # idempotency_key in event/send & message/send is the post-v1 fix. task.task_metadata = { **(task.task_metadata or {}), _INPUT_DELIVERED_MARKER: True, From 2181e77616fea7ec83239e427f6abd48b608de91 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 09:55:40 -0400 Subject: [PATCH 15/24] feat(schedules): add version column reserved for optimistic concurrency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a monotonic version column (default 1) to the brand-new schedules table now, so a later optimistic-concurrency / change-history feature does not require a backfill on a populated table. Not enforced yet — no read-modify-write path increments it and no update is conditional on it. Co-Authored-By: Claude Opus 4.8 (1M context) --- ...2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py | 4 ++++ agentex/src/adapters/orm.py | 3 +++ agentex/src/domain/entities/agent_run_schedules.py | 7 +++++++ 3 files changed, 14 insertions(+) diff --git a/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py b/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py index 07b6fcd0..60eea115 100644 --- a/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py +++ b/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py @@ -54,6 +54,10 @@ def upgrade() -> None: # Soft-delete marker: NULL = active, set = tombstoned for audit. Deleted # rows keep occupying their (agent_id, name) so names are not reusable. sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True), + # Monotonic record version reserved for future optimistic concurrency / + # change history. Added now (brand-new table) to avoid a later backfill; + # not enforced yet. + sa.Column("version", sa.Integer(), nullable=False, server_default="1"), sa.ForeignKeyConstraint(["agent_id"], ["agents.id"]), sa.PrimaryKeyConstraint("id"), ) diff --git a/agentex/src/adapters/orm.py b/agentex/src/adapters/orm.py index 49a4e28d..3e501eb8 100644 --- a/agentex/src/adapters/orm.py +++ b/agentex/src/adapters/orm.py @@ -222,6 +222,9 @@ class AgentRunScheduleORM(BaseORM): # Soft-delete marker: NULL = active, set = tombstoned for audit. Deleted rows # keep their (agent_id, name) so names are not reusable. deleted_at = Column(DateTime(timezone=True), nullable=True) + # Monotonic record version reserved for future optimistic concurrency / + # change history. Not enforced yet — no read-modify-write path increments it. + version = Column(Integer, nullable=False, server_default="1") __table_args__ = ( # Schedule names are unique per agent (the get/pause/resume/delete diff --git a/agentex/src/domain/entities/agent_run_schedules.py b/agentex/src/domain/entities/agent_run_schedules.py index a0a67132..51135879 100644 --- a/agentex/src/domain/entities/agent_run_schedules.py +++ b/agentex/src/domain/entities/agent_run_schedules.py @@ -91,3 +91,10 @@ class AgentRunScheduleEntity(BaseModel): None, description="When the schedule was soft-deleted (None while active).", ) + version: int = Field( + 1, + description=( + "Monotonic record version reserved for future optimistic concurrency " + "control / change history. Not enforced yet." + ), + ) From 6529c7e59664d76e5b10c2e42f67308c5a8f8af9 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 10:48:47 -0400 Subject: [PATCH 16/24] test(schedules): assert soft-delete tombstones the row Update the delete test to match the soft-delete behavior: the row is tombstoned via repository.update (deleted_at set) rather than hard-removed via repository.delete. The create-rollback path still hard-deletes and is unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../services/test_agent_run_schedule_service.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/agentex/tests/unit/services/test_agent_run_schedule_service.py b/agentex/tests/unit/services/test_agent_run_schedule_service.py index 6d5002bc..cce9e572 100644 --- a/agentex/tests/unit/services/test_agent_run_schedule_service.py +++ b/agentex/tests/unit/services/test_agent_run_schedule_service.py @@ -211,10 +211,13 @@ async def test_list_does_not_fan_out_to_temporal(self, service, agent): @pytest.mark.asyncio class TestAgentRunScheduleServiceDelete: async def test_delete_tolerates_missing_temporal_schedule(self, service, agent): - # A prior partial delete (Temporal gone, row survived) must still be - # cleanable: a missing Temporal schedule is treated as success. + # Delete soft-deletes (tombstones) the row for audit rather than removing + # it, and tolerates a missing Temporal schedule: a prior partial delete + # (Temporal gone, row survived) must still be cleanable, treating a + # missing clock as success. row = _persisted(agent.id, _request()) service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + service.schedule_repository.update.return_value = row service.temporal_adapter.delete_schedule.side_effect = ( TemporalScheduleNotFoundError(message="gone", detail="gone") ) @@ -222,7 +225,11 @@ async def test_delete_tolerates_missing_temporal_schedule(self, service, agent): result = await service.delete_schedule(agent.id, row.name) assert result == row.id - service.schedule_repository.delete.assert_called_once_with(id=row.id) + # Soft delete: the row is tombstoned via update, not hard-removed. + service.schedule_repository.delete.assert_not_called() + service.schedule_repository.update.assert_called_once() + tombstoned = service.schedule_repository.update.call_args.args[0] + assert tombstoned.deleted_at is not None service.authorization_service.deregister_resource.assert_called_once() From a52f9fd382b6b854cdab31f59a7f59f7481e8aed Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 11:04:52 -0400 Subject: [PATCH 17/24] fix(schedules): apply list authorization filter before the limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit list_schedules applied the DB-level limit before the per-row authorization filter, so authorized schedules sorting beyond the limit window were silently dropped — a caller could miss schedules they are entitled to. Fetch the agent's rows without a DB limit, filter by authorization, then truncate to the limit. Safe at the expected low per-agent row count; push the authorized names into the query if schedules per agent ever grow large. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../services/agent_run_schedule_service.py | 11 ++++++++- .../test_agent_run_schedule_service.py | 24 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/agentex/src/domain/services/agent_run_schedule_service.py b/agentex/src/domain/services/agent_run_schedule_service.py index b3423ebb..29e515a4 100644 --- a/agentex/src/domain/services/agent_run_schedule_service.py +++ b/agentex/src/domain/services/agent_run_schedule_service.py @@ -162,7 +162,14 @@ async def list_schedules( authorized_schedule_ids: list[str] | None = None, limit: int = 100, ) -> AgentRunScheduleListResponse: - rows = await self.schedule_repository.list_by_agent_id(agent_id, limit=limit) + # Fetch without a DB limit so the authorization filter below runs against + # the full set, then truncate to ``limit`` after filtering. Applying the + # DB limit first would drop authorized schedules that sort beyond the + # window before the auth filter ever sees them, silently hiding rows the + # caller is entitled to. Safe at the expected low per-agent row count; if + # schedules per agent ever grow large, push the authorized names into the + # query instead. + rows = await self.schedule_repository.list_by_agent_id(agent_id) # Gate on ``is not None``: an empty list means the caller owns nothing and # everything is filtered out; None means authorization is bypassed. @@ -174,6 +181,8 @@ async def list_schedules( agent = await self.agent_repository.get(id=agent_id) items: list[AgentRunScheduleResponse] = [] for row in rows: + if len(items) >= limit: + break selector = build_run_schedule_authz_selector(agent_id, row.name) if authorized is not None and selector not in authorized: continue diff --git a/agentex/tests/unit/services/test_agent_run_schedule_service.py b/agentex/tests/unit/services/test_agent_run_schedule_service.py index cce9e572..01b2d791 100644 --- a/agentex/tests/unit/services/test_agent_run_schedule_service.py +++ b/agentex/tests/unit/services/test_agent_run_schedule_service.py @@ -183,6 +183,30 @@ async def test_list_filters_by_authorized_selectors(self, service, agent): assert result.total == 1 assert result.run_schedules[0].name == "sched-a" + async def test_list_filters_before_applying_limit(self, service, agent): + # The auth filter must run before the limit is applied: an authorized + # schedule that sorts beyond the limit window must still be returned, not + # silently dropped by a DB-level limit applied before filtering. + rows = [ + _persisted(agent.id, _request(name="other-a")), + _persisted(agent.id, _request(name="other-b")), + _persisted(agent.id, _request(name="mine")), + ] + service.schedule_repository.list_by_agent_id.return_value = rows + service.agent_repository.get.return_value = agent + + authorized = [build_run_schedule_authz_selector(agent.id, "mine")] + result = await service.list_schedules( + agent.id, authorized_schedule_ids=authorized, limit=1 + ) + + # The owned schedule is returned even though it sorts last and limit=1. + assert result.total == 1 + assert result.run_schedules[0].name == "mine" + # The query is no longer pre-truncated by limit (filtering happens first). + _, kwargs = service.schedule_repository.list_by_agent_id.call_args + assert kwargs.get("limit") is None + async def test_list_none_authorized_means_bypass(self, service, agent): rows = [_persisted(agent.id, _request(name="sched-a"))] service.schedule_repository.list_by_agent_id.return_value = rows From ffe060f700fc637f01b8feba6ceb136b83461492 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 11:33:42 -0400 Subject: [PATCH 18/24] docs(schedules): note planned move to id-as-handle identity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document on the authz-selector builder that name currently doubles as the external identity (URL handle, unique key, authz selector) — hence a soft-deleted name stays reserved — and that moving the external identity to the immutable row id (with name as a mutable label) is a planned additive fast-follow, deferred to keep this change's scope contained. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/domain/services/agent_run_schedule_service.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/agentex/src/domain/services/agent_run_schedule_service.py b/agentex/src/domain/services/agent_run_schedule_service.py index 29e515a4..4c7f2583 100644 --- a/agentex/src/domain/services/agent_run_schedule_service.py +++ b/agentex/src/domain/services/agent_run_schedule_service.py @@ -54,6 +54,16 @@ def build_run_schedule_authz_selector(agent_id: str, name: str) -> str: Derivable from the (agent_id, name) path params so the CRUD endpoints can authorize without a prior DB lookup. The ``run-schedule::`` prefix namespaces the selector within the ``schedule`` resource type. + + Design note: ``name`` currently doubles as the external identity — URL + handle, unique key, and this authz selector — which is why a soft-deleted + name stays reserved (no reuse). The row's immutable ``id`` is the better + long-term handle (it is already returned in responses and is what Temporal + keys off), so moving the external identity to ``id`` with ``name`` as a + mutable label is a planned fast-follow that makes name reuse a non-issue and + keeps audit unambiguous. Deferred here to keep this change's scope contained; + the move is additive (id-based routes/selector alongside the name ones) + rather than a breaking reshape. """ return f"run-schedule::{agent_id}::{name}" From c051deb330c09f3a1caf4f3282a208f72d2c1c94 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 11:56:52 -0400 Subject: [PATCH 19/24] fix(schedules): enforce exactly-one cadence on create/update requests The create/update request models documented cron_expression and interval_seconds as mutually exclusive but did not enforce it. On create, sending both built a Temporal ScheduleSpec with both a cron and an interval (firing on both cadences); sending neither created a cadence-less schedule. On update, the apply loop set then cleared one of them, silently dropping a cadence and returning 200. Add model validators: create requires exactly one cadence; update (partial) rejects providing both while still allowing neither. The service's merged-result checks remain as defense-in-depth. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/api/schemas/agent_run_schedules.py | 24 +++++++++++++++++- .../test_agent_run_schedule_service.py | 25 +++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/agentex/src/api/schemas/agent_run_schedules.py b/agentex/src/api/schemas/agent_run_schedules.py index d49fb101..807f5af6 100644 --- a/agentex/src/api/schemas/agent_run_schedules.py +++ b/agentex/src/api/schemas/agent_run_schedules.py @@ -2,7 +2,7 @@ from enum import Enum from typing import Any, Literal -from pydantic import Field +from pydantic import Field, model_validator from src.domain.entities.task_messages import MessageAuthor from src.utils.model_utils import BaseModel @@ -93,6 +93,17 @@ class CreateAgentRunScheduleRequest(BaseModel): ..., description="The first input delivered to each created task." ) + @model_validator(mode="after") + def validate_single_cadence(self): + # Exactly one cadence is required at create time. Rejecting both here + # prevents a ScheduleSpec with both a cron and an interval (which would + # fire on both); rejecting neither prevents a cadence-less schedule. + if (self.cron_expression is None) == (self.interval_seconds is None): + raise ValueError( + "Provide exactly one of cron_expression or interval_seconds" + ) + return self + class AgentRunScheduleResponse(BaseModel): """Response model describing a scheduled agent run.""" @@ -199,6 +210,17 @@ class UpdateAgentRunScheduleRequest(BaseModel): None, description="Replacement initial input delivered to each created task." ) + @model_validator(mode="after") + def reject_both_cadences(self): + # Partial update: providing neither cadence is fine (other fields may be + # changing), but providing both is rejected here so the apply loop can't + # silently drop one and leave a misleading 200. + if self.cron_expression is not None and self.interval_seconds is not None: + raise ValueError( + "Provide only one of cron_expression or interval_seconds, not both" + ) + return self + class PauseRunScheduleRequest(BaseModel): note: str | None = Field(None, description="Optional note explaining the pause.") diff --git a/agentex/tests/unit/services/test_agent_run_schedule_service.py b/agentex/tests/unit/services/test_agent_run_schedule_service.py index 01b2d791..b29821e2 100644 --- a/agentex/tests/unit/services/test_agent_run_schedule_service.py +++ b/agentex/tests/unit/services/test_agent_run_schedule_service.py @@ -3,6 +3,7 @@ from uuid import uuid4 import pytest +from pydantic import ValidationError from src.adapters.temporal.exceptions import TemporalScheduleNotFoundError from src.api.schemas.agent_run_schedules import ( CreateAgentRunScheduleRequest, @@ -362,3 +363,27 @@ async def test_trigger_starts_manual_workflow(self, service, agent): ) assert start_kwargs["args"] == [row.id, "manual"] assert start_kwargs["task_queue"] == "agentex-server" + + +@pytest.mark.unit +class TestCadenceValidation: + def test_create_rejects_both_cadences(self): + with pytest.raises(ValidationError): + _request(cron_expression="0 9 * * MON", interval_seconds=86400) + + def test_create_rejects_neither_cadence(self): + with pytest.raises(ValidationError): + _request(cron_expression=None, interval_seconds=None) + + def test_create_accepts_exactly_one_cadence(self): + assert _request(cron_expression=None, interval_seconds=3600) is not None + + def test_update_rejects_both_cadences(self): + with pytest.raises(ValidationError): + UpdateAgentRunScheduleRequest( + cron_expression="0 9 * * MON", interval_seconds=86400 + ) + + def test_update_allows_neither_cadence(self): + # Partial update changing only an unrelated field is valid. + assert UpdateAgentRunScheduleRequest(description="new") is not None From b9bd9974c7f95027dcd5602cad235f224526fe12 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 12:03:15 -0400 Subject: [PATCH 20/24] refactor(schedules): drop now-redundant use-case cadence checks The cadence mutual-exclusivity rule is now enforced on the request models (create requires exactly one; update rejects both), which run at request deserialization. That makes the equivalent checks in the use case unreachable dead code, so remove them and the unit tests that exercised the use-case-layer rejection (the behavior is covered by the request-model validator tests). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../use_cases/agent_run_schedules_use_case.py | 15 ++--------- .../test_agent_run_schedules_use_case.py | 25 ------------------- 2 files changed, 2 insertions(+), 38 deletions(-) diff --git a/agentex/src/domain/use_cases/agent_run_schedules_use_case.py b/agentex/src/domain/use_cases/agent_run_schedules_use_case.py index 5dd55661..3b7cde93 100644 --- a/agentex/src/domain/use_cases/agent_run_schedules_use_case.py +++ b/agentex/src/domain/use_cases/agent_run_schedules_use_case.py @@ -9,7 +9,6 @@ UpdateAgentRunScheduleRequest, ) from src.domain.entities.agents import AgentEntity -from src.domain.exceptions import ClientError from src.domain.services.agent_run_schedule_service import DAgentRunScheduleService from src.utils.logging import make_logger @@ -31,14 +30,8 @@ async def create_schedule( request: CreateAgentRunScheduleRequest, creator_principal: dict[str, Any], ) -> AgentRunScheduleResponse: - if not request.cron_expression and not request.interval_seconds: - raise ClientError( - "Either cron_expression or interval_seconds must be provided" - ) - if request.cron_expression and request.interval_seconds: - raise ClientError( - "Provide only one of cron_expression or interval_seconds, not both" - ) + # Cadence mutual-exclusivity is enforced on the request models + # (CreateAgentRunScheduleRequest / UpdateAgentRunScheduleRequest). return await self.run_schedule_service.create_schedule( agent, request, creator_principal ) @@ -73,10 +66,6 @@ async def resume_schedule( async def update_schedule( self, agent_id: str, name: str, request: UpdateAgentRunScheduleRequest ) -> AgentRunScheduleResponse: - if request.cron_expression and request.interval_seconds: - raise ClientError( - "Provide only one of cron_expression or interval_seconds, not both" - ) return await self.run_schedule_service.update_schedule(agent_id, name, request) async def trigger_schedule( diff --git a/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py b/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py index bf4b088d..6240a9d4 100644 --- a/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py +++ b/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py @@ -8,7 +8,6 @@ UpdateAgentRunScheduleRequest, ) from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus -from src.domain.exceptions import ClientError from src.domain.use_cases.agent_run_schedules_use_case import ( AgentRunSchedulesUseCase, ) @@ -68,22 +67,6 @@ async def test_create_with_interval_delegates(self, use_case, mock_service, agen mock_service.create_schedule.assert_called_once() - async def test_create_requires_a_cadence(self, use_case, agent): - request = _request(cron_expression=None, interval_seconds=None) - - with pytest.raises(ClientError) as exc: - await use_case.create_schedule(agent, request, {"user_id": "u1"}) - - assert "cron_expression or interval_seconds" in str(exc.value) - - async def test_create_rejects_both_cadences(self, use_case, agent): - request = _request(cron_expression="0 0 * * *", interval_seconds=30) - - with pytest.raises(ClientError) as exc: - await use_case.create_schedule(agent, request, {"user_id": "u1"}) - - assert "only one" in str(exc.value) - async def test_pause_resume_delete_delegate(self, use_case, mock_service, agent): await use_case.pause_schedule(agent.id, "daily-summary", note="n") mock_service.pause_schedule.assert_called_once_with( @@ -105,14 +88,6 @@ async def test_update_delegates(self, use_case, mock_service, agent): agent.id, "daily-summary", request ) - async def test_update_rejects_both_cadences(self, use_case, agent): - request = UpdateAgentRunScheduleRequest( - cron_expression="0 0 * * *", interval_seconds=30 - ) - with pytest.raises(ClientError) as exc: - await use_case.update_schedule(agent.id, "daily-summary", request) - assert "only one" in str(exc.value) - async def test_trigger_delegates(self, use_case, mock_service, agent): await use_case.trigger_schedule(agent.id, "daily-summary") mock_service.trigger_schedule.assert_called_once_with(agent.id, "daily-summary") From a00146a47c2b14ac13bb77da36408c6e0aef6f8f Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 12:12:30 -0400 Subject: [PATCH 21/24] fix(schedules): skip launch for soft-deleted schedules After soft-delete, the tombstoned row still loads by id (the base repo get() does not filter deleted_at), so a fire already in flight at delete time, or an activity retry after delete, could still create a task and deliver input. Guard the launch activity on deleted_at and skip with reason schedule_deleted. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../activities/scheduled_agent_run_activities.py | 7 +++++++ .../temporal/test_scheduled_agent_run_activity.py | 11 +++++++++++ 2 files changed, 18 insertions(+) diff --git a/agentex/src/temporal/activities/scheduled_agent_run_activities.py b/agentex/src/temporal/activities/scheduled_agent_run_activities.py index 15b16b3e..1c0dc16b 100644 --- a/agentex/src/temporal/activities/scheduled_agent_run_activities.py +++ b/agentex/src/temporal/activities/scheduled_agent_run_activities.py @@ -189,6 +189,13 @@ async def launch_scheduled_agent_run( "schedule_id": schedule_id, } + if schedule.deleted_at is not None: + return { + "status": "skipped", + "reason": "schedule_deleted", + "schedule_id": schedule_id, + } + if schedule.paused and trigger_type != "manual": # Temporal pauses the schedule too, but a manual trigger can still # fire a paused schedule. Honor the stored paused state defensively diff --git a/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py b/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py index 89585610..f8478717 100644 --- a/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py +++ b/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py @@ -1,3 +1,4 @@ +from datetime import UTC, datetime from unittest.mock import AsyncMock, MagicMock from uuid import uuid4 @@ -123,6 +124,16 @@ async def test_skips_when_paused(self, activity_instance): assert result["status"] == "skipped" assert result["reason"] == "schedule_paused" + async def test_skips_when_schedule_deleted(self, activity_instance): + activity_instance.schedule_repository.get.return_value = _schedule( + deleted_at=datetime.now(UTC) + ) + + result = await activity_instance.launch_scheduled_agent_run("sched-1", "fire-1") + + assert result["status"] == "skipped" + assert result["reason"] == "schedule_deleted" + async def test_async_agent_delivers_via_event_send( self, activity_instance, monkeypatch ): From febeba4612ea08b8eafc33056b86c63fc4a28902 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 12:12:33 -0400 Subject: [PATCH 22/24] fix(schedules): use strict bool parser for ENABLE_AGENT_RUN_SCHEDULES The flag used os.environ.get(...) == "true", which silently disabled the feature for True / TRUE / 1. Switch to _parse_bool_env so it accepts true/false/1/0 case-insensitively (and fails loud on garbage), matching the other boolean flags. Co-Authored-By: Claude Opus 4.8 (1M context) --- agentex/src/config/environment_variables.py | 4 ++-- .../tests/unit/config/test_agent_run_schedules_env.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/agentex/src/config/environment_variables.py b/agentex/src/config/environment_variables.py index bd5a7340..09415505 100644 --- a/agentex/src/config/environment_variables.py +++ b/agentex/src/config/environment_variables.py @@ -251,8 +251,8 @@ def refresh(cls, force_refresh: bool = False) -> EnvironmentVariables | None: ENABLE_HEALTH_CHECK_WORKFLOW=_parse_bool_env( EnvVarKeys.ENABLE_HEALTH_CHECK_WORKFLOW, default=False ), - ENABLE_AGENT_RUN_SCHEDULES=( - os.environ.get(EnvVarKeys.ENABLE_AGENT_RUN_SCHEDULES, "false") == "true" + ENABLE_AGENT_RUN_SCHEDULES=_parse_bool_env( + EnvVarKeys.ENABLE_AGENT_RUN_SCHEDULES, default=False ), WEBHOOK_REQUEST_TIMEOUT=float( os.environ.get(EnvVarKeys.WEBHOOK_REQUEST_TIMEOUT, "15.0") diff --git a/agentex/tests/unit/config/test_agent_run_schedules_env.py b/agentex/tests/unit/config/test_agent_run_schedules_env.py index 6842f9d5..79aeeb12 100644 --- a/agentex/tests/unit/config/test_agent_run_schedules_env.py +++ b/agentex/tests/unit/config/test_agent_run_schedules_env.py @@ -11,6 +11,16 @@ def test_agent_run_schedules_flag_parses_enabled(monkeypatch): assert env.ENABLE_AGENT_RUN_SCHEDULES is True +@pytest.mark.unit +@pytest.mark.parametrize("value", ["True", "TRUE", "1"]) +def test_agent_run_schedules_flag_uses_strict_bool_parser(monkeypatch, value): + monkeypatch.setenv("ENABLE_AGENT_RUN_SCHEDULES", value) + + env = EnvironmentVariables.refresh(force_refresh=True) + + assert env.ENABLE_AGENT_RUN_SCHEDULES is True + + @pytest.mark.unit def test_agent_run_schedules_flag_defaults_disabled(monkeypatch): monkeypatch.delenv("ENABLE_AGENT_RUN_SCHEDULES", raising=False) From 24c7fcfa009b283859f72578785e2014ffeb3feb Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 12:14:44 -0400 Subject: [PATCH 23/24] test(schedules): cover run schedule route authz Add route-level coverage for the new run schedule selector and authorization gates so future refactors preserve the intended access checks. Co-authored-by: Cursor --- .../api/test_agent_run_schedules_authz.py | 355 ++++++++++++++++++ 1 file changed, 355 insertions(+) create mode 100644 agentex/tests/unit/api/test_agent_run_schedules_authz.py diff --git a/agentex/tests/unit/api/test_agent_run_schedules_authz.py b/agentex/tests/unit/api/test_agent_run_schedules_authz.py new file mode 100644 index 00000000..9243d8c0 --- /dev/null +++ b/agentex/tests/unit/api/test_agent_run_schedules_authz.py @@ -0,0 +1,355 @@ +"""Route-level authorization tests for agent run schedules. + +The service tests cover schedule behavior; these tests keep the HTTP route +wiring honest: selector shape, operation choice, create's parent-agent gate, and +list's ownership-filter dependency. +""" + +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock + +import pytest +from src.adapters.authorization.exceptions import AuthorizationError +from src.adapters.crud_store.exceptions import ItemDoesNotExist +from src.api.routes.agent_run_schedules import ( + create_run_schedule, + delete_run_schedule, + get_run_schedule, + list_run_schedules, + pause_run_schedule, + resume_run_schedule, + trigger_run_schedule, + update_run_schedule, +) +from src.api.schemas.agent_run_schedules import ( + CreateAgentRunScheduleRequest, + ScheduleInitialInput, + UpdateAgentRunScheduleRequest, +) +from src.api.schemas.authorization_types import ( + AgentexResource, + AgentexResourceType, + AuthorizedOperationType, +) +from src.domain.services.agent_run_schedule_service import ( + build_run_schedule_authz_selector, +) +from src.utils.authorization_shortcuts import DAuthorizedResourceIds +from src.utils.schedule_authorization import _check_schedule_or_collapse_to_404 + + +def _dep_callable(annotation): + """Pull the inner FastAPI dependency out of an Annotated Depends.""" + return annotation.__metadata__[0].dependency + + +def _authz_selector(agent_id: str = "agent-1", name: str = "nightly") -> str: + return build_run_schedule_authz_selector(agent_id, name) + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestCheckScheduleOrCollapseTo404: + async def test_allowed_check_returns_normally(self): + authorization = MagicMock() + authorization.check = AsyncMock(return_value=True) + + await _check_schedule_or_collapse_to_404( + authorization, + _authz_selector(), + AuthorizedOperationType.read, + ) + + called = authorization.check.await_args.kwargs + assert called["resource"] == AgentexResource.schedule(_authz_selector()) + assert called["operation"] == AuthorizedOperationType.read + + async def test_denied_read_collapses_to_not_found(self): + authorization = MagicMock() + authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) + + with pytest.raises(ItemDoesNotExist): + await _check_schedule_or_collapse_to_404( + authorization, + _authz_selector(), + AuthorizedOperationType.read, + ) + + authorization.check.assert_awaited_once() + + async def test_denied_non_read_collapses_when_read_denied(self): + authorization = MagicMock() + authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) + + with pytest.raises(ItemDoesNotExist): + await _check_schedule_or_collapse_to_404( + authorization, + _authz_selector(), + AuthorizedOperationType.delete, + ) + + assert authorization.check.await_count == 2 + first_call, second_call = authorization.check.await_args_list + assert first_call.kwargs["operation"] == AuthorizedOperationType.delete + assert second_call.kwargs["operation"] == AuthorizedOperationType.read + + async def test_denied_non_read_surfaces_when_read_allowed(self): + authorization = MagicMock() + operation_denied = AuthorizationError("denied") + authorization.check = AsyncMock(side_effect=[operation_denied, True]) + + with pytest.raises(AuthorizationError) as exc_info: + await _check_schedule_or_collapse_to_404( + authorization, + _authz_selector(), + AuthorizedOperationType.delete, + ) + + assert exc_info.value is operation_denied + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestSingleResourceRouteAuthz: + async def test_get_checks_read_and_calls_use_case(self): + authorization = MagicMock() + authorization.check = AsyncMock(return_value=True) + use_case = MagicMock() + use_case.get_schedule = AsyncMock(return_value=MagicMock()) + + await get_run_schedule( + agent_id="agent-1", + name="nightly", + run_schedules_use_case=use_case, + authorization=authorization, + ) + + called = authorization.check.await_args.kwargs + assert called["resource"] == AgentexResource.schedule(_authz_selector()) + assert called["operation"] == AuthorizedOperationType.read + use_case.get_schedule.assert_awaited_once_with("agent-1", "nightly") + + async def test_get_denied_collapses_to_404_and_skips_use_case(self): + authorization = MagicMock() + authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) + use_case = MagicMock() + use_case.get_schedule = AsyncMock() + + with pytest.raises(ItemDoesNotExist): + await get_run_schedule( + agent_id="agent-1", + name="nightly", + run_schedules_use_case=use_case, + authorization=authorization, + ) + + use_case.get_schedule.assert_not_called() + + @pytest.mark.parametrize( + ("route", "method_name"), + [ + (pause_run_schedule, "pause_schedule"), + (resume_run_schedule, "resume_schedule"), + (trigger_run_schedule, "trigger_schedule"), + ], + ) + async def test_mutation_routes_use_update_op(self, route, method_name): + authorization = MagicMock() + authorization.check = AsyncMock(return_value=True) + use_case = MagicMock() + setattr(use_case, method_name, AsyncMock(return_value=MagicMock())) + + await route( + agent_id="agent-1", + name="nightly", + run_schedules_use_case=use_case, + authorization=authorization, + ) + + called = authorization.check.await_args.kwargs + assert called["resource"] == AgentexResource.schedule(_authz_selector()) + assert called["operation"] == AuthorizedOperationType.update + getattr(use_case, method_name).assert_awaited_once() + + async def test_update_route_uses_update_op(self): + authorization = MagicMock() + authorization.check = AsyncMock(return_value=True) + use_case = MagicMock() + use_case.update_schedule = AsyncMock(return_value=MagicMock()) + request = UpdateAgentRunScheduleRequest(description="new") + + await update_run_schedule( + agent_id="agent-1", + name="nightly", + request=request, + run_schedules_use_case=use_case, + authorization=authorization, + ) + + called = authorization.check.await_args.kwargs + assert called["resource"] == AgentexResource.schedule(_authz_selector()) + assert called["operation"] == AuthorizedOperationType.update + use_case.update_schedule.assert_awaited_once_with("agent-1", "nightly", request) + + async def test_delete_uses_delete_op_and_denial_skips_delete(self): + authorization = MagicMock() + authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) + use_case = MagicMock() + use_case.delete_schedule = AsyncMock() + + with pytest.raises(ItemDoesNotExist): + await delete_run_schedule( + agent_id="agent-1", + name="nightly", + run_schedules_use_case=use_case, + authorization=authorization, + ) + + use_case.delete_schedule.assert_not_called() + first_call, second_call = authorization.check.await_args_list + assert first_call.kwargs["operation"] == AuthorizedOperationType.delete + assert second_call.kwargs["operation"] == AuthorizedOperationType.read + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestCreateParentAgentCheck: + @staticmethod + def _agent_id_dep(): + return _dep_callable(create_run_schedule.__annotations__["agent_id"]) + + async def test_create_checks_parent_agent_update(self): + dep = self._agent_id_dep() + authorization = MagicMock() + authorization.check = AsyncMock(return_value=True) + + result = await dep( + authorization, + MagicMock(), + MagicMock(), + MagicMock(), + resource_id="agent-1", + ) + + assert result == "agent-1" + called = authorization.check.await_args.kwargs + assert called["resource"] == AgentexResource.agent("agent-1") + assert called["operation"] == AuthorizedOperationType.update + + async def test_create_denied_collapses_to_404(self): + dep = self._agent_id_dep() + authorization = MagicMock() + authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) + + with pytest.raises(ItemDoesNotExist): + await dep( + authorization, + MagicMock(), + MagicMock(), + MagicMock(), + resource_id="agent-1", + ) + + async def test_create_denied_when_parent_readable_surfaces_403(self): + dep = self._agent_id_dep() + authorization = MagicMock() + authorization.check = AsyncMock( + side_effect=[AuthorizationError("update denied"), True] + ) + + with pytest.raises(AuthorizationError): + await dep( + authorization, + MagicMock(), + MagicMock(), + MagicMock(), + resource_id="agent-1", + ) + + async def test_create_route_fetches_agent_and_captures_creator_principal(self): + agents_use_case = MagicMock() + agent = MagicMock(id="agent-1") + agents_use_case.get = AsyncMock(return_value=agent) + run_schedules_use_case = MagicMock() + run_schedules_use_case.create_schedule = AsyncMock(return_value=MagicMock()) + request = CreateAgentRunScheduleRequest( + name="nightly", + interval_seconds=3600, + initial_input=ScheduleInitialInput(content="hello"), + ) + http_request = SimpleNamespace( + state=SimpleNamespace( + principal_context={ + "principal_type": "user", + "user_id": "user-1", + "account_id": "account-1", + "authorization": "Bearer secret", + } + ) + ) + + await create_run_schedule( + agent_id="agent-1", + request=request, + http_request=http_request, + agents_use_case=agents_use_case, + run_schedules_use_case=run_schedules_use_case, + ) + + agents_use_case.get.assert_awaited_once_with(id="agent-1") + run_schedules_use_case.create_schedule.assert_awaited_once_with( + agent, + request, + { + "principal_type": "user", + "user_id": "user-1", + "account_id": "account-1", + }, + ) + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestListOwnershipFiltering: + async def test_list_route_forwards_authorized_ids_and_limit(self): + use_case = MagicMock() + use_case.list_schedules = AsyncMock(return_value=MagicMock()) + + await list_run_schedules( + agent_id="agent-1", + run_schedules_use_case=use_case, + authorized_schedule_ids=[_authz_selector("agent-1", "mine")], + limit=5, + ) + + use_case.list_schedules.assert_awaited_once_with( + "agent-1", + authorized_schedule_ids=[_authz_selector("agent-1", "mine")], + limit=5, + ) + + async def test_authorized_resource_ids_dependency_lists_schedule_reads(self): + dep = _dep_callable( + DAuthorizedResourceIds( + AgentexResourceType.schedule, AuthorizedOperationType.read + ) + ) + authorization = MagicMock() + authorization.list_resources = AsyncMock(return_value=[_authz_selector()]) + + result = await dep(authorization) + + assert result == [_authz_selector()] + authorization.list_resources.assert_awaited_once_with( + filter_resource=AgentexResourceType.schedule, + filter_operation=AuthorizedOperationType.read, + ) + + async def test_authorized_resource_ids_dependency_preserves_bypass_none(self): + dep = _dep_callable(DAuthorizedResourceIds(AgentexResourceType.schedule)) + authorization = MagicMock() + authorization.list_resources = AsyncMock(return_value=None) + + result = await dep(authorization) + + assert result is None From e6ea64b2c9d607061a6db2fd5356cb3b55fefb10 Mon Sep 17 00:00:00 2001 From: Jerome Romualdez Date: Fri, 26 Jun 2026 13:16:39 -0400 Subject: [PATCH 24/24] fix(temporal): pass workflow args via args= in start_workflow start_workflow spread the args list positionally into client.start_workflow, but Temporal's client takes a single positional arg and requires multiple args via the args= keyword. With one arg this happened to work; the schedule manual-trigger path passes two ([schedule_id, trigger_type]) and hit 'takes from 2 to 3 positional arguments but 4 were given', returning HTTP 500. Pass args via the keyword so any arity works. Co-Authored-By: Claude Opus 4.8 (1M context) --- agentex/src/adapters/temporal/adapter_temporal.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/agentex/src/adapters/temporal/adapter_temporal.py b/agentex/src/adapters/temporal/adapter_temporal.py index f7213fa7..617e5b96 100644 --- a/agentex/src/adapters/temporal/adapter_temporal.py +++ b/agentex/src/adapters/temporal/adapter_temporal.py @@ -93,10 +93,13 @@ async def start_workflow( if start_delay: options["start_delay"] = start_delay - # Start the workflow + # Start the workflow. Temporal's client.start_workflow takes a single + # positional ``arg``; multiple workflow arguments must be passed via + # the ``args`` keyword (spreading them positionally raises "takes from + # 2 to 3 positional arguments"). handle = await self.client.start_workflow( workflow, - *args if args else [], + args=args if args else [], **options, )