From b05a09965d7e33806bbb12864949e3348de31b52 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Fri, 26 Jun 2026 11:59:37 +0800 Subject: [PATCH 1/2] Add act_in_view: scroll a target into view, then act when actionable Composes scroll_find.scroll_until_visible with actionability.act_when_ready so a 'click the off-screen row' step works in one call: scroll until the target is located, then run the actionability gate at its point and perform the action. ScrollPlan bundles the scroll search + locator/scroller seams to keep the call within the argument limit; the actionability probes and gate config are injectable, so the whole flow is testable without a screen. --- WHATS_NEW.md | 6 ++ .../doc/new_features/v221_features_doc.rst | 49 ++++++++++ .../Zh/doc/new_features/v221_features_doc.rst | 38 ++++++++ je_auto_control/__init__.py | 3 + .../gui/script_builder/command_schema.py | 17 ++++ je_auto_control/utils/act_in_view/__init__.py | 6 ++ .../utils/act_in_view/act_in_view.py | 70 ++++++++++++++ .../utils/executor/action_executor.py | 17 ++++ .../utils/mcp_server/tools/_factories.py | 15 +++ .../utils/mcp_server/tools/_handlers.py | 7 ++ .../headless/test_act_in_view_batch.py | 93 +++++++++++++++++++ 11 files changed, 321 insertions(+) create mode 100644 docs/source/Eng/doc/new_features/v221_features_doc.rst create mode 100644 docs/source/Zh/doc/new_features/v221_features_doc.rst create mode 100644 je_auto_control/utils/act_in_view/__init__.py create mode 100644 je_auto_control/utils/act_in_view/act_in_view.py create mode 100644 test/unit_test/headless/test_act_in_view_batch.py diff --git a/WHATS_NEW.md b/WHATS_NEW.md index 18ba3291..d7bf15b2 100644 --- a/WHATS_NEW.md +++ b/WHATS_NEW.md @@ -2,6 +2,12 @@ ## What's new (2026-06-26) +### Act In View — Scroll to a Target, Then Act When Actionable + +Click the row three pages down: scroll it into view, then gate on actionability before clicking. Full reference: [`docs/source/Eng/doc/new_features/v221_features_doc.rst`](docs/source/Eng/doc/new_features/v221_features_doc.rst). + +- **`act_in_view` / `ScrollPlan`** (`AC_act_in_view`): two reliability primitives stayed separate — `scroll_find.scroll_until_visible` brings an off-screen target on-screen, and `actionability.act_when_ready` waits for it to be visible/stable/enabled/unoccluded before acting. A real "click the off-screen row" step needs both. `act_in_view` composes them: scroll until the target is located, then run the actionability gate at its point and perform the action. `ScrollPlan` bundles the scroll search + its `locator`/`scroller` seams so the call stays within the argument limit; the actionability probes (`region_sampler`/`enabled_probe`/`hit_tester`) and gate `config` are injectable too, so the whole flow is testable without a screen. Closes the input-fidelity lane's composition gap. No `PySide6`. + ### Template-Free Element Proposal (Pixels to Elements) Get a clean numbered element list straight from the screen when there's no accessibility tree. Full reference: [`docs/source/Eng/doc/new_features/v220_features_doc.rst`](docs/source/Eng/doc/new_features/v220_features_doc.rst). diff --git a/docs/source/Eng/doc/new_features/v221_features_doc.rst b/docs/source/Eng/doc/new_features/v221_features_doc.rst new file mode 100644 index 00000000..e3d9711a --- /dev/null +++ b/docs/source/Eng/doc/new_features/v221_features_doc.rst @@ -0,0 +1,49 @@ +Act In View — Scroll to a Target, Then Act When Actionable +========================================================== + +Two reliability primitives stayed separate: ``scroll_find.scroll_until_visible`` +brings an off-screen target on-screen, and ``actionability.act_when_ready`` waits +for a target to be visible / stable / enabled / unoccluded before acting. A real +"click the row three pages down" step needs *both* — scroll to it, then gate +before clicking. ``act_in_view`` composes them into one call. + +* :class:`ScrollPlan` — bundles the scroll search (``kind`` / ``direction`` / + ``max_scrolls`` / ``scroll_amount``) and its injectable ``locator`` / + ``scroller`` seams, so the composed call stays within a sane argument count. +* :func:`act_in_view` — scroll until the target is found, then run the + actionability gate at its location and perform ``action`` on it. + +Every seam — the scroll locator / scroller, the action, the actionability probes +(``region_sampler`` / ``enabled_probe`` / ``hit_tester``) and the gate ``config`` +— is injectable, so the whole flow is testable without a screen. Reuses +:func:`scroll_find.scroll_until_visible` and +:func:`actionability.act_when_ready`. Imports no ``PySide6``. + +Headless API +------------ + +.. code-block:: python + + from je_auto_control import act_in_view, ScrollPlan + + # Scroll down to the "Submit" button image, then click it once it's actionable + act_in_view("submit.png", lambda point: click(point[0], point[1]), + scroll=ScrollPlan(kind="image", direction="down", + max_scrolls=20)) + +``act_in_view`` returns ``{acted, coords, scrolls, result}`` (``result`` is the +action's return value) and raises ``AutoControlActionException`` if the target +never comes into view. Pass ``enabled_probe`` / ``hit_tester`` / ``config`` to +have the actionability gate actually wait for the control to be enabled and +unoccluded before the action fires — otherwise it acts as soon as the target is +located. + +Executor commands +----------------- + +``AC_act_in_view`` (``target`` + ``kind`` / ``direction`` / ``max_scrolls`` / +``scroll_amount`` / ``button`` → ``{acted, coords, scrolls}``) scrolls a template +or text target into view and clicks it. It is the matching ``ac_act_in_view`` MCP +tool and a Script Builder command under **Flow**. :func:`act_in_view` (which +takes an arbitrary action and the actionability probes) is the Python-API +surface. diff --git a/docs/source/Zh/doc/new_features/v221_features_doc.rst b/docs/source/Zh/doc/new_features/v221_features_doc.rst new file mode 100644 index 00000000..986ae7b9 --- /dev/null +++ b/docs/source/Zh/doc/new_features/v221_features_doc.rst @@ -0,0 +1,38 @@ +在視野內操作——捲動到目標,再於可操作時動作 +============================================ + +兩個可靠性原語原本各自獨立:``scroll_find.scroll_until_visible`` 把螢幕外的目標捲進畫面, +``actionability.act_when_ready`` 則在目標可見 / 穩定 / 啟用 / 未被遮擋前等待再動作。真實的 +「點選下三頁的那一列」步驟需要*兩者*——先捲到它,再閘控後才點擊。``act_in_view`` 把它們組合成單一呼叫。 + +* :class:`ScrollPlan` ——把捲動搜尋(``kind`` / ``direction`` / ``max_scrolls`` / + ``scroll_amount``)與其可注入的 ``locator`` / ``scroller`` 接縫打包,讓組合後的呼叫維持在合理的參數數量內。 +* :func:`act_in_view` ——捲動直到找到目標,接著在其位置執行 actionability 閘控,並對其執行 ``action``。 + +每個接縫——捲動的 locator / scroller、action、actionability 探針(``region_sampler`` / +``enabled_probe`` / ``hit_tester``)與閘控 ``config``——皆可注入,故整個流程能在沒有螢幕的情況下測試。 +重用 :func:`scroll_find.scroll_until_visible` 與 :func:`actionability.act_when_ready`。不匯入 ``PySide6``。 + +無頭 API +-------- + +.. code-block:: python + + from je_auto_control import act_in_view, ScrollPlan + + # 向下捲動到「Submit」按鈕影像,於可操作時點擊 + act_in_view("submit.png", lambda point: click(point[0], point[1]), + scroll=ScrollPlan(kind="image", direction="down", + max_scrolls=20)) + +``act_in_view`` 回傳 ``{acted, coords, scrolls, result}``(``result`` 為 action 的回傳值), +若目標始終未進入畫面則丟出 ``AutoControlActionException``。傳入 ``enabled_probe`` / ``hit_tester`` / +``config`` 可讓 actionability 閘控真正等到控制項已啟用且未被遮擋才觸發動作——否則一旦定位到目標即動作。 + +執行器指令 +---------- + +``AC_act_in_view``(``target`` 加上 ``kind`` / ``direction`` / ``max_scrolls`` / +``scroll_amount`` / ``button`` → ``{acted, coords, scrolls}``)把 template 或文字目標捲入畫面並點擊。 +以對應的 ``ac_act_in_view`` MCP 工具及 Script Builder 指令(位於 **Flow** 分類下)形式提供。 +:func:`act_in_view`(接受任意 action 與 actionability 探針)則是 Python API 介面。 diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index 1ccf1d6d..7817f77d 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -151,6 +151,8 @@ ) # Propose a clean element list from raw pixels (template-free) from je_auto_control.utils.element_proposal import propose_elements, tag_kinds +# Scroll a target into view, then act on it once it is actionable +from je_auto_control.utils.act_in_view import ScrollPlan, act_in_view # Rich clipboard formats — RTF + CSV/TSV codecs and Windows get / set from je_auto_control.utils.clipboard_rich_formats import ( build_rtf, csv_to_rows, get_clipboard_csv, get_clipboard_rtf, rows_to_csv, @@ -1782,6 +1784,7 @@ def start_autocontrol_gui(*args, **kwargs): "localize_changes", "rank_changes", "classify_widget", "box_features", "classify_icon", "propose_elements", "tag_kinds", + "act_in_view", "ScrollPlan", "build_rtf", "rtf_to_text", "rows_to_csv", "csv_to_rows", "set_clipboard_rtf", "get_clipboard_rtf", "set_clipboard_csv", "get_clipboard_csv", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index 50e2031b..29885136 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -4522,6 +4522,23 @@ def _add_work_queue_specs(specs: List[CommandSpec]) -> None: ), description="Index where a busy/idle series first settles idle.", )) + specs.append(CommandSpec( + "AC_act_in_view", "Flow", "Act In View (scroll + click)", + fields=( + FieldSpec("target", FieldType.STRING, + placeholder="template path or text"), + FieldSpec("kind", FieldType.STRING, optional=True, + default="image", placeholder="image / text"), + FieldSpec("direction", FieldType.STRING, optional=True, + default="down", placeholder="up / down"), + FieldSpec("max_scrolls", FieldType.INT, optional=True, default=10), + FieldSpec("scroll_amount", FieldType.INT, optional=True, + default=3), + FieldSpec("button", FieldType.STRING, optional=True, + default="left"), + ), + description="Scroll a target into view, then click it when actionable.", + )) specs.append(CommandSpec( "AC_simulate_cvd", "Image", "Simulate Colour-Vision Deficiency", fields=( diff --git a/je_auto_control/utils/act_in_view/__init__.py b/je_auto_control/utils/act_in_view/__init__.py new file mode 100644 index 00000000..43640bad --- /dev/null +++ b/je_auto_control/utils/act_in_view/__init__.py @@ -0,0 +1,6 @@ +"""Scroll a target into view, then act on it once it is actionable.""" +from je_auto_control.utils.act_in_view.act_in_view import ( + ScrollPlan, act_in_view, +) + +__all__ = ["act_in_view", "ScrollPlan"] diff --git a/je_auto_control/utils/act_in_view/act_in_view.py b/je_auto_control/utils/act_in_view/act_in_view.py new file mode 100644 index 00000000..c0cc903f --- /dev/null +++ b/je_auto_control/utils/act_in_view/act_in_view.py @@ -0,0 +1,70 @@ +"""Scroll a target into view, then act on it only once it is actionable. + +Two reliability primitives the framework already had stayed separate: +``scroll_find.scroll_until_visible`` brings an off-screen target on-screen, and +``actionability.act_when_ready`` waits for a target to be visible / stable / +enabled / unoccluded before acting. A real "click the row three pages down" step +needs *both* — scroll to it, then gate before clicking. ``act_in_view`` composes +them into one call. + +* :class:`ScrollPlan` — bundles the scroll search (``kind`` / ``direction`` / + ``max_scrolls`` / ``scroll_amount``) and its injectable ``locator`` / + ``scroller`` seams, so the composed call stays within a sane argument count. +* :func:`act_in_view` — scroll until the target is found, then run the + actionability gate at its location and perform ``action`` on it. + +All seams (locator / scroller / action / actionability probes / clock) are +injectable, so the whole flow is testable without a screen. Reuses +:func:`scroll_find.scroll_until_visible` and +:func:`actionability.act_when_ready`. Imports no ``PySide6``. +""" +from dataclasses import dataclass +from typing import Any, Callable, Dict, List, Optional + +from je_auto_control.utils.actionability import GateConfig, act_when_ready +from je_auto_control.utils.exception.exceptions import AutoControlActionException +from je_auto_control.utils.scroll_find import scroll_until_visible +from je_auto_control.utils.scroll_find.scroll_find import Locator, Scroller + + +@dataclass +class ScrollPlan: + """How to scroll while searching for the target (with injectable seams).""" + + kind: str = "image" + direction: str = "down" + max_scrolls: int = 10 + scroll_amount: int = 3 + locator: Optional[Locator] = None + scroller: Optional[Scroller] = None + + +def act_in_view(target: str, action: Callable[[List[int]], Any], *, + scroll: Optional[ScrollPlan] = None, + region_sampler: Optional[Callable[[Any], Any]] = None, + enabled_probe: Optional[Callable[[], Optional[bool]]] = None, + hit_tester: Optional[Callable[[List[int]], bool]] = None, + config: Optional[GateConfig] = None) -> Dict[str, Any]: + """Scroll ``target`` into view, gate on actionability, then ``action`` it. + + Scrolls per ``scroll`` (a :class:`ScrollPlan`) until ``target`` is located, + then runs :func:`actionability.act_when_ready` at the found point and calls + ``action(center_point)``. Raises ``AutoControlActionException`` if the target + never comes into view. The actionability probes / ``config`` are injectable + and forwarded to the gate. Returns ``{acted, coords, scrolls, result}``. + """ + plan = scroll if scroll is not None else ScrollPlan() + found = scroll_until_visible( + target, kind=plan.kind, direction=plan.direction, + max_scrolls=plan.max_scrolls, scroll_amount=plan.scroll_amount, + locator=plan.locator, scroller=plan.scroller) + if not found["found"]: + raise AutoControlActionException( + f"target {target!r} not in view after {found['scrolls']} scrolls") + cx, cy = int(found["coords"][0]), int(found["coords"][1]) + result = act_when_ready(action, lambda: (cx, cy, 1, 1), + region_sampler=region_sampler, + enabled_probe=enabled_probe, hit_tester=hit_tester, + config=config) + return {"acted": True, "coords": [cx, cy], "scrolls": found["scrolls"], + "result": result} diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index 7f5a8761..4f566c17 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -2963,6 +2963,22 @@ def _tag_kinds(elements: Any) -> Dict[str, Any]: return {"elements": tag_kinds(items)} +def _act_in_view(target: Any, kind: Any = "image", direction: Any = "down", + max_scrolls: Any = 10, scroll_amount: Any = 3, + button: Any = "left") -> Dict[str, Any]: + """Adapter: scroll a target into view then click it when actionable.""" + from je_auto_control.utils.act_in_view import ScrollPlan, act_in_view + plan = ScrollPlan(kind=str(kind), direction=str(direction), + max_scrolls=int(max_scrolls), + scroll_amount=int(scroll_amount)) + out = act_in_view( + str(target), + lambda point: click_mouse(str(button), int(point[0]), int(point[1])), + scroll=plan) + return {"acted": out["acted"], "coords": out["coords"], + "scrolls": out["scrolls"]} + + def _normalize_ext(target: str) -> Dict[str, Any]: """Adapter: the lowercased extension of a path / bare ext (pure).""" from je_auto_control.utils.file_assoc import normalize_ext @@ -7008,6 +7024,7 @@ def __init__(self): "AC_classify_icon": _classify_icon, "AC_propose_elements": _propose_elements, "AC_tag_kinds": _tag_kinds, + "AC_act_in_view": _act_in_view, "AC_normalize_ext": _normalize_ext, "AC_file_association": _file_association, "AC_get_control_text": _get_control_text, diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index 26c2427e..e2910c74 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -1898,6 +1898,21 @@ def smart_wait_tools() -> List[MCPTool]: handler=h.idle_point, annotations=READ_ONLY, ), + MCPTool( + name="ac_act_in_view", + description=("Scroll a 'target' (kind=image template path / text) " + "into view, wait until it is actionable, then click it " + "('button'). Returns {acted, coords, scrolls}."), + input_schema=schema({"target": {"type": "string"}, + "kind": {"type": "string"}, + "direction": {"type": "string"}, + "max_scrolls": {"type": "integer"}, + "scroll_amount": {"type": "integer"}, + "button": {"type": "string"}}, + required=["target"]), + handler=h.act_in_view, + annotations=SIDE_EFFECT_ONLY, + ), ] diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index 91d38a68..4eadd1d0 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -809,6 +809,13 @@ def tag_kinds(elements): return _tag_kinds(elements) +def act_in_view(target, kind="image", direction="down", max_scrolls=10, + scroll_amount=3, button="left"): + from je_auto_control.utils.executor.action_executor import _act_in_view + return _act_in_view(target, kind, direction, max_scrolls, scroll_amount, + button) + + def normalize_ext(target): from je_auto_control.utils.executor.action_executor import _normalize_ext return _normalize_ext(target) diff --git a/test/unit_test/headless/test_act_in_view_batch.py b/test/unit_test/headless/test_act_in_view_batch.py new file mode 100644 index 00000000..4c18ba76 --- /dev/null +++ b/test/unit_test/headless/test_act_in_view_batch.py @@ -0,0 +1,93 @@ +"""Headless tests for act_in_view (injected locator / scroller / action / gate).""" +import pytest + +import je_auto_control as ac +from je_auto_control.utils.act_in_view import ScrollPlan, act_in_view +from je_auto_control.utils.actionability import GateConfig +from je_auto_control.utils.exception.exceptions import AutoControlActionException + + +def _locator_found_after(scrolls_needed, coords=(100, 200)): + """A locator that returns None until ``scrolls_needed`` scrolls, then coords.""" + state = {"calls": 0} + + def locator(_target): + result = coords if state["calls"] >= scrolls_needed else None + state["calls"] += 1 + return result + + return locator + + +# --- scroll then act ------------------------------------------------------ + +def test_act_in_view_scrolls_then_acts(): + scrolled = [] + clicked = [] + plan = ScrollPlan(locator=_locator_found_after(2, (100, 200)), + scroller=lambda direction, amount: scrolled.append( + (direction, amount)), + max_scrolls=5) + out = act_in_view("target.png", clicked.append, scroll=plan) + assert out["acted"] is True + assert out["coords"] == [100, 200] + assert out["scrolls"] == 2 + assert clicked == [[100, 200]] # acted at the located point + assert len(scrolled) == 2 # scrolled twice before finding + + +def test_act_in_view_acts_immediately_when_already_visible(): + clicked = [] + plan = ScrollPlan(locator=lambda _t: (50, 60), + scroller=lambda d, a: None) + out = act_in_view("here", clicked.append, scroll=plan) + assert out["scrolls"] == 0 + assert clicked == [[50, 60]] + + +def test_act_in_view_raises_when_never_found(): + plan = ScrollPlan(locator=lambda _t: None, + scroller=lambda d, a: None, max_scrolls=3) + with pytest.raises(AutoControlActionException): + act_in_view("missing", lambda point: None, scroll=plan) + + +# --- actionability gate is honoured --------------------------------------- + +def test_act_in_view_waits_for_enabled(): + enabled_calls = {"n": 0} + + def enabled_probe(): + enabled_calls["n"] += 1 + return enabled_calls["n"] >= 2 # disabled on the first poll + + ticks = iter([0.0, 0.0, 1.0, 2.0, 3.0, 4.0]) + config = GateConfig(timeout_s=10.0, stable_for_s=0.0, poll_interval_s=1.0, + clock=lambda: next(ticks), sleep=lambda _s: None) + clicked = [] + plan = ScrollPlan(locator=lambda _t: (10, 20), scroller=lambda d, a: None) + out = act_in_view("x", clicked.append, scroll=plan, + enabled_probe=enabled_probe, config=config) + assert out["acted"] is True + assert clicked == [[10, 20]] + assert enabled_calls["n"] >= 2 # gated until the probe reported enabled + + +# --- wiring --------------------------------------------------------------- + +def test_wiring(): + known = set(ac.executor.known_commands()) + assert "AC_act_in_view" in known + from je_auto_control.utils.mcp_server.tools import ( + build_default_tool_registry, + ) + names = {t.name for t in build_default_tool_registry()} + assert "ac_act_in_view" in names + from je_auto_control.gui.script_builder.command_schema import _build_specs + specs = {s.command for s in _build_specs()} + assert "AC_act_in_view" in specs + + +def test_facade_exports(): + for name in ("act_in_view", "ScrollPlan"): + assert hasattr(ac, name) and name in ac.__all__ From ea124d3f2957a6defa74c3b86d00559e27619463 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Fri, 26 Jun 2026 12:16:57 +0800 Subject: [PATCH 2/2] Add act_with_mode: trial and force action modes over the actionability gate act_when_ready only waits-then-acts. Real flows need two more modes Playwright codified: trial (run every actionability check but DON'T act - a side-effect-free dry run) and force (skip checks, act now). act_with_mode adds both alongside the default auto, over the same injectable seams as the gate, so each mode is testable without a screen. Reuses wait_actionable. --- WHATS_NEW.md | 6 ++ .../doc/new_features/v222_features_doc.rst | 49 +++++++++ .../Zh/doc/new_features/v222_features_doc.rst | 41 +++++++ je_auto_control/__init__.py | 4 +- .../gui/script_builder/command_schema.py | 12 +++ je_auto_control/utils/act_modes/__init__.py | 4 + je_auto_control/utils/act_modes/act_modes.py | 63 +++++++++++ .../utils/executor/action_executor.py | 13 +++ .../utils/mcp_server/tools/_factories.py | 14 +++ .../utils/mcp_server/tools/_handlers.py | 5 + .../headless/test_act_modes_batch.py | 102 ++++++++++++++++++ 11 files changed, 312 insertions(+), 1 deletion(-) create mode 100644 docs/source/Eng/doc/new_features/v222_features_doc.rst create mode 100644 docs/source/Zh/doc/new_features/v222_features_doc.rst create mode 100644 je_auto_control/utils/act_modes/__init__.py create mode 100644 je_auto_control/utils/act_modes/act_modes.py create mode 100644 test/unit_test/headless/test_act_modes_batch.py diff --git a/WHATS_NEW.md b/WHATS_NEW.md index d7bf15b2..ce781384 100644 --- a/WHATS_NEW.md +++ b/WHATS_NEW.md @@ -2,6 +2,12 @@ ## What's new (2026-06-26) +### Trial and Force Action Modes (Playwright-style) + +Dry-run "is this control ready?" without clicking, or force a click past the gate. Full reference: [`docs/source/Eng/doc/new_features/v222_features_doc.rst`](docs/source/Eng/doc/new_features/v222_features_doc.rst). + +- **`act_with_mode`** (`AC_act_with_mode`): `actionability.act_when_ready` only waits-then-acts. Real flows need two more modes Playwright codified: **trial** (run every actionability check but *don't* act — a side-effect-free "would this work?" dry run) and **force** (skip the checks and act now — the escape hatch when the gate misjudges a control as occluded/disabled). `act_with_mode` adds both alongside the default `auto`, over the same injectable seams as the gate, returning `{mode, acted, actionable, reason, point, result}`. Reuses `actionability.wait_actionable`; fully testable without a screen. Completes the ROUND-15 input-fidelity lane (7/7). No `PySide6`. + ### Act In View — Scroll to a Target, Then Act When Actionable Click the row three pages down: scroll it into view, then gate on actionability before clicking. Full reference: [`docs/source/Eng/doc/new_features/v221_features_doc.rst`](docs/source/Eng/doc/new_features/v221_features_doc.rst). diff --git a/docs/source/Eng/doc/new_features/v222_features_doc.rst b/docs/source/Eng/doc/new_features/v222_features_doc.rst new file mode 100644 index 00000000..820a9a4b --- /dev/null +++ b/docs/source/Eng/doc/new_features/v222_features_doc.rst @@ -0,0 +1,49 @@ +Trial and Force Action Modes (Playwright-style) +=============================================== + +``actionability.act_when_ready`` has one behaviour: wait for the target to be +actionable, then act (or raise on timeout). Real flows need two more modes that +Playwright codified: + +* **trial** — run every actionability check but *don't* perform the action; just + report whether it *would* have acted. The dry run for "is this control ready?" + without side effects. +* **force** — skip the checks and act *now*, the deliberate escape hatch when the + gate is wrong (a control the heuristics misjudge as occluded / disabled). + +:func:`act_with_mode` adds both alongside the default gated (``auto``) behaviour, +over the same injectable seams as the gate, so each mode is testable without a +screen. Reuses :func:`actionability.wait_actionable`. Imports no ``PySide6``. + +Headless API +------------ + +.. code-block:: python + + from je_auto_control import act_with_mode + + bbox = lambda: (x, y, w, h) + click = lambda point: do_click(point[0], point[1]) + + act_with_mode(click, bbox, mode="auto") # gate, then click if ready + report = act_with_mode(click, bbox, mode="trial") # dry run, never clicks + if report["actionable"]: + ... + act_with_mode(click, bbox, mode="force") # click now, no checks + +Every mode returns ``{mode, acted, actionable, reason, point, result}``: +``acted`` says whether the action ran, ``actionable`` / ``reason`` come from the +gate (``trial`` reports these without acting), and ``result`` is the action's +return value. The actionability probes (``region_sampler`` / ``enabled_probe`` / +``hit_tester``) and ``config`` are forwarded to the gate as usual. An unknown +``mode`` raises ``ValueError``. + +Executor commands +----------------- + +``AC_act_with_mode`` (``x`` / ``y`` + ``mode`` / ``button`` → ``{mode, acted, +actionable, reason, point}``) clicks a point under the chosen mode — ``trial`` +is a dry-run probe that never clicks, ``force`` clicks unconditionally. It is the +matching ``ac_act_with_mode`` MCP tool and a Script Builder command under +**Flow**. :func:`act_with_mode` (which takes an arbitrary action) is the +Python-API surface. diff --git a/docs/source/Zh/doc/new_features/v222_features_doc.rst b/docs/source/Zh/doc/new_features/v222_features_doc.rst new file mode 100644 index 00000000..3a82d3d8 --- /dev/null +++ b/docs/source/Zh/doc/new_features/v222_features_doc.rst @@ -0,0 +1,41 @@ +試行與強制動作模式(Playwright 風格) +===================================== + +``actionability.act_when_ready`` 只有一種行為:等待目標可操作,再動作(或逾時丟例外)。真實流程還需要 +Playwright 定義的另外兩種模式: + +* **trial(試行)**——執行每一項 actionability 檢查,但*不*真正動作;只回報它*是否會*動作。 + 「這個控制項準備好了嗎?」的無副作用乾跑。 +* **force(強制)**——跳過檢查,*立即*動作;當閘控判斷錯誤(把控制項誤判為被遮擋 / 停用)時的刻意逃生口。 + +:func:`act_with_mode` 在預設的閘控(``auto``)行為之外加上這兩種,使用與閘控相同的可注入接縫, +故每種模式都能在沒有螢幕的情況下測試。重用 :func:`actionability.wait_actionable`。不匯入 ``PySide6``。 + +無頭 API +-------- + +.. code-block:: python + + from je_auto_control import act_with_mode + + bbox = lambda: (x, y, w, h) + click = lambda point: do_click(point[0], point[1]) + + act_with_mode(click, bbox, mode="auto") # 閘控後若就緒則點擊 + report = act_with_mode(click, bbox, mode="trial") # 乾跑,絕不點擊 + if report["actionable"]: + ... + act_with_mode(click, bbox, mode="force") # 立即點擊,不檢查 + +每種模式皆回傳 ``{mode, acted, actionable, reason, point, result}``:``acted`` 表示動作是否執行, +``actionable`` / ``reason`` 來自閘控(``trial`` 不動作即回報這些),``result`` 為 action 的回傳值。 +actionability 探針(``region_sampler`` / ``enabled_probe`` / ``hit_tester``)與 ``config`` 一如往常轉發給閘控。 +未知的 ``mode`` 會丟出 ``ValueError``。 + +執行器指令 +---------- + +``AC_act_with_mode``(``x`` / ``y`` 加上 ``mode`` / ``button`` → ``{mode, acted, +actionable, reason, point}``)以所選模式點擊一個點——``trial`` 是絕不點擊的乾跑探測,``force`` 無條件點擊。 +以對應的 ``ac_act_with_mode`` MCP 工具及 Script Builder 指令(位於 **Flow** 分類下)形式提供。 +:func:`act_with_mode`(接受任意 action)則是 Python API 介面。 diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index 7817f77d..84c51200 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -153,6 +153,8 @@ from je_auto_control.utils.element_proposal import propose_elements, tag_kinds # Scroll a target into view, then act on it once it is actionable from je_auto_control.utils.act_in_view import ScrollPlan, act_in_view +# Trial / force action modes over the actionability gate +from je_auto_control.utils.act_modes import act_with_mode # Rich clipboard formats — RTF + CSV/TSV codecs and Windows get / set from je_auto_control.utils.clipboard_rich_formats import ( build_rtf, csv_to_rows, get_clipboard_csv, get_clipboard_rtf, rows_to_csv, @@ -1784,7 +1786,7 @@ def start_autocontrol_gui(*args, **kwargs): "localize_changes", "rank_changes", "classify_widget", "box_features", "classify_icon", "propose_elements", "tag_kinds", - "act_in_view", "ScrollPlan", + "act_in_view", "ScrollPlan", "act_with_mode", "build_rtf", "rtf_to_text", "rows_to_csv", "csv_to_rows", "set_clipboard_rtf", "get_clipboard_rtf", "set_clipboard_csv", "get_clipboard_csv", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index 29885136..be1377da 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -4539,6 +4539,18 @@ def _add_work_queue_specs(specs: List[CommandSpec]) -> None: ), description="Scroll a target into view, then click it when actionable.", )) + specs.append(CommandSpec( + "AC_act_with_mode", "Flow", "Click with Mode (auto/trial/force)", + fields=( + FieldSpec("x", FieldType.INT, placeholder="x"), + FieldSpec("y", FieldType.INT, placeholder="y"), + FieldSpec("mode", FieldType.STRING, optional=True, default="auto", + placeholder="auto / trial / force"), + FieldSpec("button", FieldType.STRING, optional=True, + default="left"), + ), + description="Click a point under an action mode (gate / dry-run / force).", + )) specs.append(CommandSpec( "AC_simulate_cvd", "Image", "Simulate Colour-Vision Deficiency", fields=( diff --git a/je_auto_control/utils/act_modes/__init__.py b/je_auto_control/utils/act_modes/__init__.py new file mode 100644 index 00000000..30067368 --- /dev/null +++ b/je_auto_control/utils/act_modes/__init__.py @@ -0,0 +1,4 @@ +"""Trial and force action modes over the actionability gate.""" +from je_auto_control.utils.act_modes.act_modes import ACT_MODES, act_with_mode + +__all__ = ["act_with_mode", "ACT_MODES"] diff --git a/je_auto_control/utils/act_modes/act_modes.py b/je_auto_control/utils/act_modes/act_modes.py new file mode 100644 index 00000000..ed370fb9 --- /dev/null +++ b/je_auto_control/utils/act_modes/act_modes.py @@ -0,0 +1,63 @@ +"""Trial and force action modes over the actionability gate (Playwright-style). + +``actionability.act_when_ready`` has one behaviour: wait for the target to be +actionable, then act (or raise on timeout). Real flows need two more modes that +Playwright codified: + +* **trial** — run every actionability check but *don't* perform the action; just + report whether it *would* have acted. The dry-run for "is this control ready?" + without side effects. +* **force** — skip the checks and act *now*, for the deliberate escape hatch when + the gate is wrong (a control the heuristics misjudge as occluded / disabled). + +``act_with_mode`` adds both alongside the default gated behaviour, over the same +injectable seams as the gate, so each mode is testable without a screen. Reuses +:func:`actionability.wait_actionable`. Imports no ``PySide6``. +""" +from typing import Any, Callable, Dict, List, Optional + +from je_auto_control.utils.actionability import wait_actionable +from je_auto_control.utils.actionability.actionability import _center + +# The supported action modes. +ACT_MODES = ("auto", "trial", "force") + + +def _force_act(action: Callable[[List[int]], Any], + bbox_provider: Callable[[], Any]) -> Dict[str, Any]: + """Act at the target centre with no actionability checks.""" + bbox = bbox_provider() + if not bbox: + return {"mode": "force", "acted": False, "actionable": True, + "reason": "no target", "point": None, "result": None} + point = _center(bbox) + return {"mode": "force", "acted": True, "actionable": True, + "reason": "forced", "point": point, "result": action(point)} + + +def act_with_mode(action: Callable[[List[int]], Any], + bbox_provider: Callable[[], Any], *, mode: str = "auto", + region_sampler: Optional[Callable[[Any], Any]] = None, + enabled_probe: Optional[Callable[[], Optional[bool]]] = None, + hit_tester: Optional[Callable[[List[int]], bool]] = None, + config: Optional[Any] = None) -> Dict[str, Any]: + """Perform ``action`` on a target under a ``mode`` (auto / trial / force). + + ``auto`` waits for the actionability gate and acts only if it passes; + ``trial`` runs the gate but never acts (a dry run); ``force`` acts at once + with no checks. Returns ``{mode, acted, actionable, reason, point, result}``. + Raises ``ValueError`` for an unknown ``mode``. + """ + if mode not in ACT_MODES: + raise ValueError(f"unknown act mode: {mode!r}") + if mode == "force": + return _force_act(action, bbox_provider) + report = wait_actionable(bbox_provider, region_sampler=region_sampler, + enabled_probe=enabled_probe, hit_tester=hit_tester, + config=config) + point = list(report.point) if report.point is not None else None + base = {"mode": mode, "actionable": report.actionable, + "reason": report.reason, "point": point} + if mode == "trial" or not report.actionable: + return {**base, "acted": False, "result": None} + return {**base, "acted": True, "result": action(report.point)} diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index 4f566c17..639a3acf 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -2979,6 +2979,18 @@ def _act_in_view(target: Any, kind: Any = "image", direction: Any = "down", "scrolls": out["scrolls"]} +def _act_with_mode(x: Any, y: Any, mode: Any = "auto", + button: Any = "left") -> Dict[str, Any]: + """Adapter: click a point under an action mode (auto / trial / force).""" + from je_auto_control.utils.act_modes import act_with_mode + out = act_with_mode( + lambda point: click_mouse(str(button), int(point[0]), int(point[1])), + lambda: (int(x), int(y), 1, 1), mode=str(mode)) + return {"mode": out["mode"], "acted": out["acted"], + "actionable": out["actionable"], "reason": out["reason"], + "point": out["point"]} + + def _normalize_ext(target: str) -> Dict[str, Any]: """Adapter: the lowercased extension of a path / bare ext (pure).""" from je_auto_control.utils.file_assoc import normalize_ext @@ -7025,6 +7037,7 @@ def __init__(self): "AC_propose_elements": _propose_elements, "AC_tag_kinds": _tag_kinds, "AC_act_in_view": _act_in_view, + "AC_act_with_mode": _act_with_mode, "AC_normalize_ext": _normalize_ext, "AC_file_association": _file_association, "AC_get_control_text": _get_control_text, diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index e2910c74..a5130fc9 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -1913,6 +1913,20 @@ def smart_wait_tools() -> List[MCPTool]: handler=h.act_in_view, annotations=SIDE_EFFECT_ONLY, ), + MCPTool( + name="ac_act_with_mode", + description=("Click point (x, y) under an action 'mode': 'auto' " + "(gate then click), 'trial' (gate but DON'T click — " + "dry run), or 'force' (click with no checks). Returns " + "{mode, acted, actionable, reason, point}."), + input_schema=schema({"x": {"type": "integer"}, + "y": {"type": "integer"}, + "mode": {"type": "string"}, + "button": {"type": "string"}}, + required=["x", "y"]), + handler=h.act_with_mode, + annotations=SIDE_EFFECT_ONLY, + ), ] diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index 4eadd1d0..5fdc6f29 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -816,6 +816,11 @@ def act_in_view(target, kind="image", direction="down", max_scrolls=10, button) +def act_with_mode(x, y, mode="auto", button="left"): + from je_auto_control.utils.executor.action_executor import _act_with_mode + return _act_with_mode(x, y, mode, button) + + def normalize_ext(target): from je_auto_control.utils.executor.action_executor import _normalize_ext return _normalize_ext(target) diff --git a/test/unit_test/headless/test_act_modes_batch.py b/test/unit_test/headless/test_act_modes_batch.py new file mode 100644 index 00000000..2cfc5c0b --- /dev/null +++ b/test/unit_test/headless/test_act_modes_batch.py @@ -0,0 +1,102 @@ +"""Headless tests for act_with_mode (trial / force / auto over the gate).""" +import pytest + +import je_auto_control as ac +from je_auto_control.utils.act_modes import ACT_MODES, act_with_mode +from je_auto_control.utils.actionability import GateConfig + + +def _gate_config(): + """A GateConfig whose clock advances so the gate can time out / poll.""" + ticks = iter([float(t) for t in range(0, 40)]) + return GateConfig(timeout_s=3.0, stable_for_s=0.0, poll_interval_s=1.0, + clock=lambda: next(ticks), sleep=lambda _s: None) + + +# --- force ---------------------------------------------------------------- + +def test_force_acts_without_any_checks(): + clicked = [] + # enabled_probe says disabled, but force ignores the gate entirely + out = act_with_mode(clicked.append, lambda: (10, 20, 4, 4), mode="force", + enabled_probe=lambda: False) + assert out["mode"] == "force" + assert out["acted"] is True + assert out["point"] == [12, 22] # centre of (10,20,4,4) + assert clicked == [[12, 22]] + + +def test_force_no_target_does_not_act(): + clicked = [] + out = act_with_mode(clicked.append, lambda: None, mode="force") + assert out["acted"] is False + assert clicked == [] + + +# --- trial ---------------------------------------------------------------- + +def test_trial_reports_but_never_acts(): + clicked = [] + out = act_with_mode(clicked.append, lambda: (0, 0, 2, 2), mode="trial", + config=_gate_config()) + assert out["mode"] == "trial" + assert out["acted"] is False # dry run: gate ran, no action + assert out["actionable"] is True + assert clicked == [] # never clicked + + +def test_trial_reports_not_actionable_without_acting(): + clicked = [] + out = act_with_mode(clicked.append, lambda: None, mode="trial", + config=_gate_config()) # no bbox -> not visible + assert out["acted"] is False + assert out["actionable"] is False + assert out["reason"] == "not visible" + assert clicked == [] + + +# --- auto ----------------------------------------------------------------- + +def test_auto_acts_when_actionable(): + clicked = [] + out = act_with_mode(clicked.append, lambda: (5, 5, 2, 2), mode="auto", + config=_gate_config()) + assert out["acted"] is True + assert clicked == [[6, 6]] # centre of (5,5,2,2) + + +def test_auto_does_not_act_when_gate_times_out(): + clicked = [] + out = act_with_mode(clicked.append, lambda: None, mode="auto", + config=_gate_config()) # never visible -> timeout + assert out["acted"] is False + assert out["actionable"] is False + assert clicked == [] + + +def test_unknown_mode_raises(): + with pytest.raises(ValueError): + act_with_mode(lambda p: None, lambda: (0, 0, 1, 1), mode="bogus") + + +def test_act_modes_constant(): + assert set(ACT_MODES) == {"auto", "trial", "force"} + + +# --- wiring --------------------------------------------------------------- + +def test_wiring(): + known = set(ac.executor.known_commands()) + assert "AC_act_with_mode" in known + from je_auto_control.utils.mcp_server.tools import ( + build_default_tool_registry, + ) + names = {t.name for t in build_default_tool_registry()} + assert "ac_act_with_mode" in names + from je_auto_control.gui.script_builder.command_schema import _build_specs + specs = {s.command for s in _build_specs()} + assert "AC_act_with_mode" in specs + + +def test_facade_exports(): + assert hasattr(ac, "act_with_mode") and "act_with_mode" in ac.__all__