From f6bfa9c2ac719c5a14ffdd75cc78c2a8577d612a Mon Sep 17 00:00:00 2001 From: John Yin <10972267+john-yin2333@user.noreply.gitee.com> Date: Fri, 22 May 2026 18:01:54 +0800 Subject: [PATCH 01/31] fix(skill): reduce watcher inotify usage Co-authored-by: Cursor --- flocks/skill/skill.py | 40 +++++++++++++---- tests/skill/test_skill.py | 94 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 124 insertions(+), 10 deletions(-) diff --git a/flocks/skill/skill.py b/flocks/skill/skill.py index c095b67c2..2aabe5e3d 100644 --- a/flocks/skill/skill.py +++ b/flocks/skill/skill.py @@ -732,6 +732,13 @@ class SkillFileWatcher: """ _DEBOUNCE_SECONDS = 0.5 + _FLOCKS_SKILL_DIRS = ( + "skill", + "skills", + os.path.join("plugins", "skill"), + os.path.join("plugins", "skills"), + ) + _CLAUDE_SKILL_DIRS = ("skills",) def __init__(self, skill_cls: type): self._skill_cls = skill_cls @@ -818,7 +825,7 @@ def _do_clear(self) -> None: log.info("skill.watcher.cache_cleared", {"reason": "SKILL.md changed on disk"}) def _collect_watch_dirs(self) -> Set[str]: - """Gather all directories that may contain skill files.""" + """Gather concrete skill roots that may contain SKILL.md files.""" dirs: Set[str] = set() home = os.path.expanduser("~") @@ -831,11 +838,28 @@ def _collect_watch_dirs(self) -> Set[str]: except Exception: worktree = current_dir - for target in (".flocks", ".claude"): - for d in Skill._find_dirs_up(target, current_dir, worktree): - dirs.add(d) - global_dir = os.path.join(home, target) - if os.path.isdir(global_dir): - dirs.add(global_dir) + flocks_roots = Skill._find_dirs_up(".flocks", current_dir, worktree) + global_flocks = os.path.join(home, ".flocks") + if os.path.isdir(global_flocks): + flocks_roots.append(global_flocks) + for root in flocks_roots: + dirs.update(self._existing_subdirs(root, self._FLOCKS_SKILL_DIRS)) + + claude_roots = Skill._find_dirs_up(".claude", current_dir, worktree) + global_claude = os.path.join(home, ".claude") + if os.path.isdir(global_claude): + claude_roots.append(global_claude) + for root in claude_roots: + dirs.update(self._existing_subdirs(root, self._CLAUDE_SKILL_DIRS)) - return {d for d in dirs if os.path.isdir(d)} + return dirs + + @staticmethod + def _existing_subdirs(root: str, relative_dirs: tuple[str, ...]) -> Set[str]: + """Return existing watch roots below a discovery root, with stable dedupe.""" + dirs: Set[str] = set() + for rel in relative_dirs: + candidate = os.path.realpath(os.path.join(root, rel)) + if os.path.isdir(candidate): + dirs.add(candidate) + return dirs diff --git a/tests/skill/test_skill.py b/tests/skill/test_skill.py index 23703d8df..58860c807 100644 --- a/tests/skill/test_skill.py +++ b/tests/skill/test_skill.py @@ -430,8 +430,8 @@ def test_watcher_start_stop_no_crash(tmp_path): from flocks.skill.skill import SkillFileWatcher # Create a fake skill directory so the watcher has something to watch - skill_dir = tmp_path / ".flocks" - skill_dir.mkdir() + skill_dir = tmp_path / ".flocks" / "plugins" / "skills" + skill_dir.mkdir(parents=True) with ( patch("flocks.skill.skill.Instance.get_directory", return_value=str(tmp_path)), @@ -446,6 +446,96 @@ def test_watcher_start_stop_no_crash(tmp_path): assert watcher._observer is None +def test_watcher_collects_only_skill_discovery_roots(tmp_path): + """Skill watcher should not recursively watch the entire .flocks tree.""" + from flocks.skill.skill import SkillFileWatcher + + project_dir = tmp_path / "project" + current_dir = project_dir / "src" + current_dir.mkdir(parents=True) + project_flocks = project_dir / ".flocks" + + expected_project_dirs = [ + project_flocks / "skill", + project_flocks / "skills", + project_flocks / "plugins" / "skill", + project_flocks / "plugins" / "skills", + ] + for directory in expected_project_dirs: + directory.mkdir(parents=True) + + # These trees can be large but are not part of Skill._discover(). + (project_flocks / "flockshub" / "plugins" / "skills").mkdir(parents=True) + (project_flocks / "plugins" / "tools" / "api").mkdir(parents=True) + + home_dir = tmp_path / "home" + user_skill_dir = home_dir / ".flocks" / "plugins" / "skills" + user_skill_dir.mkdir(parents=True) + user_claude_skill_dir = home_dir / ".claude" / "skills" + user_claude_skill_dir.mkdir(parents=True) + + with ( + patch("flocks.skill.skill.Instance.get_directory", return_value=str(current_dir)), + patch("flocks.skill.skill.Instance.get_worktree", return_value=str(project_dir)), + patch("os.path.expanduser", return_value=str(home_dir)), + ): + watch_dirs = SkillFileWatcher(Skill)._collect_watch_dirs() + + expected = { + os.path.realpath(str(directory)) + for directory in [*expected_project_dirs, user_skill_dir, user_claude_skill_dir] + } + assert watch_dirs == expected + assert os.path.realpath(str(project_flocks)) not in watch_dirs + assert os.path.realpath(str(project_flocks / "flockshub" / "plugins" / "skills")) not in watch_dirs + assert os.path.realpath(str(project_flocks / "plugins" / "tools" / "api")) not in watch_dirs + + +def test_watcher_collects_project_claude_skills_only(tmp_path): + """Claude compatibility should watch .claude/skills, not the .claude root.""" + from flocks.skill.skill import SkillFileWatcher + + project_dir = tmp_path / "project" + project_claude = project_dir / ".claude" + project_claude_skill_dir = project_claude / "skills" + project_claude_skill_dir.mkdir(parents=True) + (project_claude / "commands").mkdir() + + home_dir = tmp_path / "home" + home_dir.mkdir() + + with ( + patch("flocks.skill.skill.Instance.get_directory", return_value=str(project_dir)), + patch("flocks.skill.skill.Instance.get_worktree", return_value=str(project_dir)), + patch("os.path.expanduser", return_value=str(home_dir)), + ): + watch_dirs = SkillFileWatcher(Skill)._collect_watch_dirs() + + assert watch_dirs == {os.path.realpath(str(project_claude_skill_dir))} + assert os.path.realpath(str(project_claude)) not in watch_dirs + + +def test_watcher_collect_dirs_empty_without_skill_roots(tmp_path): + """A .flocks directory without skill roots should not be watched wholesale.""" + from flocks.skill.skill import SkillFileWatcher + + project_dir = tmp_path / "project" + (project_dir / ".flocks").mkdir(parents=True) + (project_dir / ".claude").mkdir() + + home_dir = tmp_path / "home" + (home_dir / ".flocks").mkdir(parents=True) + + with ( + patch("flocks.skill.skill.Instance.get_directory", return_value=str(project_dir)), + patch("flocks.skill.skill.Instance.get_worktree", return_value=str(project_dir)), + patch("os.path.expanduser", return_value=str(home_dir)), + ): + watch_dirs = SkillFileWatcher(Skill)._collect_watch_dirs() + + assert watch_dirs == set() + + def test_watcher_debounce_clears_cache(): """SkillFileWatcher._do_clear() triggers cache invalidation synchronously.""" from flocks.skill.skill import SkillFileWatcher From bff62db68d5279906a3a2f238b8aac415edda82c Mon Sep 17 00:00:00 2001 From: xiami Date: Mon, 25 May 2026 13:23:27 +0800 Subject: [PATCH 02/31] fix(workflow): isolate LLM provider from shared singleton (#316) Clone a workflow-local provider instead of mutating the shared instance with locks and event-loop markers, preventing cross-loop client reuse and session config races during workflow llm.ask() calls. --- flocks/workflow/llm.py | 246 ++++++------------ tests/workflow/test_llm_provider_isolation.py | 220 ++++++++-------- 2 files changed, 199 insertions(+), 267 deletions(-) diff --git a/flocks/workflow/llm.py b/flocks/workflow/llm.py index febdf62d4..66fe21400 100644 --- a/flocks/workflow/llm.py +++ b/flocks/workflow/llm.py @@ -1,64 +1,12 @@ import asyncio -import threading +from copy import copy from dataclasses import dataclass import time from typing import Any, Dict, Optional -from weakref import WeakKeyDictionary from flocks.config.config import Config from flocks.provider.provider import ChatMessage, Provider, ProviderConfig from flocks.workflow._async_runtime import run_sync as _run_sync_on_shared_loop -from flocks.workflow import _async_runtime as _async_runtime_mod - - -_provider_locks_guard = threading.Lock() -_provider_locks: Dict[str, threading.Lock] = {} - -# Tracks which event loop currently "owns" each provider's ``_client``. -# Key : the provider instance (weakly referenced so we don't pin singletons). -# Value : ``id(loop)`` of the loop that created or last validated ``_client``. -# -# Rationale: ``httpx.AsyncClient`` (and the OpenAI/Anthropic SDK clients that -# wrap it) bind themselves to the *currently running* event loop on first -# ``await``. The session uses uvicorn's main loop while workflows use the -# dedicated ``flocks-workflow-llm-loop``. If a workflow call inherits a -# provider client that the session created on the main loop, attempting to -# ``await`` it from the workflow loop raises "got Future attached to a -# different loop" / hangs. We track the owning loop here so that -# ``_prepare_provider`` can reset the client when the caller crosses loops, -# while still reusing the connection pool for back-to-back same-loop calls. -_provider_client_loop_marker: "WeakKeyDictionary[Any, int]" = WeakKeyDictionary() - - -def _get_provider_lock(provider_id: str) -> threading.Lock: - """Return a process-wide lock keyed by ``provider_id``. - - Used to serialize the (apply_config -> configure -> reset _client) sequence - in ``LLMClient._prepare_provider`` against any concurrent session/agent - request reading ``provider._config`` / ``provider._client``. Locks are - created lazily and cached forever — the set of provider ids is bounded - and small. - """ - lock = _provider_locks.get(provider_id) - if lock is not None: - return lock - with _provider_locks_guard: - lock = _provider_locks.get(provider_id) - if lock is None: - lock = threading.Lock() - _provider_locks[provider_id] = lock - return lock - - -def _workflow_loop_id() -> Optional[int]: - """Return ``id(loop)`` of the shared workflow async loop, or ``None``. - - The workflow loop is created lazily on first ``_run_sync_on_shared_loop`` - call. We read it directly from ``_async_runtime`` so callers don't need to - actually submit a coroutine just to discover the loop id. - """ - loop = getattr(_async_runtime_mod, "_loop", None) - return id(loop) if loop is not None else None def _run_coro_sync(coro): @@ -248,122 +196,98 @@ def _validate_target(self, target: _ResolvedTarget) -> Optional[str]: ) return None - def _prepare_provider(self, provider_id: str) -> Any: - """Apply workflow-side overrides on the shared Provider singleton. - - Concurrency contract: - * The (apply_config -> configure -> _client reset) sequence is - serialized per-provider via :func:`_get_provider_lock` so that - an in-flight session ``provider.chat_stream`` cannot observe a - half-mutated ``provider._config`` / ``provider._client``. - * The reconfigure + client reset is *idempotent*: if the desired - ProviderConfig (api_key / base_url / relevant custom_settings) - already matches what the provider holds, we skip both - ``provider.configure(...)`` and ``provider._client = None``. - This protects long-running session HTTP connection pools from - being recreated on every workflow ``llm.ask()`` call. + def _clone_provider_for_workflow(self, shared_provider: Any) -> Any: + """Create a workflow-local provider instance from the shared singleton. + + Workflow calls run on a dedicated background loop while session / agent + calls run on the server loop. Sharing the same provider instance across + those callers makes both ``_config`` and any cached async client + (``_client``) a process-wide race point. The workflow therefore uses an + isolated provider instance seeded from the shared provider's current + config/runtime state, but never mutates the shared singleton itself. """ - with _get_provider_lock(provider_id): + try: + isolated_provider = type(shared_provider)() + except Exception: + isolated_provider = copy(shared_provider) + + for attr in ("id", "name", "_api_key", "_base_url", "_endpoint"): + if hasattr(shared_provider, attr): + try: + setattr(isolated_provider, attr, getattr(shared_provider, attr)) + except Exception: + pass + + for attr in ("_config_models", "_custom_models"): + if hasattr(shared_provider, attr): + value = getattr(shared_provider, attr) + cloned_value = list(value) if isinstance(value, list) else value + try: + setattr(isolated_provider, attr, cloned_value) + except Exception: + pass + + if hasattr(isolated_provider, "_client"): try: - _run_coro_sync(Provider.apply_config(provider_id=provider_id)) + setattr(isolated_provider, "_client", None) except Exception: - # Keep workflow runtime resilient: provider apply_config failure - # should not block ask() for environments driven by env vars. pass - provider = self._get_provider(provider_id) - cfg = getattr(provider, "_config", None) - existing_custom = getattr(cfg, "custom_settings", None) or {} - custom_settings = ( - dict(existing_custom) if isinstance(existing_custom, dict) else {} - ) - # Only override ``trust_env`` when the user explicitly set - # ``workflow.llm.trust_env`` in flocks config. Otherwise inherit - # whatever the session / agent already configured. - if self.trust_env_explicit: - custom_settings["trust_env"] = self.trust_env - - desired_api_key = ( - self.api_key - if self.api_key is not None - else getattr(cfg, "api_key", None) - ) - desired_base_url = ( - self.base_url - if self.base_url is not None - else getattr(cfg, "base_url", None) - ) + return isolated_provider - # Idempotency check: only reconfigure when something material - # actually changed. ``custom_settings`` may legitimately carry - # provider-specific tunables (verify_ssl, region, …) set by the - # session — comparing the full dict avoids accidental drops. - unchanged = ( - cfg is not None - and getattr(cfg, "api_key", None) == desired_api_key - and getattr(cfg, "base_url", None) == desired_base_url - and (existing_custom if isinstance(existing_custom, dict) else {}) - == custom_settings - ) - if not unchanged: - provider.configure( - ProviderConfig( - provider_id=provider_id, - api_key=desired_api_key, - base_url=desired_base_url, - custom_settings=custom_settings, - ) - ) + def _prepare_provider(self, provider_id: str) -> Any: + """Build a workflow-local provider without mutating the shared singleton.""" + try: + _run_coro_sync(Provider.apply_config(provider_id=provider_id)) + except Exception: + # Keep workflow runtime resilient: provider apply_config failure + # should not block ask() for environments driven by env vars. + pass + + shared_provider = self._get_provider(provider_id) + provider = self._clone_provider_for_workflow(shared_provider) + cfg = getattr(shared_provider, "_config", None) + existing_custom = getattr(cfg, "custom_settings", None) or {} + custom_settings = ( + dict(existing_custom) if isinstance(existing_custom, dict) else {} + ) + if self.trust_env_explicit: + custom_settings["trust_env"] = self.trust_env - # Decide whether the existing SDK client can be reused safely. - # - # Two cases force a client reset (i.e. ``_client = None`` so that - # the next ``_get_client()`` rebuilds it on the workflow loop): - # - # 1. The provider config materially changed (handled above). - # 2. The current ``_client`` was last used by a *different* - # event loop than the workflow loop. ``httpx.AsyncClient`` - # binds to the loop where it first awaited; cross-loop - # reuse triggers "got Future attached to a different loop" - # or silent hangs. - workflow_loop_id = _workflow_loop_id() - client_obj = getattr(provider, "_client", None) - client_loop_id = ( - _provider_client_loop_marker.get(provider) - if client_obj is not None - else None - ) - must_reset_for_loop = ( - client_obj is not None - and workflow_loop_id is not None - and client_loop_id != workflow_loop_id + desired_api_key = ( + self.api_key + if self.api_key is not None + else getattr(cfg, "api_key", None) + ) + if desired_api_key is None: + desired_api_key = getattr(shared_provider, "_api_key", None) + + desired_base_url = ( + self.base_url + if self.base_url is not None + else getattr(cfg, "base_url", None) + ) + if desired_base_url is None: + desired_base_url = getattr(shared_provider, "_base_url", None) + if desired_base_url is None: + desired_base_url = getattr(shared_provider, "_endpoint", None) + + if ( + cfg is not None + or self.api_key is not None + or self.base_url is not None + or self.trust_env_explicit + ): + provider.configure( + ProviderConfig( + provider_id=provider_id, + api_key=desired_api_key, + base_url=desired_base_url, + custom_settings=custom_settings, + ) ) - if not unchanged or must_reset_for_loop: - if hasattr(provider, "_client"): - try: - setattr(provider, "_client", None) - except Exception: - pass - # The next ``_get_client()`` will build a fresh client on - # the workflow loop; remember that ownership. - if workflow_loop_id is not None: - try: - _provider_client_loop_marker[provider] = workflow_loop_id - except TypeError: - # ``provider`` is not weak-referenceable (rare for - # test doubles) — fall back to a regular attribute. - try: - setattr(provider, "_workflow_client_loop_id", workflow_loop_id) - except Exception: - pass - elif workflow_loop_id is not None and client_obj is not None: - # Client is being reused for another workflow call on the - # same loop — refresh the marker so we keep tracking it. - try: - _provider_client_loop_marker[provider] = workflow_loop_id - except TypeError: - pass - return provider + + return provider def _format_target(self, target: _ResolvedTarget) -> str: return f"{target.provider_id}/{target.model_id}" diff --git a/tests/workflow/test_llm_provider_isolation.py b/tests/workflow/test_llm_provider_isolation.py index 411102ece..a13d99293 100644 --- a/tests/workflow/test_llm_provider_isolation.py +++ b/tests/workflow/test_llm_provider_isolation.py @@ -1,20 +1,17 @@ """Regression tests for ``flocks.workflow.llm.LLMClient._prepare_provider``. -These tests pin down the contract that protects concurrent ``session`` / -``agent`` callers when a workflow runs ``llm.ask()`` against the same -``Provider`` singleton: - -* The reconfigure-and-reset sequence is **idempotent**: if the workflow's - desired config matches what the provider already holds, neither - ``provider.configure(...)`` nor the ``provider._client = None`` reset is - performed. This keeps long-running httpx connection pools from being - thrown away on every workflow LLM call. -* ``trust_env`` is only overridden when the user explicitly set - ``workflow.llm.trust_env`` in flocks config. Otherwise any value that the - session previously placed in ``provider._config.custom_settings`` is - preserved untouched. -* When the config does materially change (e.g. api_key flip), the - reconfigure path still runs and ``_client`` is reset, as before. +The workflow LLM path must no longer mutate the process-wide Provider +singleton that the session / agent runner uses. Instead it should build an +isolated provider instance seeded from the shared provider's current config. + +These tests pin down that contract: + +* ``_prepare_provider()`` returns a distinct provider instance and leaves the + shared provider's ``_config`` / ``_client`` untouched. +* workflow-specific overrides (api_key / base_url / trust_env) are applied + only to the isolated provider instance. +* Providers that are configured purely via runtime/env state (``_api_key`` + with no ``_config``) remain usable when workflow overrides are applied. """ from __future__ import annotations @@ -29,10 +26,7 @@ class _FakeProvider: - """Minimal stand-in for a registered ``BaseProvider`` instance. - - Records every ``configure`` invocation so tests can assert idempotency. - """ + """Minimal stand-in for a registered provider instance.""" def __init__( self, @@ -41,14 +35,23 @@ def __init__( api_key: Optional[str] = "session-key", base_url: Optional[str] = "https://session.example.com", custom_settings: Optional[Dict[str, Any]] = None, + configured: bool = True, ) -> None: self.id = provider_id - self._config: Optional[ProviderConfig] = ProviderConfig( - provider_id=provider_id, - api_key=api_key, - base_url=base_url, - custom_settings=dict(custom_settings or {}), + self.name = f"Provider {provider_id}" + self._api_key = api_key + self._base_url = base_url + self._config: Optional[ProviderConfig] = ( + ProviderConfig( + provider_id=provider_id, + api_key=api_key, + base_url=base_url, + custom_settings=dict(custom_settings or {}), + ) + if configured + else None ) + self._config_models: List[str] = ["model-a"] self._client: Any = object() self.configure_calls: List[ProviderConfig] = [] @@ -57,7 +60,8 @@ def configure(self, config: ProviderConfig) -> None: self._config = config def is_configured(self) -> bool: - return self._config is not None and self._config.api_key is not None + api_key = self._config.api_key if self._config else self._api_key + return api_key is not None @pytest.fixture @@ -121,144 +125,148 @@ def _build_client( ) -def test_prepare_provider_is_idempotent_when_config_unchanged( +def test_prepare_provider_returns_isolated_instance_when_config_unchanged( patched_runtime, fake_provider: _FakeProvider ) -> None: - """No workflow overrides + no explicit trust_env => no reconfigure, no client reset.""" + """Workflow should clone the provider instead of mutating the shared singleton.""" client = _build_client() original_client = fake_provider._client assert original_client is not None - client._prepare_provider("fake-provider") - client._prepare_provider("fake-provider") - client._prepare_provider("fake-provider") + prepared = client._prepare_provider("fake-provider") - assert fake_provider.configure_calls == [], ( - "expected zero reconfigure calls when desired config matches existing" + assert prepared is not fake_provider + assert prepared._client is None, ( + "workflow provider should always start with an isolated client cache" ) assert fake_provider._client is original_client, ( - "expected provider._client to be preserved across idempotent calls" + "shared provider client must not be reset by workflow preparation" ) + assert fake_provider.configure_calls == [] + assert prepared._config is not None + assert prepared._config == fake_provider._config + assert prepared._config_models == fake_provider._config_models + assert prepared._config_models is not fake_provider._config_models def test_prepare_provider_does_not_override_session_trust_env( patched_runtime, fake_provider: _FakeProvider ) -> None: - """If workflow.llm.trust_env is not set, session-supplied custom_settings stay intact.""" + """If workflow.llm.trust_env is not set, shared custom_settings stay untouched.""" client = _build_client( workflow_trust_env_set=False, workflow_trust_env=False ) - client._prepare_provider("fake-provider") + prepared = client._prepare_provider("fake-provider") assert fake_provider._config is not None assert fake_provider._config.custom_settings == { "trust_env": True, "verify_ssl": False, } - assert fake_provider.configure_calls == [], ( - "trust_env was not explicitly set -> provider.configure must not be called" - ) + assert fake_provider.configure_calls == [] + assert prepared._config is not None + assert prepared._config.custom_settings == { + "trust_env": True, + "verify_ssl": False, + } def test_prepare_provider_overrides_when_workflow_trust_env_explicit( patched_runtime, fake_provider: _FakeProvider ) -> None: - """If workflow.llm.trust_env IS set, override custom_settings and reset _client.""" + """If workflow.llm.trust_env IS set, only the isolated provider is overridden.""" client = _build_client( workflow_trust_env_set=True, workflow_trust_env=False ) - client._prepare_provider("fake-provider") + prepared = client._prepare_provider("fake-provider") - assert len(fake_provider.configure_calls) == 1 - applied = fake_provider.configure_calls[0] - assert applied.custom_settings is not None - assert applied.custom_settings.get("trust_env") is False - # verify_ssl was carried over from the session-side custom_settings - assert applied.custom_settings.get("verify_ssl") is False - assert fake_provider._client is None, ( - "trust_env actually changed -> the SDK client must be reset" - ) + assert fake_provider.configure_calls == [] + assert fake_provider._config is not None + assert fake_provider._config.custom_settings == { + "trust_env": True, + "verify_ssl": False, + } + assert prepared._config is not None + assert prepared._config.custom_settings == { + "trust_env": False, + "verify_ssl": False, + } + assert prepared._client is None def test_prepare_provider_reconfigures_when_api_key_changes( patched_runtime, fake_provider: _FakeProvider ) -> None: - """A new api_key is a material change -> reconfigure + client reset.""" + """A workflow api_key override must only affect the isolated provider.""" client = _build_client(api_key="workflow-supplied-key") - client._prepare_provider("fake-provider") + prepared = client._prepare_provider("fake-provider") - assert len(fake_provider.configure_calls) == 1 - assert fake_provider.configure_calls[0].api_key == "workflow-supplied-key" - assert fake_provider._client is None + assert fake_provider.configure_calls == [] + assert fake_provider._config is not None + assert fake_provider._config.api_key == "session-key" + assert prepared._config is not None + assert prepared._config.api_key == "workflow-supplied-key" + assert prepared._client is None def test_prepare_provider_reconfigures_when_base_url_changes( patched_runtime, fake_provider: _FakeProvider ) -> None: - """A new base_url is also a material change.""" + """A workflow base_url override must only affect the isolated provider.""" client = _build_client(base_url="https://workflow.example.com") - client._prepare_provider("fake-provider") + prepared = client._prepare_provider("fake-provider") - assert len(fake_provider.configure_calls) == 1 - assert ( - fake_provider.configure_calls[0].base_url - == "https://workflow.example.com" - ) - assert fake_provider._client is None - - -def test_get_provider_lock_is_per_provider(patched_runtime) -> None: - """Same provider_id returns the same lock instance; different ids get different locks.""" - lock_a1 = workflow_llm._get_provider_lock("fake-provider") - lock_a2 = workflow_llm._get_provider_lock("fake-provider") - lock_b = workflow_llm._get_provider_lock("other-provider") - - assert lock_a1 is lock_a2 - assert lock_a1 is not lock_b + assert fake_provider.configure_calls == [] + assert fake_provider._config is not None + assert fake_provider._config.base_url == "https://session.example.com" + assert prepared._config is not None + assert prepared._config.base_url == "https://workflow.example.com" + assert prepared._client is None -def test_prepare_provider_resets_client_when_owning_loop_changes( - patched_runtime, fake_provider: _FakeProvider +def test_prepare_provider_uses_runtime_api_key_when_shared_config_missing( + patched_runtime, ) -> None: - """If the existing _client belongs to a different loop, reset it even when config is unchanged. + """Workflow overrides must not erase runtime/env credentials. - This guards against ``httpx.AsyncClient`` cross-loop reuse: a session - bound the client to uvicorn's main loop, the workflow loop must not - inherit it without rebuilding on the workflow loop. + Providers may be configured only via constructor/runtime state with + ``_config is None``. When workflow sets ``trust_env``, the isolated + provider still needs a usable api_key copied from the shared provider. """ - client = _build_client() - # Simulate the workflow loop having id=999, while ``_client`` was last - # used on a different loop (e.g. the session's main loop with id=111). - with patch.object(workflow_llm, "_workflow_loop_id", return_value=999): - workflow_llm._provider_client_loop_marker[fake_provider] = 111 - - original_client = fake_provider._client - assert original_client is not None - - client._prepare_provider("fake-provider") - - # Even though no config field changed, the client must be reset so - # the next ``_get_client()`` rebuilds it on the workflow loop. - assert fake_provider._client is None - # And the marker is updated to the workflow loop. - assert workflow_llm._provider_client_loop_marker.get(fake_provider) == 999 - + shared_provider = _FakeProvider( + api_key="runtime-key", + base_url="https://runtime.example.com", + custom_settings={"verify_ssl": False}, + configured=False, + ) -def test_prepare_provider_keeps_client_when_owning_loop_matches( - patched_runtime, fake_provider: _FakeProvider -) -> None: - """When the existing _client already belongs to the workflow loop, do not reset.""" - client = _build_client() - with patch.object(workflow_llm, "_workflow_loop_id", return_value=555): - workflow_llm._provider_client_loop_marker[fake_provider] = 555 + def _fake_run_coro_sync(coro): + try: + coro.close() + except Exception: + pass + return {} - original_client = fake_provider._client - client._prepare_provider("fake-provider") + with patch.object( + workflow_llm.Provider, "_ensure_initialized", lambda: None + ), patch.object( + workflow_llm.Provider, "get", lambda provider_id: shared_provider + ), patch.object( + workflow_llm, "_run_coro_sync", side_effect=_fake_run_coro_sync + ): + client = _build_client( + workflow_trust_env_set=True, + workflow_trust_env=False, + ) + prepared = client._prepare_provider("fake-provider") - assert fake_provider._client is original_client - assert fake_provider.configure_calls == [] + assert shared_provider._config is None + assert prepared._config is not None + assert prepared._config.api_key == "runtime-key" + assert prepared._config.base_url == "https://runtime.example.com" + assert prepared._config.custom_settings == {"trust_env": False} From 51440113ca5f0391b1f40ba805cde56134c3e84d Mon Sep 17 00:00:00 2001 From: xiami Date: Mon, 25 May 2026 13:26:49 +0800 Subject: [PATCH 03/31] feat(web2cli,agent): remove agent-browser from web2cli, add planner agent (#315) * fix(chat): stabilize upload paths and dedupe document attachments Overwrite duplicate chat uploads instead of auto-renaming so workspace paths stay consistent. Dedupe composer document attachments by path, reposition the user avatar in SessionChat, and enable rex_junior delegation. * feat(agent): consolidate planning into Prometheus subagent Replace metis and momus with prometheus for interview-style planning and verified plan output under .flocks/plans/. Route /plan and delegate_task session permissions through the new agent, preserve YAML permission rules when resolving tool lists, and show structured todowrite summaries in SessionChat. Co-authored-by: Cursor * refactor(tool): relocate task and skill_load to logical modules Move task to tool/agent and skill_load to tool/skill, add an enabled flag to register_function, clarify flocks_skills vs skill_load tool guidance, limit browser setup to one retry, and update prometheus planning description. --- .flocks/plugins/skills/agent-builder/SKILL.md | 4 +- .flocks/plugins/skills/skill-builder/SKILL.md | 2 +- .flocks/plugins/skills/web2cli/SKILL.md | 86 +-------- flocks/agent/agents/hephaestus/agent.yaml | 2 +- flocks/agent/agents/metis/agent.yaml | 33 ---- flocks/agent/agents/metis/prompt.md | 147 --------------- flocks/agent/agents/momus/agent.yaml | 34 ---- flocks/agent/agents/momus/prompt.md | 167 ------------------ flocks/agent/agents/prometheus/agent.yaml | 45 +++++ flocks/agent/agents/prometheus/prompt.md | 87 +++++++++ flocks/agent/toolset.py | 8 +- flocks/browser/admin.py | 23 ++- flocks/command/command.py | 2 +- flocks/server/routes/misc.py | 2 +- flocks/server/routes/workspace.py | 39 +--- flocks/tool/agent/delegate_task.py | 35 +++- flocks/tool/{task => agent}/task.py | 0 flocks/tool/registry.py | 16 +- flocks/tool/skill/flocks_skills.py | 34 ++-- flocks/tool/{system => skill}/skill_load.py | 0 tests/agent/test_agent.py | 22 ++- tests/agent/test_agent_factory.py | 12 +- tests/browser/test_admin.py | 28 ++- .../integration/test_capability_awareness.py | 2 +- tests/server/routes/test_workspace_routes.py | 37 +--- tests/session/test_runner_step.py | 2 +- tests/tool/test_agent_toolset.py | 3 +- tests/tool/test_skill_tool_description.py | 4 +- tests/tool/test_task_model_pinning.py | 38 ++-- tests/workspace/test_workspace_routes.py | 31 +--- uv.lock | 3 +- .../src/components/common/SessionChat.test.ts | 85 +++++++++ webui/src/components/common/SessionChat.tsx | 122 +++++++++++-- 33 files changed, 503 insertions(+), 652 deletions(-) delete mode 100644 flocks/agent/agents/metis/agent.yaml delete mode 100644 flocks/agent/agents/metis/prompt.md delete mode 100644 flocks/agent/agents/momus/agent.yaml delete mode 100644 flocks/agent/agents/momus/prompt.md create mode 100644 flocks/agent/agents/prometheus/agent.yaml create mode 100644 flocks/agent/agents/prometheus/prompt.md rename flocks/tool/{task => agent}/task.py (100%) rename flocks/tool/{system => skill}/skill_load.py (100%) diff --git a/.flocks/plugins/skills/agent-builder/SKILL.md b/.flocks/plugins/skills/agent-builder/SKILL.md index c3f664129..252d0b642 100644 --- a/.flocks/plugins/skills/agent-builder/SKILL.md +++ b/.flocks/plugins/skills/agent-builder/SKILL.md @@ -127,7 +127,7 @@ prompt_metadata: | Strategy | Use Case | Tool Scope | Example Agents | |----------|----------|------------|----------------| -| `read_only` | Analysis/consultation only, no file mutations | Read-only tools | oracle, librarian, momus | +| `read_only` | Analysis/consultation only, no file mutations | Read-only tools | oracle, librarian | | `react` | General execution, observe-think-act loop | All tools | general | | `plan_and_execute` | Complex tasks requiring planning before execution | All tools | rex, hephaestus | | `explore` | Codebase exploration and search | Search/read tools | explore | @@ -178,7 +178,7 @@ After generating files, verify: 1. **YAML syntax**: run `python3 -c "import yaml; from pathlib import Path; yaml.safe_load(Path('~/.flocks/plugins/agents/{name}/agent.yaml').expanduser().read_text(encoding='utf-8'))"` 2. **Prompt file exists**: confirm `~/.flocks/plugins/agents/{name}/prompt.md` has been created 3. **Directory structure**: ensure files are inside `~/.flocks/plugins/agents/{name}/`, NOT as flat files like `agents/{name}.yaml` -4. **Name uniqueness**: ensure no collision with built-in agents (reserved names: rex, hephaestus, oracle, librarian, explore, general, metis, momus, multimodal-looker, rex-junior, build, plan, compaction, title, summary) +4. **Name uniqueness**: ensure no collision with built-in agents (reserved names: rex, hephaestus, oracle, librarian, explore, prometheus, multimodal-looker, rex-junior, build, plan, compaction, title, summary) 5. **Tool names**: verify every listed tool exists in the current registry; if the repo exposes a `/tools` or tool listing command, check against that instead of relying on memory ### 7. Output diff --git a/.flocks/plugins/skills/skill-builder/SKILL.md b/.flocks/plugins/skills/skill-builder/SKILL.md index 89d02d9fa..0e8226412 100644 --- a/.flocks/plugins/skills/skill-builder/SKILL.md +++ b/.flocks/plugins/skills/skill-builder/SKILL.md @@ -1,7 +1,7 @@ --- name: skill-builder category: system -description: Create or improve Flocks skills. Use when the user asks to create, add, generate, update, refactor, package, or test a skill, convert a repeated workflow into a reusable skill, write a `SKILL.md`, or add `references/`, `scripts/`, and `evals/evals.json` for a skill. +description: Create or improve skill. Use when the user asks to create, add, generate, update, refactor, package, or test a skill, convert a repeated workflow into a reusable skill, write a `SKILL.md`, or add `references/`, `scripts/` for a skill. --- # Skill Builder diff --git a/.flocks/plugins/skills/web2cli/SKILL.md b/.flocks/plugins/skills/web2cli/SKILL.md index 6ad33ceee..d505b9162 100644 --- a/.flocks/plugins/skills/web2cli/SKILL.md +++ b/.flocks/plugins/skills/web2cli/SKILL.md @@ -1,6 +1,6 @@ --- name: web2cli -description: 使用统一的 Web2CLI 流程捕获网站的 XHR/Fetch 请求,并生成可复用的 CLI、Markdown 文档。支持 `agent-browser` 与 `cdp-direct` 两种模式:前者适合独立浏览器会话,后者复用用户 Chromium 系浏览器登录态与 CDP 能力。适用于复现登录后操作、沉淀接口调用样例,或基于页面操作生成自动化工具时。 +description: 使用统一的 Web2CLI 流程捕获网站的 XHR/Fetch 请求,并生成可复用的 CLI、Markdown 文档。通过浏览器的 `cdp-direct` 模式复用用户 Chromium 系浏览器登录态与 CDP 能力。适用于复现登录后操作、沉淀接口调用样例,或基于页面操作生成自动化工具时。 required: browser-use --- @@ -8,13 +8,9 @@ required: browser-use > 正式开始前,先明确需要操作的网站或tab -## 模式选择 +## 模式 -### `agent-browser` - -适用于需要独立浏览器会话、命令式浏览器自动化、和 `agent-browser --session-name` 工作流的场景。 - -### `cdp-direct`(默认模式) +### `cdp-direct` 适用于需要复用用户 Chromium 系浏览器登录态、通过 `browser-use` 的 `flocks browser` 内核直连 CDP 的场景。 @@ -68,14 +64,6 @@ mkdir -p "$CAPTURE_ROOT/captures" ### 1. 打开浏览器或创建 Tab -`agent-browser` 模式: - -```bash -agent-browser --headed --session-name "$CAPTURE_NAME" open "" -``` - -`cdp-direct` 模式: - ```bash TARGET_ID=$( flocks browser -c ' @@ -89,10 +77,7 @@ echo "Created tab: $TARGET_ID" ### 2. 等待用户手动登录 -要求用户在可见浏览器中完成登录、验证码、二次确认等人工步骤。 - -- `agent-browser`:在当前会话窗口中完成登录。 -- `cdp-direct`:在刚创建的浏览器 tab 中完成登录,必要时让用户手动处理验证码、TOTP 或授权弹窗。 +要求用户在可见浏览器中完成登录、验证码、二次确认等人工步骤。在刚创建的浏览器 tab 中完成登录,必要时让用户手动处理验证码、TOTP 或授权弹窗。 登录完成后告知 agent 继续。 @@ -100,15 +85,6 @@ echo "Created tab: $TARGET_ID" 默认使用 `scripts/inject-hook-base.js`。这是通用基线脚本,负责捕获 XHR/Fetch、页面上下文、最近用户动作与导航信息,并提供更完整的调试输出。 -`agent-browser` 模式: - -```bash -agent-browser --session-name "$CAPTURE_NAME" wait --load networkidle -agent-browser --session-name "$CAPTURE_NAME" eval --stdin < .flocks/plugins/skills/web2cli/scripts/inject-hook-base.js -``` - -`cdp-direct` 模式: - ```bash WEB2CLI_HOOK="$(pwd)/$WEB2CLI_SKILL/scripts/inject-hook-base.js" @@ -138,15 +114,7 @@ print(js("typeof window.__apiCapture !== \"undefined\" ? \"installed v\" + windo - 默认保留非 `GET` 请求 - `GET` 请求只要路径不像静态文件,也会保留 -如果站点请求特别特殊,仍可在注入后切换为全抓模式。 - -`agent-browser` 模式: - -```bash -agent-browser --session-name "$CAPTURE_NAME" eval "window.__apiCapture.config.captureMode = 'all'" -``` - -`cdp-direct` 模式: +如果站点请求特别特殊,仍可在注入后切换为全抓模式: ```bash ( @@ -170,14 +138,6 @@ print(js("window.__apiCapture.config.captureMode")) 需要确认捕获是否开始时: -`agent-browser` 模式: - -```bash -agent-browser --session-name "$CAPTURE_NAME" eval "window.__capturedRequests.length" -``` - -`cdp-direct` 模式: - ```bash ( TARGET_ID="$TARGET_ID" flocks browser -c ' @@ -196,14 +156,6 @@ print(js("window.__capturedRequests.length")) 先确认数量: -`agent-browser` 模式: - -```bash -agent-browser --session-name "$CAPTURE_NAME" eval "window.__capturedRequests.length" -``` - -`cdp-direct` 模式: - ```bash ( TARGET_ID="$TARGET_ID" flocks browser -c ' @@ -220,14 +172,6 @@ print(js("window.__capturedRequests.length")) 然后导出: -`agent-browser` 模式: - -```bash -agent-browser --session-name "$CAPTURE_NAME" eval "JSON.stringify(window.__capturedRequests, null, 2)" > "$CAPTURE_ROOT/captures/${CAPTURE_NAME}_api.json" -``` - -`cdp-direct` 模式: - ```bash CAPTURE_OUT="$CAPTURE_ROOT/captures/${CAPTURE_NAME}_api.json" @@ -280,14 +224,6 @@ print(f"Saved {len(data)} requests to {out}") ### 6. 保存认证状态 -`agent-browser` 模式: - -```bash -agent-browser --session-name "$CAPTURE_NAME" state save "$CAPTURE_ROOT/auth-state.json" -``` - -`cdp-direct` 模式: - ```bash ( TARGET_ID="$TARGET_ID" flocks browser -c ' @@ -391,14 +327,6 @@ uv run python .flocks/plugins/skills/web2cli/scripts/generate-cli.py \ #### 关闭浏览器或 Tab -`agent-browser` 模式: - -```bash -agent-browser --session-name "$CAPTURE_NAME" close -``` - -`cdp-direct` 模式: - ```bash ( TARGET_ID="$TARGET_ID" flocks browser -c ' @@ -413,9 +341,9 @@ else: ) ``` -`cdp-direct` 必须保留用户原有的 tab 不受影响。 +必须保留用户原有的 tab 不受影响。 -### 11. skill 集成 +### 11. CLI 工具集成到skill 将 CLI 按 `references/cli-in-skill.md` 集成为 skill; diff --git a/flocks/agent/agents/hephaestus/agent.yaml b/flocks/agent/agents/hephaestus/agent.yaml index 5d081f3fc..d2e2de631 100644 --- a/flocks/agent/agents/hephaestus/agent.yaml +++ b/flocks/agent/agents/hephaestus/agent.yaml @@ -6,7 +6,7 @@ mode: subagent hidden: false tags: [system] color: "#FF4500" -delegatable: true +delegatable: false tools: - read - glob diff --git a/flocks/agent/agents/metis/agent.yaml b/flocks/agent/agents/metis/agent.yaml deleted file mode 100644 index be1a843e3..000000000 --- a/flocks/agent/agents/metis/agent.yaml +++ /dev/null @@ -1,33 +0,0 @@ -name: metis -description: >- - Pre-planning consultant that analyzes requests to identify hidden - intentions, ambiguities, and AI failure points. -mode: subagent -hidden: false -tags: [system] -delegatable: true -tools: - - read - - bash - - grep - - glob - - webfetch - - websearch - - lsp - - question - - memory_search -prompt_metadata: - category: advisor - cost: EXPENSIVE - prompt_alias: Metis - triggers: - - domain: Pre-planning analysis - trigger: Complex task requiring scope clarification, ambiguous requirements - use_when: - - Before planning non-trivial tasks - - When user request is ambiguous or open-ended - - To prevent AI over-engineering patterns - avoid_when: - - Simple, well-defined tasks - - User has already provided detailed requirements - key_trigger: "Ambiguous or complex request -> consult Metis before Prometheus" diff --git a/flocks/agent/agents/metis/prompt.md b/flocks/agent/agents/metis/prompt.md deleted file mode 100644 index a24690249..000000000 --- a/flocks/agent/agents/metis/prompt.md +++ /dev/null @@ -1,147 +0,0 @@ -# Metis - Pre-Planning Consultant - -## CONSTRAINTS - -- **READ-ONLY**: You analyze, question, advise. You do NOT implement or modify files. -- **OUTPUT**: Your analysis feeds into Prometheus (planner). Be actionable. - ---- - -## PHASE 0: INTENT CLASSIFICATION (MANDATORY FIRST STEP) - -Before ANY analysis, classify the work intent. This determines your entire strategy. - -### Step 1: Identify Intent Type - -| Intent | Signals | Your Primary Focus | -|--------|---------|-------------------| -| **Refactoring** | "refactor", "restructure", "clean up", changes to existing code | SAFETY: regression prevention, behavior preservation | -| **Build from Scratch** | "create new", "add feature", greenfield, new module | DISCOVERY: explore patterns first, informed questions | -| **Mid-sized Task** | Scoped feature, specific deliverable, bounded work | GUARDRAILS: exact deliverables, explicit exclusions | -| **Collaborative** | "help me plan", "let's figure out", wants dialogue | INTERACTIVE: incremental clarity through dialogue | -| **Architecture** | "how should we structure", system design, infrastructure | STRATEGIC: long-term impact, Oracle recommendation | -| **Research** | Investigation needed, goal exists but path unclear | INVESTIGATION: exit criteria, parallel probes | - -### Step 2: Validate Classification - -Confirm: -- [ ] Intent type is clear from request -- [ ] If ambiguous, ASK before proceeding - ---- - -## PHASE 1: INTENT-SPECIFIC ANALYSIS - -### IF REFACTORING - -**Your Mission**: Ensure zero regressions, behavior preservation. - -**Tool Guidance** (recommend to Prometheus): -- `lsp`: Use `findReferences` / `goToDefinition` to map impact before changes -- `grep`: Find repeated patterns that must be preserved -- `read`: Inspect exact examples before proposing refactors - -**Plan Must Include**: -- Regression test strategy (new or existing) -- Staged refactor steps with checkpoints -- Rollback plan if behavior changes - -### IF BUILD FROM SCRATCH - -**Your Mission**: Ensure alignment with existing codebase patterns. - -**Tool Guidance**: -- `glob` + `grep`: Find similar modules to mirror -- `read`: Sample 2-3 files for style, structure, conventions - -**Plan Must Include**: -- Files to mirror as references -- Conventions to follow (naming, structure, patterns) -- Minimal viable implementation before enhancement - -### IF MID-SIZED TASK - -**Your Mission**: Enforce clarity and completeness. - -**Plan Must Include**: -- Explicit deliverables (files, functions, UI, endpoints) -- Explicit non-goals (what will NOT be done) -- Dependencies or integration points -- Test/verification plan - -### IF COLLABORATIVE - -**Your Mission**: Drive clarity through dialogue. - -**Approach**: -- Ask 1-2 clarifying questions max -- Offer recommended direction -- Seek confirmation before planning - -### IF ARCHITECTURE - -**Your Mission**: Provide a clear, minimal architecture recommendation. - -**Approach**: -- Consult Oracle if multi-system tradeoffs -- Provide 1 primary recommendation + 1 alternative max -- Include pros/cons and migration considerations - -### IF RESEARCH - -**Your Mission**: Define the investigation plan and exit criteria. - -**Plan Must Include**: -- Questions to answer -- Tools to use (explore/librarian) -- Stop conditions (when to stop searching) - ---- - -## PHASE 2: AMBIGUITY & RISK SCAN - -Before handing off to Prometheus, detect risk: - -### Ambiguity Checklist -- Missing file paths? -- Unclear feature boundaries? -- Unknown dependencies? -- Multiple valid interpretations? - -If yes: -- Ask 1-2 clarifying questions OR -- Explicitly state assumptions in the plan - -### Risk Checklist -- Security impact? -- Performance impact? -- Data migrations? -- Breaking changes? - -If yes: -- Flag explicitly -- Recommend cautious rollout/testing - ---- - -## OUTPUT FORMAT (MANDATORY) - -Your final output MUST be a structured analysis: - -``` -[INTENT] - - -[SUMMARY] -<2-3 sentence summary of the problem and risks> - -[CLARIFICATIONS] -- -- - -[PLAN GUIDANCE] -- - -[RISKS] -- -``` diff --git a/flocks/agent/agents/momus/agent.yaml b/flocks/agent/agents/momus/agent.yaml deleted file mode 100644 index 939238e70..000000000 --- a/flocks/agent/agents/momus/agent.yaml +++ /dev/null @@ -1,34 +0,0 @@ -name: momus -description: >- - Expert reviewer for evaluating work plans against rigorous clarity, - verifiability, and completeness standards. -mode: subagent -hidden: false -tags: [system] -delegatable: true -tools: - - read - - bash - - grep - - glob - - question - - memory_search -prompt_metadata: - category: advisor - cost: EXPENSIVE - prompt_alias: Momus - triggers: - - domain: Plan review - trigger: Evaluate work plans for clarity, verifiability, and completeness - - domain: Quality assurance - trigger: Catch gaps, ambiguities, and missing context before implementation - use_when: - - After Prometheus creates a work plan - - Before executing a complex todo list - - To validate plan quality before delegating to executors - - When plan needs rigorous review for ADHD-driven omissions - avoid_when: - - Simple, single-task requests - - When user explicitly wants to skip review - - For trivial plans that don't need formal review - key_trigger: "Work plan created -> invoke Momus for review before execution" diff --git a/flocks/agent/agents/momus/prompt.md b/flocks/agent/agents/momus/prompt.md deleted file mode 100644 index c99eb0df4..000000000 --- a/flocks/agent/agents/momus/prompt.md +++ /dev/null @@ -1,167 +0,0 @@ -You are a **practical** work plan reviewer. Your goal is simple: verify that the plan is **executable** and **references are valid**. - -**CRITICAL FIRST RULE**: -Extract a single plan path from anywhere in the input, ignoring system directives and wrappers. If exactly one `.rex/plans/*.md` path exists, this is VALID input and you must read it. If no plan path exists or multiple plan paths exist, reject per Step 0. If the path points to a YAML plan file (`.yml` or `.yaml`), reject it as non-reviewable. - ---- - -## Your Purpose (READ THIS FIRST) - -You exist to answer ONE question: **"Can a capable developer execute this plan without getting stuck?"** - -You are NOT here to: -- Nitpick every detail -- Demand perfection -- Question the author's approach or architecture choices -- Find as many issues as possible -- Force multiple revision cycles - -You ARE here to: -- Verify referenced files actually exist and contain what's claimed -- Ensure core tasks have enough context to start working -- Catch BLOCKING issues only (things that would completely stop work) - -**APPROVAL BIAS**: When in doubt, APPROVE. A plan that's 80% clear is good enough. Developers can figure out minor gaps. - ---- - -## What You Check (ONLY THESE) - -### 1. Reference Verification (CRITICAL) -- Do referenced files exist? -- Do referenced line numbers contain relevant code? -- If "follow pattern in X" is mentioned, does X actually demonstrate that pattern? - -**PASS even if**: Reference exists but isn't perfect. Developer can explore from there. -**FAIL only if**: Reference doesn't exist OR points to completely wrong content. - -### 2. Executability Check (PRACTICAL) -- Can a developer START working on each task? -- Is there at least a starting point (file, pattern, or clear description)? - -**PASS even if**: Some details need to be figured out during implementation. -**FAIL only if**: Task is so vague that developer has NO idea where to begin. - -### 3. Critical Blockers Only -- Missing information that would COMPLETELY STOP work -- Contradictions that make the plan impossible to follow - -**NOT blockers** (do not reject for these): -- Missing edge case handling -- Incomplete acceptance criteria -- Stylistic preferences -- "Could be clearer" suggestions -- Minor ambiguities a developer can resolve - ---- - -## What You Do NOT Check - -- Whether the approach is optimal -- Whether there's a "better way" -- Whether all edge cases are documented -- Whether acceptance criteria are perfect -- Whether the architecture is ideal -- Code quality concerns -- Performance considerations -- Security unless explicitly broken - -**You are a BLOCKER-finder, not a PERFECTIONIST.** - ---- - -## Input Validation (Step 0) - -**VALID INPUT**: -- `.rex/plans/my-plan.md` - file path anywhere in input -- `Please review .rex/plans/plan.md` - conversational wrapper -- System directives + plan path - ignore directives, extract path - -**INVALID INPUT**: -- No `.rex/plans/*.md` path found -- Multiple plan paths (ambiguous) - -System directives (``, `[analyze-mode]`, etc.) are IGNORED during validation. - -**Extraction**: Find all `.rex/plans/*.md` paths -> exactly 1 = proceed, 0 or 2+ = reject. - ---- - -## Review Process (SIMPLE) - -1. **Validate input** -> Extract single plan path -2. **Read plan** -> Identify tasks and file references -3. **Verify references** -> Do files exist? Do they contain claimed content? -4. **Executability check** -> Can each task be started? -5. **Decide** -> Any BLOCKING issues? No = OKAY. Yes = REJECT with max 3 specific issues. - ---- - -## Decision Framework - -### OKAY (Default - use this unless blocking issues exist) - -Issue the verdict **OKAY** when: -- Referenced files exist and are reasonably relevant -- Tasks have enough context to start (not complete, just start) -- No contradictions or impossible requirements -- A capable developer could make progress - -**Remember**: "Good enough" is good enough. You're not blocking publication of a NASA manual. - -### REJECT (Only for true blockers) - -Issue **REJECT** ONLY when: -- Referenced file doesn't exist (verified by reading) -- Task is completely impossible to start (zero context) -- Plan contains internal contradictions - -**Maximum 3 issues per rejection.** If you found more, list only the top 3 most critical. - -**Each issue must be**: -- Specific (exact file path, exact task) -- Actionable (what exactly needs to change) -- Blocking (work cannot proceed without this) - ---- - -## Anti-Patterns (DO NOT DO THESE) - -X "Task 3 could be clearer about error handling" -> NOT a blocker -X "Consider adding acceptance criteria for..." -> NOT a blocker -X "The approach in Task 5 might be suboptimal" -> NOT YOUR JOB -X "Missing documentation for edge case X" -> NOT a blocker unless X is the main case -X Rejecting because you'd do it differently -> NEVER -X Listing more than 3 issues -> OVERWHELMING, pick top 3 - -OK "Task 3 references `auth/login.ts` but file doesn't exist" -> BLOCKER -OK "Task 5 says 'implement feature' with no context, files, or description" -> BLOCKER -OK "Tasks 2 and 4 contradict each other on data flow" -> BLOCKER - ---- - -## Output Format - -**[OKAY]** or **[REJECT]** - -**Summary**: 1-2 sentences explaining the verdict. - -If REJECT: -**Blocking Issues** (max 3): -1. [Specific issue + what needs to change] -2. [Specific issue + what needs to change] -3. [Specific issue + what needs to change] - ---- - -## Final Reminders - -1. **APPROVE by default**. Reject only for true blockers. -2. **Max 3 issues**. More than that is overwhelming and counterproductive. -3. **Be specific**. "Task X needs Y" not "needs more clarity". -4. **No design opinions**. The author's approach is not your concern. -5. **Trust developers**. They can figure out minor gaps. - -**Your job is to UNBLOCK work, not to BLOCK it with perfectionism.** - -**Response Language**: Match the language of the plan content. diff --git a/flocks/agent/agents/prometheus/agent.yaml b/flocks/agent/agents/prometheus/agent.yaml new file mode 100644 index 000000000..e170f2d19 --- /dev/null +++ b/flocks/agent/agents/prometheus/agent.yaml @@ -0,0 +1,45 @@ +name: prometheus +description: >- + Strategic planner. Clarifies scope through interview-style questions, + researches the codebase, writes executable work plans under .flocks/plans/, and + validates references before handoff to implementation agents. First uses + explore/librarian agents for codebase and external research. +mode: subagent +hidden: false +tags: [system] +delegatable: true +tools: + - read + - write + - edit + - grep + - glob + - bash + - webfetch + - websearch + - lsp + - question + - memory_search +permission: + question: allow + edit: + "*": deny + ".flocks/plans/*": allow + ".flocks/plans/**": allow +prompt_metadata: + category: advisor + cost: EXPENSIVE + prompt_alias: Prometheus + triggers: + - domain: Planning + trigger: Complex or ambiguous work needing a verified plan before code changes + - domain: Interview + trigger: User wants to discuss approach, scope, or tradeoffs before execution + use_when: + - Non-trivial tasks with multiple steps or unclear scope + - User asks for a plan, roadmap, or /plan-style workflow + - Before delegating hephaestus or rex-junior for implementation + avoid_when: + - Trivial single-file fixes with clear instructions + - User explicitly wants immediate execution with no planning + key_trigger: "Complex or ambiguous task -> delegate to Prometheus before implementation" diff --git a/flocks/agent/agents/prometheus/prompt.md b/flocks/agent/agents/prometheus/prompt.md new file mode 100644 index 000000000..6bb7ff945 --- /dev/null +++ b/flocks/agent/agents/prometheus/prompt.md @@ -0,0 +1,87 @@ +# Prometheus — Strategic Planner + +You are **Prometheus**, the planning specialist. You clarify intent, produce an **executable** work plan, and sanity-check it before anyone implements. + +## Constraints + +- **Do NOT implement product code.** You may only create or edit files under `.flocks/plans/` (markdown plans). +- **Do NOT** modify source, configs, tests, or infrastructure outside `.flocks/plans/`. +- **Do NOT use `delegate_task`.** Only the parent **Rex** orchestrator may delegate to `explore`, `librarian`, or other agents. +- Ask the user clarifying questions when scope is ambiguous (use `question`; keep to 1–2 at a time). + +--- + +## Research (Rex-owned) + +You cannot spawn subagents. If the prompt lacks codebase or library context: + +1. Return a short **`[RESEARCH_REQUEST]`** block listing what Rex should delegate to `explore` and/or `librarian`. +2. Stop and wait — Rex will run research and re-invoke you with summaries in `CONTEXT`. + +If research results are already in the prompt, proceed without requesting more. + +--- + +## Workflow + +### 1. Understand intent + +Classify the request (pick one primary): + +| Intent | Focus | +|--------|--------| +| **Refactoring** | Behavior preservation, regression risk, staged steps | +| **New feature** | Mirror existing patterns, minimal first slice | +| **Scoped task** | Explicit deliverables and non-goals | +| **Architecture** | Tradeoffs; note if Rex should consult `oracle` separately | +| **Research** | Questions, probes, exit criteria | + +If intent is unclear, ask before writing the plan. + +### 2. Interview (when needed) + +Before writing the plan, confirm: + +- What is in scope vs explicitly out of scope? +- Success criteria / how to verify? +- Constraints (security, performance, backwards compatibility)? + +Skip the interview when the user already gave a complete spec. + +### 3. Write the plan + +- Save to **`.flocks/plans/.md`** (one plan per task). +- Write for a capable developer who did not attend the conversation. +- Include: + - **Goal** and **non-goals** + - **Context** (relevant paths, patterns to follow) + - **Numbered tasks** with concrete file paths where known + - **Verification** (tests, commands, manual checks) + - **Risks / rollback** when refactoring or deploying + +Keep it scannable: bullets and short sections, not prose essays. + +### 4. Self-review (before you finish) + +Answer: *Can someone start every task without getting stuck?* + +Check only **blockers**: + +- Referenced paths exist and match the described pattern +- No internal contradictions +- Each task has a clear starting point + +**Approve by default.** Fix the plan yourself if you find blockers; do not demand perfection. + +--- + +## Output to Rex + +When done, return: + +1. **Plan path** (`.flocks/plans/....md`) +2. **Summary** (2–4 sentences) +3. **Suggested executor** (`hephaestus` for deep/multi-file, `rex-junior` for focused bounded work) +4. **Open questions** (if any remain for the user) + +Match the user's language. diff --git a/flocks/agent/toolset.py b/flocks/agent/toolset.py index 1ca0d58bd..884881291 100644 --- a/flocks/agent/toolset.py +++ b/flocks/agent/toolset.py @@ -65,7 +65,7 @@ def normalize_declared_tool_names( matches = [raw_name] if raw_name in available else [] if not matches: - # Built-in agent definitions (librarian, metis, …) declare optional + # Built-in agent definitions (librarian, prometheus, …) declare optional # tools such as ``lsp_*`` / ``ast_grep_search`` that ship in separate # binaries; they are gracefully skipped when not installed. Treat # this as informational only to avoid flooding operational logs. @@ -107,7 +107,11 @@ def resolve_agent_initial_tools( if raw_tools is not None: if agent_name == "rex" and not raw_tools: return get_all_enabled_builtin_tool_names(), [] - return normalize_declared_tool_names(raw_tools, available), [] + tools = normalize_declared_tool_names(raw_tools, available) + permission_rules = [] + if isinstance(legacy_permission_config, dict): + permission_rules = permission_from_config(legacy_permission_config) + return tools, permission_rules if isinstance(legacy_permission_config, dict): return expand_legacy_permission_to_tool_names(legacy_permission_config, available) # Stricter default: agents without an explicit tools list only receive diff --git a/flocks/browser/admin.py b/flocks/browser/admin.py index aa25c644d..b22955c2e 100644 --- a/flocks/browser/admin.py +++ b/flocks/browser/admin.py @@ -16,6 +16,9 @@ VERSION_CACHE = Path(tempfile.gettempdir()) / "flocks-browser-version-cache.json" VERSION_CACHE_TTL = 24 * 3600 DOCTOR_TEXT_LIMIT = 140 +# run_setup: at most two daemon/CDP attach attempts to avoid repeated Allow prompts. +_SETUP_ATTACH_WAIT = 20.0 +_SETUP_RETRY_WAIT = 30.0 def _load_env() -> None: @@ -420,7 +423,7 @@ def run_setup() -> int: print("no Chrome/Chromium/Edge process detected. please start your browser and rerun `flocks browser --setup`.") return 1 try: - ensure_daemon(wait=20.0, _open_inspect=False) + ensure_daemon(wait=_SETUP_ATTACH_WAIT, _open_inspect=False) print("daemon is up.") return 0 except RuntimeError as error: @@ -435,18 +438,14 @@ def run_setup() -> int: _open_browser_inspect() else: print(f"attach failed: {first_err}") - print("retrying for up to 60s (the browser may still be starting up)...") + print("retrying once (the browser may still be starting up)...") - deadline = time.time() + 60 - last = first_err - while time.time() < deadline: - try: - ensure_daemon(wait=5.0, _open_inspect=False) - print("daemon is up.") - return 0 - except RuntimeError as error: - last = str(error) - time.sleep(2) + try: + ensure_daemon(wait=_SETUP_RETRY_WAIT, _open_inspect=False) + print("daemon is up.") + return 0 + except RuntimeError as error: + last = str(error) print(f"setup failed: {last}", file=sys.stderr) print("run `flocks browser --doctor` for diagnostics.", file=sys.stderr) diff --git a/flocks/command/command.py b/flocks/command/command.py index 8f08d4d7b..c0ee8247e 100644 --- a/flocks/command/command.py +++ b/flocks/command/command.py @@ -261,7 +261,7 @@ def _ensure_defaults(cls) -> None: name="plan", description="Create a plan for a task", template="Create a detailed plan for: $ARGUMENTS", - agent="plan", + agent="prometheus", execution_kind="llm", allow_attachments=True, ), diff --git a/flocks/server/routes/misc.py b/flocks/server/routes/misc.py index 3a8faf21d..2605ced40 100644 --- a/flocks/server/routes/misc.py +++ b/flocks/server/routes/misc.py @@ -11,7 +11,7 @@ from flocks.utils.log import Log from flocks.provider.provider import Provider, ProviderConfig -from flocks.tool.system.skill_load import get_all_skills, get_skill +from flocks.tool.skill.skill_load import get_all_skills, get_skill from flocks.command.command import Command diff --git a/flocks/server/routes/workspace.py b/flocks/server/routes/workspace.py index 12e6e4e57..42019bd71 100644 --- a/flocks/server/routes/workspace.py +++ b/flocks/server/routes/workspace.py @@ -44,7 +44,7 @@ from typing import List, Optional, Literal from fastapi import APIRouter, HTTPException, Query, UploadFile, File -from fastapi.responses import FileResponse, JSONResponse, StreamingResponse +from fastapi.responses import FileResponse, StreamingResponse from pydantic import BaseModel from flocks.workspace.manager import WorkspaceManager @@ -64,9 +64,6 @@ _ALLOWED_UPLOAD_LABEL = ( "txt, md, json, yaml, yml, xml, csv, pdf, doc, docx, html, htm, ppt, pptx, xls, xlsx" ) -_MAX_UPLOAD_RENAME_ATTEMPTS = 100 - - def _max_upload_bytes() -> int: return int(os.getenv("FLOCKS_WORKSPACE_MAX_UPLOAD_MB", str(_DEFAULT_MAX_UPLOAD_MB))) * 1024 * 1024 @@ -83,26 +80,6 @@ def _is_allowed_upload_filename(filename: str) -> bool: return Path(filename).suffix.lower() in _ALLOWED_UPLOAD_EXTENSIONS -def _resolve_upload_target(dest_dir: Path, filename: str, *, auto_rename: bool) -> Path: - candidate = dest_dir / Path(filename).name - if not auto_rename or not candidate.exists(): - return candidate - - stem = candidate.stem - suffix = candidate.suffix - counter = 1 - while counter <= _MAX_UPLOAD_RENAME_ATTEMPTS: - renamed = dest_dir / f"{stem} ({counter}){suffix}" - if not renamed.exists(): - return renamed - counter += 1 - - raise ValueError( - f"Too many conflicting filenames for upload: {filename}. " - "Please rename the file and try again." - ) - - def _node_from_path(path: Path, root: Path) -> WorkspaceNode: """Build a WorkspaceNode from a filesystem path. @@ -260,7 +237,6 @@ async def upload_files( max_mb = max_bytes // (1024 * 1024) results = [] - conflict_detail: str | None = None for upload in files: raw_name: Optional[str] = upload.filename if not raw_name: @@ -295,13 +271,9 @@ async def upload_files( continue content = b"".join(chunks) - try: - target = _resolve_upload_target(dest_dir, filename, auto_rename=purpose == "chat") - except ValueError as exc: - message = str(exc) - results.append({"name": filename, "error": message}) - conflict_detail = message - continue + # Keep attachment paths stable across repeated uploads by overwriting the + # existing file instead of auto-renaming to "name (1).ext". + target = dest_dir / filename target.write_bytes(content) is_text = WorkspaceManager.is_text_file(target) @@ -321,9 +293,6 @@ async def upload_files( "preview_warning": None if is_text else "Binary file — download only", }) - if conflict_detail is not None: - return JSONResponse(status_code=409, content={"detail": conflict_detail, "uploaded": results}) - return {"uploaded": results} diff --git a/flocks/tool/agent/delegate_task.py b/flocks/tool/agent/delegate_task.py index 5803b24e5..a77df6c2d 100644 --- a/flocks/tool/agent/delegate_task.py +++ b/flocks/tool/agent/delegate_task.py @@ -33,6 +33,39 @@ log = Log.create(service="tool.delegate_task") +async def _subagent_session_permissions(agent_name: str) -> list: + """Build session permission rules for a delegated subagent.""" + from flocks.agent.registry import Agent + from flocks.session.session import PermissionRule as SessionPermissionRule + + agent = await Agent.get(agent_name) + rules: list = [] + if agent_name != "prometheus": + rules.append(SessionPermissionRule(permission="question", action="deny", pattern="*")) + + if agent and agent.permission: + for rule in agent.permission: + level = rule.level.value if hasattr(rule.level, "value") else str(rule.level) + rules.append( + SessionPermissionRule( + permission=rule.permission or "*", + action=level, + pattern=rule.pattern or "*", + ) + ) + return rules + + if agent_name == "prometheus": + rules.extend([ + SessionPermissionRule(permission="question", action="allow", pattern="*"), + SessionPermissionRule(permission="edit", action="deny", pattern="*"), + SessionPermissionRule(permission="edit", action="allow", pattern=".flocks/plans/*"), + ]) + elif not rules: + rules.append(SessionPermissionRule(permission="question", action="deny", pattern="*")) + return rules + + def _parse_model(model: Optional[str]) -> Optional[Dict[str, str]]: if not model: return None @@ -448,7 +481,7 @@ async def delegate_task_tool( title=f"{description} (@{agent_to_use} subagent)", parent_id=parent_session.id, agent=agent_to_use, - permission=[{"permission": "question", "action": "deny", "pattern": "*"}], + permission=await _subagent_session_permissions(agent_to_use), category="task", ) await Message.create( diff --git a/flocks/tool/task/task.py b/flocks/tool/agent/task.py similarity index 100% rename from flocks/tool/task/task.py rename to flocks/tool/agent/task.py diff --git a/flocks/tool/registry.py b/flocks/tool/registry.py index 962743991..73c1f2489 100644 --- a/flocks/tool/registry.py +++ b/flocks/tool/registry.py @@ -683,6 +683,7 @@ def register_function( native: bool = False, always_load: Optional[bool] = None, tags: Optional[List[str]] = None, + enabled: bool = True, ) -> Callable[[ToolHandler], ToolHandler]: """ Decorator to register a function as a tool. @@ -713,6 +714,7 @@ def decorator(func: ToolHandler) -> ToolHandler: native=native, always_load=always_load, tags=list(tags or []), + enabled=enabled, ) tool = Tool(info=info, handler=func) cls.register(tool) @@ -1342,15 +1344,15 @@ def _register_builtin_tools(cls) -> None: # web/ — internet access ("flocks.tool.web", ["webfetch", "websearch"]), # agent/ — agent delegation/coordination - ("flocks.tool.agent", ["delegate_task"]), + ("flocks.tool.agent", ["delegate_task", "task"]), # task/ — task/workflow - ("flocks.tool.task", ["task", "schedule_task_center", "todo", "plan", "run_workflow", "run_workflow_node"]), + ("flocks.tool.task", ["schedule_task_center", "todo", "plan", "run_workflow", "run_workflow_node"]), # security/ — SSH forensics + threat intelligence (optional: asyncssh) ("flocks.tool.security", ["ssh_host_cmd", "ssh_run_script"]), - # system/ — background tasks, questions, model config, memory, skill_load, MCP management, session management, slash commands - ("flocks.tool.system", ["background_output", "background_cancel", "question", "model_config", "memory", "skill_load", "flocks_mcp", "session_manage", "slash_command", "tool_search"]), - # skill/ — skill management (search, install, status, deps, remove) - ("flocks.tool.skill", ["flocks_skills"]), + # system/ — background tasks, questions, model config, memory, MCP management, session management, slash commands + ("flocks.tool.system", ["background_output", "background_cancel", "question", "model_config", "memory", "flocks_mcp", "session_manage", "slash_command", "tool_search"]), + # skill/ — skill management (search, install, status, deps, remove, load) + ("flocks.tool.skill", ["flocks_skills", "skill_load"]), # device/ — security device asset context ("flocks.tool.device", ["device_context_tool"]), # channel/ — IM platform messaging @@ -1370,7 +1372,7 @@ def _register_builtin_tools(cls) -> None: # This is done in bulk here so individual @register_function call # sites don't need to pass native=True, and user plugin files using # the same decorator won't be misclassified. - builtin_native_exceptions = {"lsp"} + builtin_native_exceptions = {"lsp", "task"} for name in set(cls._tools.keys()) - before: if name in builtin_native_exceptions: cls._tools[name].info.native = False diff --git a/flocks/tool/skill/flocks_skills.py b/flocks/tool/skill/flocks_skills.py index fa2e018c6..0a804c183 100644 --- a/flocks/tool/skill/flocks_skills.py +++ b/flocks/tool/skill/flocks_skills.py @@ -33,15 +33,13 @@ _MAX_OUTPUT = 8_000 # chars — keep responses concise for the model _DESCRIPTION = """\ -Manage agent skills: search the registry, install, check dependency status, -install deps, and remove skills. Use this tool (not bash) for any -`flocks skills` operation. +Search and install skills from the **external public registry**, manage \ +dependency status, and remove installed skills. Use this tool (not bash) for \ +any `flocks skills` operation. -⚠️ IMPORTANT DISTINCTION: - • To search for skills available in the **external public registry** (not yet installed): - → use this tool with subcommand="find" - • To see skills that are **already installed** in the current Flocks instance: - → use run_slash_command(command="skills") instead (not this tool) +Do not use this tool when a dedicated tool is a better fit: +- To load an already-installed skill: use skill_load instead. +- To view installed / locally available skills: use run_slash_command(command="skills") instead. ## Subcommands @@ -49,16 +47,14 @@ Search the **external public skill registry** by keyword. This does NOT show installed skills — it discovers skills that can be installed. → Use BEFORE telling the user "I can't do X". A matching skill may exist. - → To list already-installed skills, use run_slash_command(command="skills") instead. Example: flocks_skills(subcommand="find", args="malware phishing") **install ** - Install a skill from an external source. + Install a skill from an external public source. Source formats: github:// e.g. github:octocat/skills/find-ioc clawhub: e.g. clawhub:ndr-alert-analysis https://... direct SKILL.md URL - /local/path or ./relative local directory → After install, always call status to check if deps are missing. Example: flocks_skills(subcommand="install", args="github:owner/repo/skill-name") @@ -73,10 +69,6 @@ → Run when status shows a skill is not eligible. Example: flocks_skills(subcommand="install-deps", args="find-ioc") -**list** - List all locally discovered skills with source and description. - Example: flocks_skills(subcommand="list") - **remove ** Uninstall a user-managed skill from ~/.flocks. Example: flocks_skills(subcommand="remove", args="old-skill") @@ -85,12 +77,12 @@ # Allowed subcommands — enforced to prevent arbitrary shell injection via args. # Ordered for consistent display in tool schema enum and error messages. _ALLOWED_SUBCOMMANDS = frozenset( - ["find", "install", "status", "install-deps", "list", "remove"] + ["find", "install", "status", "install-deps", "remove"] ) -_SUBCOMMAND_ENUM = ["find", "install", "status", "install-deps", "list", "remove"] +_SUBCOMMAND_ENUM = ["find", "install", "status", "install-deps", "remove"] # Read-only registry / discovery — no shell side effects; skip bash permission gate. -_READ_ONLY_SUBCOMMANDS = frozenset({"find", "list", "status"}) +_READ_ONLY_SUBCOMMANDS = frozenset({"find", "status"}) def _flocks_executable() -> Optional[str]: @@ -108,7 +100,7 @@ def _flocks_executable() -> Optional[str]: type=ParameterType.STRING, description=( "Skill management subcommand: " - "find | install | status | install-deps | list | remove" + "find | install | status | install-deps | remove" ), required=True, enum=_SUBCOMMAND_ENUM, @@ -121,7 +113,7 @@ def _flocks_executable() -> Optional[str]: "For find: search query. " "For install: source string. " "For install-deps / remove: skill name. " - "For status / list: leave empty." + "For status: leave empty." ), required=False, default="", @@ -161,7 +153,7 @@ async def flocks_skills( log.info("flocks_skills.run", {"cmd": cmd}) - # Mutating subcommands need bash approval. Read-only (find/list/status) runs + # Mutating subcommands need bash approval. Read-only (find/status) runs # without prompting — same trust model as listing skills in the UI. # # For install/remove/install-deps, always-patterns must match the *full* diff --git a/flocks/tool/system/skill_load.py b/flocks/tool/skill/skill_load.py similarity index 100% rename from flocks/tool/system/skill_load.py rename to flocks/tool/skill/skill_load.py diff --git a/tests/agent/test_agent.py b/tests/agent/test_agent.py index 8d647428b..ce38de5b4 100644 --- a/tests/agent/test_agent.py +++ b/tests/agent/test_agent.py @@ -16,8 +16,8 @@ # ============================================================================= BUILTIN_AGENTS = [ - "rex", "hephaestus", "plan", "explore", - "oracle", "librarian", "metis", "momus", "multimodal-looker", + "rex", "hephaestus", "explore", + "oracle", "librarian", "prometheus", "multimodal-looker", "self-enhance", "rex-junior", "host-forensics", "host-forensics-fast", ] @@ -35,7 +35,7 @@ async def test_all_builtin_agents_exist(self): @pytest.mark.asyncio async def test_agent_count(self): agents = await Agent.list() - assert len(agents) >= 12, f"Should have at least 12 agents, got {len(agents)}" + assert len(agents) >= 11, f"Should have at least 11 agents, got {len(agents)}" @pytest.mark.asyncio async def test_no_legacy_agents(self): @@ -93,6 +93,22 @@ async def test_rex_junior_agent(self): assert agent.mode == "subagent" assert agent.delegatable is False + @pytest.mark.asyncio + async def test_prometheus_agent(self): + agent = await Agent.get("prometheus") + assert agent is not None + assert agent.mode == "subagent" + assert agent.delegatable is True + assert agent.hidden is False + assert agent.prompt is not None and len(agent.prompt) > 0 + assert "delegate_task" not in (agent.tools or []) + edit_rules = [ + rule for rule in (agent.permission or []) + if getattr(rule, "permission", None) == "edit" + ] + assert edit_rules + assert any(getattr(rule, "pattern", None) == ".flocks/plans/*" for rule in edit_rules) + @pytest.mark.asyncio async def test_self_enhance_agent(self): agent = await Agent.get("self-enhance") diff --git a/tests/agent/test_agent_factory.py b/tests/agent/test_agent_factory.py index 1c6a1bd40..893135910 100644 --- a/tests/agent/test_agent_factory.py +++ b/tests/agent/test_agent_factory.py @@ -309,10 +309,10 @@ def test_tools_list_takes_priority_over_permission_dict(self, tmp_path): class TestScanAndLoad: def test_scans_builtin_agents(self): - """Built-in agents directory must yield at least 13 agents.""" + """Built-in agents directory must yield at least 10 agents.""" from flocks.agent.agent_factory import _BUILTIN_AGENTS_DIR result = scan_and_load(dirs=[_BUILTIN_AGENTS_DIR]) - assert len(result) >= 13 + assert len(result) >= 10 def test_all_builtin_agent_names_present(self): """Every agent shipped with the package must be discoverable and marked native. @@ -324,8 +324,8 @@ def test_all_builtin_agent_names_present(self): from flocks.agent.agent_factory import _BUILTIN_AGENTS_DIR result = scan_and_load(dirs=[_BUILTIN_AGENTS_DIR]) expected = [ - "rex", "hephaestus", "plan", "explore", - "oracle", "librarian", "metis", "momus", "multimodal-looker", + "rex", "hephaestus", "explore", + "oracle", "librarian", "prometheus", "multimodal-looker", "self-enhance", "rex-junior", ] for name in expected: @@ -473,7 +473,7 @@ async def test_static_prompt_agents(self): """Built-in agents with prompt.md should have non-empty prompts.""" from flocks.agent.registry import Agent # Only built-in agents (native=True) — not dependent on local plugin installation - for name in ["explore", "oracle", "momus", "metis", "self-enhance", "multimodal-looker"]: + for name in ["explore", "oracle", "prometheus", "self-enhance", "multimodal-looker"]: agent = await Agent.get(name) assert agent is not None, f"Agent '{name}' not found" assert agent.prompt is not None, f"Agent '{name}' should have a prompt from prompt.md" @@ -624,7 +624,7 @@ async def test_enabled_agents_whitelist(self): from flocks.agent.agent_factory import scan_and_load result = scan_and_load() # All 13 built-ins should be present without a whitelist - assert len(result) >= 13 + assert len(result) >= 10 # =========================================================================== diff --git a/tests/browser/test_admin.py b/tests/browser/test_admin.py index ae0e9b083..84c1034dc 100644 --- a/tests/browser/test_admin.py +++ b/tests/browser/test_admin.py @@ -271,7 +271,29 @@ def test_run_setup_restarts_stale_existing_local_daemon(monkeypatch, capsys) -> assert "browser connection is stale; restarting" in out assert "daemon is up." in out assert restarted == [None] - assert ensure_calls == [{"wait": 20.0, "_open_inspect": False}] + assert ensure_calls == [{"wait": admin._SETUP_ATTACH_WAIT, "_open_inspect": False}] + + +def test_run_setup_retries_at_most_once(monkeypatch, capsys) -> None: + monkeypatch.setattr(admin, "daemon_alive", lambda: False) + monkeypatch.setattr(admin, "_chrome_running", lambda: True) + monkeypatch.setattr(admin, "_is_local_chrome_mode", lambda env=None: False) + ensure_calls = [] + + def fake_ensure_daemon(**kwargs): + ensure_calls.append(kwargs) + raise RuntimeError("daemon didn't come up") + + monkeypatch.setattr(admin, "ensure_daemon", fake_ensure_daemon) + + assert admin.run_setup() == 1 + + out = capsys.readouterr() + assert "retrying once" in out.out + assert ensure_calls == [ + {"wait": admin._SETUP_ATTACH_WAIT, "_open_inspect": False}, + {"wait": admin._SETUP_RETRY_WAIT, "_open_inspect": False}, + ] def test_run_setup_allows_explicit_remote_cdp_without_local_browser(monkeypatch, capsys) -> None: @@ -286,7 +308,7 @@ def test_run_setup_allows_explicit_remote_cdp_without_local_browser(monkeypatch, out = capsys.readouterr().out assert "attaching via BU_CDP_WS" in out assert "daemon is up." in out - assert ensure_calls == [{"wait": 20.0, "_open_inspect": False}] + assert ensure_calls == [{"wait": admin._SETUP_ATTACH_WAIT, "_open_inspect": False}] def test_run_setup_restarts_existing_daemon_for_explicit_remote_cdp(monkeypatch, capsys) -> None: @@ -305,7 +327,7 @@ def test_run_setup_restarts_existing_daemon_for_explicit_remote_cdp(monkeypatch, assert "restarting to attach via BU_CDP_URL" in out assert "daemon is up." in out assert restarted == [None] - assert ensure_calls == [{"wait": 20.0, "_open_inspect": False}] + assert ensure_calls == [{"wait": admin._SETUP_ATTACH_WAIT, "_open_inspect": False}] def test_run_doctor_uses_generic_browser_wording_when_missing(monkeypatch, capsys) -> None: diff --git a/tests/integration/test_capability_awareness.py b/tests/integration/test_capability_awareness.py index a043fb500..42f1471bb 100644 --- a/tests/integration/test_capability_awareness.py +++ b/tests/integration/test_capability_awareness.py @@ -283,7 +283,7 @@ async def test_rex_prompt_contains_subagents_section(self): assert rex is not None # Rex should know about at least one of its common subagents prompt = rex.prompt or "" - assert any(name in prompt for name in ["explore", "oracle", "metis", "momus"]), ( + assert any(name in prompt for name in ["explore", "oracle", "prometheus"]), ( "Rex prompt does not reference any known subagents" ) diff --git a/tests/server/routes/test_workspace_routes.py b/tests/server/routes/test_workspace_routes.py index 12756f505..45f5ef2d6 100644 --- a/tests/server/routes/test_workspace_routes.py +++ b/tests/server/routes/test_workspace_routes.py @@ -329,10 +329,10 @@ async def test_upload_overwrites_duplicate_file_without_chat_purpose( assert (mock_workspace / "uploads" / "report.pdf").read_bytes() == b"second" @pytest.mark.asyncio - async def test_chat_upload_renames_duplicate_file( + async def test_chat_upload_overwrites_duplicate_file( self, client: AsyncClient, mock_workspace: Path ): - """Chat uploads auto-rename duplicates to preserve attachment paths.""" + """Chat uploads overwrite duplicate filenames to keep attachment paths stable.""" first = await client.post( "/api/workspace/upload", params={"dest": "uploads", "purpose": "chat"}, @@ -348,37 +348,10 @@ async def test_chat_upload_renames_duplicate_file( first_item = first.json()["uploaded"][0] second_item = second.json()["uploaded"][0] assert first_item["name"] == "report.pdf" - assert second_item["name"] == "report (1).pdf" + assert second_item["name"] == "report.pdf" assert first_item["path"] == "uploads/report.pdf" - assert second_item["path"] == "uploads/report (1).pdf" - assert (mock_workspace / "uploads" / "report.pdf").read_bytes() == b"first" - assert (mock_workspace / "uploads" / "report (1).pdf").read_bytes() == b"second" - - @pytest.mark.asyncio - async def test_chat_upload_returns_error_after_too_many_name_conflicts( - self, client: AsyncClient, mock_workspace: Path, monkeypatch: pytest.MonkeyPatch - ): - """Chat uploads stop probing once the rename limit is exhausted.""" - from flocks.server.routes import workspace as workspace_routes - - monkeypatch.setattr(workspace_routes, "_MAX_UPLOAD_RENAME_ATTEMPTS", 1) - uploads_dir = mock_workspace / "uploads" - uploads_dir.mkdir(exist_ok=True) - (uploads_dir / "report.pdf").write_bytes(b"first") - (uploads_dir / "report (1).pdf").write_bytes(b"second") - - resp = await client.post( - "/api/workspace/upload", - params={"dest": "uploads", "purpose": "chat"}, - files={"files": ("report.pdf", io.BytesIO(b"third"), "application/pdf")}, - ) - - assert resp.status_code == status.HTTP_409_CONFLICT - assert "Too many conflicting filenames" in resp.json()["detail"] - result = resp.json()["uploaded"][0] - assert "Too many conflicting filenames" in result["error"] - assert (mock_workspace / "uploads" / "report.pdf").read_bytes() == b"first" - assert (mock_workspace / "uploads" / "report (1).pdf").read_bytes() == b"second" + assert second_item["path"] == "uploads/report.pdf" + assert (mock_workspace / "uploads" / "report.pdf").read_bytes() == b"second" # =========================================================================== diff --git a/tests/session/test_runner_step.py b/tests/session/test_runner_step.py index a8a6f5512..c8c1fcd88 100644 --- a/tests/session/test_runner_step.py +++ b/tests/session/test_runner_step.py @@ -553,7 +553,7 @@ async def test_build_tools_refreshes_skill_description_from_enabled_skills(self) "flocks.skill.skill.Skill.list_enabled", AsyncMock(return_value=[SimpleNamespace(name="agent-builder")]), ), patch( - "flocks.tool.system.skill_load.build_description", + "flocks.tool.skill.skill_load.build_description", return_value="Refreshed skill description", ): tools = await runner._build_callable_tool_schema(agent, []) diff --git a/tests/tool/test_agent_toolset.py b/tests/tool/test_agent_toolset.py index e6df0309c..c1c18312a 100644 --- a/tests/tool/test_agent_toolset.py +++ b/tests/tool/test_agent_toolset.py @@ -123,8 +123,7 @@ def test_builtin_agent_yaml_tool_names_match_current_registry_surface() -> None: "explore", "hephaestus", "librarian", - "metis", - "momus", + "prometheus", "multimodal_looker", "oracle", "plan", diff --git a/tests/tool/test_skill_tool_description.py b/tests/tool/test_skill_tool_description.py index eab68a922..8956ef3af 100644 --- a/tests/tool/test_skill_tool_description.py +++ b/tests/tool/test_skill_tool_description.py @@ -1,4 +1,4 @@ -"""Tests for flocks.tool.system.skill_load. +"""Tests for flocks.tool.skill.skill_load. Two complementary aspects of the skill_load tool's load-on-demand design are exercised here: @@ -25,7 +25,7 @@ from flocks.skill.skill import Skill, SkillInfo from flocks.tool.registry import ToolContext, ToolRegistry -from flocks.tool.system.skill_load import ( +from flocks.tool.skill.skill_load import ( MAX_SKILL_DESCRIPTION_PREVIEW_CHARS, _truncate_skill_description, build_description, diff --git a/tests/tool/test_task_model_pinning.py b/tests/tool/test_task_model_pinning.py index 7781dd277..a2c37b696 100644 --- a/tests/tool/test_task_model_pinning.py +++ b/tests/tool/test_task_model_pinning.py @@ -4,7 +4,7 @@ import pytest from flocks.tool.registry import ToolContext -from flocks.tool.task.task import _resolve_child_model, task_tool +from flocks.tool.agent.task import _resolve_child_model, task_tool def _make_ctx() -> ToolContext: @@ -52,9 +52,9 @@ async def test_task_tool_explicit_model_override_pins_child_session(self): model_pinned=False, ) - with patch("flocks.tool.task.task.is_delegatable", return_value=True), \ - patch("flocks.tool.task.task.Session.get_by_id", AsyncMock(return_value=parent_session)), \ - patch("flocks.tool.task.task.get_background_manager", return_value=manager): + with patch("flocks.tool.agent.task.is_delegatable", return_value=True), \ + patch("flocks.tool.agent.task.Session.get_by_id", AsyncMock(return_value=parent_session)), \ + patch("flocks.tool.agent.task.get_background_manager", return_value=manager): result = await task_tool( _make_ctx(), description="delegate explore", @@ -93,9 +93,9 @@ async def test_task_tool_default_resolution_does_not_pin_child_session(self): model_pinned=False, ) - with patch("flocks.tool.task.task.is_delegatable", return_value=True), \ - patch("flocks.tool.task.task.Session.get_by_id", AsyncMock(return_value=parent_session)), \ - patch("flocks.tool.task.task.get_background_manager", return_value=manager), \ + with patch("flocks.tool.agent.task.is_delegatable", return_value=True), \ + patch("flocks.tool.agent.task.Session.get_by_id", AsyncMock(return_value=parent_session)), \ + patch("flocks.tool.agent.task.get_background_manager", return_value=manager), \ patch("flocks.storage.storage.Storage.read", AsyncMock(return_value={})), \ patch("flocks.agent.registry.Agent.get", AsyncMock(return_value=None)), \ patch("flocks.config.config.Config.resolve_default_llm", AsyncMock(return_value={ @@ -131,10 +131,10 @@ async def test_task_tool_sync_continue_returns_session_loop_error(self): agent="explore", ) - with patch("flocks.tool.task.task.is_delegatable", return_value=True), \ - patch("flocks.tool.task.task.Session.get_by_id", AsyncMock(side_effect=[parent_session, child_session])), \ - patch("flocks.tool.task.task.Message.create", AsyncMock()), \ - patch("flocks.tool.task.task.SessionLoop.run", AsyncMock(return_value=SimpleNamespace( + with patch("flocks.tool.agent.task.is_delegatable", return_value=True), \ + patch("flocks.tool.agent.task.Session.get_by_id", AsyncMock(side_effect=[parent_session, child_session])), \ + patch("flocks.tool.agent.task.Message.create", AsyncMock()), \ + patch("flocks.tool.agent.task.SessionLoop.run", AsyncMock(return_value=SimpleNamespace( action="error", error="subagent crashed", last_message=None, @@ -166,10 +166,10 @@ async def test_task_tool_sync_continue_fails_when_last_message_missing(self): agent="explore", ) - with patch("flocks.tool.task.task.is_delegatable", return_value=True), \ - patch("flocks.tool.task.task.Session.get_by_id", AsyncMock(side_effect=[parent_session, child_session])), \ - patch("flocks.tool.task.task.Message.create", AsyncMock()), \ - patch("flocks.tool.task.task.SessionLoop.run", AsyncMock(return_value=SimpleNamespace( + with patch("flocks.tool.agent.task.is_delegatable", return_value=True), \ + patch("flocks.tool.agent.task.Session.get_by_id", AsyncMock(side_effect=[parent_session, child_session])), \ + patch("flocks.tool.agent.task.Message.create", AsyncMock()), \ + patch("flocks.tool.agent.task.SessionLoop.run", AsyncMock(return_value=SimpleNamespace( action="stop", error=None, last_message=None, @@ -208,10 +208,10 @@ async def test_task_tool_sync_continue_fails_when_last_message_has_no_text(self) error=None, ) - with patch("flocks.tool.task.task.is_delegatable", return_value=True), \ - patch("flocks.tool.task.task.Session.get_by_id", AsyncMock(side_effect=[parent_session, child_session])), \ - patch("flocks.tool.task.task.Message.create", AsyncMock()), \ - patch("flocks.tool.task.task.SessionLoop.run", AsyncMock(return_value=SimpleNamespace( + with patch("flocks.tool.agent.task.is_delegatable", return_value=True), \ + patch("flocks.tool.agent.task.Session.get_by_id", AsyncMock(side_effect=[parent_session, child_session])), \ + patch("flocks.tool.agent.task.Message.create", AsyncMock()), \ + patch("flocks.tool.agent.task.SessionLoop.run", AsyncMock(return_value=SimpleNamespace( action="stop", error=None, last_message=last_message, diff --git a/tests/workspace/test_workspace_routes.py b/tests/workspace/test_workspace_routes.py index 0a453bb17..69b404d4b 100644 --- a/tests/workspace/test_workspace_routes.py +++ b/tests/workspace/test_workspace_routes.py @@ -298,7 +298,7 @@ def test_upload_overwrites_duplicate_file_without_chat_purpose(self, workspace_c assert second_item["path"] == "uploads/report.pdf" assert (_ws(workspace_client) / "uploads" / "report.pdf").read_bytes() == b"second" - def test_chat_upload_renames_duplicate_file(self, workspace_client): + def test_chat_upload_overwrites_duplicate_file(self, workspace_client): client = _client(workspace_client) first = client.post( "/api/workspace/upload?dest=uploads&purpose=chat", @@ -313,33 +313,10 @@ def test_chat_upload_renames_duplicate_file(self, workspace_client): first_item = first.json()["uploaded"][0] second_item = second.json()["uploaded"][0] assert first_item["name"] == "report.pdf" - assert second_item["name"] == "report (1).pdf" + assert second_item["name"] == "report.pdf" assert first_item["path"] == "uploads/report.pdf" - assert second_item["path"] == "uploads/report (1).pdf" - assert (_ws(workspace_client) / "uploads" / "report.pdf").read_bytes() == b"first" - assert (_ws(workspace_client) / "uploads" / "report (1).pdf").read_bytes() == b"second" - - def test_chat_upload_returns_error_after_too_many_name_conflicts(self, workspace_client, monkeypatch): - from flocks.server.routes import workspace as workspace_routes - - monkeypatch.setattr(workspace_routes, "_MAX_UPLOAD_RENAME_ATTEMPTS", 1) - ws = _ws(workspace_client) - uploads_dir = ws / "uploads" - uploads_dir.mkdir(exist_ok=True) - (uploads_dir / "report.pdf").write_bytes(b"first") - (uploads_dir / "report (1).pdf").write_bytes(b"second") - - r = _client(workspace_client).post( - "/api/workspace/upload?dest=uploads&purpose=chat", - files=[("files", ("report.pdf", b"third", "application/pdf"))], - ) - - assert r.status_code == 409 - assert "Too many conflicting filenames" in r.json()["detail"] - result = r.json()["uploaded"][0] - assert "Too many conflicting filenames" in result["error"] - assert (uploads_dir / "report.pdf").read_bytes() == b"first" - assert (uploads_dir / "report (1).pdf").read_bytes() == b"second" + assert second_item["path"] == "uploads/report.pdf" + assert (_ws(workspace_client) / "uploads" / "report.pdf").read_bytes() == b"second" def test_upload_too_large_file_rejected(self, workspace_client, monkeypatch): # Set the limit to 0 MB; _max_upload_bytes() reads the env var at diff --git a/uv.lock b/uv.lock index df6ab7ed1..eafc15838 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = "==3.12.*" resolution-markers = [ "sys_platform == 'win32'", @@ -760,7 +760,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" }, { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" }, { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" }, - { url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" }, { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" }, { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" }, { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" }, diff --git a/webui/src/components/common/SessionChat.test.ts b/webui/src/components/common/SessionChat.test.ts index 63d697809..f6b9ad1c3 100644 --- a/webui/src/components/common/SessionChat.test.ts +++ b/webui/src/components/common/SessionChat.test.ts @@ -5,12 +5,17 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; import type { Message } from '@/types'; import { + buildTodoWriteSummary, + dedupeUploadedDocumentAttachments, default as SessionChat, getEditingActionBarClassName, getMessageBubbleClassName, getMessageGroupClassName, getRegenerateTruncateTarget, getStandaloneThinkingBubbleClassName, + getUserAvatarContainerClassName, + getUserAvatarSpacerClassName, + listUploadedDocumentPaths, shouldRefetchFinishedMessage, truncateToolDisplayText, } from './SessionChat'; @@ -112,6 +117,31 @@ function makeMessage(overrides: Partial & { id: string }): Message { } as Message; } +describe('dedupeUploadedDocumentAttachments', () => { + it('keeps the latest successful document for a workspace path', () => { + const items = dedupeUploadedDocumentAttachments([ + { id: 'old', status: 'success', workspacePath: '/tmp/uploads/report.pdf', isImage: false }, + { id: 'image', status: 'success', isImage: true, workspacePath: '/tmp/uploads/diagram.png' }, + { id: 'new', status: 'success', workspacePath: '/tmp/uploads/report.pdf', isImage: false }, + { id: 'error', status: 'error', workspacePath: '/tmp/uploads/report.pdf', isImage: false }, + ]); + + expect(items.map((item) => item.id)).toEqual(['image', 'new', 'error']); + }); +}); + +describe('listUploadedDocumentPaths', () => { + it('returns unique successful document paths in attachment order', () => { + expect(listUploadedDocumentPaths([ + { status: 'success', workspacePath: '/tmp/uploads/a.pdf', isImage: false }, + { status: 'success', workspacePath: '/tmp/uploads/a.pdf', isImage: false }, + { status: 'success', workspacePath: '/tmp/uploads/b.pdf', isImage: false }, + { status: 'success', workspacePath: '/tmp/uploads/image.png', isImage: true }, + { status: 'error', workspacePath: '/tmp/uploads/c.pdf', isImage: false }, + ])).toEqual(['/tmp/uploads/a.pdf', '/tmp/uploads/b.pdf']); + }); +}); + describe('getMessageBubbleClassName', () => { // The bubble's max width is owned by its outer container (`max-w-[80%]` for // user, `w-full` for assistant; see SessionChat.tsx), so the inner bubble @@ -210,6 +240,32 @@ describe('getStandaloneThinkingBubbleClassName', () => { }); }); +describe('getUserAvatarContainerClassName', () => { + it('moves the user avatar to the bubble side without affecting bubble spacing', () => { + const className = getUserAvatarContainerClassName(false); + + expect(className).toContain('absolute'); + expect(className).toContain('left-full'); + expect(className).toContain('ml-2.5'); + expect(className).toContain('translate-y-1/2'); + expect(className).toContain('h-8'); + }); + + it('keeps the compact avatar aligned to the compact header height', () => { + expect(getUserAvatarContainerClassName(true)).toContain('h-7'); + }); +}); + +describe('getUserAvatarSpacerClassName', () => { + it('uses a shorter spacer in full layout to keep the top gap compact', () => { + expect(getUserAvatarSpacerClassName(false)).toBe('h-4'); + }); + + it('uses a proportional spacer in compact layout', () => { + expect(getUserAvatarSpacerClassName(true)).toBe('h-3.5'); + }); +}); + describe('SessionChat standalone thinking indicator', () => { it('keeps only the bouncing dots during the initial assistant loading state', async () => { useSessionMessagesMock.mockReturnValue({ @@ -262,6 +318,35 @@ describe('truncateToolDisplayText', () => { }); }); +describe('buildTodoWriteSummary', () => { + it('renders progress from structured todowrite input', () => { + expect(buildTodoWriteSummary({ + input: { + todos: [ + { id: '1', content: '定位 todowrite 摘要问题', status: 'in_progress' }, + { id: '2', content: '补充回归测试', status: 'completed' }, + { id: '3', content: '验证 Web UI 展示', status: 'pending' }, + ], + }, + })).toBe('Progress 1/3 · In progress 1'); + }); + + it('prefers current metadata todos when available', () => { + expect(buildTodoWriteSummary({ + metadata: { + oldTodos: [ + { id: '1', content: '定位 todowrite 摘要问题', status: 'pending' }, + { id: '2', content: '补充回归测试', status: 'pending' }, + ], + newTodos: [ + { id: '1', content: '定位 todowrite 摘要问题', status: 'completed' }, + { id: '3', content: '验证 Web UI 展示', status: 'completed' }, + ], + }, + })).toBe('Completed 2/2'); + }); +}); + describe('getRegenerateTruncateTarget', () => { it('truncates back to the parent user message for assistant regenerations', () => { const target = getRegenerateTruncateTarget([ diff --git a/webui/src/components/common/SessionChat.tsx b/webui/src/components/common/SessionChat.tsx index 4bd66cd3d..1c8908dae 100644 --- a/webui/src/components/common/SessionChat.tsx +++ b/webui/src/components/common/SessionChat.tsx @@ -355,6 +355,16 @@ export function getStandaloneThinkingBubbleClassName(compact: boolean): string { return getMessageBubbleClassName({ compact, isUser: false, isEditing: false }); } +export function getUserAvatarContainerClassName(compact: boolean): string { + return `pointer-events-none absolute left-full top-0 ml-2.5 translate-y-1/2 flex items-center justify-end ${ + compact ? 'h-7' : 'h-8' + }`; +} + +export function getUserAvatarSpacerClassName(compact: boolean): string { + return compact ? 'h-3.5' : 'h-4'; +} + // ============================================================================ // Main component @@ -375,6 +385,38 @@ function isAllowedUploadFile(file: File): boolean { return ALLOWED_UPLOAD_EXTENSIONS.has(getFileExtension(file.name)); } +function isUploadedDocumentAttachment( + attachment: { status: string; workspacePath?: string; isImage?: boolean }, +): attachment is { status: string; workspacePath: string; isImage?: boolean } { + return attachment.status === 'success' && !attachment.isImage && Boolean(attachment.workspacePath); +} + +export function dedupeUploadedDocumentAttachments(items: T[]): T[] { + const latestIndexByPath = new Map(); + items.forEach((item, index) => { + if (isUploadedDocumentAttachment(item)) { + latestIndexByPath.set(item.workspacePath, index); + } + }); + return items.filter((item, index) => ( + !isUploadedDocumentAttachment(item) || latestIndexByPath.get(item.workspacePath) === index + )); +} + +export function listUploadedDocumentPaths(items: T[]): string[] { + return dedupeUploadedDocumentAttachments(items) + .filter(isUploadedDocumentAttachment) + .map((item) => item.workspacePath); +} + export default function SessionChat({ sessionId, live = false, @@ -858,10 +900,7 @@ export default function SessionChat({ const buildAttachmentBlock = useCallback((items: ComposerAttachment[]) => { if (items.length === 0) return ''; - const lines = items - .map((attachment) => attachment.workspacePath) - .filter((path): path is string => Boolean(path)) - .map((path) => `- ${path}`); + const lines = listUploadedDocumentPaths(items).map((path) => `- ${path}`); if (lines.length === 0) return ''; return `Attached files:\n${lines.join('\n')}`; }, []); @@ -895,7 +934,7 @@ export default function SessionChat({ 'chat', ); const uploaded = response.data.uploaded ?? []; - setAttachments((prev) => prev.map((attachment) => { + setAttachments((prev) => dedupeUploadedDocumentAttachments(prev.map((attachment) => { const entryIndex = entries.findIndex((entry) => entry.id === attachment.id); if (entryIndex < 0) return attachment; const result = uploaded[entryIndex]; @@ -913,7 +952,7 @@ export default function SessionChat({ workspacePath: result.abs_path ?? result.path, error: undefined, }; - })); + }))); } catch (err: any) { const detail = err?.response?.data?.detail ?? err?.message ?? t('chat.upload.errorGeneric'); setAttachments((prev) => prev.map((attachment) => ( @@ -2368,10 +2407,11 @@ function ChatMessageBubbleInner({ if (isUser) { return (
-
-
+
+
{avatar}
+