diff --git a/backend/app/api/websocket.py b/backend/app/api/websocket.py index dd466ed2e..68f3bda1d 100644 --- a/backend/app/api/websocket.py +++ b/backend/app/api/websocket.py @@ -702,7 +702,7 @@ async def websocket_chat( _sess = _sess_r.scalar_one_or_none() if _sess: _sess.last_message_at = _now - if not history_messages and _sess.title.startswith("Session "): + if not history_messages and (_sess.title.startswith("Session ") or _sess.title == "New Session"): # Use display_content for title (avoids raw base64/markers) title_src = display_content if display_content else content # Clean up common prefixes from image/file messages @@ -761,7 +761,20 @@ async def stream_to_ws(text: str): _sent_live_envs: set[str] = set() async def tool_call_to_ws(data: dict): - """Send tool call info to client and persist completed ones.""" + """Send tool call info to client and persist completed ones. + + Sensitive fields in tool args are sanitized before + being sent over the WebSocket or saved to the DB. + """ + from app.utils.sanitize import sanitize_tool_args, is_secrets_path + data["args"] = sanitize_tool_args(data.get("args")) + + # Redact secrets.md content from read_file results + tool_name = data.get("name", "") + arguments = data.get("args") or {} + if tool_name == "read_file" and is_secrets_path(arguments.get("path", "")): + data["result"] = "[Content hidden - secrets.md is protected]" + # ── AgentBay live preview: embed screenshot URL in tool_call message ── # We embed live preview data directly in the tool_call payload # because separate WebSocket messages get silently dropped by nginx. diff --git a/backend/app/services/agent_context.py b/backend/app/services/agent_context.py index 84ebaece2..53ea6197c 100644 --- a/backend/app/services/agent_context.py +++ b/backend/app/services/agent_context.py @@ -199,9 +199,10 @@ async def build_agent_context(agent_id: uuid.UUID, agent_name: str, role_descrip try: from app.models.channel_config import ChannelConfig from app.database import async_session as _ctx_session + from sqlalchemy import select as _feishu_select async with _ctx_session() as _ctx_db: _cfg_r = await _ctx_db.execute( - select(ChannelConfig).where( + _feishu_select(ChannelConfig).where( ChannelConfig.agent_id == agent_id, ChannelConfig.channel_type == "feishu", ChannelConfig.is_configured == True, @@ -334,6 +335,7 @@ async def build_agent_context(agent_id: uuid.UUID, agent_name: str, role_descrip try: from app.database import async_session from app.models.system_settings import SystemSetting + from app.models.agent import Agent as _AgentModel from sqlalchemy import select as sa_select async with async_session() as db: # Resolve agent's tenant_id @@ -396,6 +398,28 @@ async def build_agent_context(agent_id: uuid.UUID, agent_name: str, role_descrip - workspace/ → Your work files (reports, documents, etc.) - relationships.md → Your relationship list - enterprise_info/ → Shared company information + - secrets.md → PRIVATE credentials store (passwords, API keys, connection strings) + +🔐 **SECRETS MANAGEMENT — ABSOLUTE RULES (VIOLATION = CRITICAL FAILURE)**: + +1. **MANDATORY STORAGE**: When a user provides ANY sensitive credential (password, API key, database connection string, token, secret), you MUST IMMEDIATELY call `write_file(path="secrets.md", content="...")` to store it. This is NOT optional. + +2. **VERIFY THE TOOL CALL**: You must see an actual `write_file` tool call result confirming "Written to secrets.md" before telling the user it's saved. NEVER claim "I've saved it" without a real tool call result — that is a hallucination. + +3. **NEVER store credentials in memory/memory.md** or any other file. ONLY secrets.md. + +4. **NEVER output credential values in chat messages**. Refer to them by name only (e.g. "the MySQL connection stored in secrets.md"). + +5. **Reading credentials**: When you need to use a stored credential, call `read_file(path="secrets.md")` first, then use the value in tool calls. + +6. **secrets.md format** — use clear labels: + ``` + ## Database Connections + - mysql_prod: mysql://user:pass@host:3306/db + + ## API Keys + - openai: sk-xxx + ``` ⚠️ CRITICAL RULES — YOU MUST FOLLOW THESE STRICTLY: diff --git a/backend/app/utils/__init__.py b/backend/app/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/app/utils/sanitize.py b/backend/app/utils/sanitize.py new file mode 100644 index 000000000..3665d8530 --- /dev/null +++ b/backend/app/utils/sanitize.py @@ -0,0 +1,88 @@ +"""Sanitize sensitive fields from tool call arguments before sending to clients.""" + +import re +from copy import deepcopy +from urllib.parse import urlparse, urlunparse + +# Field names whose values should be completely hidden (replaced with "***REDACTED***") +SENSITIVE_FIELD_NAMES = { + "password", "secret", "token", "api_key", "apikey", "api_secret", + "access_token", "refresh_token", "private_key", "secret_key", + "authorization", "credentials", "auth", + # Connection/credential strings — hide entirely, not partially + "connection_string", "database_url", "db_url", "dsn", "uri", + "connection_uri", "jdbc_url", "mongo_uri", "redis_url", +} + + +def sanitize_tool_args(args: dict | None) -> dict | None: + """Return a sanitized copy of tool call arguments. + + - Fields matching SENSITIVE_FIELD_NAMES are replaced with "***REDACTED***" + - Values that look like connection URIs are also replaced with "***REDACTED***" + - Original dict is NOT modified (returns a deep copy) + """ + if not args: + return args + + sanitized = deepcopy(args) + + for key in list(sanitized.keys()): + key_lower = key.lower() + + # Fully mask sensitive fields by name + if key_lower in SENSITIVE_FIELD_NAMES: + sanitized[key] = "***REDACTED***" + continue + + # Fully mask values that look like connection URIs regardless of field name + if isinstance(sanitized[key], str) and _looks_like_connection_uri(sanitized[key]): + sanitized[key] = "***REDACTED***" + + # Special case: hide content when writing to secrets.md + path_val = sanitized.get("path", "") or "" + if _is_secrets_file_path(path_val): + if "content" in sanitized: + sanitized["content"] = "***REDACTED***" + + return sanitized + + +def is_secrets_path(path: str) -> bool: + """Check if a path references secrets.md.""" + normalized = path.strip("/") + return normalized == "secrets.md" or normalized.endswith("/secrets.md") + + +# Keep private alias for backward compatibility within this module +_is_secrets_file_path = is_secrets_path + + +def _mask_uri_password(uri: str) -> str: + """Mask the password portion of a connection URI. + + mysql://user:secret123@host:3306/db -> mysql://user:***REDACTED***@host:3306/db + """ + try: + parsed = urlparse(uri) + if parsed.password: + # Reconstruct with masked password + netloc = parsed.hostname or "" + if parsed.port: + netloc = f"{netloc}:{parsed.port}" + if parsed.username: + netloc = f"{parsed.username}:***REDACTED***@{netloc}" + return urlunparse((parsed.scheme, netloc, parsed.path, parsed.params, parsed.query, parsed.fragment)) + except Exception: + pass + + # Fallback: regex-based masking for non-standard URIs + return re.sub(r'(://[^:]+:)[^@]+(@)', r'\1***REDACTED***\2', uri) + + +def _looks_like_connection_uri(value: str) -> bool: + """Check if a string value looks like a database connection URI.""" + prefixes = ("mysql://", "postgresql://", "postgres://", "sqlite://", + "mongodb://", "redis://", "mssql://", "oracle://", + "mysql+", "postgresql+", "postgres+") + return any(value.lower().startswith(p) for p in prefixes)