From 238af1b384fe291d78116f912498846debe107ba Mon Sep 17 00:00:00 2001 From: David O'Keeffe Date: Wed, 6 May 2026 17:48:35 +1000 Subject: [PATCH] fix(cli-auth): atomic writes + observable failures on PAT rotation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hermes was returning 403 ("Invalid access token") on the first call after a PAT rotation, then succeeding on retry. Two reasons: 1. update_cli_tokens() rewrote each agent's config file with a bare open(path, "w"), creating a window where a concurrent Hermes invocation could read a half-written api_key line. Hermes is exposed to this because it re-reads ~/.hermes/config.yaml on every call; Claude/Codex/Gemini cache the token in env at process startup. 2. Every write path silently swallowed OSError, so an actual write failure (perms, locked file, ENOSPC) would leave the config stale forever with no log line — the user just saw 403s. Adds _atomic_write_text() helper (write to .tmp, os.replace) used by all five _update_* functions. Replaces silent except OSError: pass with logger.warning at WARNING level. FileNotFoundError still silenced via an explicit os.path.exists() guard so the rotator doesn't spam during the brief window between app start and setup script completion. Co-authored-by: Isaac --- cli_auth.py | 55 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/cli_auth.py b/cli_auth.py index 61c9f25..8e913f8 100644 --- a/cli_auth.py +++ b/cli_auth.py @@ -2,6 +2,11 @@ Called by pat_rotator._persist_token() every 10 minutes. Lightweight — just swaps token values in existing files, no installs or script runs. + +All writes are atomic (write to `.tmp`, then `os.replace`) so a Hermes / OpenCode +/ Codex invocation that reads the file mid-update sees the old token whole or +the new token whole — never a half-written file. Errors other than "file does +not exist" surface as warnings rather than being silently swallowed. """ import json @@ -16,6 +21,20 @@ _HOME = "/app/python/source_code" +def _atomic_write_text(path, content): + """Write `content` to `path` atomically via tmp file + rename. + + Prevents the read-while-rewriting race that bit Hermes specifically: + Hermes reads `~/.hermes/config.yaml` on every invocation, so a bare + open(path, 'w') by the rotator could leave the file in a partial state + visible to a concurrent Hermes call → 403 Invalid access token. + """ + tmp = f"{path}.tmp" + with open(tmp, "w") as f: + f.write(content) + os.replace(tmp, path) + + def update_cli_tokens(token): """Update the literal token in all CLI config files.""" _update_claude(token) @@ -28,15 +47,16 @@ def update_cli_tokens(token): def _update_claude(token): """Update ANTHROPIC_AUTH_TOKEN in ~/.claude/settings.json.""" path = os.path.join(_HOME, ".claude", "settings.json") + if not os.path.exists(path): + return # setup_claude.py hasn't run yet try: with open(path) as f: settings = json.load(f) if "env" in settings and "ANTHROPIC_AUTH_TOKEN" in settings["env"]: settings["env"]["ANTHROPIC_AUTH_TOKEN"] = token - with open(path, "w") as f: - json.dump(settings, f, indent=2) - except (OSError, json.JSONDecodeError): - pass # file doesn't exist yet — initial setup hasn't run + _atomic_write_text(path, json.dumps(settings, indent=2)) + except (OSError, json.JSONDecodeError) as e: + logger.warning("Failed to update Claude token in %s: %s", path, e) def _update_codex(token): @@ -48,6 +68,8 @@ def _update_codex(token): def _update_opencode(token): """Update api_key values in ~/.local/share/opencode/auth.json.""" path = os.path.join(_HOME, ".local", "share", "opencode", "auth.json") + if not os.path.exists(path): + return # setup_opencode.py hasn't run yet try: with open(path) as f: auth = json.load(f) @@ -57,10 +79,9 @@ def _update_opencode(token): provider["api_key"] = token changed = True if changed: - with open(path, "w") as f: - json.dump(auth, f, indent=2) - except (OSError, json.JSONDecodeError): - pass + _atomic_write_text(path, json.dumps(auth, indent=2)) + except (OSError, json.JSONDecodeError) as e: + logger.warning("Failed to update OpenCode token in %s: %s", path, e) def _update_gemini(token): @@ -72,6 +93,8 @@ def _update_gemini(token): def _update_hermes(token): """Update api_key lines in ~/.hermes/config.yaml.""" path = os.path.join(_HOME, ".hermes", "config.yaml") + if not os.path.exists(path): + return # setup_hermes.py hasn't run yet try: with open(path) as f: content = f.read() @@ -82,14 +105,15 @@ def _update_hermes(token): flags=re.MULTILINE ) if new_content != content: - with open(path, "w") as f: - f.write(new_content) - except OSError: - pass + _atomic_write_text(path, new_content) + except OSError as e: + logger.warning("Failed to update Hermes token in %s: %s", path, e) def _replace_dotenv_key(path, key, value): """Replace a KEY=value line in a dotenv file.""" + if not os.path.exists(path): + return # caller's setup script hasn't run yet try: with open(path) as f: content = f.read() @@ -100,7 +124,6 @@ def _replace_dotenv_key(path, key, value): flags=re.MULTILINE ) if new_content != content: - with open(path, "w") as f: - f.write(new_content) - except OSError: - pass + _atomic_write_text(path, new_content) + except OSError as e: + logger.warning("Failed to update %s in %s: %s", key, path, e)