From ce06850d8299b5bd18809050e8c61383d1a6324a Mon Sep 17 00:00:00 2001 From: Jordan Coin Jackson Date: Wed, 1 Jul 2026 22:37:51 -0400 Subject: [PATCH 1/2] Make `watch stop` work safely on Windows (#74) Implements the follow-up from #73: `watch stop` now terminates the daemon on Windows, guarded against PID reuse. - process_windows.go: implement processCommandLine via Get-CimInstance Win32_Process (the Windows analog of `ps`), so IsOwnedDaemon can confirm a PID is this repo's `watch daemon `. - Add platform-specific terminateDaemon: Unix keeps ungated SIGTERM (unchanged behavior); Windows verifies ownership then Kills, returning ErrForeignDaemonPID when the PID can't be confirmed as ours. - Stop clears a stale/foreign pid file instead of killing an unrelated process; the stop command reports "not running (cleared stale PID file)" for that case. Adds a command-layer test for the foreign-PID path (platform-independent via the stopWatchDaemon var). The Windows runtime path (CIM lookup + ownership kill) can't be exercised on non-Windows or in CI (the Windows job only builds), so it needs verification on real hardware. Closes #74 Co-Authored-By: Claude Opus 4.8 (1M context) --- main.go | 4 ++++ main_more_test.go | 21 +++++++++++++++++++++ watch/process_unix.go | 8 ++++++++ watch/process_windows.go | 31 ++++++++++++++++++++++++++++--- watch/state.go | 24 +++++++++++++++++------- 5 files changed, 78 insertions(+), 10 deletions(-) diff --git a/main.go b/main.go index 65844ed..62749b9 100644 --- a/main.go +++ b/main.go @@ -597,6 +597,10 @@ func runWatchSubcommand(subCmd, root string) { return } if err := stopWatchDaemon(absRoot); err != nil { + if errors.Is(err, watch.ErrForeignDaemonPID) { + fmt.Println("Watch daemon not running (cleared stale PID file)") + return + } fmt.Fprintf(os.Stderr, "Error stopping daemon: %v\n", err) os.Exit(1) } diff --git a/main_more_test.go b/main_more_test.go index a355310..0e5fd6a 100644 --- a/main_more_test.go +++ b/main_more_test.go @@ -758,3 +758,24 @@ func TestSubcommandDispatchViaBinary(t *testing.T) { } }) } + +func TestRunWatchSubcommandStopForeignPID(t *testing.T) { + root := t.TempDir() + + origRunning := watchIsRunning + origStop := stopWatchDaemon + defer func() { + watchIsRunning = origRunning + stopWatchDaemon = origStop + }() + watchIsRunning = func(string) bool { return true } + stopWatchDaemon = func(string) error { return watch.ErrForeignDaemonPID } + + stdout, _ := captureMainStreams(t, func() { runWatchSubcommand("stop", root) }) + if !strings.Contains(stdout, "cleared stale PID file") { + t.Fatalf("expected stale-PID message on foreign PID, got:\n%s", stdout) + } + if strings.Contains(stdout, "Watch daemon stopped") { + t.Fatalf("should not claim the daemon was stopped for a foreign PID:\n%s", stdout) + } +} diff --git a/watch/process_unix.go b/watch/process_unix.go index dec4ff1..193ff3b 100644 --- a/watch/process_unix.go +++ b/watch/process_unix.go @@ -31,3 +31,11 @@ func processAlive(pid int) bool { } return proc.Signal(syscall.Signal(0)) == nil } + +// terminateDaemon stops the daemon on Unix with SIGTERM so it can shut down +// gracefully. This matches long-standing behavior and does not gate on +// ownership (unlike Windows, where the kill is destructive): the root argument +// is accepted only to share a signature with the Windows implementation. +func terminateDaemon(_ string, proc *os.Process) error { + return proc.Signal(syscall.SIGTERM) +} diff --git a/watch/process_windows.go b/watch/process_windows.go index d04c4dc..b1328c5 100644 --- a/watch/process_windows.go +++ b/watch/process_windows.go @@ -3,13 +3,26 @@ package watch import ( - "errors" + "fmt" + "os" + "os/exec" + "strings" "syscall" ) +// processCommandLine returns the full command line of the given PID via CIM +// (the Windows analog of `ps`; wmic is deprecated/removed on newer Windows). +// IsOwnedDaemon uses this to confirm a PID belongs to this repo's watch daemon. func processCommandLine(pid int) (string, error) { - _ = pid - return "", errors.New("process command line lookup not supported on windows") + if pid <= 0 { + return "", fmt.Errorf("invalid pid %d", pid) + } + psCmd := fmt.Sprintf(`(Get-CimInstance Win32_Process -Filter "ProcessId=%d").CommandLine`, pid) + out, err := exec.Command("powershell", "-NoProfile", "-NonInteractive", "-Command", psCmd).Output() + if err != nil { + return "", err + } + return strings.TrimSpace(string(out)), nil } // processAlive reports whether a process with the given PID is currently @@ -33,3 +46,15 @@ func processAlive(pid int) bool { } return code == stillActive } + +// terminateDaemon stops the daemon on Windows. Windows has no SIGTERM, so it +// terminates with Kill — but only after confirming the PID actually belongs to +// this repo's watch daemon. A stale watch.pid may point to a PID the OS reused +// for an unrelated process, and killing that would be destructive; when +// ownership cannot be confirmed we refuse (ErrForeignDaemonPID) instead. +func terminateDaemon(root string, proc *os.Process) error { + if !IsOwnedDaemon(root) { + return ErrForeignDaemonPID + } + return proc.Kill() +} diff --git a/watch/state.go b/watch/state.go index 1fcdec7..6f3f536 100644 --- a/watch/state.go +++ b/watch/state.go @@ -2,14 +2,20 @@ package watch import ( "encoding/json" + "errors" "fmt" "os" "path/filepath" "strings" - "syscall" "time" ) +// ErrForeignDaemonPID is returned by Stop when the PID in watch.pid is alive but +// cannot be confirmed to be this repo's watch daemon (e.g. a stale PID reused by +// an unrelated process). Callers should treat it as "nothing of ours to stop" +// rather than a hard failure. +var ErrForeignDaemonPID = errors.New("watch.pid does not belong to a codemap watch daemon for this repo (stale or reused PID)") + // ReadState reads the daemon state from disk (for hooks to use). // Returns nil if state doesn't exist or if it's stale and daemon is not running. func ReadState(root string) *State { @@ -108,12 +114,16 @@ func Stop(root string) error { if err != nil { return err } - // NOTE: Windows has no SIGTERM, so this returns an error there (as it always - // has). `watch stop` on Windows is intentionally left non-destructive: safely - // killing would require verifying the PID still belongs to this repo's daemon - // (guarding against PID reuse), which needs a Windows command-line lookup that - // processCommandLine does not yet implement. Tracked as follow-up. - if err := proc.Signal(syscall.SIGTERM); err != nil { + // terminateDaemon is platform-specific: SIGTERM on Unix; on Windows it + // verifies the PID belongs to this repo's daemon (guarding against a reused + // stale PID) before killing, returning ErrForeignDaemonPID otherwise. + if err := terminateDaemon(root, proc); err != nil { + if errors.Is(err, ErrForeignDaemonPID) { + // The recorded PID isn't our daemon (stale or reused). Clear the + // bogus pid file so status stops reporting it, but never kill a + // process we can't confirm is ours. + RemovePID(root) + } return err } // Clean up PID file From cc89963756b31d17aa9afbab936474a74feb2d2f Mon Sep 17 00:00:00 2001 From: Jordan Coin Jackson Date: Wed, 1 Jul 2026 23:03:44 -0400 Subject: [PATCH 2/2] Address Copilot review on #75: TOCTOU + ownership wording - Fix TOCTOU: terminateDaemon now classifies ownership against proc.Pid (the exact process it will kill) instead of re-reading watch.pid, so a concurrent start/stop can't make it validate one PID and kill another. - Three-state ownership (owned / foreign / unknown) via daemonOwnershipForPID: - owned -> Kill - foreign -> ErrForeignDaemonPID (command line read, didn't match); Stop clears the stale pid file. - unknown -> ErrDaemonOwnershipUnknown (command line unavailable); refuse to kill AND keep the pid file so a real daemon is never orphaned. - Reword ErrForeignDaemonPID so it no longer implies "not ours" for the uncertain case, and add ErrDaemonOwnershipUnknown for it. Co-Authored-By: Claude Opus 4.8 (1M context) --- watch/process_windows.go | 17 +++++++---- watch/state.go | 66 ++++++++++++++++++++++++++++------------ 2 files changed, 58 insertions(+), 25 deletions(-) diff --git a/watch/process_windows.go b/watch/process_windows.go index b1328c5..a84b185 100644 --- a/watch/process_windows.go +++ b/watch/process_windows.go @@ -48,13 +48,18 @@ func processAlive(pid int) bool { } // terminateDaemon stops the daemon on Windows. Windows has no SIGTERM, so it -// terminates with Kill — but only after confirming the PID actually belongs to -// this repo's watch daemon. A stale watch.pid may point to a PID the OS reused -// for an unrelated process, and killing that would be destructive; when -// ownership cannot be confirmed we refuse (ErrForeignDaemonPID) instead. +// terminates with Kill — but only after confirming proc is actually this repo's +// watch daemon. Ownership is checked against proc.Pid (not by re-reading +// watch.pid) so we validate the exact process we are about to kill. A stale +// watch.pid may point to a PID the OS reused for an unrelated process; killing +// that would be destructive, so we refuse when ownership is foreign or unknown. func terminateDaemon(root string, proc *os.Process) error { - if !IsOwnedDaemon(root) { + switch daemonOwnershipForPID(root, proc.Pid) { + case ownershipOwned: + return proc.Kill() + case ownershipForeign: return ErrForeignDaemonPID + default: + return ErrDaemonOwnershipUnknown } - return proc.Kill() } diff --git a/watch/state.go b/watch/state.go index 6f3f536..841b189 100644 --- a/watch/state.go +++ b/watch/state.go @@ -10,11 +10,17 @@ import ( "time" ) -// ErrForeignDaemonPID is returned by Stop when the PID in watch.pid is alive but -// cannot be confirmed to be this repo's watch daemon (e.g. a stale PID reused by -// an unrelated process). Callers should treat it as "nothing of ours to stop" -// rather than a hard failure. -var ErrForeignDaemonPID = errors.New("watch.pid does not belong to a codemap watch daemon for this repo (stale or reused PID)") +// ErrForeignDaemonPID is returned by Stop when the PID in watch.pid is alive and +// its command line was read but does NOT match this repo's watch daemon — i.e. a +// stale PID the OS reused for an unrelated process. Callers can treat it as +// "nothing of ours to stop" and safely discard the pid file. +var ErrForeignDaemonPID = errors.New("watch.pid points to a live process that is not this repo's codemap watch daemon (stale or reused PID)") + +// ErrDaemonOwnershipUnknown is returned by Stop when the PID is alive but its +// ownership could not be determined (the process command line was unavailable, +// e.g. introspection was denied). We refuse to kill it AND keep the pid file, so +// a real daemon is never orphaned or an unrelated process killed. +var ErrDaemonOwnershipUnknown = errors.New("could not verify that watch.pid belongs to this repo's codemap watch daemon; refusing to stop it") // ReadState reads the daemon state from disk (for hooks to use). // Returns nil if state doesn't exist or if it's stale and daemon is not running. @@ -63,34 +69,56 @@ func RemovePID(root string) { os.Remove(pidFile) } -// IsOwnedDaemon checks whether the PID file points to a codemap watch daemon -// for this repository root. -func IsOwnedDaemon(root string) bool { - pid, err := ReadPID(root) - if err != nil || pid <= 0 { - return false - } +// daemonOwnership is the result of checking whether a specific PID is this +// repo's watch daemon. It distinguishes "confirmed not ours" from "could not +// determine" so callers never treat an unverifiable process as safe to discard. +type daemonOwnership int + +const ( + ownershipUnknown daemonOwnership = iota // could not read the process command line + ownershipOwned // command line matches this repo's watch daemon + ownershipForeign // command line retrieved and does NOT match +) +// daemonOwnershipForPID classifies whether pid is this repo's watch daemon. +// It takes the PID explicitly (rather than re-reading watch.pid) so callers can +// validate the exact process they are about to act on, avoiding a TOCTOU race +// with a concurrent start/stop rewriting the pid file. +func daemonOwnershipForPID(root string, pid int) daemonOwnership { + if pid <= 0 { + return ownershipForeign + } cmdline, err := processCommandLine(pid) if err != nil { - return false + return ownershipUnknown } cmdline = strings.TrimSpace(cmdline) if cmdline == "" { - return false + return ownershipUnknown } absRoot, err := filepath.Abs(root) if err != nil { absRoot = root } - if absRoot == "" { - return false + if absRoot != "" && + strings.Contains(cmdline, "watch") && + strings.Contains(cmdline, "daemon") && + strings.Contains(cmdline, absRoot) { + return ownershipOwned } + return ownershipForeign +} - return strings.Contains(cmdline, "watch") && - strings.Contains(cmdline, "daemon") && - strings.Contains(cmdline, absRoot) +// IsOwnedDaemon reports whether the PID file points to a codemap watch daemon +// for this repository root. It is true only when ownership is positively +// confirmed. +func IsOwnedDaemon(root string) bool { + pid, err := ReadPID(root) + if err != nil || pid <= 0 { + return false + } + return daemonOwnershipForPID(root, pid) == ownershipOwned } // IsRunning checks if the daemon is running