From abe5da6eefd862018262cb95b5fccd61cdb8d559 Mon Sep 17 00:00:00 2001 From: Tyrie Vella Date: Wed, 27 May 2026 11:39:15 -0700 Subject: [PATCH] Fix PID-recycling race in orphan-lock detection GVFS's orphan-lock check (GVFSLock.LockHolder.GetExternalHolder) only asked the OS "is there a process at this PID?" via IsProcessActive. If the original lock-holder exited and Windows recycled its PID to an unrelated process before the next git command arrived, GVFS saw a live process at that PID and concluded the lock was still held. The git command then either waited the full 300s timeout or printed a spurious "Waiting for '' to release the lock" message. This is a real product bug, not just a test issue. It surfaced as flakiness in the OrphanedGVFSLockIsCleanedUp functional test on busy CI agents (two failure modes: 300s timeout, and assertion failure with non-empty stderr), but the same race can affect users when a PID is quickly reassigned by the OS scheduler. The fix is process-identity tracking. When the lock is granted to an external requestor, capture that process's creation timestamp (GetProcessTimes, an opaque 100-ns FILETIME value) alongside its PID. On every orphan check, look up the current creation timestamp at that PID and compare. If the process is gone the lookup fails; if a different process is now at the PID the timestamps differ. Either way the lock is correctly recognized as orphaned. Implementation notes: * New abstract GVFSPlatform.TryGetActiveProcessStartTime returns the raw 64-bit creation time. It is documented as identity-only; the value is never decoded as a date. * The Windows implementation combines GetExitCodeProcess (STILL_ACTIVE check) and GetProcessTimes. GetProcessTimes alone is not sufficient because it still returns a creation time for terminated processes whose kernel object is kept alive by an outstanding handle elsewhere. * Acquisition does not fail if start-time capture fails. The existing IsProcessActiveImplementation has a Process.GetProcessById fallback for cross-integrity callers that OpenProcess(QueryLimitedInformation) cannot open, and we preserve the same compatibility surface here. A HasStartTime flag is stored alongside the PID; when it is false the orphan check falls back to the legacy IsProcessActive call. The fallback is recorded in telemetry (StartTimeUnavailable=true on the TryAcquireLockExternal event) so we can see if it ever becomes common in the field. * The ExternalLockHolderExited telemetry event now carries an ExternalHolderTerminationReason field (ProcessNotActive | PidRecycled | Unknown) to make future post-mortems straightforward. * The OrphanedGVFSLockIsCleanedUp functional test is unchanged. It polls Process.GetProcessById to detect when the lock holder has exited, then runs git status. With this product fix, git status succeeds cleanly even if the PID has already been recycled to an unrelated process by the time mount processes the request, so the test passes naturally without any test-side workaround. * Two new unit tests cover the new identity-check path: TryAcquireLockForExternalRequestor_WhenHolderPidRecycled (verifies that an orphan is detected when start times differ at the same PID) and TryAcquireLockForExternalRequestor_WhenHolderStartTimeMatches (the inverse: a still-alive holder is correctly seen as active and a new acquisition is denied). Assisted-by: Claude Opus 4.7 Signed-off-by: Tyrie Vella --- GVFS/GVFS.Common/GVFSLock.cs | 71 ++++++++++++++--- GVFS/GVFS.Common/GVFSPlatform.cs | 13 ++++ GVFS/GVFS.Common/NativeMethods.Shared.cs | 12 +++ .../WindowsPlatform.Shared.cs | 38 +++++++++ GVFS/GVFS.Platform.Windows/WindowsPlatform.cs | 5 ++ GVFS/GVFS.UnitTests/Common/GVFSLockTests.cs | 78 +++++++++++++++++++ .../Mock/Common/MockPlatform.cs | 23 ++++++ 7 files changed, 230 insertions(+), 10 deletions(-) diff --git a/GVFS/GVFS.Common/GVFSLock.cs b/GVFS/GVFS.Common/GVFSLock.cs index 5bfaa5976..c8cac11ff 100644 --- a/GVFS/GVFS.Common/GVFSLock.cs +++ b/GVFS/GVFS.Common/GVFSLock.cs @@ -41,6 +41,20 @@ public bool TryAcquireLockForExternalRequestor( existingExternalHolder = null; + // Capture the requestor's process start time so we can later distinguish the + // genuine holder from an unrelated process that happens to be reusing the same + // PID after the holder exits. If we cannot read the start time (e.g. permission + // failure on OpenProcess for a different-integrity caller) we still accept the + // lock and fall back to the legacy PID-only orphan check; record the fallback in + // telemetry so we can spot if it becomes common. + long? requestorStartTime = GVFSPlatform.Instance.TryGetActiveProcessStartTime(requestor.PID, out long startTime) + ? startTime + : (long?)null; + if (requestorStartTime == null) + { + metadata.Add("StartTimeUnavailable", true); + } + try { lock (this.acquisitionLock) @@ -65,7 +79,7 @@ public bool TryAcquireLockForExternalRequestor( metadata.Add("Result", "Accepted"); eventLevel = EventLevel.Informational; - this.currentLockHolder.AcquireForExternalRequestor(requestor); + this.currentLockHolder.AcquireForExternalRequestor(requestor, requestorStartTime); this.Stats = new ActiveGitCommandStats(); return true; @@ -190,12 +204,14 @@ private bool IsLockAvailable(bool checkExternalHolderOnly, out NamedPipeMessages } bool externalHolderTerminatedWithoutReleasingLock; + string terminationReason; existingExternalHolder = this.currentLockHolder.GetExternalHolder( - out externalHolderTerminatedWithoutReleasingLock); + out externalHolderTerminatedWithoutReleasingLock, + out terminationReason); if (externalHolderTerminatedWithoutReleasingLock) { - this.ReleaseLockForTerminatedProcess(existingExternalHolder.PID); + this.ReleaseLockForTerminatedProcess(existingExternalHolder.PID, terminationReason); this.tracer.SetGitCommandSessionId(string.Empty); existingExternalHolder = null; } @@ -204,11 +220,11 @@ private bool IsLockAvailable(bool checkExternalHolderOnly, out NamedPipeMessages } } - private bool ReleaseExternalLock(int pid, string eventName) + private bool ReleaseExternalLock(int pid, string eventName, EventMetadata extraMetadata = null) { lock (this.acquisitionLock) { - EventMetadata metadata = new EventMetadata(); + EventMetadata metadata = extraMetadata ?? new EventMetadata(); try { @@ -251,9 +267,11 @@ private bool ReleaseExternalLock(int pid, string eventName) } } - private void ReleaseLockForTerminatedProcess(int pid) + private void ReleaseLockForTerminatedProcess(int pid, string terminationReason) { - this.ReleaseExternalLock(pid, "ExternalLockHolderExited"); + EventMetadata metadata = new EventMetadata(); + metadata.Add("ExternalHolderTerminationReason", terminationReason ?? "Unknown"); + this.ReleaseExternalLock(pid, "ExternalLockHolderExited", metadata); } // The lock release event is a convenient place to record stats about things that happened while a git command was running, @@ -383,6 +401,7 @@ public void AddStatsToTelemetry(EventMetadata metadata) private class LockHolder { private NamedPipeMessages.LockData externalLockHolder; + private long? externalLockHolderStartTime; public bool IsFree { @@ -404,7 +423,7 @@ public void AcquireForGVFS() this.IsGVFS = true; } - public void AcquireForExternalRequestor(NamedPipeMessages.LockData externalLockHolder) + public void AcquireForExternalRequestor(NamedPipeMessages.LockData externalLockHolder, long? startTime) { if (this.IsGVFS || this.externalLockHolder != null) @@ -413,12 +432,14 @@ public void AcquireForExternalRequestor(NamedPipeMessages.LockData externalLockH } this.externalLockHolder = externalLockHolder; + this.externalLockHolderStartTime = startTime; } public void Release() { this.IsGVFS = false; this.externalLockHolder = null; + this.externalLockHolderStartTime = null; } public NamedPipeMessages.LockData GetExternalHolder() @@ -426,14 +447,44 @@ public NamedPipeMessages.LockData GetExternalHolder() return this.externalLockHolder; } - public NamedPipeMessages.LockData GetExternalHolder(out bool externalHolderTerminatedWithoutReleasingLock) + public NamedPipeMessages.LockData GetExternalHolder(out bool externalHolderTerminatedWithoutReleasingLock, out string terminationReason) { externalHolderTerminatedWithoutReleasingLock = false; + terminationReason = null; if (this.externalLockHolder != null) { int pid = this.externalLockHolder.PID; - externalHolderTerminatedWithoutReleasingLock = !GVFSPlatform.Instance.IsProcessActive(pid); + + if (this.externalLockHolderStartTime is long capturedStartTime) + { + // Identity check: confirm the same process still owns this PID by comparing + // the OS-supplied process start time we captured at acquisition with the + // current one. A mismatch means the original holder exited and Windows + // recycled the PID to a different process (the bug this code fixes). + if (!GVFSPlatform.Instance.TryGetActiveProcessStartTime(pid, out long currentStartTime)) + { + externalHolderTerminatedWithoutReleasingLock = true; + terminationReason = "ProcessNotActive"; + } + else if (currentStartTime != capturedStartTime) + { + externalHolderTerminatedWithoutReleasingLock = true; + terminationReason = "PidRecycled"; + } + } + else + { + // Fallback for the rare case where we could not capture a start time at + // acquisition time (e.g. cross-integrity OpenProcess denial). Use the + // legacy PID-only liveness check, which is vulnerable to PID recycling + // but matches pre-fix behavior. + if (!GVFSPlatform.Instance.IsProcessActive(pid)) + { + externalHolderTerminatedWithoutReleasingLock = true; + terminationReason = "ProcessNotActive"; + } + } } return this.externalLockHolder; diff --git a/GVFS/GVFS.Common/GVFSPlatform.cs b/GVFS/GVFS.Common/GVFSPlatform.cs index 579e94955..d5132066b 100644 --- a/GVFS/GVFS.Common/GVFSPlatform.cs +++ b/GVFS/GVFS.Common/GVFSPlatform.cs @@ -66,6 +66,19 @@ public static void Register(GVFSPlatform platform) public abstract void PrepareProcessToRunInBackground(); public abstract bool IsProcessActive(int processId); + + /// + /// Returns true and writes an opaque, OS-supplied process-identity token (e.g. process + /// creation time on Windows) when the process with the given PID is currently active. + /// The token has no meaning beyond identity comparison: two calls for the same underlying + /// process yield equal tokens, and a call after the OS has recycled the PID to a different + /// process yields a different token. Returns false (and startTime = 0) if the process + /// is no longer running, or if it could not be identified for any reason (e.g. permission + /// failure). Callers must treat a false return as "no identity information available" and + /// fall back to if they still need a liveness check. + /// + public abstract bool TryGetActiveProcessStartTime(int processId, out long startTime); + public abstract void IsServiceInstalledAndRunning(string name, out bool installed, out bool running); public abstract string GetNamedPipeName(string enlistmentRoot); public abstract string GetGVFSServiceNamedPipeName(string serviceName); diff --git a/GVFS/GVFS.Common/NativeMethods.Shared.cs b/GVFS/GVFS.Common/NativeMethods.Shared.cs index 7a796b15e..43aca72f8 100644 --- a/GVFS/GVFS.Common/NativeMethods.Shared.cs +++ b/GVFS/GVFS.Common/NativeMethods.Shared.cs @@ -158,6 +158,18 @@ public static extern SafeFileHandle OpenProcess( [return: MarshalAs(UnmanagedType.Bool)] public static extern bool GetExitCodeProcess(SafeFileHandle hProcess, out uint lpExitCode); + // GetProcessTimes writes four FILETIME values (each two DWORDs / 8 bytes). + // We marshal each as `out long` because we only ever compare the raw 64-bit value + // (creation time) for identity purposes; we never decode it as a date. + [DllImport("kernel32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool GetProcessTimes( + SafeFileHandle hProcess, + out long lpCreationTime, + out long lpExitTime, + out long lpKernelTime, + out long lpUserTime); + [DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Unicode)] private static extern SafeFileHandle CreateFile( [In] string lpFileName, diff --git a/GVFS/GVFS.Platform.Windows/WindowsPlatform.Shared.cs b/GVFS/GVFS.Platform.Windows/WindowsPlatform.Shared.cs index 80cc09f68..99def6841 100644 --- a/GVFS/GVFS.Platform.Windows/WindowsPlatform.Shared.cs +++ b/GVFS/GVFS.Platform.Windows/WindowsPlatform.Shared.cs @@ -70,6 +70,44 @@ public static bool IsProcessActiveImplementation(int processId, bool tryGetProce } } + /// + /// Returns true if a process with the given PID is currently active AND writes its + /// creation timestamp (raw FILETIME, 100-ns ticks since 1601) to . + /// The returned value is intended for identity comparison only -- two calls for the + /// same underlying process (no PID reuse) always yield equal values; if the OS recycles + /// a PID to a new process the value will differ. Returns false (and startTime = 0) + /// if the process is gone, has terminated (but its kernel object lingers due to an + /// outstanding handle elsewhere), or cannot be opened for QueryLimitedInformation. + /// + public static bool TryGetActiveProcessStartTimeImplementation(int processId, out long startTime) + { + startTime = 0; + + using (SafeFileHandle process = NativeMethods.OpenProcess(NativeMethods.ProcessAccessFlags.QueryLimitedInformation, false, processId)) + { + if (process.IsInvalid) + { + return false; + } + + // GetProcessTimes succeeds for terminated processes whose kernel object still + // exists (e.g., an outstanding handle elsewhere). Confirm the process is still + // running before trusting the creation time as an identity marker. + if (!NativeMethods.GetExitCodeProcess(process, out uint exitCode) || exitCode != StillActive) + { + return false; + } + + if (!NativeMethods.GetProcessTimes(process, out long creationTime, out _, out _, out _)) + { + return false; + } + + startTime = creationTime; + return true; + } + } + public static string GetNamedPipeNameImplementation(string enlistmentRoot) { return "GVFS_" + enlistmentRoot.ToUpper().Replace(':', '_'); diff --git a/GVFS/GVFS.Platform.Windows/WindowsPlatform.cs b/GVFS/GVFS.Platform.Windows/WindowsPlatform.cs index d262a7953..b8593f417 100644 --- a/GVFS/GVFS.Platform.Windows/WindowsPlatform.cs +++ b/GVFS/GVFS.Platform.Windows/WindowsPlatform.cs @@ -205,6 +205,11 @@ public override bool IsProcessActive(int processId) return WindowsPlatform.IsProcessActiveImplementation(processId, tryGetProcessById: true); } + public override bool TryGetActiveProcessStartTime(int processId, out long startTime) + { + return WindowsPlatform.TryGetActiveProcessStartTimeImplementation(processId, out startTime); + } + public override void IsServiceInstalledAndRunning(string name, out bool installed, out bool running) { ServiceController service = ServiceController.GetServices().FirstOrDefault(s => s.ServiceName.Equals(name, StringComparison.Ordinal)); diff --git a/GVFS/GVFS.UnitTests/Common/GVFSLockTests.cs b/GVFS/GVFS.UnitTests/Common/GVFSLockTests.cs index fb0a7c5a4..91173fd7b 100644 --- a/GVFS/GVFS.UnitTests/Common/GVFSLockTests.cs +++ b/GVFS/GVFS.UnitTests/Common/GVFSLockTests.cs @@ -182,6 +182,84 @@ public void TryAcquireLockForExternalRequestor_WhenExternalHolderTerminated() mockTracer.VerifyAll(); } + /// + /// Regression test for OrphanedGVFSLockIsCleanedUp flake: simulates the original lock holder + /// exiting and the OS recycling its PID to a different, unrelated process before the orphan + /// check fires. Identity is preserved via process start time, so the orphan must be detected + /// even though the (now-different) process at the holder's PID still appears active. + /// + [TestCase] + public void TryAcquireLockForExternalRequestor_WhenHolderPidRecycled() + { + Mock mockTracer = new Mock(MockBehavior.Strict); + mockTracer.Setup(x => x.RelatedEvent(EventLevel.Informational, "TryAcquireLockExternal", It.IsAny())); + mockTracer.Setup(x => x.RelatedEvent(EventLevel.Informational, "ExternalLockHolderExited", It.IsAny(), Keywords.Telemetry)); + mockTracer.Setup(x => x.SetGitCommandSessionId(string.Empty)); + MockPlatform mockPlatform = (MockPlatform)GVFSPlatform.Instance; + + // Acquire as DefaultLockData.PID with an explicit start time so the new identity-check + // path (not the legacy fallback) runs. + mockPlatform.ActiveProcesses.Add(DefaultLockData.PID); + mockPlatform.ProcessStartTimes[DefaultLockData.PID] = 1000; + GVFSLock gvfsLock = new GVFSLock(mockTracer.Object); + NamedPipeMessages.LockData existingExternalHolder; + gvfsLock.TryAcquireLockForExternalRequestor(DefaultLockData, out existingExternalHolder).ShouldBeTrue(); + existingExternalHolder.ShouldBeNull(); + this.ValidateLockHeld(gvfsLock, DefaultLockData); + + // Simulate PID recycling: the PID is still reported active (a different process now owns it), + // but the start time differs from the one we captured at acquisition. + mockPlatform.ProcessStartTimes[DefaultLockData.PID] = 2000; + + NamedPipeMessages.LockData newLockData = new NamedPipeMessages.LockData(4321, false, false, "git new", "456"); + mockPlatform.ActiveProcesses.Add(newLockData.PID); + mockPlatform.ProcessStartTimes[newLockData.PID] = 3000; + gvfsLock.TryAcquireLockForExternalRequestor(newLockData, out existingExternalHolder).ShouldBeTrue(); + existingExternalHolder.ShouldBeNull(); + this.ValidateLockHeld(gvfsLock, newLockData); + + mockPlatform.ActiveProcesses.Remove(DefaultLockData.PID); + mockPlatform.ActiveProcesses.Remove(newLockData.PID); + mockPlatform.ProcessStartTimes.Remove(DefaultLockData.PID); + mockPlatform.ProcessStartTimes.Remove(newLockData.PID); + mockTracer.VerifyAll(); + } + + /// + /// Inverse of the PID-recycle test: when start time still matches, the original holder + /// must still be considered the active holder and a new acquisition must be denied. + /// Guards against an over-eager orphan detector that would release any lock on every check. + /// + [TestCase] + public void TryAcquireLockForExternalRequestor_WhenHolderStartTimeMatches() + { + Mock mockTracer = new Mock(MockBehavior.Strict); + mockTracer.Setup(x => x.RelatedEvent(EventLevel.Informational, "TryAcquireLockExternal", It.IsAny())); + mockTracer.Setup(x => x.RelatedEvent(EventLevel.Verbose, "TryAcquireLockExternal", It.IsAny())); + MockPlatform mockPlatform = (MockPlatform)GVFSPlatform.Instance; + + mockPlatform.ActiveProcesses.Add(DefaultLockData.PID); + mockPlatform.ProcessStartTimes[DefaultLockData.PID] = 1000; + GVFSLock gvfsLock = new GVFSLock(mockTracer.Object); + NamedPipeMessages.LockData existingExternalHolder; + gvfsLock.TryAcquireLockForExternalRequestor(DefaultLockData, out existingExternalHolder).ShouldBeTrue(); + this.ValidateLockHeld(gvfsLock, DefaultLockData); + + // Start time is unchanged -- holder is genuinely still alive; new acquire must be denied. + NamedPipeMessages.LockData newLockData = new NamedPipeMessages.LockData(4321, false, false, "git new", "456"); + mockPlatform.ActiveProcesses.Add(newLockData.PID); + mockPlatform.ProcessStartTimes[newLockData.PID] = 3000; + gvfsLock.TryAcquireLockForExternalRequestor(newLockData, out existingExternalHolder).ShouldBeFalse(); + this.ValidateExistingExternalHolder(DefaultLockData, existingExternalHolder); + this.ValidateLockHeld(gvfsLock, DefaultLockData); + + mockPlatform.ActiveProcesses.Remove(DefaultLockData.PID); + mockPlatform.ActiveProcesses.Remove(newLockData.PID); + mockPlatform.ProcessStartTimes.Remove(DefaultLockData.PID); + mockPlatform.ProcessStartTimes.Remove(newLockData.PID); + mockTracer.VerifyAll(); + } + private GVFSLock AcquireDefaultLock(MockPlatform mockPlatform, ITracer mockTracer) { GVFSLock gvfsLock = new GVFSLock(mockTracer); diff --git a/GVFS/GVFS.UnitTests/Mock/Common/MockPlatform.cs b/GVFS/GVFS.UnitTests/Mock/Common/MockPlatform.cs index 8c153a424..41876f953 100644 --- a/GVFS/GVFS.UnitTests/Mock/Common/MockPlatform.cs +++ b/GVFS/GVFS.UnitTests/Mock/Common/MockPlatform.cs @@ -45,6 +45,14 @@ public override bool SupportsSystemInstallLog public HashSet ActiveProcesses { get; } = new HashSet(); + /// + /// Optional per-PID start-time overrides used by tests that exercise PID-reuse + /// detection. When a PID is in but not in this + /// dictionary, returns the PID itself + /// as a synthetic non-zero start time. + /// + public Dictionary ProcessStartTimes { get; } = new Dictionary(); + public override void ConfigureVisualStudio(string gitBinPath, ITracer tracer) { throw new NotSupportedException(); @@ -138,6 +146,21 @@ public override bool IsProcessActive(int processId) return this.ActiveProcesses.Contains(processId); } + public override bool TryGetActiveProcessStartTime(int processId, out long startTime) + { + if (this.ActiveProcesses.Contains(processId)) + { + // If the test populated an explicit start time use that, otherwise fall back + // to a deterministic non-zero value so test scenarios that don't care about + // PID identity (the existing majority of unit tests) keep working. + startTime = this.ProcessStartTimes.TryGetValue(processId, out long stored) ? stored : processId; + return true; + } + + startTime = 0; + return false; + } + public override void IsServiceInstalledAndRunning(string name, out bool installed, out bool running) { throw new NotSupportedException();