Skip to content

Commit bb622be

Browse files
Ralfclaude
andcommitted
signal+lnd: exit with non-zero code on internal shutdown
When lnd auto-shuts down due to an internal error such as a chain backend health check failure, Main() previously returned nil, causing the process to exit with code 0. This made it impossible for process supervisors to distinguish a crash from a clean shutdown. Add a shutdownRequested flag to signal.Interceptor that is set inside mainInterruptHandler only when an internal RequestShutdown() is the first shutdown trigger. This avoids a race where a late internal request could override an earlier OS signal shutdown. Main() now returns ErrShutdownRequested in that case, which flows to os.Exit(1) in cmd/lnd/main.go. User-initiated shutdowns (SIGINT, SIGTERM) continue to exit with code 0. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent f297c47 commit bb622be

3 files changed

Lines changed: 122 additions & 0 deletions

File tree

lnd.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,14 @@ var errStreamIsolationWithProxySkip = errors.New(
141141
"while stream isolation is enabled, the TOR proxy may not be skipped",
142142
)
143143

144+
// ErrShutdownRequested is returned from Main when lnd shuts down due to an
145+
// internal error condition such as a backend health check failure. This
146+
// ensures the process exits with a non-zero code.
147+
var ErrShutdownRequested = errors.New(
148+
"lnd was shut down due to an internal request (e.g. chain backend " +
149+
"failure)",
150+
)
151+
144152
// Main is the true entry point for lnd. It accepts a fully populated and
145153
// validated main configuration struct and an optional listener config struct.
146154
// This function starts all main system components then blocks until a signal
@@ -778,6 +786,10 @@ func Main(cfg *Config, lisCfg ListenerCfg, implCfg *ImplementationCfg,
778786
return mkErr("unable to start server", err)
779787

780788
case <-interceptor.ShutdownChannel():
789+
if interceptor.ShutdownWasRequested() {
790+
return ErrShutdownRequested
791+
}
792+
781793
return nil
782794
}
783795

@@ -803,6 +815,11 @@ func Main(cfg *Config, lisCfg ListenerCfg, implCfg *ImplementationCfg,
803815
// Wait for shutdown signal from either a graceful server stop or from
804816
// the interrupt handler.
805817
<-interceptor.ShutdownChannel()
818+
819+
if interceptor.ShutdownWasRequested() {
820+
return ErrShutdownRequested
821+
}
822+
806823
return nil
807824
}
808825

signal/signal.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,14 @@ type Interceptor struct {
111111
// close this channel.
112112
quit chan struct{}
113113

114+
// shutdownRequested is atomically set to 1 when an internal shutdown
115+
// request is the first to trigger shutdown, indicating that the daemon
116+
// is shutting down due to an internal error (e.g. backend health check
117+
// failure) rather than an external signal. This is a pointer so that
118+
// it is shared between the Interceptor value returned by Intercept()
119+
// and the goroutine running mainInterruptHandler.
120+
shutdownRequested *int32
121+
114122
// Notifier handles sending shutdown notifications.
115123
Notifier Notifier
116124
}
@@ -127,6 +135,7 @@ func Intercept() (Interceptor, error) {
127135
shutdownChannel: make(chan struct{}),
128136
shutdownRequestChannel: make(chan struct{}),
129137
quit: make(chan struct{}),
138+
shutdownRequested: new(int32),
130139
}
131140

132141
signalsToCatch := []os.Signal{
@@ -179,6 +188,14 @@ func (c *Interceptor) mainInterruptHandler() {
179188

180189
case <-c.shutdownRequestChannel:
181190
log.Infof("Received shutdown request.")
191+
192+
// Only mark the shutdown as internally requested if
193+
// this is the first shutdown trigger. This prevents
194+
// a late internal request from overriding a
195+
// user-initiated signal shutdown.
196+
if !isShutdown {
197+
atomic.StoreInt32(c.shutdownRequested, 1)
198+
}
182199
shutdown()
183200

184201
case <-c.quit:
@@ -222,6 +239,13 @@ func (c *Interceptor) RequestShutdown() {
222239
}
223240
}
224241

242+
// ShutdownWasRequested returns true if the shutdown was initiated internally
243+
// via RequestShutdown (e.g. due to a health check failure) rather than by an
244+
// external OS signal.
245+
func (c *Interceptor) ShutdownWasRequested() bool {
246+
return atomic.LoadInt32(c.shutdownRequested) != 0
247+
}
248+
225249
// ShutdownChannel returns the channel that will be closed once the main
226250
// interrupt handler has exited.
227251
func (c *Interceptor) ShutdownChannel() <-chan struct{} {

signal/signal_test.go

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package signal
2+
3+
import (
4+
"testing"
5+
"time"
6+
7+
"github.com/stretchr/testify/require"
8+
)
9+
10+
// waitForShutdown waits for the interceptor's shutdown channel to close.
11+
func waitForShutdown(t *testing.T, interceptor Interceptor) {
12+
t.Helper()
13+
14+
select {
15+
case <-interceptor.ShutdownChannel():
16+
case <-time.After(time.Second):
17+
t.Fatal("timed out waiting for shutdown channel")
18+
}
19+
20+
// Allow mainInterruptHandler goroutine to fully exit and reset
21+
// the global started flag so the next test can call Intercept().
22+
time.Sleep(50 * time.Millisecond)
23+
}
24+
25+
// TestRequestShutdownSetsFlag tests that calling RequestShutdown sets the
26+
// shutdownRequested flag, which is used to distinguish internal shutdown
27+
// requests (e.g. chain backend failure) from external OS signals.
28+
func TestRequestShutdownSetsFlag(t *testing.T) {
29+
interceptor, err := Intercept()
30+
require.NoError(t, err)
31+
32+
// Before any shutdown, the flag should not be set.
33+
require.False(t, interceptor.ShutdownWasRequested())
34+
35+
// Request an internal shutdown.
36+
interceptor.RequestShutdown()
37+
38+
waitForShutdown(t, interceptor)
39+
40+
// The flag should now be set after the handler processed the request.
41+
require.True(t, interceptor.ShutdownWasRequested())
42+
}
43+
44+
// TestOSSignalDoesNotSetRequestedFlag tests that an OS signal based shutdown
45+
// does not set the shutdownRequested flag, ensuring a clean exit code 0.
46+
func TestOSSignalDoesNotSetRequestedFlag(t *testing.T) {
47+
interceptor, err := Intercept()
48+
require.NoError(t, err)
49+
50+
// Simulate an OS signal.
51+
interceptor.interruptChannel <- nil
52+
53+
waitForShutdown(t, interceptor)
54+
55+
// The flag should NOT be set for OS signal shutdowns.
56+
require.False(t, interceptor.ShutdownWasRequested())
57+
}
58+
59+
// TestOSSignalThenRequestShutdownNoFlag tests the race condition where a user
60+
// sends SIGINT first, and then an internal component calls RequestShutdown.
61+
// The first shutdown cause (OS signal) should win, so the flag must remain
62+
// unset and the process should exit with code 0.
63+
func TestOSSignalThenRequestShutdownNoFlag(t *testing.T) {
64+
interceptor, err := Intercept()
65+
require.NoError(t, err)
66+
67+
// Simulate an OS signal to initiate shutdown first.
68+
interceptor.interruptChannel <- nil
69+
70+
// Give mainInterruptHandler time to process the signal and set
71+
// isShutdown=true before the internal request arrives.
72+
time.Sleep(50 * time.Millisecond)
73+
74+
// Now an internal component also requests shutdown (late arrival).
75+
interceptor.RequestShutdown()
76+
77+
waitForShutdown(t, interceptor)
78+
79+
// The flag must NOT be set — the OS signal arrived first.
80+
require.False(t, interceptor.ShutdownWasRequested())
81+
}

0 commit comments

Comments
 (0)