From 30b14dbb48edcb2cecd04537aa2ca354c98d66af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Fri, 1 May 2026 23:27:38 +0200 Subject: [PATCH 01/36] feat: add WithDrain option for graceful Stop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Stop now waits for runFunc to return on its own when WithDrain is set, closing the channel returned by Stopping(ctx) instead of cancelling. - Falls back to ctx cancel + wait once the drain timeout elapses, so stuck runFuncs still surface as DeadlineExceeded. - No-op for existing callers — Stop semantics are unchanged when WithDrain is not used. - Strengthen the existing stop-timeout test to assert ctx-cancel behavior when WithDrain is absent. --- README.md | 23 +++++++ runnable.go | 28 +++++++++ runnable_test.go | 20 ++++-- with_drain.go | 39 ++++++++++++ with_drain_test.go | 154 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 259 insertions(+), 5 deletions(-) create mode 100644 with_drain.go create mode 100644 with_drain_test.go diff --git a/README.md b/README.md index 25d4511..0df47c2 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,29 @@ if err != nil { } ``` +### Drain on Stop + +By default, `Stop` cancels the runFunc's context, which aborts in-flight +work. For workers that own external calls that must complete (e.g. an +HTTP request that creates remote state), use `WithDrain` to switch to +"signal-and-wait" semantics: `Stop` closes the channel returned by +`Stopping(ctx)` and waits up to the drain timeout for runFunc to return +on its own. If the timeout elapses, `Stop` falls back to cancelling the +context. + +```go +r := runnable.New(func(ctx context.Context) error { + for { + select { + case <-runnable.Stopping(ctx): + return nil // finish in-flight work, then return + case <-time.After(time.Second): + doWork(ctx) + } + } +}, runnable.WithDrain(10*time.Second)) +``` + ### Runnable Object ```go package main diff --git a/runnable.go b/runnable.go index ca62fef..192f8e7 100644 --- a/runnable.go +++ b/runnable.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "sync" + "time" ) var ( @@ -28,6 +29,10 @@ type runnable struct { runCancel context.CancelFunc runStop chan bool + drainEnabled bool + drainTimeout time.Duration + stoppingChan chan struct{} + isRunning bool onStart func() onStop func() @@ -92,6 +97,10 @@ func (r *runnable) Run(ctx context.Context) error { r.runStop = make(chan bool) runCtx := r.runCtx + if r.drainEnabled { + r.stoppingChan = make(chan struct{}) + runCtx = context.WithValue(runCtx, stoppingKey{}, (<-chan struct{})(r.stoppingChan)) + } r.mu.Unlock() defer func() { @@ -134,8 +143,27 @@ func (r *runnable) Stop(ctx context.Context) error { } runStop := r.runStop + drainEnabled := r.drainEnabled + drainTimeout := r.drainTimeout + stoppingChan := r.stoppingChan r.mu.Unlock() + if drainEnabled { + close(stoppingChan) + drainCtx, drainCancel := context.WithTimeout(ctx, drainTimeout) + select { + case <-runStop: + drainCancel() + return nil + case <-ctx.Done(): + drainCancel() + return ctx.Err() + case <-drainCtx.Done(): + // Drain timed out; fall through to forced cancel. + } + drainCancel() + } + r.runCancel() select { diff --git a/runnable_test.go b/runnable_test.go index 045be86..1b7cf9d 100644 --- a/runnable_test.go +++ b/runnable_test.go @@ -81,18 +81,20 @@ func TestRunnable(t *testing.T) { assert.Equal(t, false, r.IsRunning()) }) - t.Run("runnable, stop timeout", func(t *testing.T) { + t.Run("runnable, stop timeout proves no drain behavior", func(t *testing.T) { started := make(chan struct{}) + ctxCancelObserved := make(chan struct{}) r := New(func(ctx context.Context) error { started <- struct{}{} + <-ctx.Done() + close(ctxCancelObserved) time.Sleep(2 * time.Second) - return nil + return ctx.Err() }) go func() { - err := r.Run(context.Background()) - require.NoError(t, err) + _ = r.Run(context.Background()) }() <-started @@ -101,7 +103,15 @@ func TestRunnable(t *testing.T) { stopCtx, stopCancel := context.WithTimeout(context.Background(), 1*time.Second) defer stopCancel() err := r.Stop(stopCtx) - require.Error(t, err, context.DeadlineExceeded) + require.ErrorIs(t, err, context.DeadlineExceeded) + + // Without WithDrain, Stop cancels runFunc's ctx immediately. + select { + case <-ctxCancelObserved: + case <-time.After(100 * time.Millisecond): + t.Fatal("expected runFunc's ctx to be cancelled when Stop fires without WithDrain") + } + assert.Equal(t, true, r.IsRunning()) }) } diff --git a/with_drain.go b/with_drain.go new file mode 100644 index 0000000..a409e13 --- /dev/null +++ b/with_drain.go @@ -0,0 +1,39 @@ +package runnable + +import ( + "context" + "time" +) + +type stoppingKey struct{} + +// Stopping returns a channel that closes when Stop has been called on +// the Runnable that owns ctx. runFunc implementations under WithDrain +// should select on it and return cleanly without cancelling in-flight +// work. Returns a nil channel when ctx is not associated with a +// drain-enabled Runnable — receiving from a nil channel blocks forever, +// which is the correct no-op for callers that opt into drain semantics +// only when configured. +func Stopping(ctx context.Context) <-chan struct{} { + ch, _ := ctx.Value(stoppingKey{}).(<-chan struct{}) + return ch +} + +type withDrain struct { + timeout time.Duration +} + +// WithDrain switches Stop's behavior from "cancel runFunc's ctx" to +// "close Stopping(ctx) and wait up to timeout for runFunc to return on +// its own." After the timeout elapses, Stop falls back to cancelling +// the ctx as before (preserving the existing escape hatch for stuck +// runFuncs). Use this when runFunc owns in-flight external calls that +// must drain rather than abort on shutdown. +func WithDrain(timeout time.Duration) Option { + return &withDrain{timeout: timeout} +} + +func (w *withDrain) apply(r *runnable) { + r.drainEnabled = true + r.drainTimeout = w.timeout +} diff --git a/with_drain_test.go b/with_drain_test.go new file mode 100644 index 0000000..8347bfb --- /dev/null +++ b/with_drain_test.go @@ -0,0 +1,154 @@ +package runnable + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestWithDrain(t *testing.T) { + t.Run("Stop waits for runFunc to return", func(t *testing.T) { + started := make(chan struct{}) + runFuncErr := make(chan error, 1) + + r := New(func(ctx context.Context) error { + close(started) + <-Stopping(ctx) + time.Sleep(200 * time.Millisecond) + // Return naturally without observing ctx cancellation. + if ctx.Err() != nil { + return ctx.Err() + } + return nil + }, WithDrain(1*time.Second)) + + go func() { + runFuncErr <- r.Run(context.Background()) + }() + + <-started + assert.True(t, r.IsRunning()) + + start := time.Now() + err := r.Stop(context.Background()) + elapsed := time.Since(start) + require.NoError(t, err) + assert.False(t, r.IsRunning()) + assert.GreaterOrEqual(t, elapsed, 200*time.Millisecond) + assert.Less(t, elapsed, 500*time.Millisecond) + + select { + case err := <-runFuncErr: + require.NoError(t, err, "runFunc should return naturally, not via ctx cancellation") + case <-time.After(time.Second): + t.Fatal("runFunc did not return") + } + }) + + t.Run("Stop falls back to cancel on drain timeout", func(t *testing.T) { + started := make(chan struct{}) + runFuncErr := make(chan error, 1) + + r := New(func(ctx context.Context) error { + close(started) + <-ctx.Done() + return ctx.Err() + }, WithDrain(100*time.Millisecond)) + + go func() { + runFuncErr <- r.Run(context.Background()) + }() + + <-started + + stopCtx, stopCancel := context.WithTimeout(context.Background(), 2*time.Second) + defer stopCancel() + err := r.Stop(stopCtx) + require.NoError(t, err) + assert.False(t, r.IsRunning()) + + select { + case err := <-runFuncErr: + require.ErrorIs(t, err, context.Canceled) + case <-time.After(time.Second): + t.Fatal("runFunc did not return") + } + }) + + t.Run("Stop returns DeadlineExceeded when runFunc stuck", func(t *testing.T) { + started := make(chan struct{}) + release := make(chan struct{}) + + r := New(func(ctx context.Context) error { + close(started) + <-release + return nil + }, WithDrain(50*time.Millisecond)) + + go func() { + _ = r.Run(context.Background()) + }() + + <-started + + stopCtx, stopCancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer stopCancel() + err := r.Stop(stopCtx) + require.ErrorIs(t, err, context.DeadlineExceeded) + + // Release the runFunc so the goroutine can exit cleanly. + close(release) + }) + + t.Run("outer ctx cancel still propagates", func(t *testing.T) { + started := make(chan struct{}) + runFuncErr := make(chan error, 1) + + r := New(func(ctx context.Context) error { + close(started) + <-ctx.Done() + return ctx.Err() + }, WithDrain(1*time.Second)) + + ctx, cancel := context.WithCancel(context.Background()) + + go func() { + runFuncErr <- r.Run(ctx) + }() + + <-started + cancel() + + select { + case err := <-runFuncErr: + require.True(t, errors.Is(err, context.Canceled)) + case <-time.After(time.Second): + t.Fatal("runFunc did not exit on outer ctx cancel") + } + assert.False(t, r.IsRunning()) + }) + + t.Run("Stopping returns nil when not configured", func(t *testing.T) { + var observed bool + + r := New(func(ctx context.Context) error { + ch := Stopping(ctx) + // Selecting on nil channel blocks forever; default branch runs. + select { + case <-ch: + observed = true + default: + observed = ch == nil + } + return nil + }) + + err := r.Run(context.Background()) + require.NoError(t, err) + assert.True(t, observed, "Stopping(ctx) should be nil without WithDrain") + }) +} From c586f84e468dda334d13e7dd9707e1ee742cf670 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Fri, 1 May 2026 23:27:56 +0200 Subject: [PATCH 02/36] feat: add NewTicker primitive - NewTicker(interval, tick, opts...) wraps the standard time.NewTicker + select-loop pattern that periodic workers reimplement. - Composes with WithDrain (in-flight tick finishes; loop exits without firing a new tick) and WithRecoverer (panics in tick are caught). - Skip queued ticks accumulated while a slow tick was running once Stop has been called, so shutdown isn't delayed by stale t.C buffer. - Add examples/ticker-with-drain showing the full SIGTERM-safe shape (ticker + drain + recoverer + signal.NotifyContext). --- README.md | 26 ++++++ examples/ticker-with-drain/main.go | 71 +++++++++++++++ ticker.go | 50 +++++++++++ ticker_test.go | 135 +++++++++++++++++++++++++++++ 4 files changed, 282 insertions(+) create mode 100644 examples/ticker-with-drain/main.go create mode 100644 ticker.go create mode 100644 ticker_test.go diff --git a/README.md b/README.md index 0df47c2..40e8b8b 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,32 @@ r := runnable.New(func(ctx context.Context) error { }, runnable.WithDrain(10*time.Second)) ``` +### Ticker + +`NewTicker` wraps the standard "select-loop on a `time.Ticker`" pattern. +It composes with `WithDrain` (let the current tick finish on Stop) and +`WithRecoverer` (catch panics in the tick body). + +```go +r := runnable.NewTicker( + 30*time.Second, + func(ctx context.Context) error { + return reconcile(ctx) + }, + runnable.WithDrain(10*time.Second), +) + +go r.Run(ctx) + +// On shutdown: +stopCtx, cancel := context.WithTimeout(context.Background(), 15*time.Second) +defer cancel() +r.Stop(stopCtx) // drains the in-flight tick before returning +``` + +A full SIGTERM-safe shape (ticker + drain + recoverer + signal.NotifyContext) +lives in [`examples/ticker-with-drain`](examples/ticker-with-drain/main.go). + ### Runnable Object ```go package main diff --git a/examples/ticker-with-drain/main.go b/examples/ticker-with-drain/main.go new file mode 100644 index 0000000..4ea1e05 --- /dev/null +++ b/examples/ticker-with-drain/main.go @@ -0,0 +1,71 @@ +// Example: a periodic reconciler that drains gracefully on SIGTERM. +// +// Shape: NewTicker + WithDrain + WithRecoverer + signal.NotifyContext. +// Copy-paste this into a service's cmd/.../main.go and replace the +// reconcile body with your work. +package main + +import ( + "context" + "errors" + "fmt" + "os" + "os/signal" + "syscall" + "time" + + "github.com/0xsequence/runnable" +) + +type stderrReporter struct{} + +func (stderrReporter) Report(ctx context.Context, rec interface{}) { + fmt.Fprintf(os.Stderr, "panic recovered: %v\n", rec) +} + +type stderrPrinter struct{} + +func (stderrPrinter) Print(ctx context.Context, callstack []byte) { + os.Stderr.Write(callstack) +} + +func reconcile(ctx context.Context) error { + // Pretend this is an HTTP call to an external system that must not + // be aborted mid-request when SIGTERM fires. Under WithDrain, Stop + // waits for this tick to finish before tearing down the Runnable. + fmt.Println("tick: reconciling...") + time.Sleep(500 * time.Millisecond) + fmt.Println("tick: done") + return nil +} + +func main() { + ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) + defer stop() + + rc := runnable.NewTicker( + 2*time.Second, + reconcile, + runnable.WithDrain(10*time.Second), + runnable.WithRecoverer(stderrReporter{}, stderrPrinter{}), + ) + + runErr := make(chan error, 1) + go func() { + runErr <- rc.Run(ctx) + }() + + <-ctx.Done() + fmt.Println("shutdown: draining in-flight tick...") + + stopCtx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + if err := rc.Stop(stopCtx); err != nil { + fmt.Fprintf(os.Stderr, "stop: %v\n", err) + } + + if err := <-runErr; err != nil && !errors.Is(err, context.Canceled) { + fmt.Fprintf(os.Stderr, "reconciler stopped: %v\n", err) + os.Exit(1) + } +} diff --git a/ticker.go b/ticker.go new file mode 100644 index 0000000..15589be --- /dev/null +++ b/ticker.go @@ -0,0 +1,50 @@ +package runnable + +import ( + "context" + "time" +) + +// NewTicker returns a Runnable that calls tick once per interval until +// ctx is cancelled, Stop is called, or tick returns a non-nil error. +// +// When the Runnable is configured WithDrain, an in-flight tick is +// allowed to finish before Run returns; the loop exits without firing +// a new tick. Without WithDrain, Stop cancels ctx and any in-flight +// tick observes the cancellation through ctx.Done(). +// +// tick should respect ctx.Done() for cancellation. To make in-flight +// external calls survive shutdown under WithDrain, tick should derive +// per-call timeouts via context.WithoutCancel(ctx) so its work is not +// affected by either Stop's drain signal or the Runnable's ctx cancel. +func NewTicker(interval time.Duration, tick func(ctx context.Context) error, opts ...Option) Runnable { + return New(func(ctx context.Context) error { + t := time.NewTicker(interval) + defer t.Stop() + + stopping := Stopping(ctx) // nil when WithDrain not used + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-stopping: + return nil + case <-t.C: + // Re-check shutdown signals before firing a new tick: + // when a tick takes longer than the interval, t.C may + // have ready ticks queued from before Stop was called. + select { + case <-ctx.Done(): + return ctx.Err() + case <-stopping: + return nil + default: + } + if err := tick(ctx); err != nil { + return err + } + } + } + }, opts...) +} diff --git a/ticker_test.go b/ticker_test.go new file mode 100644 index 0000000..d4ea362 --- /dev/null +++ b/ticker_test.go @@ -0,0 +1,135 @@ +package runnable + +import ( + "context" + "errors" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewTicker(t *testing.T) { + t.Run("fires on interval", func(t *testing.T) { + var count atomic.Int32 + + r := NewTicker(50*time.Millisecond, func(ctx context.Context) error { + count.Add(1) + return nil + }) + + go func() { + _ = r.Run(context.Background()) + }() + + time.Sleep(175 * time.Millisecond) + + err := r.Stop(context.Background()) + require.NoError(t, err) + + c := count.Load() + assert.GreaterOrEqual(t, c, int32(2)) + assert.LessOrEqual(t, c, int32(4)) + }) + + t.Run("Stop with drain allows current tick to finish", func(t *testing.T) { + tickStarted := make(chan struct{}, 1) + var completed atomic.Int32 + + r := NewTicker(20*time.Millisecond, func(ctx context.Context) error { + select { + case tickStarted <- struct{}{}: + default: + } + time.Sleep(200 * time.Millisecond) + completed.Add(1) + return nil + }, WithDrain(1*time.Second)) + + go func() { + _ = r.Run(context.Background()) + }() + + <-tickStarted + + start := time.Now() + err := r.Stop(context.Background()) + elapsed := time.Since(start) + require.NoError(t, err) + + assert.GreaterOrEqual(t, completed.Load(), int32(1), "in-flight tick should complete") + assert.Less(t, elapsed, 500*time.Millisecond) + }) + + t.Run("Stop without drain cancels in-flight tick", func(t *testing.T) { + tickStarted := make(chan struct{}, 1) + tickErr := make(chan error, 1) + + r := NewTicker(20*time.Millisecond, func(ctx context.Context) error { + select { + case tickStarted <- struct{}{}: + default: + } + <-ctx.Done() + tickErr <- ctx.Err() + return ctx.Err() + }) + + runDone := make(chan error, 1) + go func() { + runDone <- r.Run(context.Background()) + }() + + <-tickStarted + err := r.Stop(context.Background()) + require.NoError(t, err) + + select { + case e := <-tickErr: + require.ErrorIs(t, e, context.Canceled) + case <-time.After(time.Second): + t.Fatal("tick did not observe ctx cancellation") + } + + select { + case e := <-runDone: + require.ErrorIs(t, e, context.Canceled) + case <-time.After(time.Second): + t.Fatal("Run did not return") + } + }) + + t.Run("tick error aborts loop", func(t *testing.T) { + sentinel := errors.New("boom") + var count atomic.Int32 + + r := NewTicker(20*time.Millisecond, func(ctx context.Context) error { + if count.Add(1) == 2 { + return sentinel + } + return nil + }) + + err := r.Run(context.Background()) + require.ErrorIs(t, err, sentinel) + assert.Equal(t, int32(2), count.Load()) + }) + + t.Run("respects outer ctx cancel", func(t *testing.T) { + var count atomic.Int32 + + r := NewTicker(20*time.Millisecond, func(ctx context.Context) error { + count.Add(1) + return nil + }) + + ctx, cancel := context.WithTimeout(context.Background(), 75*time.Millisecond) + defer cancel() + + err := r.Run(ctx) + require.ErrorIs(t, err, context.DeadlineExceeded) + assert.False(t, r.IsRunning()) + }) +} From 03bfd97f99d4ff6a0c6a3c56d8a720ad4cc9e86a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Fri, 1 May 2026 23:29:10 +0200 Subject: [PATCH 03/36] chore: fix go vet and staticcheck warnings - examples/main.go: drop unreachable return after infinite loop; capture and defer the cancel from context.WithTimeout to fix the ctx leak. - runnable_test.go, runnable_group_test.go: replace single-case selects with plain channel receives (S1000). - with_recoverer_test.go: drop unreachable returns after panic. --- examples/main.go | 4 ++-- runnable_group_test.go | 4 +--- runnable_test.go | 7 ++----- with_recoverer_test.go | 3 --- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/examples/main.go b/examples/main.go index e1e710a..b799017 100644 --- a/examples/main.go +++ b/examples/main.go @@ -33,7 +33,6 @@ func (m *Monitor) run(ctx context.Context) error { time.Sleep(1 * time.Second) fmt.Println("Monitoring...") } - return nil } func main() { @@ -92,7 +91,8 @@ func main() { // simple function with timeout fmt.Println("Simple function with timeout...") - ctxWithTimeout, _ := context.WithTimeout(context.Background(), 5*time.Second) + ctxWithTimeout, cancelTimeout := context.WithTimeout(context.Background(), 5*time.Second) + defer cancelTimeout() err = runnable.New(func(ctx context.Context) error { fmt.Println("Starting...") defer fmt.Println("Stopping...") diff --git a/runnable_group_test.go b/runnable_group_test.go index 6efa922..fb70ed9 100644 --- a/runnable_group_test.go +++ b/runnable_group_test.go @@ -48,9 +48,7 @@ func TestNewGroup(t *testing.T) { // Create a new group group := NewGroup( New(func(ctx context.Context) error { - select { - case <-ctx.Done(): - } + <-ctx.Done() return nil }), New(func(ctx context.Context) error { diff --git a/runnable_test.go b/runnable_test.go index 1b7cf9d..ea1b2a8 100644 --- a/runnable_test.go +++ b/runnable_test.go @@ -43,11 +43,8 @@ func TestRunnable(t *testing.T) { r := New(func(ctx context.Context) error { started <- struct{}{} time.Sleep(2 * time.Second) - - select { - case <-ctx.Done(): - return ctx.Err() - } + <-ctx.Done() + return ctx.Err() }) ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) diff --git a/with_recoverer_test.go b/with_recoverer_test.go index e7cb5dd..1cf6b59 100644 --- a/with_recoverer_test.go +++ b/with_recoverer_test.go @@ -25,7 +25,6 @@ func TestWithRecoverer(t *testing.T) { fn := func(ctx context.Context) error { defer func() { counter++ }() panic("something went wrong") - return nil } r := New(fn, WithRecoverer(&reporter, nil)) @@ -42,7 +41,6 @@ func TestWithRecoverer(t *testing.T) { r := New(func(ctx context.Context) error { started <- struct{}{} panic("something went wrong") - return nil }, WithRecoverer(reporter, nil)) go func() { @@ -82,7 +80,6 @@ func TestWithRecoverer(t *testing.T) { r := New(func(ctx context.Context) error { started <- struct{}{} panic("something went wrong") - return nil }, WithRecoverer(reporter, nil), WithStatus("test", store)) go func() { From 6188660c578ac46ce5f55c47b95ada3db90a97dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Fri, 1 May 2026 23:30:34 +0200 Subject: [PATCH 04/36] chore: fix two more linter warnings - with_recoverer_test.go: drop forced string type assertion in Report; use %v formatting so non-string panic values don't crash. - examples/ticker-with-drain: silence errcheck on os.Stderr.Write. --- examples/ticker-with-drain/main.go | 2 +- with_recoverer_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/ticker-with-drain/main.go b/examples/ticker-with-drain/main.go index 4ea1e05..0c0a18d 100644 --- a/examples/ticker-with-drain/main.go +++ b/examples/ticker-with-drain/main.go @@ -26,7 +26,7 @@ func (stderrReporter) Report(ctx context.Context, rec interface{}) { type stderrPrinter struct{} func (stderrPrinter) Print(ctx context.Context, callstack []byte) { - os.Stderr.Write(callstack) + _, _ = os.Stderr.Write(callstack) } func reconcile(ctx context.Context) error { diff --git a/with_recoverer_test.go b/with_recoverer_test.go index 1cf6b59..8f656b2 100644 --- a/with_recoverer_test.go +++ b/with_recoverer_test.go @@ -14,7 +14,7 @@ type InMemoryReporter struct { } func (i *InMemoryReporter) Report(ctx context.Context, rec interface{}) { - i.logs = append(i.logs, fmt.Sprintf("%s", rec.(string))) + i.logs = append(i.logs, fmt.Sprintf("%v", rec)) } func TestWithRecoverer(t *testing.T) { From 63caae5dcb275bb271fdd978d8af30fb7b2a78cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Fri, 1 May 2026 23:38:26 +0200 Subject: [PATCH 05/36] fix: make Stop concurrency-safe under WithDrain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Concurrent Stop callers raced to close the same stoppingChan, causing "close of closed channel" panic on the second close. Pre-PR Stop was safe because context.CancelFunc tolerates repeat calls; WithDrain silently regressed that contract — and K8s does occasionally fire SIGTERM twice on shutdown. Claim ownership under the mutex: the first Stop copies and nils stoppingChan, drives the drain. Subsequent Stops see nil, skip the close, and fall through to the existing runCancel + <-runStop wait (which is already idempotent). All callers observe the same outcome: runFunc returns, then Stop returns. Adds TestWithDrain/Stop_is_concurrency_safe — 10 concurrent Stops, no panic, every caller returns nil or ErrNotRunning. --- runnable.go | 3 ++- with_drain_test.go | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/runnable.go b/runnable.go index 192f8e7..50bb28b 100644 --- a/runnable.go +++ b/runnable.go @@ -146,9 +146,10 @@ func (r *runnable) Stop(ctx context.Context) error { drainEnabled := r.drainEnabled drainTimeout := r.drainTimeout stoppingChan := r.stoppingChan + r.stoppingChan = nil // first-caller wins; subsequent concurrent Stops see nil r.mu.Unlock() - if drainEnabled { + if drainEnabled && stoppingChan != nil { close(stoppingChan) drainCtx, drainCancel := context.WithTimeout(ctx, drainTimeout) select { diff --git a/with_drain_test.go b/with_drain_test.go index 8347bfb..f722368 100644 --- a/with_drain_test.go +++ b/with_drain_test.go @@ -3,6 +3,7 @@ package runnable import ( "context" "errors" + "sync" "testing" "time" @@ -132,6 +133,46 @@ func TestWithDrain(t *testing.T) { assert.False(t, r.IsRunning()) }) + t.Run("Stop is concurrency-safe", func(t *testing.T) { + started := make(chan struct{}) + + r := New(func(ctx context.Context) error { + close(started) + <-Stopping(ctx) + return nil + }, WithDrain(1*time.Second)) + + go func() { + _ = r.Run(context.Background()) + }() + + <-started + + const callers = 10 + var wg sync.WaitGroup + errs := make([]error, callers) + for i := 0; i < callers; i++ { + i := i + wg.Add(1) + go func() { + defer wg.Done() + errs[i] = r.Stop(context.Background()) + }() + } + wg.Wait() + + // No double-close panic is the load-bearing assertion. Each + // Stop must return either nil (drove or waited on the drain) + // or ErrNotRunning (Run already exited before this caller + // grabbed the lock). + for _, err := range errs { + if err != nil { + require.ErrorIs(t, err, ErrNotRunning) + } + } + assert.False(t, r.IsRunning()) + }) + t.Run("Stopping returns nil when not configured", func(t *testing.T) { var observed bool From 2dc534c9d93f9eadc515e53c33cb1041c47ef759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Fri, 1 May 2026 23:40:46 +0200 Subject: [PATCH 06/36] feat: distinguish drain timeout from clean drain via ErrDrainTimedOut MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop's drain fall-through path was indistinguishable from a clean drain — both returned nil. Callers had no way to detect runFuncs that ignored Stopping(ctx) and only exited via ctx.Done(). Track the fall-through and return ErrDrainTimedOut once runFunc finally exits. ctx-cancel paths still return ctx.Err() unchanged. The runnable is fully stopped either way; the sentinel is observability. --- runnable.go | 7 ++++++- with_drain_test.go | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/runnable.go b/runnable.go index 50bb28b..b42f7f3 100644 --- a/runnable.go +++ b/runnable.go @@ -10,6 +10,7 @@ import ( var ( ErrAlreadyRunning = fmt.Errorf("already running") ErrNotRunning = fmt.Errorf("not running") + ErrDrainTimedOut = fmt.Errorf("drain timed out") ) type Option interface { @@ -149,6 +150,7 @@ func (r *runnable) Stop(ctx context.Context) error { r.stoppingChan = nil // first-caller wins; subsequent concurrent Stops see nil r.mu.Unlock() + var drainTimedOut bool if drainEnabled && stoppingChan != nil { close(stoppingChan) drainCtx, drainCancel := context.WithTimeout(ctx, drainTimeout) @@ -160,7 +162,7 @@ func (r *runnable) Stop(ctx context.Context) error { drainCancel() return ctx.Err() case <-drainCtx.Done(): - // Drain timed out; fall through to forced cancel. + drainTimedOut = true } drainCancel() } @@ -171,6 +173,9 @@ func (r *runnable) Stop(ctx context.Context) error { case <-ctx.Done(): return ctx.Err() case <-runStop: + if drainTimedOut { + return ErrDrainTimedOut + } return nil } } diff --git a/with_drain_test.go b/with_drain_test.go index f722368..486c396 100644 --- a/with_drain_test.go +++ b/with_drain_test.go @@ -50,7 +50,7 @@ func TestWithDrain(t *testing.T) { } }) - t.Run("Stop falls back to cancel on drain timeout", func(t *testing.T) { + t.Run("Stop returns ErrDrainTimedOut on fall-through", func(t *testing.T) { started := make(chan struct{}) runFuncErr := make(chan error, 1) @@ -69,7 +69,7 @@ func TestWithDrain(t *testing.T) { stopCtx, stopCancel := context.WithTimeout(context.Background(), 2*time.Second) defer stopCancel() err := r.Stop(stopCtx) - require.NoError(t, err) + require.ErrorIs(t, err, ErrDrainTimedOut) assert.False(t, r.IsRunning()) select { From 2707d2a374a08825d7f75b1fdb9c80c2a1842505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Fri, 1 May 2026 23:41:31 +0200 Subject: [PATCH 07/36] docs: warn about Stopping foot-gun and ticker+retry cadence - Stopping(ctx) godoc and README example: callers must select on both ctx.Done() and Stopping(ctx). A loop that observes only Stopping hangs on outer-ctx cancellation. README example now shows the full three-case select. - README also reflects ErrDrainTimedOut on the fall-through path. - NewTicker godoc: composing with WithRetry resets the ticker cadence on every retry (tick error bails the loop, WithRetry re-enters runFunc, fresh ticker). Document the behavior; users who need stable cadence should retry inside the tick handler. --- README.md | 8 +++++++- ticker.go | 6 ++++++ with_drain.go | 15 +++++++++++---- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 40e8b8b..444a5bf 100644 --- a/README.md +++ b/README.md @@ -129,12 +129,18 @@ HTTP request that creates remote state), use `WithDrain` to switch to "signal-and-wait" semantics: `Stop` closes the channel returned by `Stopping(ctx)` and waits up to the drain timeout for runFunc to return on its own. If the timeout elapses, `Stop` falls back to cancelling the -context. +context and returns `ErrDrainTimedOut` once runFunc exits. + +Always select on both `<-ctx.Done()` and `<-runnable.Stopping(ctx)` — +`Stopping` signals only `Stop`; outer-context cancellation still arrives +via `ctx.Done()` and a loop that ignores it will hang. ```go r := runnable.New(func(ctx context.Context) error { for { select { + case <-ctx.Done(): + return ctx.Err() case <-runnable.Stopping(ctx): return nil // finish in-flight work, then return case <-time.After(time.Second): diff --git a/ticker.go b/ticker.go index 15589be..9a268d0 100644 --- a/ticker.go +++ b/ticker.go @@ -17,6 +17,12 @@ import ( // external calls survive shutdown under WithDrain, tick should derive // per-call timeouts via context.WithoutCancel(ctx) so its work is not // affected by either Stop's drain signal or the Runnable's ctx cancel. +// +// Composing with WithRetry resets the ticker cadence on every retry: +// a tick error bails the loop, WithRetry re-enters runFunc, and the +// next tick fires `interval` after the retry — not at the original +// cadence. If you need stable cadence with transient-error tolerance, +// handle retries inside `tick` instead. func NewTicker(interval time.Duration, tick func(ctx context.Context) error, opts ...Option) Runnable { return New(func(ctx context.Context) error { t := time.NewTicker(interval) diff --git a/with_drain.go b/with_drain.go index a409e13..306be8a 100644 --- a/with_drain.go +++ b/with_drain.go @@ -10,10 +10,17 @@ type stoppingKey struct{} // Stopping returns a channel that closes when Stop has been called on // the Runnable that owns ctx. runFunc implementations under WithDrain // should select on it and return cleanly without cancelling in-flight -// work. Returns a nil channel when ctx is not associated with a -// drain-enabled Runnable — receiving from a nil channel blocks forever, -// which is the correct no-op for callers that opt into drain semantics -// only when configured. +// work. +// +// Always also select on ctx.Done() — Stopping signals only Stop; +// outer-context cancellation (e.g. the ctx passed to Run was cancelled +// directly) still arrives via ctx.Done(). A loop that selects only on +// Stopping(ctx) will hang on outer-ctx cancel. +// +// Returns a nil channel when ctx is not associated with a drain-enabled +// Runnable — receiving from a nil channel blocks forever, which is the +// correct no-op for callers that opt into drain semantics only when +// configured. func Stopping(ctx context.Context) <-chan struct{} { ch, _ := ctx.Value(stoppingKey{}).(<-chan struct{}) return ch From 101ee9fa23a33b7404b437b63850402fb928d793 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Sat, 2 May 2026 00:06:56 +0200 Subject: [PATCH 08/36] fix: use independent timer for WithDrain timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit context.WithTimeout(ctx, drainTimeout) derived the drain ctx from the caller's ctx, so a Stop deadline shorter than drainTimeout made <-ctx.Done() and the drain expiry race in the same select. The <-ctx.Done() branch returned ctx.Err() *without calling r.runCancel()* — leaving the runnable alive after Stop returned. The other branch could misreport ErrDrainTimedOut when the caller's deadline was the real cause. Switch to time.NewTimer(drainTimeout) and let <-ctx.Done() in the drain select fall through to r.runCancel() so the runnable always gets force-cancelled before Stop returns. drainTimedOut is only set when the standalone timer fires, so the sentinel is no longer spuriously returned on caller-ctx cancellation. Adds TestWithDrain/Stop_forces_cancel_when_caller_ctx_expires_during_drain which exercises a 100ms caller ctx against a 10s drain. --- runnable.go | 18 +++++++++++------- with_drain_test.go | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/runnable.go b/runnable.go index b42f7f3..00a0151 100644 --- a/runnable.go +++ b/runnable.go @@ -153,18 +153,22 @@ func (r *runnable) Stop(ctx context.Context) error { var drainTimedOut bool if drainEnabled && stoppingChan != nil { close(stoppingChan) - drainCtx, drainCancel := context.WithTimeout(ctx, drainTimeout) + // Use a standalone timer so the drain budget is independent of + // the caller's ctx — otherwise a caller ctx shorter than + // drainTimeout makes <-ctx.Done() and the drain expiry race in + // the same select. + drainTimer := time.NewTimer(drainTimeout) select { case <-runStop: - drainCancel() + drainTimer.Stop() return nil - case <-ctx.Done(): - drainCancel() - return ctx.Err() - case <-drainCtx.Done(): + case <-drainTimer.C: drainTimedOut = true + case <-ctx.Done(): + drainTimer.Stop() + // Caller deadline elapsed during drain; fall through so + // r.runCancel() still fires before we return ctx.Err(). } - drainCancel() } r.runCancel() diff --git a/with_drain_test.go b/with_drain_test.go index 486c396..7dbd9c9 100644 --- a/with_drain_test.go +++ b/with_drain_test.go @@ -133,6 +133,39 @@ func TestWithDrain(t *testing.T) { assert.False(t, r.IsRunning()) }) + t.Run("Stop forces cancel when caller ctx expires during drain", func(t *testing.T) { + started := make(chan struct{}) + runFuncDone := make(chan struct{}) + + // runFunc respects its own ctx but not Stopping(ctx). Without + // the independent drain timer, Stop with a caller ctx shorter + // than drainTimeout could return ctx.Err() before r.runCancel() + // fired, leaving the runnable alive. + r := New(func(ctx context.Context) error { + close(started) + <-ctx.Done() + close(runFuncDone) + return ctx.Err() + }, WithDrain(10*time.Second)) + + go func() { + _ = r.Run(context.Background()) + }() + + <-started + + stopCtx, stopCancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer stopCancel() + err := r.Stop(stopCtx) + require.ErrorIs(t, err, context.DeadlineExceeded) + + select { + case <-runFuncDone: + case <-time.After(2 * time.Second): + t.Fatal("runnable was not force-cancelled when caller ctx expired during drain") + } + }) + t.Run("Stop is concurrency-safe", func(t *testing.T) { started := make(chan struct{}) From 6db724232a229316d1f362e6de70f0fa1d830c4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Sat, 2 May 2026 00:07:00 +0200 Subject: [PATCH 09/36] docs(examples): use pristine ctx for Run so SIGTERM doesn't bypass drain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The example passed signal.NotifyContext's ctx to rc.Run. On SIGTERM that ctx cancels immediately — the ticker's runFunc observes <-ctx.Done() and exits before Stop ever closes Stopping(ctx), completely defeating WithDrain. Pass context.Background() to Run; use sigCtx only to detect when to call Stop. Stop's drain budget is now the sole driver of shutdown for the drain-enabled worker. --- examples/ticker-with-drain/main.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/examples/ticker-with-drain/main.go b/examples/ticker-with-drain/main.go index 0c0a18d..474c89c 100644 --- a/examples/ticker-with-drain/main.go +++ b/examples/ticker-with-drain/main.go @@ -40,8 +40,8 @@ func reconcile(ctx context.Context) error { } func main() { - ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) - defer stop() + sigCtx, stopSig := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) + defer stopSig() rc := runnable.NewTicker( 2*time.Second, @@ -50,12 +50,16 @@ func main() { runnable.WithRecoverer(stderrReporter{}, stderrPrinter{}), ) + // Run with a pristine ctx — if Run received sigCtx, SIGTERM would + // cancel runFunc's ctx directly and the ticker would exit before + // Stop ever closed Stopping(ctx), defeating WithDrain. Stop is the + // only thing that should drive shutdown of a drain-enabled worker. runErr := make(chan error, 1) go func() { - runErr <- rc.Run(ctx) + runErr <- rc.Run(context.Background()) }() - <-ctx.Done() + <-sigCtx.Done() fmt.Println("shutdown: draining in-flight tick...") stopCtx, cancel := context.WithTimeout(context.Background(), 15*time.Second) From 4dbbc854fe0dc7fa2ef8ef728330b6548e417cde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Sat, 2 May 2026 08:47:02 +0200 Subject: [PATCH 10/36] fix: preserve drain semantics under concurrent Stop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous concurrency-safe fix avoided the double-close panic but introduced two regressions Codex caught on re-review: 1. Secondary Stop callers fell through to r.runCancel() unconditionally, hard-cancelling the runCtx mid-drain. The drain that the primary caller was honoring got bypassed — exactly the failure mode WithDrain exists to prevent. 2. After fixing (1) by making secondary callers wait on runStop, a later Stop with a shorter deadline could no longer enforce it: it would return DeadlineExceeded but never escalate, so the runnable kept draining until the primary caller's drainTimeout expired. Restructure the secondary path: wait on runStop, but escalate to r.runCancel() if the caller's ctx expires first. The shortest deadline among concurrent callers wins, and the drain is only bypassed when some caller actively asks for it via deadline expiry. Adds two tests: - "concurrent Stop preserves drain semantics" — strengthened to assert runFunc observes Stopping(ctx), not ctx.Done() - "secondary Stop with shorter deadline escalates runCancel" — new regression for Codex's escalation finding --- runnable.go | 20 +++++++++-- with_drain_test.go | 86 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 96 insertions(+), 10 deletions(-) diff --git a/runnable.go b/runnable.go index 00a0151..330acfb 100644 --- a/runnable.go +++ b/runnable.go @@ -147,11 +147,27 @@ func (r *runnable) Stop(ctx context.Context) error { drainEnabled := r.drainEnabled drainTimeout := r.drainTimeout stoppingChan := r.stoppingChan - r.stoppingChan = nil // first-caller wins; subsequent concurrent Stops see nil + r.stoppingChan = nil // first-caller wins; subsequent Stops see nil r.mu.Unlock() + // Concurrent Stop with drain enabled: another caller is already + // driving the drain. Wait for its outcome rather than calling + // runCancel ourselves — that would hard-cancel the runCtx and + // defeat the drain the primary caller is honoring. If our ctx + // expires first, escalate to runCancel so the shortest deadline + // among concurrent callers wins. + if drainEnabled && stoppingChan == nil { + select { + case <-runStop: + return nil + case <-ctx.Done(): + r.runCancel() + return ctx.Err() + } + } + var drainTimedOut bool - if drainEnabled && stoppingChan != nil { + if drainEnabled { close(stoppingChan) // Use a standalone timer so the drain budget is independent of // the caller's ctx — otherwise a caller ctx shorter than diff --git a/with_drain_test.go b/with_drain_test.go index 7dbd9c9..5be93e1 100644 --- a/with_drain_test.go +++ b/with_drain_test.go @@ -4,6 +4,7 @@ import ( "context" "errors" "sync" + "sync/atomic" "testing" "time" @@ -166,14 +167,25 @@ func TestWithDrain(t *testing.T) { } }) - t.Run("Stop is concurrency-safe", func(t *testing.T) { + t.Run("concurrent Stop preserves drain semantics", func(t *testing.T) { started := make(chan struct{}) + drainObserved := make(chan struct{}) + var ctxCancelObserved atomic.Bool + // runFunc must exit via Stopping(ctx). If a concurrent Stop + // falls through to r.runCancel(), ctx.Done() fires and the + // drain semantics are violated. r := New(func(ctx context.Context) error { close(started) - <-Stopping(ctx) - return nil - }, WithDrain(1*time.Second)) + select { + case <-Stopping(ctx): + close(drainObserved) + return nil + case <-ctx.Done(): + ctxCancelObserved.Store(true) + return ctx.Err() + } + }, WithDrain(2*time.Second)) go func() { _ = r.Run(context.Background()) @@ -194,18 +206,76 @@ func TestWithDrain(t *testing.T) { } wg.Wait() - // No double-close panic is the load-bearing assertion. Each - // Stop must return either nil (drove or waited on the drain) - // or ErrNotRunning (Run already exited before this caller - // grabbed the lock). + // Each Stop must return either nil (drove or waited on the + // drain) or ErrNotRunning (Run already exited before this + // caller grabbed the lock). No double-close panic. for _, err := range errs { if err != nil { require.ErrorIs(t, err, ErrNotRunning) } } + + select { + case <-drainObserved: + default: + t.Fatal("runFunc never observed Stopping(ctx); drain was bypassed by concurrent Stop") + } + assert.False(t, ctxCancelObserved.Load(), "drain semantics violated: runCtx was hard-cancelled by a concurrent Stop") assert.False(t, r.IsRunning()) }) + t.Run("secondary Stop with shorter deadline escalates runCancel", func(t *testing.T) { + started := make(chan struct{}) + runFuncDone := make(chan struct{}) + + // runFunc waits only on ctx.Done() (ignores Stopping). Without + // escalation, Stop B's deadline expires but the runnable keeps + // draining for the full drainTimeout (5s). + r := New(func(ctx context.Context) error { + close(started) + <-ctx.Done() + close(runFuncDone) + return ctx.Err() + }, WithDrain(5*time.Second)) + + go func() { + _ = r.Run(context.Background()) + }() + + <-started + + // Stop A: no deadline; primary, drives drain. + aDone := make(chan error, 1) + go func() { + aDone <- r.Stop(context.Background()) + }() + + time.Sleep(20 * time.Millisecond) + + // Stop B: 100ms deadline; secondary. Must escalate so runFunc + // exits within the caller's budget. + bCtx, bCancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer bCancel() + start := time.Now() + bErr := r.Stop(bCtx) + bElapsed := time.Since(start) + require.ErrorIs(t, bErr, context.DeadlineExceeded) + assert.Less(t, bElapsed, 500*time.Millisecond, "Stop B should not wait beyond its own deadline") + + select { + case <-runFuncDone: + case <-time.After(time.Second): + t.Fatal("runnable was not force-cancelled when secondary Stop's ctx expired") + } + + select { + case err := <-aDone: + require.NoError(t, err) + case <-time.After(time.Second): + t.Fatal("Stop A did not return after runFunc exited") + } + }) + t.Run("Stopping returns nil when not configured", func(t *testing.T) { var observed bool From 1cbd251c41a81d13af35312ef90056ee672626e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Sat, 2 May 2026 08:47:07 +0200 Subject: [PATCH 11/36] docs(examples): exit promptly when worker dies before signal main was reading runErr only inside the SIGTERM branch, so if the worker exited early (tick error, recovered panic) main would block on sigCtx forever. Select on either sigCtx or runErr; an early worker exit now propagates the failure without waiting for a signal. --- examples/ticker-with-drain/main.go | 32 +++++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/examples/ticker-with-drain/main.go b/examples/ticker-with-drain/main.go index 474c89c..5eb85a2 100644 --- a/examples/ticker-with-drain/main.go +++ b/examples/ticker-with-drain/main.go @@ -59,17 +59,25 @@ func main() { runErr <- rc.Run(context.Background()) }() - <-sigCtx.Done() - fmt.Println("shutdown: draining in-flight tick...") - - stopCtx, cancel := context.WithTimeout(context.Background(), 15*time.Second) - defer cancel() - if err := rc.Stop(stopCtx); err != nil { - fmt.Fprintf(os.Stderr, "stop: %v\n", err) - } - - if err := <-runErr; err != nil && !errors.Is(err, context.Canceled) { - fmt.Fprintf(os.Stderr, "reconciler stopped: %v\n", err) - os.Exit(1) + // Wait for either a shutdown signal or an early worker exit + // (tick error, recovered panic). Without the runErr branch, main + // would block on sigCtx forever after the worker died. + select { + case <-sigCtx.Done(): + fmt.Println("shutdown: draining in-flight tick...") + stopCtx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + if err := rc.Stop(stopCtx); err != nil { + fmt.Fprintf(os.Stderr, "stop: %v\n", err) + } + if err := <-runErr; err != nil && !errors.Is(err, context.Canceled) { + fmt.Fprintf(os.Stderr, "reconciler stopped: %v\n", err) + os.Exit(1) + } + case err := <-runErr: + if err != nil && !errors.Is(err, context.Canceled) { + fmt.Fprintf(os.Stderr, "reconciler stopped: %v\n", err) + os.Exit(1) + } } } From fe7b1fddc61a78c512860e7225b03b680b1f4fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Sat, 2 May 2026 09:14:44 +0200 Subject: [PATCH 12/36] fix: snapshot runCancel under mutex to avoid cancelling a future Run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop captured runStop and stoppingChan under the mutex but read r.runCancel via the field after waiting in a select. With drain enabled, Stop's drainTimer / ctx.Done() branches fall through to runCancel after a wait. If the runnable exited and Run was called again before that wait returned, r.runCancel pointed at the *new* run's cancel — and Stop tore down the freshly started worker. Snapshot runCancel alongside the other fields so the Stop call operates on the runnable it observed under the lock, regardless of what the next Run does to the field. Adds "late runCancel does not tear down a subsequent Run" — runs Stop followed by an immediate fresh Run and asserts the second run isn't prematurely cancelled. Also adds "WithRetry stops retrying after Stopping fires" as part of the same drain-correctness sweep — see follow-up commit. --- runnable.go | 8 +++- with_drain_test.go | 91 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 2 deletions(-) diff --git a/runnable.go b/runnable.go index 330acfb..eb57ee7 100644 --- a/runnable.go +++ b/runnable.go @@ -144,6 +144,10 @@ func (r *runnable) Stop(ctx context.Context) error { } runStop := r.runStop + // Snapshot runCancel under the lock — the field is overwritten by + // the next Run, so reading r.runCancel() after waiting can cancel + // a *future* runnable that started after this Stop began draining. + runCancel := r.runCancel drainEnabled := r.drainEnabled drainTimeout := r.drainTimeout stoppingChan := r.stoppingChan @@ -161,7 +165,7 @@ func (r *runnable) Stop(ctx context.Context) error { case <-runStop: return nil case <-ctx.Done(): - r.runCancel() + runCancel() return ctx.Err() } } @@ -187,7 +191,7 @@ func (r *runnable) Stop(ctx context.Context) error { } } - r.runCancel() + runCancel() select { case <-ctx.Done(): diff --git a/with_drain_test.go b/with_drain_test.go index 5be93e1..26b7480 100644 --- a/with_drain_test.go +++ b/with_drain_test.go @@ -276,6 +276,97 @@ func TestWithDrain(t *testing.T) { } }) + t.Run("late runCancel does not tear down a subsequent Run", func(t *testing.T) { + // Force the secondary-path race window: Stop A and B fire + // concurrently with B's ctx already cancelled. B falls into + // ctx.Done() and is about to call runCancel. Meanwhile, A + // drives drain to completion, runFunc exits, and main restarts + // the runnable. Without snapshotting runCancel under the lock, + // B's runCancel call cancels the *new* run. + + // Round 1: drain-enabled runnable that exits as soon as + // Stopping fires. + r := New(func(ctx context.Context) error { + <-Stopping(ctx) + return nil + }, WithDrain(1*time.Second)) + + go func() { + _ = r.Run(context.Background()) + }() + + // Wait until the runnable is actually running. + for !r.IsRunning() { + time.Sleep(time.Millisecond) + } + + // Stop A drives the drain. + require.NoError(t, r.Stop(context.Background())) + assert.False(t, r.IsRunning()) + + // Round 2: re-run with a runFunc that should run to completion. + // If Round 1's snapshot regression is present, a stale call to + // the new runCancel would cancel this run before it finishes. + round2Done := make(chan error, 1) + r2 := New(func(ctx context.Context) error { + select { + case <-time.After(150 * time.Millisecond): + return nil + case <-ctx.Done(): + return ctx.Err() + } + }, WithDrain(1*time.Second)) + + go func() { + round2Done <- r2.Run(context.Background()) + }() + + select { + case err := <-round2Done: + require.NoError(t, err, "round-2 runnable was cancelled prematurely") + case <-time.After(500 * time.Millisecond): + _ = r2.Stop(context.Background()) + t.Fatal("round-2 runnable did not complete") + } + }) + + t.Run("WithRetry stops retrying after Stopping fires", func(t *testing.T) { + started := make(chan struct{}, 1) + var attempts atomic.Int32 + + // runFunc errors transiently every time. Without the + // Stopping-aware retry guard, WithRetry keeps re-entering + // runFunc after Stop is called. + r := New(func(ctx context.Context) error { + select { + case started <- struct{}{}: + default: + } + attempts.Add(1) + <-Stopping(ctx) + return errors.New("transient") + }, WithDrain(2*time.Second), WithRetry(100, ResetNever)) + + runDone := make(chan error, 1) + go func() { + runDone <- r.Run(context.Background()) + }() + + <-started + + require.NoError(t, r.Stop(context.Background())) + + select { + case <-runDone: + case <-time.After(time.Second): + t.Fatal("Run did not return after Stop") + } + + // Exactly one attempt should have run — the retry wrapper + // must observe Stopping and abandon further attempts. + assert.Equal(t, int32(1), attempts.Load(), "retry continued after Stop drained") + }) + t.Run("Stopping returns nil when not configured", func(t *testing.T) { var observed bool From 00b32bca69c024d4fd6b1b76e45dd5a11296e94c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Sat, 2 May 2026 09:14:49 +0200 Subject: [PATCH 13/36] fix: WithRetry stops retrying after WithDrain's Stopping fires MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WithRetry had no awareness of WithDrain. After Stop was called and Stopping(ctx) closed, a transient error from the in-flight attempt would still trigger a retry — re-entering runFunc and starting fresh work mid-shutdown, defeating drain semantics. Add a non-blocking Stopping(ctx) check between attempts. When drain is not configured, Stopping returns nil and the default branch runs unchanged. --- with_retry.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/with_retry.go b/with_retry.go index 97e0bbd..f409b9c 100644 --- a/with_retry.go +++ b/with_retry.go @@ -46,6 +46,17 @@ func (w *withRetry) apply(r *runnable) { return err } + // Don't retry once Stop has been called via WithDrain — + // the retry wrapper would otherwise re-enter runFunc and + // start fresh work mid-shutdown, defeating drain semantics. + // When WithDrain is not used, Stopping(ctx) is nil and the + // default branch runs (no behavior change). + select { + case <-Stopping(ctx): + return err + default: + } + if i > 0 { if r.onStop != nil { r.onStop() From b47ae7ebf51f1ea996b8c939631f1b64baffefcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Sat, 2 May 2026 09:23:42 +0200 Subject: [PATCH 14/36] test: replace vacuous round-2 test with same-runnable Run/Stop/Run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous "late runCancel" test created two separate *runnable instances for round 1 and round 2. They share no state, so a stale runCancel from round 1's Stop could not affect round 2 — the test passed even with the snapshot fix reverted. Replace with a single-runnable test: Run, then a primary Stop + secondary Stop with already-cancelled ctx (exercising the runCancel escalation path), then re-Run on the *same* runnable. Round 2 must run undisturbed until explicitly Stopped. The snapshot fix prevents round 1's secondary runCancel from cancelling round 2's runCtx via the field overwrite, so this is the closest deterministic regression shape available without runtime hooks. --- with_drain_test.go | 79 +++++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 33 deletions(-) diff --git a/with_drain_test.go b/with_drain_test.go index 26b7480..0e6d04c 100644 --- a/with_drain_test.go +++ b/with_drain_test.go @@ -276,57 +276,70 @@ func TestWithDrain(t *testing.T) { } }) - t.Run("late runCancel does not tear down a subsequent Run", func(t *testing.T) { - // Force the secondary-path race window: Stop A and B fire - // concurrently with B's ctx already cancelled. B falls into - // ctx.Done() and is about to call runCancel. Meanwhile, A - // drives drain to completion, runFunc exits, and main restarts - // the runnable. Without snapshotting runCancel under the lock, - // B's runCancel call cancels the *new* run. - - // Round 1: drain-enabled runnable that exits as soon as - // Stopping fires. + t.Run("same runnable survives Run-Stop-Run after secondary ctx.Done escalation", func(t *testing.T) { + // Round 1: a primary Stop drives drain to completion while a + // secondary Stop with an already-cancelled ctx hits the + // ctx.Done() branch and calls runCancel (which is snapshotted + // under the lock — see runnable.go). + // + // Round 2: same runnable, fresh Run with no Stop. If the + // snapshot fix were missing, round 1's secondary runCancel + // could in principle cancel round 2's runCtx (the field gets + // overwritten by Run). The snapshot makes that mechanically + // impossible, so round 2 must run undisturbed until we Stop it. r := New(func(ctx context.Context) error { - <-Stopping(ctx) - return nil + select { + case <-Stopping(ctx): + return nil + case <-ctx.Done(): + return ctx.Err() + } }, WithDrain(1*time.Second)) go func() { _ = r.Run(context.Background()) }() - // Wait until the runnable is actually running. for !r.IsRunning() { time.Sleep(time.Millisecond) } - // Stop A drives the drain. - require.NoError(t, r.Stop(context.Background())) - assert.False(t, r.IsRunning()) + // Primary Stop, no deadline — drives drain. + primaryDone := make(chan error, 1) + go func() { + primaryDone <- r.Stop(context.Background()) + }() - // Round 2: re-run with a runFunc that should run to completion. - // If Round 1's snapshot regression is present, a stale call to - // the new runCancel would cancel this run before it finishes. - round2Done := make(chan error, 1) - r2 := New(func(ctx context.Context) error { - select { - case <-time.After(150 * time.Millisecond): - return nil - case <-ctx.Done(): - return ctx.Err() - } - }, WithDrain(1*time.Second)) + // Secondary Stop with an already-cancelled ctx — exercises + // the ctx.Done() escalation path that calls runCancel. + cancelledCtx, cancel := context.WithCancel(context.Background()) + cancel() + _ = r.Stop(cancelledCtx) + <-primaryDone + for r.IsRunning() { + time.Sleep(time.Millisecond) + } + + // Round 2 — same runnable, fresh Run. Should run undisturbed + // until we Stop it. + round2Done := make(chan error, 1) go func() { - round2Done <- r2.Run(context.Background()) + round2Done <- r.Run(context.Background()) }() select { case err := <-round2Done: - require.NoError(t, err, "round-2 runnable was cancelled prematurely") - case <-time.After(500 * time.Millisecond): - _ = r2.Stop(context.Background()) - t.Fatal("round-2 runnable did not complete") + t.Fatalf("round-2 runnable exited prematurely: %v", err) + case <-time.After(150 * time.Millisecond): + } + + require.NoError(t, r.Stop(context.Background())) + select { + case err := <-round2Done: + require.NoError(t, err) + case <-time.After(time.Second): + t.Fatal("round-2 runnable did not exit after Stop") } }) From a33e08c09b4adc60cfef12c9104c27a411432bd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Sat, 2 May 2026 09:28:12 +0200 Subject: [PATCH 15/36] test: rename round-2 test, drop unfounded snapshot-coverage claim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A reviewer empirically reverted the runCancel-snapshot fix and ran the test 20×; all 20 passed. The test gates round 2 strictly after both round-1 Stops have returned, so by the time round 2 starts the field-overwrite vs. snapshot distinction is no longer observable. Rename to "runnable can be re-Run after a concurrent-Stop lifecycle" and document that it's a lifecycle smoke test, not a snapshot-fix regression. Deterministic coverage of that race needs testing/synctest (Go 1.25+) or a runtime hook — both out of scope. The snapshot fix is verified by inspection. --- with_drain_test.go | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/with_drain_test.go b/with_drain_test.go index 0e6d04c..55ec879 100644 --- a/with_drain_test.go +++ b/with_drain_test.go @@ -276,17 +276,20 @@ func TestWithDrain(t *testing.T) { } }) - t.Run("same runnable survives Run-Stop-Run after secondary ctx.Done escalation", func(t *testing.T) { - // Round 1: a primary Stop drives drain to completion while a - // secondary Stop with an already-cancelled ctx hits the - // ctx.Done() branch and calls runCancel (which is snapshotted - // under the lock — see runnable.go). + t.Run("runnable can be re-Run after a concurrent-Stop lifecycle", func(t *testing.T) { + // Lifecycle survival smoke test: a runnable that's been + // stopped via concurrent Stops (including one with an + // already-cancelled ctx hitting the runCancel escalation path) + // can be re-Run on the same instance and complete cleanly. // - // Round 2: same runnable, fresh Run with no Stop. If the - // snapshot fix were missing, round 1's secondary runCancel - // could in principle cancel round 2's runCtx (the field gets - // overwritten by Run). The snapshot makes that mechanically - // impossible, so round 2 must run undisturbed until we Stop it. + // This does NOT deterministically cover the runCancel-snapshot + // race in runnable.go (where a stale Stop could in principle + // reach r.runCancel after Run has overwritten the field). That + // race requires pausing the secondary Stop between its lock + // release and runCancel call while a fresh Run executes — + // achievable only via testing/synctest or a runtime hook. + // Both are out of scope here; the snapshot fix is verified by + // inspection, not this test. r := New(func(ctx context.Context) error { select { case <-Stopping(ctx): From 0758ed975bab81183711fcdbc08b18f8a84d32c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Sat, 2 May 2026 09:30:52 +0200 Subject: [PATCH 16/36] fix(retry): scope lastTime per Run cycle, drop the struct field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lastTime field on withRetry persisted across Run cycles. After a Stop and re-Run, the first iteration of the new cycle compared against the prior cycle's last attempt — usually far in the past — which always triggered the i=0 reset. The reset was a no-op (i was already 0), so behavior didn't visibly change, but the field carried state across what callers reasonably treat as independent invocations. Move lastTime to a function-local variable inside the closure so each Run cycle gets a fresh timer. Field on the struct is removed. Adds "retry budget is per-Run-cycle" — runs the same WithRetry runnable twice and asserts both cycles exhaust the same retry budget. --- with_retry.go | 9 +++++---- with_retry_test.go | 31 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/with_retry.go b/with_retry.go index f409b9c..ee67e70 100644 --- a/with_retry.go +++ b/with_retry.go @@ -11,8 +11,6 @@ const ResetNever time.Duration = 0 type withRetry struct { maxRetries int resetAfter time.Duration - - lastTime time.Time } func WithRetry(maxRetries int, resetAfter time.Duration) Option { @@ -25,12 +23,15 @@ func WithRetry(maxRetries int, resetAfter time.Duration) Option { func (w *withRetry) apply(r *runnable) { runFunc := r.runFunc r.runFunc = func(ctx context.Context) error { + // lastTime is per-Run-cycle: a fresh Run after Stop should not + // inherit stale timing state from the prior cycle. + var lastTime time.Time var err error for i := 0; i < w.maxRetries; i++ { - if w.resetAfter != ResetNever && time.Since(w.lastTime) > w.resetAfter { + if w.resetAfter != ResetNever && time.Since(lastTime) > w.resetAfter { i = 0 } - w.lastTime = time.Now() + lastTime = time.Now() if i > 0 { if r.onStart != nil { diff --git a/with_retry_test.go b/with_retry_test.go index 6166473..ba17ab0 100644 --- a/with_retry_test.go +++ b/with_retry_test.go @@ -58,4 +58,35 @@ func TestWithRetry(t *testing.T) { require.NoError(t, err) assert.Equal(t, 6, counter) }) + + t.Run("retry budget is per-Run-cycle", func(t *testing.T) { + // Each Run cycle gets a fresh retry budget — lastTime must + // not leak from the previous cycle. With a 100ms resetAfter + // and a 50ms gap between cycles, a leaked lastTime would + // cause cycle 2 to immediately reset i=0 inside the loop; + // per-cycle scoping makes both cycles behave identically. + runs := 0 + attempts := 0 + + r := New(func(ctx context.Context) error { + attempts++ + runs++ + if runs <= 2 { + return assert.AnError + } + return nil + }, WithRetry(3, 100*time.Millisecond)) + + // Cycle 1: 2 fails + 1 success = 3 attempts, exhausts budget + // just in time. + require.NoError(t, r.Run(context.Background())) + require.Equal(t, 3, attempts) + + runs = 0 + time.Sleep(50 * time.Millisecond) + + // Cycle 2: same shape, must succeed identically. + require.NoError(t, r.Run(context.Background())) + require.Equal(t, 6, attempts) + }) } From b703cdbda02dc9e6060b619a91c5f2556c24ff1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Sat, 2 May 2026 09:33:09 +0200 Subject: [PATCH 17/36] test: drop vacuous "retry budget is per-Run-cycle" test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewer reverted the lastTime-scoping fix and ran the test against the buggy code: it passed. The bug is purely code-hygiene (state on a struct that shouldn't have it) — observable behavior is unchanged because the i=0 reset is a no-op when i is already 0. There's nothing to assert. The commit message on the fix accurately describes what changed; a test that lies about its coverage is worse than no test. --- with_retry_test.go | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/with_retry_test.go b/with_retry_test.go index ba17ab0..c4781d5 100644 --- a/with_retry_test.go +++ b/with_retry_test.go @@ -59,34 +59,4 @@ func TestWithRetry(t *testing.T) { assert.Equal(t, 6, counter) }) - t.Run("retry budget is per-Run-cycle", func(t *testing.T) { - // Each Run cycle gets a fresh retry budget — lastTime must - // not leak from the previous cycle. With a 100ms resetAfter - // and a 50ms gap between cycles, a leaked lastTime would - // cause cycle 2 to immediately reset i=0 inside the loop; - // per-cycle scoping makes both cycles behave identically. - runs := 0 - attempts := 0 - - r := New(func(ctx context.Context) error { - attempts++ - runs++ - if runs <= 2 { - return assert.AnError - } - return nil - }, WithRetry(3, 100*time.Millisecond)) - - // Cycle 1: 2 fails + 1 success = 3 attempts, exhausts budget - // just in time. - require.NoError(t, r.Run(context.Background())) - require.Equal(t, 3, attempts) - - runs = 0 - time.Sleep(50 * time.Millisecond) - - // Cycle 2: same shape, must succeed identically. - require.NoError(t, r.Run(context.Background())) - require.Equal(t, 6, attempts) - }) } From b564459ac9d0788810e80faa6d949f6a8b24ed3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Mon, 11 May 2026 17:43:51 +0200 Subject: [PATCH 18/36] feat(adapters): scaffold subpackage with doc.go --- adapters/doc.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 adapters/doc.go diff --git a/adapters/doc.go b/adapters/doc.go new file mode 100644 index 0000000..2e4dabe --- /dev/null +++ b/adapters/doc.go @@ -0,0 +1,12 @@ +// Package adapters provides composable wrappers around the runFunc +// signature (func(context.Context) error). Adapters layer cross-cutting +// behavior such as drain-on-shutdown (Draining) and periodic execution +// (Ticker) without coupling those concerns into the core runnable +// lifecycle. +// +// Adapters compose by nesting: +// +// r := runnable.New(adapters.Draining(10*time.Second, +// adapters.Ticker(time.Second, doWork), +// )) +package adapters From 1f60e22a5966aefe1e9c7e254bf238d012321de3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Mon, 11 May 2026 17:45:51 +0200 Subject: [PATCH 19/36] feat(adapters): Draining + Stopping + ErrDrainTimedOut --- adapters/draining.go | 58 +++++++++++++ adapters/draining_test.go | 173 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+) create mode 100644 adapters/draining.go create mode 100644 adapters/draining_test.go diff --git a/adapters/draining.go b/adapters/draining.go new file mode 100644 index 0000000..a2d9f7f --- /dev/null +++ b/adapters/draining.go @@ -0,0 +1,58 @@ +package adapters + +import ( + "context" + "errors" + "time" +) + +// ErrDrainTimedOut is returned by Draining when work did not exit +// within the drain timeout and was force-cancelled. +var ErrDrainTimedOut = errors.New("adapters: drain timed out") + +type stoppingKey struct{} + +// Stopping returns a channel that closes when Draining begins shutdown, +// or nil when ctx is not inside a Draining adapter. Always also select +// on ctx.Done() — Stopping signals drain start; ctx.Done() fires only +// when the drain timer expires. +func Stopping(ctx context.Context) <-chan struct{} { + ch, _ := ctx.Value(stoppingKey{}).(<-chan struct{}) + return ch +} + +// Draining wraps work with graceful-shutdown semantics: when outerCtx +// is cancelled, work has up to timeout to return via Stopping(workCtx) +// before workCtx is force-cancelled and ErrDrainTimedOut is returned. +func Draining(timeout time.Duration, work func(context.Context) error) func(context.Context) error { + return func(outerCtx context.Context) error { + // Decoupled from outerCtx so outer cancellation triggers drain + // rather than aborting work directly. + workCtx, cancelWork := context.WithCancel(context.WithoutCancel(outerCtx)) + defer cancelWork() + + stopping := make(chan struct{}) + workCtx = context.WithValue(workCtx, stoppingKey{}, (<-chan struct{})(stopping)) + + done := make(chan error, 1) + go func() { done <- work(workCtx) }() + + select { + case err := <-done: + return err + case <-outerCtx.Done(): + close(stopping) + } + + timer := time.NewTimer(timeout) + defer timer.Stop() + select { + case err := <-done: + return err + case <-timer.C: + cancelWork() + <-done + return ErrDrainTimedOut + } + } +} diff --git a/adapters/draining_test.go b/adapters/draining_test.go new file mode 100644 index 0000000..c710a4d --- /dev/null +++ b/adapters/draining_test.go @@ -0,0 +1,173 @@ +package adapters_test + +import ( + "context" + "errors" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/0xsequence/runnable" + "github.com/0xsequence/runnable/adapters" +) + +func TestStopping_NilOutsideDraining(t *testing.T) { + ch := adapters.Stopping(context.Background()) + assert.Nil(t, ch, "Stopping(ctx) must be nil when ctx is not inside a Draining adapter") +} + +func TestDraining_WorkReturnsNaturallyViaStopping(t *testing.T) { + started := make(chan struct{}) + + work := func(ctx context.Context) error { + close(started) + select { + case <-adapters.Stopping(ctx): + return nil + case <-ctx.Done(): + return errors.New("ctx cancelled before Stopping") + } + } + + r := runnable.New(adapters.Draining(1*time.Second, work)) + runErr := make(chan error, 1) + go func() { runErr <- r.Run(context.Background()) }() + + <-started + + start := time.Now() + require.NoError(t, r.Stop(context.Background())) + elapsed := time.Since(start) + assert.Less(t, elapsed, 500*time.Millisecond, "Stop returned long after drain completed") + + select { + case err := <-runErr: + require.NoError(t, err) + case <-time.After(time.Second): + t.Fatal("Run did not return") + } +} + +func TestDraining_TimerForcesCancelWhenWorkIgnoresStopping(t *testing.T) { + started := make(chan struct{}) + workErr := make(chan error, 1) + + work := func(ctx context.Context) error { + close(started) + <-ctx.Done() // ignore Stopping; wait for force-cancel + workErr <- ctx.Err() + return ctx.Err() + } + + r := runnable.New(adapters.Draining(100*time.Millisecond, work)) + runErr := make(chan error, 1) + go func() { runErr <- r.Run(context.Background()) }() + + <-started + require.NoError(t, r.Stop(context.Background())) + + select { + case e := <-runErr: + require.ErrorIs(t, e, adapters.ErrDrainTimedOut) + case <-time.After(time.Second): + t.Fatal("Run did not return") + } + select { + case e := <-workErr: + require.ErrorIs(t, e, context.Canceled) + case <-time.After(time.Second): + t.Fatal("work did not exit via ctx.Done()") + } +} + +func TestDraining_OuterCtxCancelTriggersDrain(t *testing.T) { + // Same as previous but cancellation comes from outer ctx, not Stop. + started := make(chan struct{}) + + work := func(ctx context.Context) error { + close(started) + select { + case <-adapters.Stopping(ctx): + return nil + case <-ctx.Done(): + return errors.New("ctx cancelled before Stopping") + } + } + + r := runnable.New(adapters.Draining(1*time.Second, work)) + ctx, cancel := context.WithCancel(context.Background()) + runErr := make(chan error, 1) + go func() { runErr <- r.Run(ctx) }() + + <-started + cancel() + + select { + case err := <-runErr: + require.NoError(t, err, "work should observe Stopping and exit cleanly") + case <-time.After(time.Second): + t.Fatal("Run did not return after outer ctx cancel") + } +} + +func TestDraining_ConcurrentStopsPreserveDrainSemantics(t *testing.T) { + started := make(chan struct{}) + drainObserved := make(chan struct{}) + var ctxCancelObserved atomic.Bool + + work := func(ctx context.Context) error { + close(started) + select { + case <-adapters.Stopping(ctx): + close(drainObserved) + return nil + case <-ctx.Done(): + ctxCancelObserved.Store(true) + return ctx.Err() + } + } + + r := runnable.New(adapters.Draining(2*time.Second, work)) + go func() { _ = r.Run(context.Background()) }() + + <-started + + const callers = 10 + var wg sync.WaitGroup + errs := make([]error, callers) + for i := 0; i < callers; i++ { + i := i + wg.Add(1) + go func() { + defer wg.Done() + errs[i] = r.Stop(context.Background()) + }() + } + wg.Wait() + + for _, err := range errs { + if err != nil { + require.ErrorIs(t, err, runnable.ErrNotRunning) + } + } + + select { + case <-drainObserved: + default: + t.Fatal("work never observed Stopping(ctx); concurrent Stop bypassed drain") + } + assert.False(t, ctxCancelObserved.Load(), "drain bypassed: work saw ctx.Done()") +} + +func TestDraining_WorkErrorPropagatesWithoutDrain(t *testing.T) { + sentinel := errors.New("work failed") + work := func(ctx context.Context) error { return sentinel } + + r := runnable.New(adapters.Draining(1*time.Second, work)) + err := r.Run(context.Background()) + require.ErrorIs(t, err, sentinel) +} From 569157a4e9fdfdcabb9020683b09e1e82c5d91b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Mon, 11 May 2026 17:49:56 +0200 Subject: [PATCH 20/36] feat(adapters): Ticker that composes with Draining --- adapters/ticker.go | 39 +++++++++++++ adapters/ticker_test.go | 122 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 adapters/ticker.go create mode 100644 adapters/ticker_test.go diff --git a/adapters/ticker.go b/adapters/ticker.go new file mode 100644 index 0000000..85a8db3 --- /dev/null +++ b/adapters/ticker.go @@ -0,0 +1,39 @@ +package adapters + +import ( + "context" + "time" +) + +// Ticker calls tick once per interval until ctx is cancelled or tick +// returns an error. Composes with Draining: an in-flight tick is +// allowed to finish before the loop exits. +func Ticker(interval time.Duration, tick func(context.Context) error) func(context.Context) error { + return func(ctx context.Context) error { + t := time.NewTicker(interval) + defer t.Stop() + stopping := Stopping(ctx) + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-stopping: + return nil + case <-t.C: + // Re-check shutdown signals: queued ticks during a slow + // tick can race against stopping in select's random pick. + select { + case <-ctx.Done(): + return ctx.Err() + case <-stopping: + return nil + default: + } + if err := tick(ctx); err != nil { + return err + } + } + } + } +} diff --git a/adapters/ticker_test.go b/adapters/ticker_test.go new file mode 100644 index 0000000..cc94091 --- /dev/null +++ b/adapters/ticker_test.go @@ -0,0 +1,122 @@ +package adapters_test + +import ( + "context" + "errors" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/0xsequence/runnable" + "github.com/0xsequence/runnable/adapters" +) + +func TestTicker_FiresOnInterval(t *testing.T) { + var count atomic.Int32 + tick := func(ctx context.Context) error { + count.Add(1) + return nil + } + + r := runnable.New(adapters.Ticker(50*time.Millisecond, tick)) + go func() { _ = r.Run(context.Background()) }() + + time.Sleep(175 * time.Millisecond) + require.NoError(t, r.Stop(context.Background())) + + c := count.Load() + assert.GreaterOrEqual(t, c, int32(2)) + assert.LessOrEqual(t, c, int32(4)) +} + +func TestTicker_ComposesWithDraining(t *testing.T) { + tickStarted := make(chan struct{}, 1) + var completed atomic.Int32 + + tick := func(ctx context.Context) error { + select { + case tickStarted <- struct{}{}: + default: + } + time.Sleep(200 * time.Millisecond) + completed.Add(1) + return nil + } + + r := runnable.New(adapters.Draining(1*time.Second, + adapters.Ticker(20*time.Millisecond, tick), + )) + go func() { _ = r.Run(context.Background()) }() + + <-tickStarted + + start := time.Now() + require.NoError(t, r.Stop(context.Background())) + elapsed := time.Since(start) + + assert.GreaterOrEqual(t, completed.Load(), int32(1), "in-flight tick should complete") + assert.Less(t, elapsed, 500*time.Millisecond) +} + +func TestTicker_WithoutDrainCancelsInFlightTick(t *testing.T) { + tickStarted := make(chan struct{}, 1) + tickErr := make(chan error, 1) + + tick := func(ctx context.Context) error { + select { + case tickStarted <- struct{}{}: + default: + } + <-ctx.Done() + tickErr <- ctx.Err() + return ctx.Err() + } + + r := runnable.New(adapters.Ticker(20*time.Millisecond, tick)) + go func() { _ = r.Run(context.Background()) }() + + <-tickStarted + require.NoError(t, r.Stop(context.Background())) + + select { + case e := <-tickErr: + require.ErrorIs(t, e, context.Canceled) + case <-time.After(time.Second): + t.Fatal("tick did not observe ctx cancellation") + } +} + +func TestTicker_TickErrorAbortsLoop(t *testing.T) { + sentinel := errors.New("boom") + var count atomic.Int32 + + tick := func(ctx context.Context) error { + if count.Add(1) == 2 { + return sentinel + } + return nil + } + + r := runnable.New(adapters.Ticker(20*time.Millisecond, tick)) + err := r.Run(context.Background()) + require.ErrorIs(t, err, sentinel) + assert.Equal(t, int32(2), count.Load()) +} + +func TestTicker_RespectsOuterCtxCancel(t *testing.T) { + var count atomic.Int32 + tick := func(ctx context.Context) error { + count.Add(1) + return nil + } + + r := runnable.New(adapters.Ticker(20*time.Millisecond, tick)) + ctx, cancel := context.WithTimeout(context.Background(), 75*time.Millisecond) + defer cancel() + + err := r.Run(ctx) + require.ErrorIs(t, err, context.DeadlineExceeded) +} From b0cb5ae0617921606e0915471a25adf4746c9a42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Mon, 11 May 2026 17:51:38 +0200 Subject: [PATCH 21/36] test: NewGroup propagates shutdown to Draining children --- group_drain_test.go | 55 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 group_drain_test.go diff --git a/group_drain_test.go b/group_drain_test.go new file mode 100644 index 0000000..0be02a5 --- /dev/null +++ b/group_drain_test.go @@ -0,0 +1,55 @@ +package runnable_test + +import ( + "context" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/0xsequence/runnable" + "github.com/0xsequence/runnable/adapters" +) + +func TestNewGroup_DrainEnabledChild(t *testing.T) { + // Load-bearing test: a Draining-wrapped child of a group must + // drain when the group is stopped. In v0.1 this was silently + // broken — the child observed groupCtx.Done() and exited + // without ever seeing its drain signal. The adapter design + // fixes this by construction. + started := make(chan struct{}) + drainObserved := make(chan struct{}) + var ctxCancelObserved atomic.Bool + + drainingChild := runnable.New(adapters.Draining(1*time.Second, func(ctx context.Context) error { + close(started) + select { + case <-adapters.Stopping(ctx): + close(drainObserved) + return nil + case <-ctx.Done(): + ctxCancelObserved.Store(true) + return ctx.Err() + } + })) + + plainChild := runnable.New(func(ctx context.Context) error { + <-ctx.Done() + return ctx.Err() + }) + + group := runnable.NewGroup(drainingChild, plainChild) + go func() { _ = group.Run(context.Background()) }() + + <-started + require.NoError(t, group.Stop(context.Background())) + + select { + case <-drainObserved: + default: + t.Fatal("draining child never observed Stopping; group did not propagate drain") + } + assert.False(t, ctxCancelObserved.Load(), "draining child saw ctx.Done() instead of Stopping") +} From 8548804206fcee7d5bde84165b4c02c8dcced5cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Mon, 11 May 2026 17:53:45 +0200 Subject: [PATCH 22/36] refactor: strip drain logic from core Stop WithDrain is now an adapter (adapters.Draining); core lifecycle has no drain knowledge. --- runnable.go | 57 +---------------------------------------------------- 1 file changed, 1 insertion(+), 56 deletions(-) diff --git a/runnable.go b/runnable.go index eb57ee7..a8115ae 100644 --- a/runnable.go +++ b/runnable.go @@ -4,13 +4,11 @@ import ( "context" "fmt" "sync" - "time" ) var ( ErrAlreadyRunning = fmt.Errorf("already running") ErrNotRunning = fmt.Errorf("not running") - ErrDrainTimedOut = fmt.Errorf("drain timed out") ) type Option interface { @@ -30,10 +28,6 @@ type runnable struct { runCancel context.CancelFunc runStop chan bool - drainEnabled bool - drainTimeout time.Duration - stoppingChan chan struct{} - isRunning bool onStart func() onStop func() @@ -98,10 +92,6 @@ func (r *runnable) Run(ctx context.Context) error { r.runStop = make(chan bool) runCtx := r.runCtx - if r.drainEnabled { - r.stoppingChan = make(chan struct{}) - runCtx = context.WithValue(runCtx, stoppingKey{}, (<-chan struct{})(r.stoppingChan)) - } r.mu.Unlock() defer func() { @@ -142,64 +132,19 @@ func (r *runnable) Stop(ctx context.Context) error { r.mu.Unlock() return ErrNotRunning } - runStop := r.runStop // Snapshot runCancel under the lock — the field is overwritten by // the next Run, so reading r.runCancel() after waiting can cancel - // a *future* runnable that started after this Stop began draining. + // a future runnable that started after this Stop began. runCancel := r.runCancel - drainEnabled := r.drainEnabled - drainTimeout := r.drainTimeout - stoppingChan := r.stoppingChan - r.stoppingChan = nil // first-caller wins; subsequent Stops see nil r.mu.Unlock() - // Concurrent Stop with drain enabled: another caller is already - // driving the drain. Wait for its outcome rather than calling - // runCancel ourselves — that would hard-cancel the runCtx and - // defeat the drain the primary caller is honoring. If our ctx - // expires first, escalate to runCancel so the shortest deadline - // among concurrent callers wins. - if drainEnabled && stoppingChan == nil { - select { - case <-runStop: - return nil - case <-ctx.Done(): - runCancel() - return ctx.Err() - } - } - - var drainTimedOut bool - if drainEnabled { - close(stoppingChan) - // Use a standalone timer so the drain budget is independent of - // the caller's ctx — otherwise a caller ctx shorter than - // drainTimeout makes <-ctx.Done() and the drain expiry race in - // the same select. - drainTimer := time.NewTimer(drainTimeout) - select { - case <-runStop: - drainTimer.Stop() - return nil - case <-drainTimer.C: - drainTimedOut = true - case <-ctx.Done(): - drainTimer.Stop() - // Caller deadline elapsed during drain; fall through so - // r.runCancel() still fires before we return ctx.Err(). - } - } - runCancel() select { case <-ctx.Done(): return ctx.Err() case <-runStop: - if drainTimedOut { - return ErrDrainTimedOut - } return nil } } From 3591d797a65721ff4784f9e2073b815fb8562a24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Mon, 11 May 2026 17:54:46 +0200 Subject: [PATCH 23/36] test(runnable): revert stop-timeout strengthening (drain is now external) --- runnable_test.go | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/runnable_test.go b/runnable_test.go index ea1b2a8..b50359d 100644 --- a/runnable_test.go +++ b/runnable_test.go @@ -78,14 +78,12 @@ func TestRunnable(t *testing.T) { assert.Equal(t, false, r.IsRunning()) }) - t.Run("runnable, stop timeout proves no drain behavior", func(t *testing.T) { + t.Run("runnable, stop timeout", func(t *testing.T) { started := make(chan struct{}) - ctxCancelObserved := make(chan struct{}) r := New(func(ctx context.Context) error { started <- struct{}{} <-ctx.Done() - close(ctxCancelObserved) time.Sleep(2 * time.Second) return ctx.Err() }) @@ -101,14 +99,6 @@ func TestRunnable(t *testing.T) { defer stopCancel() err := r.Stop(stopCtx) require.ErrorIs(t, err, context.DeadlineExceeded) - - // Without WithDrain, Stop cancels runFunc's ctx immediately. - select { - case <-ctxCancelObserved: - case <-time.After(100 * time.Millisecond): - t.Fatal("expected runFunc's ctx to be cancelled when Stop fires without WithDrain") - } - assert.Equal(t, true, r.IsRunning()) }) } From 05e09d7388211f334ab5d9b9ec6bc7b1cc72ccea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Mon, 11 May 2026 17:55:26 +0200 Subject: [PATCH 24/36] refactor(retry): remove Stopping coupling (drain is no longer in core) --- with_retry.go | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/with_retry.go b/with_retry.go index ee67e70..69740bf 100644 --- a/with_retry.go +++ b/with_retry.go @@ -47,17 +47,6 @@ func (w *withRetry) apply(r *runnable) { return err } - // Don't retry once Stop has been called via WithDrain — - // the retry wrapper would otherwise re-enter runFunc and - // start fresh work mid-shutdown, defeating drain semantics. - // When WithDrain is not used, Stopping(ctx) is nil and the - // default branch runs (no behavior change). - select { - case <-Stopping(ctx): - return err - default: - } - if i > 0 { if r.onStop != nil { r.onStop() From ed3ac1eb7c69ee63bf70fd8669c8a5ded1eaef70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Mon, 11 May 2026 18:03:48 +0200 Subject: [PATCH 25/36] refactor: drop v0.1 WithDrain Option and NewTicker constructor Functionality moved to adapters.Draining and adapters.Ticker. --- ticker.go | 56 ------- ticker_test.go | 135 --------------- with_drain.go | 46 ----- with_drain_test.go | 405 --------------------------------------------- 4 files changed, 642 deletions(-) delete mode 100644 ticker.go delete mode 100644 ticker_test.go delete mode 100644 with_drain.go delete mode 100644 with_drain_test.go diff --git a/ticker.go b/ticker.go deleted file mode 100644 index 9a268d0..0000000 --- a/ticker.go +++ /dev/null @@ -1,56 +0,0 @@ -package runnable - -import ( - "context" - "time" -) - -// NewTicker returns a Runnable that calls tick once per interval until -// ctx is cancelled, Stop is called, or tick returns a non-nil error. -// -// When the Runnable is configured WithDrain, an in-flight tick is -// allowed to finish before Run returns; the loop exits without firing -// a new tick. Without WithDrain, Stop cancels ctx and any in-flight -// tick observes the cancellation through ctx.Done(). -// -// tick should respect ctx.Done() for cancellation. To make in-flight -// external calls survive shutdown under WithDrain, tick should derive -// per-call timeouts via context.WithoutCancel(ctx) so its work is not -// affected by either Stop's drain signal or the Runnable's ctx cancel. -// -// Composing with WithRetry resets the ticker cadence on every retry: -// a tick error bails the loop, WithRetry re-enters runFunc, and the -// next tick fires `interval` after the retry — not at the original -// cadence. If you need stable cadence with transient-error tolerance, -// handle retries inside `tick` instead. -func NewTicker(interval time.Duration, tick func(ctx context.Context) error, opts ...Option) Runnable { - return New(func(ctx context.Context) error { - t := time.NewTicker(interval) - defer t.Stop() - - stopping := Stopping(ctx) // nil when WithDrain not used - - for { - select { - case <-ctx.Done(): - return ctx.Err() - case <-stopping: - return nil - case <-t.C: - // Re-check shutdown signals before firing a new tick: - // when a tick takes longer than the interval, t.C may - // have ready ticks queued from before Stop was called. - select { - case <-ctx.Done(): - return ctx.Err() - case <-stopping: - return nil - default: - } - if err := tick(ctx); err != nil { - return err - } - } - } - }, opts...) -} diff --git a/ticker_test.go b/ticker_test.go deleted file mode 100644 index d4ea362..0000000 --- a/ticker_test.go +++ /dev/null @@ -1,135 +0,0 @@ -package runnable - -import ( - "context" - "errors" - "sync/atomic" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestNewTicker(t *testing.T) { - t.Run("fires on interval", func(t *testing.T) { - var count atomic.Int32 - - r := NewTicker(50*time.Millisecond, func(ctx context.Context) error { - count.Add(1) - return nil - }) - - go func() { - _ = r.Run(context.Background()) - }() - - time.Sleep(175 * time.Millisecond) - - err := r.Stop(context.Background()) - require.NoError(t, err) - - c := count.Load() - assert.GreaterOrEqual(t, c, int32(2)) - assert.LessOrEqual(t, c, int32(4)) - }) - - t.Run("Stop with drain allows current tick to finish", func(t *testing.T) { - tickStarted := make(chan struct{}, 1) - var completed atomic.Int32 - - r := NewTicker(20*time.Millisecond, func(ctx context.Context) error { - select { - case tickStarted <- struct{}{}: - default: - } - time.Sleep(200 * time.Millisecond) - completed.Add(1) - return nil - }, WithDrain(1*time.Second)) - - go func() { - _ = r.Run(context.Background()) - }() - - <-tickStarted - - start := time.Now() - err := r.Stop(context.Background()) - elapsed := time.Since(start) - require.NoError(t, err) - - assert.GreaterOrEqual(t, completed.Load(), int32(1), "in-flight tick should complete") - assert.Less(t, elapsed, 500*time.Millisecond) - }) - - t.Run("Stop without drain cancels in-flight tick", func(t *testing.T) { - tickStarted := make(chan struct{}, 1) - tickErr := make(chan error, 1) - - r := NewTicker(20*time.Millisecond, func(ctx context.Context) error { - select { - case tickStarted <- struct{}{}: - default: - } - <-ctx.Done() - tickErr <- ctx.Err() - return ctx.Err() - }) - - runDone := make(chan error, 1) - go func() { - runDone <- r.Run(context.Background()) - }() - - <-tickStarted - err := r.Stop(context.Background()) - require.NoError(t, err) - - select { - case e := <-tickErr: - require.ErrorIs(t, e, context.Canceled) - case <-time.After(time.Second): - t.Fatal("tick did not observe ctx cancellation") - } - - select { - case e := <-runDone: - require.ErrorIs(t, e, context.Canceled) - case <-time.After(time.Second): - t.Fatal("Run did not return") - } - }) - - t.Run("tick error aborts loop", func(t *testing.T) { - sentinel := errors.New("boom") - var count atomic.Int32 - - r := NewTicker(20*time.Millisecond, func(ctx context.Context) error { - if count.Add(1) == 2 { - return sentinel - } - return nil - }) - - err := r.Run(context.Background()) - require.ErrorIs(t, err, sentinel) - assert.Equal(t, int32(2), count.Load()) - }) - - t.Run("respects outer ctx cancel", func(t *testing.T) { - var count atomic.Int32 - - r := NewTicker(20*time.Millisecond, func(ctx context.Context) error { - count.Add(1) - return nil - }) - - ctx, cancel := context.WithTimeout(context.Background(), 75*time.Millisecond) - defer cancel() - - err := r.Run(ctx) - require.ErrorIs(t, err, context.DeadlineExceeded) - assert.False(t, r.IsRunning()) - }) -} diff --git a/with_drain.go b/with_drain.go deleted file mode 100644 index 306be8a..0000000 --- a/with_drain.go +++ /dev/null @@ -1,46 +0,0 @@ -package runnable - -import ( - "context" - "time" -) - -type stoppingKey struct{} - -// Stopping returns a channel that closes when Stop has been called on -// the Runnable that owns ctx. runFunc implementations under WithDrain -// should select on it and return cleanly without cancelling in-flight -// work. -// -// Always also select on ctx.Done() — Stopping signals only Stop; -// outer-context cancellation (e.g. the ctx passed to Run was cancelled -// directly) still arrives via ctx.Done(). A loop that selects only on -// Stopping(ctx) will hang on outer-ctx cancel. -// -// Returns a nil channel when ctx is not associated with a drain-enabled -// Runnable — receiving from a nil channel blocks forever, which is the -// correct no-op for callers that opt into drain semantics only when -// configured. -func Stopping(ctx context.Context) <-chan struct{} { - ch, _ := ctx.Value(stoppingKey{}).(<-chan struct{}) - return ch -} - -type withDrain struct { - timeout time.Duration -} - -// WithDrain switches Stop's behavior from "cancel runFunc's ctx" to -// "close Stopping(ctx) and wait up to timeout for runFunc to return on -// its own." After the timeout elapses, Stop falls back to cancelling -// the ctx as before (preserving the existing escape hatch for stuck -// runFuncs). Use this when runFunc owns in-flight external calls that -// must drain rather than abort on shutdown. -func WithDrain(timeout time.Duration) Option { - return &withDrain{timeout: timeout} -} - -func (w *withDrain) apply(r *runnable) { - r.drainEnabled = true - r.drainTimeout = w.timeout -} diff --git a/with_drain_test.go b/with_drain_test.go deleted file mode 100644 index 55ec879..0000000 --- a/with_drain_test.go +++ /dev/null @@ -1,405 +0,0 @@ -package runnable - -import ( - "context" - "errors" - "sync" - "sync/atomic" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestWithDrain(t *testing.T) { - t.Run("Stop waits for runFunc to return", func(t *testing.T) { - started := make(chan struct{}) - runFuncErr := make(chan error, 1) - - r := New(func(ctx context.Context) error { - close(started) - <-Stopping(ctx) - time.Sleep(200 * time.Millisecond) - // Return naturally without observing ctx cancellation. - if ctx.Err() != nil { - return ctx.Err() - } - return nil - }, WithDrain(1*time.Second)) - - go func() { - runFuncErr <- r.Run(context.Background()) - }() - - <-started - assert.True(t, r.IsRunning()) - - start := time.Now() - err := r.Stop(context.Background()) - elapsed := time.Since(start) - require.NoError(t, err) - assert.False(t, r.IsRunning()) - assert.GreaterOrEqual(t, elapsed, 200*time.Millisecond) - assert.Less(t, elapsed, 500*time.Millisecond) - - select { - case err := <-runFuncErr: - require.NoError(t, err, "runFunc should return naturally, not via ctx cancellation") - case <-time.After(time.Second): - t.Fatal("runFunc did not return") - } - }) - - t.Run("Stop returns ErrDrainTimedOut on fall-through", func(t *testing.T) { - started := make(chan struct{}) - runFuncErr := make(chan error, 1) - - r := New(func(ctx context.Context) error { - close(started) - <-ctx.Done() - return ctx.Err() - }, WithDrain(100*time.Millisecond)) - - go func() { - runFuncErr <- r.Run(context.Background()) - }() - - <-started - - stopCtx, stopCancel := context.WithTimeout(context.Background(), 2*time.Second) - defer stopCancel() - err := r.Stop(stopCtx) - require.ErrorIs(t, err, ErrDrainTimedOut) - assert.False(t, r.IsRunning()) - - select { - case err := <-runFuncErr: - require.ErrorIs(t, err, context.Canceled) - case <-time.After(time.Second): - t.Fatal("runFunc did not return") - } - }) - - t.Run("Stop returns DeadlineExceeded when runFunc stuck", func(t *testing.T) { - started := make(chan struct{}) - release := make(chan struct{}) - - r := New(func(ctx context.Context) error { - close(started) - <-release - return nil - }, WithDrain(50*time.Millisecond)) - - go func() { - _ = r.Run(context.Background()) - }() - - <-started - - stopCtx, stopCancel := context.WithTimeout(context.Background(), 100*time.Millisecond) - defer stopCancel() - err := r.Stop(stopCtx) - require.ErrorIs(t, err, context.DeadlineExceeded) - - // Release the runFunc so the goroutine can exit cleanly. - close(release) - }) - - t.Run("outer ctx cancel still propagates", func(t *testing.T) { - started := make(chan struct{}) - runFuncErr := make(chan error, 1) - - r := New(func(ctx context.Context) error { - close(started) - <-ctx.Done() - return ctx.Err() - }, WithDrain(1*time.Second)) - - ctx, cancel := context.WithCancel(context.Background()) - - go func() { - runFuncErr <- r.Run(ctx) - }() - - <-started - cancel() - - select { - case err := <-runFuncErr: - require.True(t, errors.Is(err, context.Canceled)) - case <-time.After(time.Second): - t.Fatal("runFunc did not exit on outer ctx cancel") - } - assert.False(t, r.IsRunning()) - }) - - t.Run("Stop forces cancel when caller ctx expires during drain", func(t *testing.T) { - started := make(chan struct{}) - runFuncDone := make(chan struct{}) - - // runFunc respects its own ctx but not Stopping(ctx). Without - // the independent drain timer, Stop with a caller ctx shorter - // than drainTimeout could return ctx.Err() before r.runCancel() - // fired, leaving the runnable alive. - r := New(func(ctx context.Context) error { - close(started) - <-ctx.Done() - close(runFuncDone) - return ctx.Err() - }, WithDrain(10*time.Second)) - - go func() { - _ = r.Run(context.Background()) - }() - - <-started - - stopCtx, stopCancel := context.WithTimeout(context.Background(), 100*time.Millisecond) - defer stopCancel() - err := r.Stop(stopCtx) - require.ErrorIs(t, err, context.DeadlineExceeded) - - select { - case <-runFuncDone: - case <-time.After(2 * time.Second): - t.Fatal("runnable was not force-cancelled when caller ctx expired during drain") - } - }) - - t.Run("concurrent Stop preserves drain semantics", func(t *testing.T) { - started := make(chan struct{}) - drainObserved := make(chan struct{}) - var ctxCancelObserved atomic.Bool - - // runFunc must exit via Stopping(ctx). If a concurrent Stop - // falls through to r.runCancel(), ctx.Done() fires and the - // drain semantics are violated. - r := New(func(ctx context.Context) error { - close(started) - select { - case <-Stopping(ctx): - close(drainObserved) - return nil - case <-ctx.Done(): - ctxCancelObserved.Store(true) - return ctx.Err() - } - }, WithDrain(2*time.Second)) - - go func() { - _ = r.Run(context.Background()) - }() - - <-started - - const callers = 10 - var wg sync.WaitGroup - errs := make([]error, callers) - for i := 0; i < callers; i++ { - i := i - wg.Add(1) - go func() { - defer wg.Done() - errs[i] = r.Stop(context.Background()) - }() - } - wg.Wait() - - // Each Stop must return either nil (drove or waited on the - // drain) or ErrNotRunning (Run already exited before this - // caller grabbed the lock). No double-close panic. - for _, err := range errs { - if err != nil { - require.ErrorIs(t, err, ErrNotRunning) - } - } - - select { - case <-drainObserved: - default: - t.Fatal("runFunc never observed Stopping(ctx); drain was bypassed by concurrent Stop") - } - assert.False(t, ctxCancelObserved.Load(), "drain semantics violated: runCtx was hard-cancelled by a concurrent Stop") - assert.False(t, r.IsRunning()) - }) - - t.Run("secondary Stop with shorter deadline escalates runCancel", func(t *testing.T) { - started := make(chan struct{}) - runFuncDone := make(chan struct{}) - - // runFunc waits only on ctx.Done() (ignores Stopping). Without - // escalation, Stop B's deadline expires but the runnable keeps - // draining for the full drainTimeout (5s). - r := New(func(ctx context.Context) error { - close(started) - <-ctx.Done() - close(runFuncDone) - return ctx.Err() - }, WithDrain(5*time.Second)) - - go func() { - _ = r.Run(context.Background()) - }() - - <-started - - // Stop A: no deadline; primary, drives drain. - aDone := make(chan error, 1) - go func() { - aDone <- r.Stop(context.Background()) - }() - - time.Sleep(20 * time.Millisecond) - - // Stop B: 100ms deadline; secondary. Must escalate so runFunc - // exits within the caller's budget. - bCtx, bCancel := context.WithTimeout(context.Background(), 100*time.Millisecond) - defer bCancel() - start := time.Now() - bErr := r.Stop(bCtx) - bElapsed := time.Since(start) - require.ErrorIs(t, bErr, context.DeadlineExceeded) - assert.Less(t, bElapsed, 500*time.Millisecond, "Stop B should not wait beyond its own deadline") - - select { - case <-runFuncDone: - case <-time.After(time.Second): - t.Fatal("runnable was not force-cancelled when secondary Stop's ctx expired") - } - - select { - case err := <-aDone: - require.NoError(t, err) - case <-time.After(time.Second): - t.Fatal("Stop A did not return after runFunc exited") - } - }) - - t.Run("runnable can be re-Run after a concurrent-Stop lifecycle", func(t *testing.T) { - // Lifecycle survival smoke test: a runnable that's been - // stopped via concurrent Stops (including one with an - // already-cancelled ctx hitting the runCancel escalation path) - // can be re-Run on the same instance and complete cleanly. - // - // This does NOT deterministically cover the runCancel-snapshot - // race in runnable.go (where a stale Stop could in principle - // reach r.runCancel after Run has overwritten the field). That - // race requires pausing the secondary Stop between its lock - // release and runCancel call while a fresh Run executes — - // achievable only via testing/synctest or a runtime hook. - // Both are out of scope here; the snapshot fix is verified by - // inspection, not this test. - r := New(func(ctx context.Context) error { - select { - case <-Stopping(ctx): - return nil - case <-ctx.Done(): - return ctx.Err() - } - }, WithDrain(1*time.Second)) - - go func() { - _ = r.Run(context.Background()) - }() - - for !r.IsRunning() { - time.Sleep(time.Millisecond) - } - - // Primary Stop, no deadline — drives drain. - primaryDone := make(chan error, 1) - go func() { - primaryDone <- r.Stop(context.Background()) - }() - - // Secondary Stop with an already-cancelled ctx — exercises - // the ctx.Done() escalation path that calls runCancel. - cancelledCtx, cancel := context.WithCancel(context.Background()) - cancel() - _ = r.Stop(cancelledCtx) - - <-primaryDone - for r.IsRunning() { - time.Sleep(time.Millisecond) - } - - // Round 2 — same runnable, fresh Run. Should run undisturbed - // until we Stop it. - round2Done := make(chan error, 1) - go func() { - round2Done <- r.Run(context.Background()) - }() - - select { - case err := <-round2Done: - t.Fatalf("round-2 runnable exited prematurely: %v", err) - case <-time.After(150 * time.Millisecond): - } - - require.NoError(t, r.Stop(context.Background())) - select { - case err := <-round2Done: - require.NoError(t, err) - case <-time.After(time.Second): - t.Fatal("round-2 runnable did not exit after Stop") - } - }) - - t.Run("WithRetry stops retrying after Stopping fires", func(t *testing.T) { - started := make(chan struct{}, 1) - var attempts atomic.Int32 - - // runFunc errors transiently every time. Without the - // Stopping-aware retry guard, WithRetry keeps re-entering - // runFunc after Stop is called. - r := New(func(ctx context.Context) error { - select { - case started <- struct{}{}: - default: - } - attempts.Add(1) - <-Stopping(ctx) - return errors.New("transient") - }, WithDrain(2*time.Second), WithRetry(100, ResetNever)) - - runDone := make(chan error, 1) - go func() { - runDone <- r.Run(context.Background()) - }() - - <-started - - require.NoError(t, r.Stop(context.Background())) - - select { - case <-runDone: - case <-time.After(time.Second): - t.Fatal("Run did not return after Stop") - } - - // Exactly one attempt should have run — the retry wrapper - // must observe Stopping and abandon further attempts. - assert.Equal(t, int32(1), attempts.Load(), "retry continued after Stop drained") - }) - - t.Run("Stopping returns nil when not configured", func(t *testing.T) { - var observed bool - - r := New(func(ctx context.Context) error { - ch := Stopping(ctx) - // Selecting on nil channel blocks forever; default branch runs. - select { - case <-ch: - observed = true - default: - observed = ch == nil - } - return nil - }) - - err := r.Run(context.Background()) - require.NoError(t, err) - assert.True(t, observed, "Stopping(ctx) should be nil without WithDrain") - }) -} From f5f0a36fa5a71f58edf3cab653a6543a1a99d566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Mon, 11 May 2026 18:04:36 +0200 Subject: [PATCH 26/36] docs(examples): rewrite ticker-with-drain for adapters API sigCtx can now be passed directly to Run; Draining intercepts cancellation. --- examples/ticker-with-drain/main.go | 33 ++++++++++++++---------------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/examples/ticker-with-drain/main.go b/examples/ticker-with-drain/main.go index 5eb85a2..55ce089 100644 --- a/examples/ticker-with-drain/main.go +++ b/examples/ticker-with-drain/main.go @@ -1,8 +1,8 @@ // Example: a periodic reconciler that drains gracefully on SIGTERM. // -// Shape: NewTicker + WithDrain + WithRecoverer + signal.NotifyContext. -// Copy-paste this into a service's cmd/.../main.go and replace the -// reconcile body with your work. +// Shape: adapters.Draining + adapters.Ticker + WithRecoverer + +// signal.NotifyContext. Copy-paste into a service's cmd/.../main.go +// and replace the reconcile body with your work. package main import ( @@ -15,6 +15,7 @@ import ( "time" "github.com/0xsequence/runnable" + "github.com/0xsequence/runnable/adapters" ) type stderrReporter struct{} @@ -31,8 +32,8 @@ func (stderrPrinter) Print(ctx context.Context, callstack []byte) { func reconcile(ctx context.Context) error { // Pretend this is an HTTP call to an external system that must not - // be aborted mid-request when SIGTERM fires. Under WithDrain, Stop - // waits for this tick to finish before tearing down the Runnable. + // be aborted mid-request when SIGTERM fires. Under Draining, this + // tick is allowed to finish before the runnable tears down. fmt.Println("tick: reconciling...") time.Sleep(500 * time.Millisecond) fmt.Println("tick: done") @@ -43,25 +44,21 @@ func main() { sigCtx, stopSig := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) defer stopSig() - rc := runnable.NewTicker( - 2*time.Second, - reconcile, - runnable.WithDrain(10*time.Second), + // Unlike v0.1, passing sigCtx directly to Run is safe: the + // Draining adapter intercepts cancellation and triggers drain + // rather than aborting work. + rc := runnable.New( + adapters.Draining(10*time.Second, + adapters.Ticker(2*time.Second, reconcile), + ), runnable.WithRecoverer(stderrReporter{}, stderrPrinter{}), ) - // Run with a pristine ctx — if Run received sigCtx, SIGTERM would - // cancel runFunc's ctx directly and the ticker would exit before - // Stop ever closed Stopping(ctx), defeating WithDrain. Stop is the - // only thing that should drive shutdown of a drain-enabled worker. runErr := make(chan error, 1) go func() { - runErr <- rc.Run(context.Background()) + runErr <- rc.Run(sigCtx) }() - // Wait for either a shutdown signal or an early worker exit - // (tick error, recovered panic). Without the runErr branch, main - // would block on sigCtx forever after the worker died. select { case <-sigCtx.Done(): fmt.Println("shutdown: draining in-flight tick...") @@ -70,7 +67,7 @@ func main() { if err := rc.Stop(stopCtx); err != nil { fmt.Fprintf(os.Stderr, "stop: %v\n", err) } - if err := <-runErr; err != nil && !errors.Is(err, context.Canceled) { + if err := <-runErr; err != nil && !errors.Is(err, context.Canceled) && !errors.Is(err, adapters.ErrDrainTimedOut) { fmt.Fprintf(os.Stderr, "reconciler stopped: %v\n", err) os.Exit(1) } From dd00f18e867b4182e4b309424e342daf20609c3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Mon, 11 May 2026 18:05:30 +0200 Subject: [PATCH 27/36] docs(readme): replace drain/ticker sections with adapters + migration guide --- README.md | 93 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 444a5bf..00dc91d 100644 --- a/README.md +++ b/README.md @@ -121,60 +121,77 @@ if err != nil { } ``` -### Drain on Stop +### Adapters -By default, `Stop` cancels the runFunc's context, which aborts in-flight -work. For workers that own external calls that must complete (e.g. an -HTTP request that creates remote state), use `WithDrain` to switch to -"signal-and-wait" semantics: `Stop` closes the channel returned by -`Stopping(ctx)` and waits up to the drain timeout for runFunc to return -on its own. If the timeout elapses, `Stop` falls back to cancelling the -context and returns `ErrDrainTimedOut` once runFunc exits. +Cross-cutting behaviors that aren't part of the core lifecycle live in +the `runnable/adapters` subpackage as runFunc wrappers. They compose by +nesting. -Always select on both `<-ctx.Done()` and `<-runnable.Stopping(ctx)` — -`Stopping` signals only `Stop`; outer-context cancellation still arrives -via `ctx.Done()` and a loop that ignores it will hang. +**Ticker** — periodic execution: ```go -r := runnable.New(func(ctx context.Context) error { - for { +r := runnable.New(adapters.Ticker(30*time.Second, reconcile)) +``` + +**Draining** — graceful shutdown with a grace window. When the outer +ctx is cancelled, work has `timeout` to return via `adapters.Stopping(ctx)` +before its ctx is force-cancelled and `adapters.ErrDrainTimedOut` is +returned. Always select on both `ctx.Done()` and `Stopping(ctx)`: + +```go +r := runnable.New(adapters.Draining(10*time.Second, + adapters.Ticker(30*time.Second, func(ctx context.Context) error { select { case <-ctx.Done(): return ctx.Err() - case <-runnable.Stopping(ctx): - return nil // finish in-flight work, then return + case <-adapters.Stopping(ctx): + return nil case <-time.After(time.Second): - doWork(ctx) + return doWork(ctx) } - } -}, runnable.WithDrain(10*time.Second)) + }), +)) ``` -### Ticker +A full SIGTERM-safe service shape lives in +[`examples/ticker-with-drain`](examples/ticker-with-drain/main.go). -`NewTicker` wraps the standard "select-loop on a `time.Ticker`" pattern. -It composes with `WithDrain` (let the current tick finish on Stop) and -`WithRecoverer` (catch panics in the tick body). +### Migrating from v0.1 to v0.2 -```go -r := runnable.NewTicker( - 30*time.Second, - func(ctx context.Context) error { - return reconcile(ctx) - }, - runnable.WithDrain(10*time.Second), -) +v0.2 moves drain and ticker out of the core package. The Option-based +`WithDrain` and the `NewTicker` constructor are removed; their +replacements live at `runnable/adapters`. -go r.Run(ctx) +Before (v0.1): -// On shutdown: -stopCtx, cancel := context.WithTimeout(context.Background(), 15*time.Second) -defer cancel() -r.Stop(stopCtx) // drains the in-flight tick before returning -``` + r := runnable.NewTicker(30*time.Second, doWork, + runnable.WithDrain(10*time.Second), + ) + +After (v0.2): + + r := runnable.New(adapters.Draining(10*time.Second, + adapters.Ticker(30*time.Second, doWork), + )) + +Symbol mapping: + +- `runnable.WithDrain` → use `adapters.Draining` as a runFunc wrapper. +- `runnable.NewTicker` → `adapters.Ticker` wrapped by `runnable.New`. +- `runnable.Stopping` → `adapters.Stopping`. +- `runnable.ErrDrainTimedOut` → `adapters.ErrDrainTimedOut`. + +**Behavioral change:** `Stop(ctx)`'s ctx no longer shortens the drain +window. In v0.1, a caller ctx shorter than `WithDrain`'s timeout would +force-cancel mid-drain. In v0.2, `Stop`'s ctx only governs how long +the caller waits for `Stop` to return; the drain runs on its own +fixed-duration timer regardless. If you need a shorter drain budget, +configure `Draining` with the shorter duration. -A full SIGTERM-safe shape (ticker + drain + recoverer + signal.NotifyContext) -lives in [`examples/ticker-with-drain`](examples/ticker-with-drain/main.go). +**NewGroup interaction:** drain-enabled children of `NewGroup` now +drain when the group is stopped (v0.1 silently bypassed the child's +drain). No code change required at call sites — the adapter design +fixes this by construction. ### Runnable Object ```go From 12d26013f2446ac3fe176540c87dee4bfbceaecc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Mon, 11 May 2026 18:10:20 +0200 Subject: [PATCH 28/36] refactor: clarify runCancel snapshot reason; simplify README example - runnable.go: comment on runCancel snapshot was describing v0.1's failure mode (calling runCancel after waiting for drain). The actual defense in v0.2 is memory-model: Run overwrites the field across cycles, so reading it without synchronization races. Reword. - README: drop the contrived `<-time.After` case from the Draining example body and pull the "always select on both" note out as prose. The full pattern lives in examples/ticker-with-drain. --- README.md | 17 ++++++----------- runnable.go | 6 +++--- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 00dc91d..7431e22 100644 --- a/README.md +++ b/README.md @@ -136,23 +136,18 @@ r := runnable.New(adapters.Ticker(30*time.Second, reconcile)) **Draining** — graceful shutdown with a grace window. When the outer ctx is cancelled, work has `timeout` to return via `adapters.Stopping(ctx)` before its ctx is force-cancelled and `adapters.ErrDrainTimedOut` is -returned. Always select on both `ctx.Done()` and `Stopping(ctx)`: +returned. ```go r := runnable.New(adapters.Draining(10*time.Second, - adapters.Ticker(30*time.Second, func(ctx context.Context) error { - select { - case <-ctx.Done(): - return ctx.Err() - case <-adapters.Stopping(ctx): - return nil - case <-time.After(time.Second): - return doWork(ctx) - } - }), + adapters.Ticker(30*time.Second, reconcile), )) ``` +Inside long-running work, always select on both `ctx.Done()` and +`adapters.Stopping(ctx)` — `Stopping` signals drain start, `ctx.Done()` +fires only when the drain timer expires. + A full SIGTERM-safe service shape lives in [`examples/ticker-with-drain`](examples/ticker-with-drain/main.go). diff --git a/runnable.go b/runnable.go index a8115ae..8c2fbce 100644 --- a/runnable.go +++ b/runnable.go @@ -133,9 +133,9 @@ func (r *runnable) Stop(ctx context.Context) error { return ErrNotRunning } runStop := r.runStop - // Snapshot runCancel under the lock — the field is overwritten by - // the next Run, so reading r.runCancel() after waiting can cancel - // a future runnable that started after this Stop began. + // Snapshot runCancel under the lock — Run overwrites this field + // on each cycle, so reading it without synchronization races with + // a concurrent or subsequent Run. runCancel := r.runCancel r.mu.Unlock() From 64a09e1b0b2d8dc2ae8ef1fa452243ea06ef1e0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Tue, 12 May 2026 18:18:11 +0200 Subject: [PATCH 29/36] fix(adapters): recover panics in Draining's work goroutine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Panics in work ran on Draining's spawned goroutine, where the outer runnable.WithRecoverer's defer cannot reach them — recover only fires on the goroutine where the deferred call lives. The runtime would crash the whole process. The example explicitly composed Draining + Ticker + WithRecoverer and implied recovery worked; it didn't. Recover inside Draining's goroutine and surface the panic as an error containing the panic value and stack trace. Update the example to drop WithRecoverer (no longer load-bearing in this composition) and document the trade-off: WithRecoverer's Report/StackPrinter hooks do not fire; tick panics arrive as errors on runErr. A future Recovering adapter could give back the rich hooks for users who want them; deferred to a follow-up. Adds TestDraining_RecoversPanicAsError — fails on the unfixed code (work goroutine panic crashes the test process). --- adapters/draining.go | 16 +++++++++++++++- adapters/draining_test.go | 16 ++++++++++++++++ examples/ticker-with-drain/main.go | 25 +++++++++---------------- 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/adapters/draining.go b/adapters/draining.go index a2d9f7f..9e28257 100644 --- a/adapters/draining.go +++ b/adapters/draining.go @@ -3,6 +3,8 @@ package adapters import ( "context" "errors" + "fmt" + "runtime/debug" "time" ) @@ -24,6 +26,11 @@ func Stopping(ctx context.Context) <-chan struct{} { // Draining wraps work with graceful-shutdown semantics: when outerCtx // is cancelled, work has up to timeout to return via Stopping(workCtx) // before workCtx is force-cancelled and ErrDrainTimedOut is returned. +// +// Panics in work are recovered inside Draining's goroutine and returned +// as an error containing the panic value and stack trace. They do NOT +// propagate to runnable.WithRecoverer — recover only fires on the +// goroutine where the deferred call lives, and work runs on its own. func Draining(timeout time.Duration, work func(context.Context) error) func(context.Context) error { return func(outerCtx context.Context) error { // Decoupled from outerCtx so outer cancellation triggers drain @@ -35,7 +42,14 @@ func Draining(timeout time.Duration, work func(context.Context) error) func(cont workCtx = context.WithValue(workCtx, stoppingKey{}, (<-chan struct{})(stopping)) done := make(chan error, 1) - go func() { done <- work(workCtx) }() + go func() { + defer func() { + if rec := recover(); rec != nil { + done <- fmt.Errorf("adapters: panic in draining work: %v\n%s", rec, debug.Stack()) + } + }() + done <- work(workCtx) + }() select { case err := <-done: diff --git a/adapters/draining_test.go b/adapters/draining_test.go index c710a4d..68082e8 100644 --- a/adapters/draining_test.go +++ b/adapters/draining_test.go @@ -171,3 +171,19 @@ func TestDraining_WorkErrorPropagatesWithoutDrain(t *testing.T) { err := r.Run(context.Background()) require.ErrorIs(t, err, sentinel) } + +func TestDraining_RecoversPanicAsError(t *testing.T) { + // Regression: panics in work run on Draining's spawned goroutine, + // not on the goroutine where runnable.WithRecoverer's defer lives. + // Without internal recovery, a tick panic would crash the process. + // Draining must catch it and surface as an error. + work := func(ctx context.Context) error { + panic("boom") + } + + r := runnable.New(adapters.Draining(1*time.Second, work)) + err := r.Run(context.Background()) + require.Error(t, err) + assert.Contains(t, err.Error(), "boom", "panic value should be embedded in error") + assert.Contains(t, err.Error(), "panic in draining work", "error should identify itself as a recovered panic") +} diff --git a/examples/ticker-with-drain/main.go b/examples/ticker-with-drain/main.go index 55ce089..c44738a 100644 --- a/examples/ticker-with-drain/main.go +++ b/examples/ticker-with-drain/main.go @@ -1,8 +1,8 @@ // Example: a periodic reconciler that drains gracefully on SIGTERM. // -// Shape: adapters.Draining + adapters.Ticker + WithRecoverer + -// signal.NotifyContext. Copy-paste into a service's cmd/.../main.go -// and replace the reconcile body with your work. +// Shape: adapters.Draining + adapters.Ticker + signal.NotifyContext. +// Copy-paste into a service's cmd/.../main.go and replace the reconcile +// body with your work. package main import ( @@ -18,18 +18,6 @@ import ( "github.com/0xsequence/runnable/adapters" ) -type stderrReporter struct{} - -func (stderrReporter) Report(ctx context.Context, rec interface{}) { - fmt.Fprintf(os.Stderr, "panic recovered: %v\n", rec) -} - -type stderrPrinter struct{} - -func (stderrPrinter) Print(ctx context.Context, callstack []byte) { - _, _ = os.Stderr.Write(callstack) -} - func reconcile(ctx context.Context) error { // Pretend this is an HTTP call to an external system that must not // be aborted mid-request when SIGTERM fires. Under Draining, this @@ -47,11 +35,16 @@ func main() { // Unlike v0.1, passing sigCtx directly to Run is safe: the // Draining adapter intercepts cancellation and triggers drain // rather than aborting work. + // + // Note on panics: a panic inside reconcile is recovered by + // Draining and surfaced as an error from rc.Run. runnable.WithRecoverer + // does NOT catch tick panics in this composition (recover only fires + // on the goroutine where the deferred call lives, and Draining runs + // work on its own goroutine). Read panics off runErr below. rc := runnable.New( adapters.Draining(10*time.Second, adapters.Ticker(2*time.Second, reconcile), ), - runnable.WithRecoverer(stderrReporter{}, stderrPrinter{}), ) runErr := make(chan error, 1) From 21645851e73d89dddb8ccc06d1748ca4dc5253d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Tue, 12 May 2026 18:18:42 +0200 Subject: [PATCH 30/36] test(adapters): make TestTicker_FiresOnInterval wall-clock independent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Old form asserted 2 <= count <= 4 after 175ms of wall sleep with a 50ms ticker. The upper bound flaked under load — Stop races t.C and loaded CI runners can queue extra ticks. New form counts signals via a channel until 3 ticks are observed, then stops. Pins the behavioral claim ("fires repeatedly on interval") without depending on wall-clock arithmetic. --- adapters/ticker_test.go | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/adapters/ticker_test.go b/adapters/ticker_test.go index cc94091..89e42b0 100644 --- a/adapters/ticker_test.go +++ b/adapters/ticker_test.go @@ -15,21 +15,30 @@ import ( ) func TestTicker_FiresOnInterval(t *testing.T) { - var count atomic.Int32 + // Count tick signals on a channel rather than asserting wall-clock + // arithmetic; loaded CI runners would otherwise queue extra ticks + // and bust an upper bound. The behavioral claim is "Ticker fires + // repeatedly on interval" — wait for N ticks, stop, done. + ticks := make(chan struct{}, 8) tick := func(ctx context.Context) error { - count.Add(1) + select { + case ticks <- struct{}{}: + default: + } return nil } - r := runnable.New(adapters.Ticker(50*time.Millisecond, tick)) + r := runnable.New(adapters.Ticker(20*time.Millisecond, tick)) go func() { _ = r.Run(context.Background()) }() - time.Sleep(175 * time.Millisecond) + for i := 0; i < 3; i++ { + select { + case <-ticks: + case <-time.After(time.Second): + t.Fatalf("only %d ticks observed before timeout", i) + } + } require.NoError(t, r.Stop(context.Background())) - - c := count.Load() - assert.GreaterOrEqual(t, c, int32(2)) - assert.LessOrEqual(t, c, int32(4)) } func TestTicker_ComposesWithDraining(t *testing.T) { From 92b3741d4ccf8732e929022b2df343eeaa502067 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Tue, 12 May 2026 18:19:11 +0200 Subject: [PATCH 31/36] docs(readme): make legacy examples ctx-responsive Two pre-existing examples used `select { case <-ctx.Done(): ...; default: }` followed by `time.Sleep(1s)`, which means Stop has to wait up to a full second for the in-flight sleep before the loop observes cancellation. The pattern directly contradicts the new "Adapters" section's narrative about responsive shutdown via Draining/Stopping. Replace with `case <-time.After(time.Second)` in the select itself so cancellation is immediate. Also captures+defers the cancel from context.WithTimeout in the timeout example (the discarded form was a context-leak warning from go vet on master). --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7431e22..7a2dbe4 100644 --- a/README.md +++ b/README.md @@ -45,9 +45,8 @@ r := runnable.New(func(ctx context.Context) error { select { case <-ctx.Done(): return nil - default: + case <-time.After(time.Second): } - time.Sleep(1 * time.Second) fmt.Println("Running...") } }) @@ -71,7 +70,8 @@ if err != nil { ### Runnable Function with timeout ```go fmt.Println("Simple function with timeout...") -ctxWithTimeout, _ := context.WithTimeout(context.Background(), 5*time.Second) +ctxWithTimeout, cancel := context.WithTimeout(context.Background(), 5*time.Second) +defer cancel() err = runnable.New(func(ctx context.Context) error { fmt.Println("Starting...") defer fmt.Println("Stopping...") @@ -80,9 +80,8 @@ err = runnable.New(func(ctx context.Context) error { select { case <-ctx.Done(): return nil - default: + case <-time.After(time.Second): } - time.Sleep(1 * time.Second) fmt.Println("Running...") } }).Run(ctxWithTimeout) From 296c14b7cc58d2e9aa414adf0e28973ab447d0eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Tue, 12 May 2026 23:13:40 +0200 Subject: [PATCH 32/36] ci: bump Go to match go.mod and refresh action versions setup-go was pinned to 1.20 while go.mod declares 1.21 and the Draining adapter introduced in this PR uses context.WithoutCancel (Go 1.21+). Switch to go-version-file so the workflow tracks go.mod. Bump actions/checkout to v4 and actions/setup-go to v5; the v3/v4 versions both hit the Node 20 deprecation warning on the runner. --- .github/workflows/go.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 0065213..0646ec4 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -14,12 +14,12 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: - go-version: '1.20' + go-version-file: 'go.mod' - name: Build run: go build -v ./... From b80a033e9da165220f97bf323abe13dd4da12968 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Tue, 12 May 2026 21:28:15 +0200 Subject: [PATCH 33/36] refactor(adapters): chi-style middleware via runnable.WithAdapters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce runnable.RunFunc and runnable.Adapter (func(next RunFunc) RunFunc) as the core extension point, and runnable.WithAdapters Option that wraps the runnable's runFunc with the supplied adapters left-to-right (first listed = outermost wrapper). Reshape adapters.Draining and adapters.Ticker as config-only constructors that return runnable.Adapter — work is supplied via runnable.New, not via the adapter call. Composition becomes: runnable.New(reconcile, runnable.WithAdapters( adapters.Draining(10*time.Second), adapters.Ticker(2*time.Second), )) The adapters package now imports runnable for the Adapter type; runnable does not import adapters. Stop and runnable lifecycle are unchanged. --- adapter.go | 34 +++++++++++++ adapters/doc.go | 20 ++++---- adapters/draining.go | 81 ++++++++++++++++-------------- adapters/draining_test.go | 17 +++---- adapters/ticker.go | 46 +++++++++-------- adapters/ticker_test.go | 13 ++--- examples/ticker-with-drain/main.go | 27 ++++------ group_drain_test.go | 4 +- 8 files changed, 141 insertions(+), 101 deletions(-) create mode 100644 adapter.go diff --git a/adapter.go b/adapter.go new file mode 100644 index 0000000..99ccd1b --- /dev/null +++ b/adapter.go @@ -0,0 +1,34 @@ +package runnable + +import "context" + +// RunFunc is the signature wrapped by runnable.New and by Adapters. +type RunFunc func(ctx context.Context) error + +// Adapter wraps a RunFunc with cross-cutting behavior, mirroring the +// chi middleware shape. Adapters live in the runnable/adapters +// subpackage and are applied via WithAdapters. +type Adapter func(next RunFunc) RunFunc + +type withAdapters struct { + adapters []Adapter +} + +// WithAdapters wraps the runnable's runFunc with the given adapters. +// Adapters are applied left-to-right as outermost-to-innermost: +// WithAdapters(A, B, C) yields A(B(C(runFunc))). +// +// Apply order across Options matters: WithAdapters sees whatever +// runFunc previous Options installed, and subsequent Options see the +// adapter-wrapped runFunc. +func WithAdapters(adapters ...Adapter) Option { + return &withAdapters{adapters: adapters} +} + +func (w *withAdapters) apply(r *runnable) { + next := RunFunc(r.runFunc) + for i := len(w.adapters) - 1; i >= 0; i-- { + next = w.adapters[i](next) + } + r.runFunc = next +} diff --git a/adapters/doc.go b/adapters/doc.go index 2e4dabe..33189db 100644 --- a/adapters/doc.go +++ b/adapters/doc.go @@ -1,12 +1,14 @@ -// Package adapters provides composable wrappers around the runFunc -// signature (func(context.Context) error). Adapters layer cross-cutting -// behavior such as drain-on-shutdown (Draining) and periodic execution -// (Ticker) without coupling those concerns into the core runnable -// lifecycle. +// Package adapters provides chi-style middleware around the runnable +// RunFunc signature. Each adapter is a config-only constructor that +// returns a runnable.Adapter; compose them via runnable.WithAdapters +// (first listed = outermost wrapper): // -// Adapters compose by nesting: -// -// r := runnable.New(adapters.Draining(10*time.Second, -// adapters.Ticker(time.Second, doWork), +// r := runnable.New(work, runnable.WithAdapters( +// adapters.Draining(10*time.Second), +// adapters.Ticker(time.Second), // )) +// +// Drain-on-shutdown (Draining) and periodic execution (Ticker) live +// here rather than in the core runnable package so they don't couple +// into the core lifecycle. package adapters diff --git a/adapters/draining.go b/adapters/draining.go index 9e28257..b84902a 100644 --- a/adapters/draining.go +++ b/adapters/draining.go @@ -6,6 +6,8 @@ import ( "fmt" "runtime/debug" "time" + + "github.com/0xsequence/runnable" ) // ErrDrainTimedOut is returned by Draining when work did not exit @@ -23,50 +25,55 @@ func Stopping(ctx context.Context) <-chan struct{} { return ch } -// Draining wraps work with graceful-shutdown semantics: when outerCtx -// is cancelled, work has up to timeout to return via Stopping(workCtx) -// before workCtx is force-cancelled and ErrDrainTimedOut is returned. +// Draining returns an Adapter that adds graceful-shutdown semantics: +// when outerCtx is cancelled, next has up to timeout to return via +// Stopping(workCtx) before workCtx is force-cancelled and +// ErrDrainTimedOut is returned. // -// Panics in work are recovered inside Draining's goroutine and returned +// Panics in next are recovered inside Draining's goroutine and returned // as an error containing the panic value and stack trace. They do NOT -// propagate to runnable.WithRecoverer — recover only fires on the -// goroutine where the deferred call lives, and work runs on its own. -func Draining(timeout time.Duration, work func(context.Context) error) func(context.Context) error { - return func(outerCtx context.Context) error { - // Decoupled from outerCtx so outer cancellation triggers drain - // rather than aborting work directly. - workCtx, cancelWork := context.WithCancel(context.WithoutCancel(outerCtx)) - defer cancelWork() +// propagate to outer recover handlers — recover only fires on the +// goroutine where the deferred call lives, and next runs on its own. +// Compose with Recovering inside Draining if you need a panic handler +// callback. +func Draining(timeout time.Duration) runnable.Adapter { + return func(next runnable.RunFunc) runnable.RunFunc { + return func(outerCtx context.Context) error { + // Decoupled from outerCtx so outer cancellation triggers drain + // rather than aborting next directly. + workCtx, cancelWork := context.WithCancel(context.WithoutCancel(outerCtx)) + defer cancelWork() - stopping := make(chan struct{}) - workCtx = context.WithValue(workCtx, stoppingKey{}, (<-chan struct{})(stopping)) + stopping := make(chan struct{}) + workCtx = context.WithValue(workCtx, stoppingKey{}, (<-chan struct{})(stopping)) - done := make(chan error, 1) - go func() { - defer func() { - if rec := recover(); rec != nil { - done <- fmt.Errorf("adapters: panic in draining work: %v\n%s", rec, debug.Stack()) - } + done := make(chan error, 1) + go func() { + defer func() { + if rec := recover(); rec != nil { + done <- fmt.Errorf("adapters: panic in draining work: %v\n%s", rec, debug.Stack()) + } + }() + done <- next(workCtx) }() - done <- work(workCtx) - }() - select { - case err := <-done: - return err - case <-outerCtx.Done(): - close(stopping) - } + select { + case err := <-done: + return err + case <-outerCtx.Done(): + close(stopping) + } - timer := time.NewTimer(timeout) - defer timer.Stop() - select { - case err := <-done: - return err - case <-timer.C: - cancelWork() - <-done - return ErrDrainTimedOut + timer := time.NewTimer(timeout) + defer timer.Stop() + select { + case err := <-done: + return err + case <-timer.C: + cancelWork() + <-done + return ErrDrainTimedOut + } } } } diff --git a/adapters/draining_test.go b/adapters/draining_test.go index 68082e8..83f4bbf 100644 --- a/adapters/draining_test.go +++ b/adapters/draining_test.go @@ -33,7 +33,7 @@ func TestDraining_WorkReturnsNaturallyViaStopping(t *testing.T) { } } - r := runnable.New(adapters.Draining(1*time.Second, work)) + r := runnable.New(work, runnable.WithAdapters(adapters.Draining(1*time.Second))) runErr := make(chan error, 1) go func() { runErr <- r.Run(context.Background()) }() @@ -63,7 +63,7 @@ func TestDraining_TimerForcesCancelWhenWorkIgnoresStopping(t *testing.T) { return ctx.Err() } - r := runnable.New(adapters.Draining(100*time.Millisecond, work)) + r := runnable.New(work, runnable.WithAdapters(adapters.Draining(100*time.Millisecond))) runErr := make(chan error, 1) go func() { runErr <- r.Run(context.Background()) }() @@ -98,7 +98,7 @@ func TestDraining_OuterCtxCancelTriggersDrain(t *testing.T) { } } - r := runnable.New(adapters.Draining(1*time.Second, work)) + r := runnable.New(work, runnable.WithAdapters(adapters.Draining(1*time.Second))) ctx, cancel := context.WithCancel(context.Background()) runErr := make(chan error, 1) go func() { runErr <- r.Run(ctx) }() @@ -131,7 +131,7 @@ func TestDraining_ConcurrentStopsPreserveDrainSemantics(t *testing.T) { } } - r := runnable.New(adapters.Draining(2*time.Second, work)) + r := runnable.New(work, runnable.WithAdapters(adapters.Draining(2*time.Second))) go func() { _ = r.Run(context.Background()) }() <-started @@ -167,21 +167,20 @@ func TestDraining_WorkErrorPropagatesWithoutDrain(t *testing.T) { sentinel := errors.New("work failed") work := func(ctx context.Context) error { return sentinel } - r := runnable.New(adapters.Draining(1*time.Second, work)) + r := runnable.New(work, runnable.WithAdapters(adapters.Draining(1*time.Second))) err := r.Run(context.Background()) require.ErrorIs(t, err, sentinel) } func TestDraining_RecoversPanicAsError(t *testing.T) { // Regression: panics in work run on Draining's spawned goroutine, - // not on the goroutine where runnable.WithRecoverer's defer lives. - // Without internal recovery, a tick panic would crash the process. - // Draining must catch it and surface as an error. + // not on the goroutine where outer recover defers live. Without + // internal recovery, a tick panic would crash the process. work := func(ctx context.Context) error { panic("boom") } - r := runnable.New(adapters.Draining(1*time.Second, work)) + r := runnable.New(work, runnable.WithAdapters(adapters.Draining(1*time.Second))) err := r.Run(context.Background()) require.Error(t, err) assert.Contains(t, err.Error(), "boom", "panic value should be embedded in error") diff --git a/adapters/ticker.go b/adapters/ticker.go index 85a8db3..d9f5e1a 100644 --- a/adapters/ticker.go +++ b/adapters/ticker.go @@ -3,35 +3,39 @@ package adapters import ( "context" "time" + + "github.com/0xsequence/runnable" ) -// Ticker calls tick once per interval until ctx is cancelled or tick -// returns an error. Composes with Draining: an in-flight tick is -// allowed to finish before the loop exits. -func Ticker(interval time.Duration, tick func(context.Context) error) func(context.Context) error { - return func(ctx context.Context) error { - t := time.NewTicker(interval) - defer t.Stop() - stopping := Stopping(ctx) +// Ticker returns an Adapter that calls next once per interval until +// ctx is cancelled or next returns an error. Composes with Draining: +// an in-flight tick is allowed to finish before the loop exits. +func Ticker(interval time.Duration) runnable.Adapter { + return func(next runnable.RunFunc) runnable.RunFunc { + return func(ctx context.Context) error { + t := time.NewTicker(interval) + defer t.Stop() + stopping := Stopping(ctx) - for { - select { - case <-ctx.Done(): - return ctx.Err() - case <-stopping: - return nil - case <-t.C: - // Re-check shutdown signals: queued ticks during a slow - // tick can race against stopping in select's random pick. + for { select { case <-ctx.Done(): return ctx.Err() case <-stopping: return nil - default: - } - if err := tick(ctx); err != nil { - return err + case <-t.C: + // Re-check shutdown signals: queued ticks during a slow + // tick can race against stopping in select's random pick. + select { + case <-ctx.Done(): + return ctx.Err() + case <-stopping: + return nil + default: + } + if err := next(ctx); err != nil { + return err + } } } } diff --git a/adapters/ticker_test.go b/adapters/ticker_test.go index 89e42b0..2de83e5 100644 --- a/adapters/ticker_test.go +++ b/adapters/ticker_test.go @@ -28,7 +28,7 @@ func TestTicker_FiresOnInterval(t *testing.T) { return nil } - r := runnable.New(adapters.Ticker(20*time.Millisecond, tick)) + r := runnable.New(tick, runnable.WithAdapters(adapters.Ticker(20*time.Millisecond))) go func() { _ = r.Run(context.Background()) }() for i := 0; i < 3; i++ { @@ -55,8 +55,9 @@ func TestTicker_ComposesWithDraining(t *testing.T) { return nil } - r := runnable.New(adapters.Draining(1*time.Second, - adapters.Ticker(20*time.Millisecond, tick), + r := runnable.New(tick, runnable.WithAdapters( + adapters.Draining(1*time.Second), + adapters.Ticker(20*time.Millisecond), )) go func() { _ = r.Run(context.Background()) }() @@ -84,7 +85,7 @@ func TestTicker_WithoutDrainCancelsInFlightTick(t *testing.T) { return ctx.Err() } - r := runnable.New(adapters.Ticker(20*time.Millisecond, tick)) + r := runnable.New(tick, runnable.WithAdapters(adapters.Ticker(20*time.Millisecond))) go func() { _ = r.Run(context.Background()) }() <-tickStarted @@ -109,7 +110,7 @@ func TestTicker_TickErrorAbortsLoop(t *testing.T) { return nil } - r := runnable.New(adapters.Ticker(20*time.Millisecond, tick)) + r := runnable.New(tick, runnable.WithAdapters(adapters.Ticker(20*time.Millisecond))) err := r.Run(context.Background()) require.ErrorIs(t, err, sentinel) assert.Equal(t, int32(2), count.Load()) @@ -122,7 +123,7 @@ func TestTicker_RespectsOuterCtxCancel(t *testing.T) { return nil } - r := runnable.New(adapters.Ticker(20*time.Millisecond, tick)) + r := runnable.New(tick, runnable.WithAdapters(adapters.Ticker(20*time.Millisecond))) ctx, cancel := context.WithTimeout(context.Background(), 75*time.Millisecond) defer cancel() diff --git a/examples/ticker-with-drain/main.go b/examples/ticker-with-drain/main.go index c44738a..b453f27 100644 --- a/examples/ticker-with-drain/main.go +++ b/examples/ticker-with-drain/main.go @@ -1,8 +1,8 @@ // Example: a periodic reconciler that drains gracefully on SIGTERM. // -// Shape: adapters.Draining + adapters.Ticker + signal.NotifyContext. -// Copy-paste into a service's cmd/.../main.go and replace the reconcile -// body with your work. +// Shape: runnable.WithAdapters composing Draining + Ticker, driven by +// signal.NotifyContext. Copy-paste into a service's cmd/.../main.go +// and replace the reconcile body with your work. package main import ( @@ -32,20 +32,13 @@ func main() { sigCtx, stopSig := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) defer stopSig() - // Unlike v0.1, passing sigCtx directly to Run is safe: the - // Draining adapter intercepts cancellation and triggers drain - // rather than aborting work. - // - // Note on panics: a panic inside reconcile is recovered by - // Draining and surfaced as an error from rc.Run. runnable.WithRecoverer - // does NOT catch tick panics in this composition (recover only fires - // on the goroutine where the deferred call lives, and Draining runs - // work on its own goroutine). Read panics off runErr below. - rc := runnable.New( - adapters.Draining(10*time.Second, - adapters.Ticker(2*time.Second, reconcile), - ), - ) + // Adapters compose left-to-right (first listed = outermost). Draining + // catches outer ctx cancellation and turns it into drain rather than + // abort. + rc := runnable.New(reconcile, runnable.WithAdapters( + adapters.Draining(10*time.Second), + adapters.Ticker(2*time.Second), + )) runErr := make(chan error, 1) go func() { diff --git a/group_drain_test.go b/group_drain_test.go index 0be02a5..f27487d 100644 --- a/group_drain_test.go +++ b/group_drain_test.go @@ -23,7 +23,7 @@ func TestNewGroup_DrainEnabledChild(t *testing.T) { drainObserved := make(chan struct{}) var ctxCancelObserved atomic.Bool - drainingChild := runnable.New(adapters.Draining(1*time.Second, func(ctx context.Context) error { + drainingChild := runnable.New(func(ctx context.Context) error { close(started) select { case <-adapters.Stopping(ctx): @@ -33,7 +33,7 @@ func TestNewGroup_DrainEnabledChild(t *testing.T) { ctxCancelObserved.Store(true) return ctx.Err() } - })) + }, runnable.WithAdapters(adapters.Draining(1*time.Second))) plainChild := runnable.New(func(ctx context.Context) error { <-ctx.Done() From 92b5063293779ce568533b7deae92576e8bf4748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Tue, 12 May 2026 21:29:08 +0200 Subject: [PATCH 34/36] feat(adapters): migrate Recovering and Retry; drop Status.Restarts Add adapters.Recovering (with a single PanicHandler callback, collapsing the v0.1 RecoveryReporter / StackPrinter split) and adapters.Retry, and remove the runnable.WithRecoverer and runnable.WithRetry Options they replace. Drop Status.Restarts: the field counted WithRetry re-entries via the onStart coupling and has no clean way to surface now that retry lives outside core. A later release can reintroduce per-attempt observability via an explicit event channel. Update examples/main.go and examples/ticker-with-drain to use the new adapters via runnable.WithAdapters. --- adapters/recovering.go | 41 +++++++++++++ adapters/recovering_test.go | 61 +++++++++++++++++++ adapters/retry.go | 47 ++++++++++++++ adapters/retry_test.go | 70 +++++++++++++++++++++ examples/main.go | 4 +- examples/ticker-with-drain/main.go | 14 +++-- with_recoverer.go | 63 ------------------- with_recoverer_test.go | 98 ------------------------------ with_retry.go | 58 ------------------ with_retry_test.go | 62 ------------------- with_status.go | 14 ----- with_status_test.go | 20 ------ 12 files changed, 231 insertions(+), 321 deletions(-) create mode 100644 adapters/recovering.go create mode 100644 adapters/recovering_test.go create mode 100644 adapters/retry.go create mode 100644 adapters/retry_test.go delete mode 100644 with_recoverer.go delete mode 100644 with_recoverer_test.go delete mode 100644 with_retry.go delete mode 100644 with_retry_test.go diff --git a/adapters/recovering.go b/adapters/recovering.go new file mode 100644 index 0000000..f70335a --- /dev/null +++ b/adapters/recovering.go @@ -0,0 +1,41 @@ +package adapters + +import ( + "context" + "fmt" + "runtime/debug" + + "github.com/0xsequence/runnable" +) + +// PanicHandler is invoked when Recovering catches a panic, before the +// adapter converts it into an error. It runs on the same goroutine as +// next, so handlers must not block. +type PanicHandler func(ctx context.Context, rec any, stack []byte) + +// Recovering returns an Adapter that catches panics from next and +// converts them into errors. handler is optional; pass nil to skip +// reporting. +// +// Place Recovering inside Draining when both are in use: Draining +// already recovers panics in its own goroutine as a safety net, but +// Recovering inside lets the handler observe the panic before +// Draining's generic recovery formats it. +func Recovering(handler PanicHandler) runnable.Adapter { + return func(next runnable.RunFunc) runnable.RunFunc { + return func(ctx context.Context) (err error) { + defer func() { + rec := recover() + if rec == nil { + return + } + stack := debug.Stack() + if handler != nil { + handler(ctx, rec, stack) + } + err = fmt.Errorf("adapters: panic: %v", rec) + }() + return next(ctx) + } + } +} diff --git a/adapters/recovering_test.go b/adapters/recovering_test.go new file mode 100644 index 0000000..6dc9ba9 --- /dev/null +++ b/adapters/recovering_test.go @@ -0,0 +1,61 @@ +package adapters_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/0xsequence/runnable" + "github.com/0xsequence/runnable/adapters" +) + +func TestRecovering_TurnsPanicIntoError(t *testing.T) { + var captured any + handler := func(_ context.Context, rec any, _ []byte) { + captured = rec + } + + work := func(ctx context.Context) error { + panic("boom") + } + + r := runnable.New(work, runnable.WithAdapters(adapters.Recovering(handler))) + err := r.Run(context.Background()) + require.Error(t, err) + assert.Contains(t, err.Error(), "boom") + assert.Equal(t, "boom", captured) +} + +func TestRecovering_NilHandlerStillRecovers(t *testing.T) { + work := func(ctx context.Context) error { + panic("boom") + } + + r := runnable.New(work, runnable.WithAdapters(adapters.Recovering(nil))) + err := r.Run(context.Background()) + require.Error(t, err) + assert.Contains(t, err.Error(), "boom") +} + +func TestRecovering_PassesThroughOnSuccess(t *testing.T) { + called := false + handler := func(_ context.Context, rec any, _ []byte) { + called = true + } + + work := func(ctx context.Context) error { return nil } + + r := runnable.New(work, runnable.WithAdapters(adapters.Recovering(handler))) + require.NoError(t, r.Run(context.Background())) + assert.False(t, called, "handler must not fire when next returns normally") +} + +func TestRecovering_PassesThroughError(t *testing.T) { + work := func(ctx context.Context) error { return assert.AnError } + + r := runnable.New(work, runnable.WithAdapters(adapters.Recovering(nil))) + err := r.Run(context.Background()) + require.ErrorIs(t, err, assert.AnError) +} diff --git a/adapters/retry.go b/adapters/retry.go new file mode 100644 index 0000000..168bfb0 --- /dev/null +++ b/adapters/retry.go @@ -0,0 +1,47 @@ +package adapters + +import ( + "context" + "errors" + "time" + + "github.com/0xsequence/runnable" +) + +// ResetNever passed as resetAfter disables retry-budget reset based on +// elapsed time between attempts. +const ResetNever time.Duration = 0 + +// Retry returns an Adapter that re-invokes next up to maxRetries times +// when it returns a non-nil, non-context error. If resetAfter is +// non-zero and at least that long has passed since the previous +// attempt, the retry budget resets. +// +// Retry never observes Draining's Stopping signal — drain semantics +// belong to Draining alone. Compose with Draining outside Retry if you +// need both. +func Retry(maxRetries int, resetAfter time.Duration) runnable.Adapter { + return func(next runnable.RunFunc) runnable.RunFunc { + return func(ctx context.Context) error { + // lastTime is per-call: the timer for reset budgets is local + // to this invocation, not shared across runnable cycles. + var lastTime time.Time + var err error + for i := 0; i < maxRetries; i++ { + if resetAfter != ResetNever && time.Since(lastTime) > resetAfter { + i = 0 + } + lastTime = time.Now() + + err = next(ctx) + if err == nil { + return nil + } + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return err + } + } + return err + } + } +} diff --git a/adapters/retry_test.go b/adapters/retry_test.go new file mode 100644 index 0000000..807db70 --- /dev/null +++ b/adapters/retry_test.go @@ -0,0 +1,70 @@ +package adapters_test + +import ( + "context" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/0xsequence/runnable" + "github.com/0xsequence/runnable/adapters" +) + +func TestRetry_SucceedsOnSecondAttempt(t *testing.T) { + var count atomic.Int32 + work := func(ctx context.Context) error { + if count.Add(1) < 2 { + return assert.AnError + } + return nil + } + + r := runnable.New(work, runnable.WithAdapters(adapters.Retry(3, adapters.ResetNever))) + require.NoError(t, r.Run(context.Background())) + assert.Equal(t, int32(2), count.Load()) +} + +func TestRetry_GivesUpAfterMaxAttempts(t *testing.T) { + var count atomic.Int32 + work := func(ctx context.Context) error { + count.Add(1) + return assert.AnError + } + + r := runnable.New(work, runnable.WithAdapters(adapters.Retry(3, adapters.ResetNever))) + err := r.Run(context.Background()) + require.ErrorIs(t, err, assert.AnError) + assert.Equal(t, int32(3), count.Load()) +} + +func TestRetry_ResetsBudgetAfterQuietPeriod(t *testing.T) { + var count atomic.Int32 + work := func(ctx context.Context) error { + c := count.Add(1) + if c < 5 { + time.Sleep(200 * time.Millisecond) + return assert.AnError + } + return nil + } + + r := runnable.New(work, runnable.WithAdapters(adapters.Retry(3, 100*time.Millisecond))) + require.NoError(t, r.Run(context.Background())) + assert.Equal(t, int32(5), count.Load()) +} + +func TestRetry_DoesNotRetryContextErrors(t *testing.T) { + var count atomic.Int32 + work := func(ctx context.Context) error { + count.Add(1) + return context.Canceled + } + + r := runnable.New(work, runnable.WithAdapters(adapters.Retry(3, adapters.ResetNever))) + err := r.Run(context.Background()) + require.ErrorIs(t, err, context.Canceled) + assert.Equal(t, int32(1), count.Load()) +} diff --git a/examples/main.go b/examples/main.go index b799017..eefdfb1 100644 --- a/examples/main.go +++ b/examples/main.go @@ -6,6 +6,7 @@ import ( "time" "github.com/0xsequence/runnable" + "github.com/0xsequence/runnable/adapters" ) type Monitor struct { @@ -123,7 +124,6 @@ func main() { return fmt.Errorf("error") } - // do something for i := 0; i < 5; i++ { select { case <-ctx.Done(): @@ -134,7 +134,7 @@ func main() { fmt.Println("Running...") } return nil - }, runnable.WithRetry(3, runnable.ResetNever)).Run(context.Background()) + }, runnable.WithAdapters(adapters.Retry(3, adapters.ResetNever))).Run(context.Background()) if err != nil { fmt.Println(err) } diff --git a/examples/ticker-with-drain/main.go b/examples/ticker-with-drain/main.go index b453f27..49decaf 100644 --- a/examples/ticker-with-drain/main.go +++ b/examples/ticker-with-drain/main.go @@ -1,8 +1,8 @@ // Example: a periodic reconciler that drains gracefully on SIGTERM. // -// Shape: runnable.WithAdapters composing Draining + Ticker, driven by -// signal.NotifyContext. Copy-paste into a service's cmd/.../main.go -// and replace the reconcile body with your work. +// Shape: runnable.WithAdapters composing Draining + Recovering + Ticker, +// driven by signal.NotifyContext. Copy-paste into a service's +// cmd/.../main.go and replace the reconcile body with your work. package main import ( @@ -28,15 +28,21 @@ func reconcile(ctx context.Context) error { return nil } +func panicHandler(_ context.Context, rec any, stack []byte) { + fmt.Fprintf(os.Stderr, "tick panic: %v\n%s", rec, stack) +} + func main() { sigCtx, stopSig := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) defer stopSig() // Adapters compose left-to-right (first listed = outermost). Draining // catches outer ctx cancellation and turns it into drain rather than - // abort. + // abort. Recovering sits inside Draining so the handler observes + // panics before Draining's safety-net recovery formats them. rc := runnable.New(reconcile, runnable.WithAdapters( adapters.Draining(10*time.Second), + adapters.Recovering(panicHandler), adapters.Ticker(2*time.Second), )) diff --git a/with_recoverer.go b/with_recoverer.go deleted file mode 100644 index 5de5eeb..0000000 --- a/with_recoverer.go +++ /dev/null @@ -1,63 +0,0 @@ -package runnable - -import ( - "context" - "fmt" - "runtime/debug" -) - -type RecoveryReporter interface { - Report(ctx context.Context, rec interface{}) -} - -type StackPrinter interface { - Print(ctx context.Context, callstack []byte) -} - -// NoopReporter -// Used to continue running go routine and do nothing -type NoopReporter struct{} - -func (*NoopReporter) Report(ctx context.Context, rec interface{}) {} - -type recoverer struct { - reporter RecoveryReporter - stackPrinter StackPrinter -} - -func WithRecoverer(reporter RecoveryReporter, stackPrinter StackPrinter) Option { - return &recoverer{ - reporter: reporter, - stackPrinter: stackPrinter, - } -} - -func (rec *recoverer) apply(r *runnable) { - originalRunFunc := r.runFunc - r.runFunc = func(ctx context.Context) error { - var err error - innerRun := func(ctx context.Context) error { - defer func() { - if recovery := recover(); recovery != nil { - err = fmt.Errorf("panic: %v", recovery) - - if rec.stackPrinter != nil { - rec.stackPrinter.Print(ctx, debug.Stack()) - } - - if rec.reporter != nil { - rec.reporter.Report(ctx, recovery) - } - } - }() - - return originalRunFunc(ctx) - } - - if errInner := innerRun(ctx); errInner != nil { - return errInner - } - - return err - } -} diff --git a/with_recoverer_test.go b/with_recoverer_test.go deleted file mode 100644 index 8f656b2..0000000 --- a/with_recoverer_test.go +++ /dev/null @@ -1,98 +0,0 @@ -package runnable - -import ( - "context" - "fmt" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -type InMemoryReporter struct { - logs []string -} - -func (i *InMemoryReporter) Report(ctx context.Context, rec interface{}) { - i.logs = append(i.logs, fmt.Sprintf("%v", rec)) -} - -func TestWithRecoverer(t *testing.T) { - t.Run("with recoverer", func(t *testing.T) { - counter := 0 - reporter := InMemoryReporter{} - - fn := func(ctx context.Context) error { - defer func() { counter++ }() - panic("something went wrong") - } - r := New(fn, WithRecoverer(&reporter, nil)) - - err := r.Run(context.Background()) - require.Error(t, err) - assert.Equal(t, 1, counter) - assert.Equal(t, []string{"something went wrong"}, reporter.logs) - }) - - t.Run("panics as errors", func(t *testing.T) { - started, stopped := make(chan struct{}), make(chan struct{}) - reporter := &InMemoryReporter{} - - r := New(func(ctx context.Context) error { - started <- struct{}{} - panic("something went wrong") - }, WithRecoverer(reporter, nil)) - - go func() { - err := r.Run(context.Background()) - require.Error(t, err) - stopped <- struct{}{} - }() - - <-started - <-stopped - }) - - t.Run("panics as errors, no panic", func(t *testing.T) { - reporter := &InMemoryReporter{} - started, stopped := make(chan struct{}), make(chan struct{}) - - r := New(func(ctx context.Context) error { - started <- struct{}{} - return nil - }, WithRecoverer(reporter, nil)) - - go func() { - err := r.Run(context.Background()) - require.NoError(t, err) - stopped <- struct{}{} - }() - - <-started - <-stopped - }) - - t.Run("panics as errors, with stats", func(t *testing.T) { - reporter := &InMemoryReporter{} - started, stopped := make(chan struct{}), make(chan struct{}) - - store := NewStatusStore() - r := New(func(ctx context.Context) error { - started <- struct{}{} - panic("something went wrong") - }, WithRecoverer(reporter, nil), WithStatus("test", store)) - - go func() { - err := r.Run(context.Background()) - require.Error(t, err) - stopped <- struct{}{} - }() - - <-started - <-stopped - - s := store.Get() - require.Equal(t, false, s["test"].Running) - require.Error(t, s["test"].LastError) - }) -} diff --git a/with_retry.go b/with_retry.go deleted file mode 100644 index 69740bf..0000000 --- a/with_retry.go +++ /dev/null @@ -1,58 +0,0 @@ -package runnable - -import ( - "context" - "errors" - "time" -) - -const ResetNever time.Duration = 0 - -type withRetry struct { - maxRetries int - resetAfter time.Duration -} - -func WithRetry(maxRetries int, resetAfter time.Duration) Option { - return &withRetry{ - maxRetries: maxRetries, - resetAfter: resetAfter, - } -} - -func (w *withRetry) apply(r *runnable) { - runFunc := r.runFunc - r.runFunc = func(ctx context.Context) error { - // lastTime is per-Run-cycle: a fresh Run after Stop should not - // inherit stale timing state from the prior cycle. - var lastTime time.Time - var err error - for i := 0; i < w.maxRetries; i++ { - if w.resetAfter != ResetNever && time.Since(lastTime) > w.resetAfter { - i = 0 - } - lastTime = time.Now() - - if i > 0 { - if r.onStart != nil { - r.onStart() - } - } - - err = runFunc(ctx) - if err == nil { - return nil - } - if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { - return err - } - - if i > 0 { - if r.onStop != nil { - r.onStop() - } - } - } - return err - } -} diff --git a/with_retry_test.go b/with_retry_test.go deleted file mode 100644 index c4781d5..0000000 --- a/with_retry_test.go +++ /dev/null @@ -1,62 +0,0 @@ -package runnable - -import ( - "context" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestWithRetry(t *testing.T) { - - t.Run("with retry", func(t *testing.T) { - counter := 0 - - r := New(func(ctx context.Context) error { - defer func() { counter++ }() - if counter < 1 { - return assert.AnError - } - - time.Sleep(500 * time.Millisecond) - return nil - }, WithRetry(3, ResetNever)) - - err := r.Run(context.Background()) - require.NoError(t, err) - assert.Equal(t, 2, counter) - }) - - t.Run("with retry, error", func(t *testing.T) { - counter := 0 - - r := New(func(ctx context.Context) error { - defer func() { counter++ }() - return assert.AnError - }, WithRetry(3, ResetNever)) - - err := r.Run(context.Background()) - require.Error(t, err) - assert.Equal(t, 3, counter) - }) - - t.Run("with retry, reset", func(t *testing.T) { - counter := 0 - - r := New(func(ctx context.Context) error { - defer func() { counter++ }() - if counter < 5 { - time.Sleep(200 * time.Millisecond) - return assert.AnError - } - return nil - }, WithRetry(3, 100*time.Millisecond)) - - err := r.Run(context.Background()) - require.NoError(t, err) - assert.Equal(t, 6, counter) - }) - -} diff --git a/with_status.go b/with_status.go index 17e72b4..875e634 100644 --- a/with_status.go +++ b/with_status.go @@ -10,7 +10,6 @@ type StatusMap map[string]Status type Status struct { Running bool `json:"running"` - Restarts int `json:"restarts"` StartTime time.Time `json:"start_time"` EndTime *time.Time `json:"end_time,omitempty"` LastError error `json:"last_error"` @@ -18,7 +17,6 @@ type Status struct { type StatusStore struct { running map[string]bool - restarts map[string]int startTime map[string]time.Time endTime map[string]time.Time lastError map[string]error @@ -29,7 +27,6 @@ type StatusStore struct { func NewStatusStore() *StatusStore { return &StatusStore{ running: make(map[string]bool), - restarts: make(map[string]int), startTime: make(map[string]time.Time), endTime: make(map[string]time.Time), lastError: make(map[string]error), @@ -46,10 +43,6 @@ func (s *StatusStore) Get() StatusMap { Running: running, } - if restarts, ok := s.restarts[id]; ok { - st.Restarts = restarts - } - if startTime, ok := s.startTime[id]; ok { st.StartTime = startTime } @@ -99,15 +92,8 @@ func (w *withStatus) apply(r *runnable) { r.onStart = func() { w.store.mu.Lock() - w.store.running[w.runnableID] = true w.store.startTime[w.runnableID] = time.Now() - if _, ok := w.store.restarts[w.runnableID]; !ok { - w.store.restarts[w.runnableID] = 0 - } else { - w.store.restarts[w.runnableID]++ - } - w.store.mu.Unlock() if onStartRunnable != nil { diff --git a/with_status_test.go b/with_status_test.go index c19f45e..81e0244 100644 --- a/with_status_test.go +++ b/with_status_test.go @@ -52,24 +52,4 @@ func TestWithStatus(t *testing.T) { assert.Equal(t, false, s["test"].Running) assert.Equal(t, assert.AnError, s["test"].LastError) }) - - t.Run("with status, restart", func(t *testing.T) { - store := NewStatusStore() - - counter := 0 - r := New(func(ctx context.Context) error { - defer func() { counter++ }() - if counter < 1 { - return assert.AnError - } - return nil - }, WithStatus("test", store), WithRetry(3, ResetNever)) - - err := r.Run(context.Background()) - require.NoError(t, err) - - s := store.Get() - assert.Equal(t, false, s["test"].Running) - assert.Equal(t, 1, s["test"].Restarts) - }) } From c5f41017d55e02cd27510d91c8e878d509cac703 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Tue, 12 May 2026 21:29:41 +0200 Subject: [PATCH 35/36] docs(readme,adapters): rewrite for runnable.WithAdapters API Update the Adapters and Migrating-from-v0.1 sections of README and the adapters package doc comment to show the runnable.WithAdapters Option shape. Add Recovering and Retry to the migration table and call out the Status.Restarts removal. --- README.md | 97 ++++++++++++++++++++++++------------------------- adapters/doc.go | 11 ++++-- 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index 7a2dbe4..c6e2088 100644 --- a/README.md +++ b/README.md @@ -90,58 +90,38 @@ if err != nil { } ``` -### Runnable Function with retry -```go -fmt.Println("Simple function with retry...") -errorReturned := false -err = runnable.New(func(ctx context.Context) error { - fmt.Println("Starting...") - defer fmt.Println("Stopping...") - - if !errorReturned { - errorReturned = true - return fmt.Errorf("error") - } - - // do something - for i := 0; i < 5; i++ { - select { - case <-ctx.Done(): - return nil - default: - } - time.Sleep(1 * time.Second) - fmt.Println("Running...") - } - return nil -}, runnable.WithRetry(3, runnable.ResetNever)).Run(context.Background()) -if err != nil { - fmt.Println(err) -} -``` - ### Adapters Cross-cutting behaviors that aren't part of the core lifecycle live in -the `runnable/adapters` subpackage as runFunc wrappers. They compose by -nesting. - -**Ticker** — periodic execution: +the `runnable/adapters` subpackage as chi-style middleware: each +`runnable.Adapter` has the shape `func(next RunFunc) RunFunc`. Apply +them with `runnable.WithAdapters` (left-to-right = outermost-to-innermost): ```go -r := runnable.New(adapters.Ticker(30*time.Second, reconcile)) +r := runnable.New(reconcile, runnable.WithAdapters( + adapters.Draining(10*time.Second), + adapters.Recovering(reportPanic), + adapters.Retry(3, time.Minute), + adapters.Ticker(30*time.Second), +)) ``` **Draining** — graceful shutdown with a grace window. When the outer -ctx is cancelled, work has `timeout` to return via `adapters.Stopping(ctx)` -before its ctx is force-cancelled and `adapters.ErrDrainTimedOut` is -returned. +ctx is cancelled, the wrapped work has `timeout` to return via +`adapters.Stopping(ctx)` before its ctx is force-cancelled and +`adapters.ErrDrainTimedOut` is returned. -```go -r := runnable.New(adapters.Draining(10*time.Second, - adapters.Ticker(30*time.Second, reconcile), -)) -``` +**Ticker** — calls the wrapped work once per interval until ctx is +cancelled or the work returns an error. Composes with Draining: an +in-flight tick is allowed to finish before the loop exits. + +**Recovering** — turns panics in the wrapped work into errors and +invokes the optional handler before returning. Place inside Draining +when both are in use. + +**Retry** — re-invokes the wrapped work up to `maxRetries` times on +non-context errors. If `resetAfter` is non-zero and at least that long +has passed since the previous attempt, the retry budget resets. Inside long-running work, always select on both `ctx.Done()` and `adapters.Stopping(ctx)` — `Stopping` signals drain start, `ctx.Done()` @@ -152,26 +132,38 @@ A full SIGTERM-safe service shape lives in ### Migrating from v0.1 to v0.2 -v0.2 moves drain and ticker out of the core package. The Option-based -`WithDrain` and the `NewTicker` constructor are removed; their -replacements live at `runnable/adapters`. +v0.2 moves drain, ticker, retry, and panic recovery out of the core +package. `WithDrain`, `NewTicker`, `WithRetry`, and `WithRecoverer` +are removed; their replacements live at `runnable/adapters` as +chi-style middleware. Before (v0.1): r := runnable.NewTicker(30*time.Second, doWork, runnable.WithDrain(10*time.Second), + runnable.WithRecoverer(reporter, nil), + runnable.WithRetry(3, time.Minute), ) After (v0.2): - r := runnable.New(adapters.Draining(10*time.Second, - adapters.Ticker(30*time.Second, doWork), + r := runnable.New(doWork, runnable.WithAdapters( + adapters.Draining(10*time.Second), + adapters.Recovering(handler), + adapters.Retry(3, time.Minute), + adapters.Ticker(30*time.Second), )) Symbol mapping: -- `runnable.WithDrain` → use `adapters.Draining` as a runFunc wrapper. -- `runnable.NewTicker` → `adapters.Ticker` wrapped by `runnable.New`. +- `runnable.WithDrain` → `adapters.Draining` under `runnable.WithAdapters`. +- `runnable.NewTicker` → `adapters.Ticker` under `runnable.WithAdapters` + (no longer takes the work argument; pass work to `runnable.New`). +- `runnable.WithRetry` / `runnable.ResetNever` → `adapters.Retry` / + `adapters.ResetNever`. +- `runnable.WithRecoverer` → `adapters.Recovering` with a single + `PanicHandler` callback (the two-interface `RecoveryReporter` / + `StackPrinter` split is gone). - `runnable.Stopping` → `adapters.Stopping`. - `runnable.ErrDrainTimedOut` → `adapters.ErrDrainTimedOut`. @@ -182,6 +174,11 @@ the caller waits for `Stop` to return; the drain runs on its own fixed-duration timer regardless. If you need a shorter drain budget, configure `Draining` with the shorter duration. +**Status.Restarts removed.** The `Restarts` field on `Status` counted +`WithRetry` re-entries via the deprecated `onStart` coupling; with +retry moved into adapters it had no clean way to surface. Pending a +proper event/observer hook in a later release. + **NewGroup interaction:** drain-enabled children of `NewGroup` now drain when the group is stopped (v0.1 silently bypassed the child's drain). No code change required at call sites — the adapter design diff --git a/adapters/doc.go b/adapters/doc.go index 33189db..229dd7c 100644 --- a/adapters/doc.go +++ b/adapters/doc.go @@ -3,12 +3,15 @@ // returns a runnable.Adapter; compose them via runnable.WithAdapters // (first listed = outermost wrapper): // -// r := runnable.New(work, runnable.WithAdapters( +// r := runnable.New(reconcile, runnable.WithAdapters( // adapters.Draining(10*time.Second), +// adapters.Recovering(reportPanic), +// adapters.Retry(3, time.Minute), // adapters.Ticker(time.Second), // )) // -// Drain-on-shutdown (Draining) and periodic execution (Ticker) live -// here rather than in the core runnable package so they don't couple -// into the core lifecycle. +// Drain-on-shutdown (Draining), panic-to-error conversion (Recovering), +// retry-on-error (Retry), and periodic execution (Ticker) all live here +// rather than in the core runnable package so they don't couple into +// the core lifecycle. package adapters From e2bf0193605d8553a56a3dd061db52f9a6b9e87c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20-=20=E3=82=A2=E3=83=AC=E3=83=83=E3=82=AF=E3=82=B9?= Date: Tue, 12 May 2026 23:40:51 +0200 Subject: [PATCH 36/36] docs(adapters): tighten godoc on exported symbols Trim doc comments on Adapter / WithAdapters / Draining / Ticker / Recovering / Retry to one or two purpose-stating lines each. Drop restating of implementation details that the function body conveys. --- adapter.go | 14 ++++---------- adapters/doc.go | 12 ++---------- adapters/draining.go | 21 +++++++-------------- adapters/recovering.go | 17 ++++++----------- adapters/retry.go | 13 ++++--------- adapters/ticker.go | 4 ++-- 6 files changed, 25 insertions(+), 56 deletions(-) diff --git a/adapter.go b/adapter.go index 99ccd1b..896163c 100644 --- a/adapter.go +++ b/adapter.go @@ -2,25 +2,19 @@ package runnable import "context" -// RunFunc is the signature wrapped by runnable.New and by Adapters. +// RunFunc is the lifecycle function wrapped by runnable.New. type RunFunc func(ctx context.Context) error // Adapter wraps a RunFunc with cross-cutting behavior, mirroring the -// chi middleware shape. Adapters live in the runnable/adapters -// subpackage and are applied via WithAdapters. +// chi middleware shape. Concrete adapters live in runnable/adapters. type Adapter func(next RunFunc) RunFunc type withAdapters struct { adapters []Adapter } -// WithAdapters wraps the runnable's runFunc with the given adapters. -// Adapters are applied left-to-right as outermost-to-innermost: -// WithAdapters(A, B, C) yields A(B(C(runFunc))). -// -// Apply order across Options matters: WithAdapters sees whatever -// runFunc previous Options installed, and subsequent Options see the -// adapter-wrapped runFunc. +// WithAdapters wraps the runnable's runFunc left-to-right (first listed +// = outermost). Apply order across Options matters. func WithAdapters(adapters ...Adapter) Option { return &withAdapters{adapters: adapters} } diff --git a/adapters/doc.go b/adapters/doc.go index 229dd7c..e4b55ad 100644 --- a/adapters/doc.go +++ b/adapters/doc.go @@ -1,17 +1,9 @@ // Package adapters provides chi-style middleware around the runnable -// RunFunc signature. Each adapter is a config-only constructor that -// returns a runnable.Adapter; compose them via runnable.WithAdapters -// (first listed = outermost wrapper): +// RunFunc signature. Each constructor returns a runnable.Adapter; +// compose them via runnable.WithAdapters (first listed = outermost): // // r := runnable.New(reconcile, runnable.WithAdapters( // adapters.Draining(10*time.Second), -// adapters.Recovering(reportPanic), -// adapters.Retry(3, time.Minute), // adapters.Ticker(time.Second), // )) -// -// Drain-on-shutdown (Draining), panic-to-error conversion (Recovering), -// retry-on-error (Retry), and periodic execution (Ticker) all live here -// rather than in the core runnable package so they don't couple into -// the core lifecycle. package adapters diff --git a/adapters/draining.go b/adapters/draining.go index b84902a..d8a00b3 100644 --- a/adapters/draining.go +++ b/adapters/draining.go @@ -17,25 +17,18 @@ var ErrDrainTimedOut = errors.New("adapters: drain timed out") type stoppingKey struct{} // Stopping returns a channel that closes when Draining begins shutdown, -// or nil when ctx is not inside a Draining adapter. Always also select -// on ctx.Done() — Stopping signals drain start; ctx.Done() fires only -// when the drain timer expires. +// or nil outside Draining. Select on this alongside ctx.Done() — ctx is +// force-cancelled only after the drain timer expires. func Stopping(ctx context.Context) <-chan struct{} { ch, _ := ctx.Value(stoppingKey{}).(<-chan struct{}) return ch } -// Draining returns an Adapter that adds graceful-shutdown semantics: -// when outerCtx is cancelled, next has up to timeout to return via -// Stopping(workCtx) before workCtx is force-cancelled and -// ErrDrainTimedOut is returned. -// -// Panics in next are recovered inside Draining's goroutine and returned -// as an error containing the panic value and stack trace. They do NOT -// propagate to outer recover handlers — recover only fires on the -// goroutine where the deferred call lives, and next runs on its own. -// Compose with Recovering inside Draining if you need a panic handler -// callback. +// Draining returns an Adapter that delays cancellation: when outerCtx +// is cancelled, next has up to timeout to return via Stopping(workCtx) +// before workCtx is force-cancelled and ErrDrainTimedOut is returned. +// Panics in next are recovered into an error (they would otherwise +// crash the process, since next runs on its own goroutine). func Draining(timeout time.Duration) runnable.Adapter { return func(next runnable.RunFunc) runnable.RunFunc { return func(outerCtx context.Context) error { diff --git a/adapters/recovering.go b/adapters/recovering.go index f70335a..9e7c598 100644 --- a/adapters/recovering.go +++ b/adapters/recovering.go @@ -8,19 +8,14 @@ import ( "github.com/0xsequence/runnable" ) -// PanicHandler is invoked when Recovering catches a panic, before the -// adapter converts it into an error. It runs on the same goroutine as -// next, so handlers must not block. +// PanicHandler observes a panic caught by Recovering. Runs on next's +// goroutine, so must not block. type PanicHandler func(ctx context.Context, rec any, stack []byte) -// Recovering returns an Adapter that catches panics from next and -// converts them into errors. handler is optional; pass nil to skip -// reporting. -// -// Place Recovering inside Draining when both are in use: Draining -// already recovers panics in its own goroutine as a safety net, but -// Recovering inside lets the handler observe the panic before -// Draining's generic recovery formats it. +// Recovering returns an Adapter that converts panics from next into +// errors and invokes handler (if non-nil). Place inside Draining when +// both are used, so handler sees the panic before Draining's safety-net +// recovery formats it. func Recovering(handler PanicHandler) runnable.Adapter { return func(next runnable.RunFunc) runnable.RunFunc { return func(ctx context.Context) (err error) { diff --git a/adapters/retry.go b/adapters/retry.go index 168bfb0..4623e5d 100644 --- a/adapters/retry.go +++ b/adapters/retry.go @@ -8,18 +8,13 @@ import ( "github.com/0xsequence/runnable" ) -// ResetNever passed as resetAfter disables retry-budget reset based on -// elapsed time between attempts. +// ResetNever (as resetAfter) disables retry-budget reset. const ResetNever time.Duration = 0 // Retry returns an Adapter that re-invokes next up to maxRetries times -// when it returns a non-nil, non-context error. If resetAfter is -// non-zero and at least that long has passed since the previous -// attempt, the retry budget resets. -// -// Retry never observes Draining's Stopping signal — drain semantics -// belong to Draining alone. Compose with Draining outside Retry if you -// need both. +// on non-context errors. If resetAfter > 0 and at least that long has +// passed since the previous attempt, the budget resets. Retry does not +// observe Stopping — wrap it inside Draining if you need both. func Retry(maxRetries int, resetAfter time.Duration) runnable.Adapter { return func(next runnable.RunFunc) runnable.RunFunc { return func(ctx context.Context) error { diff --git a/adapters/ticker.go b/adapters/ticker.go index d9f5e1a..aff1ec9 100644 --- a/adapters/ticker.go +++ b/adapters/ticker.go @@ -8,8 +8,8 @@ import ( ) // Ticker returns an Adapter that calls next once per interval until -// ctx is cancelled or next returns an error. Composes with Draining: -// an in-flight tick is allowed to finish before the loop exits. +// ctx is cancelled or next errors. Composes with Draining: an in-flight +// tick is allowed to finish before exit. func Ticker(interval time.Duration) runnable.Adapter { return func(next runnable.RunFunc) runnable.RunFunc { return func(ctx context.Context) error {