From da5c1e4ddd8785b88ecf467aa31dfb8f1662e411 Mon Sep 17 00:00:00 2001
From: Ken Sedgwick <ken@bonsai.com>
Date: Tue, 5 May 2026 10:08:16 -0700
Subject: [PATCH 1/3] tests: add xfail-strict reproducer for funder-side stuck
 CHANNELD_AWAITING_LOCKIN

When a node is the funder of a channel whose funding tx never confirms
(broadcast rejected at ATMP, evicted from mempool, or simply never
broadcast), the channel record stays in CHANNELD_AWAITING_LOCKIN
indefinitely. CLN already implements the BOLT 2 fundee-side forget
rule (PR #1468, --max-funding-unconfirmed-blocks, default 2016) but
has no equivalent on the funder side.

The test asserts the desired post-fix behavior (state has moved beyond
CHANNELD_AWAITING_LOCKIN) and is marked @pytest.mark.xfail(strict=True)
so:

  - CI reports XFAIL today (acceptable; documents the open bug)
  - When the bug is fixed, the test reports XPASS, which strict=True
    promotes to a hard failure to alert the dev to remove the marker.

Changelog-None
---
 tests/test_opening.py | 75 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/tests/test_opening.py b/tests/test_opening.py
index 4954c31b87b4..7c4e3de05e73 100644
--- a/tests/test_opening.py
+++ b/tests/test_opening.py
@@ -3018,3 +3018,78 @@ def test_zeroconf_withhold_htlc_failback(node_factory, bitcoind):
 
     # l1's channel to l2 is still normal — no force-close
     assert only_one(l1.rpc.listpeerchannels(l2.info['id'])['channels'])['state'] == 'CHANNELD_NORMAL'
+
+
+@pytest.mark.xfail(
+    strict=True,
+    reason="Bug: funder-side channel stuck in CHANNELD_AWAITING_LOCKIN if funding never confirms"
+)
+def test_funder_stuck_no_funding_confirm(node_factory, bitcoind):
+    """Funder-side channel record is never cleaned up if funding never confirms.
+
+    BOLT 2 mandates a 2016-block forget rule for fundees and CLN
+    implements it (PR #1468, --max-funding-unconfirmed-blocks). But CLN
+    has no equivalent on the funder side: when the funding tx is
+    rejected at broadcast (e.g. fee too low) or evicted from mempool,
+    the channel record persists in CHANNELD_AWAITING_LOCKIN
+    indefinitely with no auto-cleanup mechanism. Even after the
+    would-be funding inputs are spent in other transactions (making
+    the funding tx permanently unconfirmable), the channel record
+    remains.
+
+    This test demonstrates the stuck state. It is marked xfail-strict
+    because no fix yet exists; once fixed, the marker should be
+    removed.
+    """
+    # Lower the unconfirmed-funding threshold on the funder so we
+    # don't have to mine 2016 blocks to make the point.  This dev
+    # knob is the same one CLN's existing fundee-side test
+    # (test_zeroconf_forget) uses to control --max-funding-unconfirmed-blocks.
+    # On the funder side, no code path consults it — that's the bug.
+    THRESHOLD = 10
+    l1, l2 = node_factory.line_graph(
+        2,
+        fundchannel=False,
+        opts={'dev-max-funding-unconfirmed-blocks': THRESHOLD},
+    )
+    l1.fundwallet(10**7)
+
+    # Censor sendrawtransaction so the funding tx never reaches
+    # bitcoind's mempool.  lightningd will think the broadcast
+    # succeeded; bitcoind never sees the tx.  Same trick as
+    # test_zeroconf_forget.
+    def censor(tx):
+        return {'id': tx['id'], 'result': {}}
+    l1.daemon.rpcproxy.mock_rpc('sendrawtransaction', censor)
+
+    # Open the channel.  Broadcast appears to succeed (mock) but the
+    # tx never lands.
+    l1.rpc.fundchannel(l2.info['id'], 10**6)
+
+    # Both sides reach CHANNELD_AWAITING_LOCKIN.
+    wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state']
+             == 'CHANNELD_AWAITING_LOCKIN')
+    wait_for(lambda: only_one(l2.rpc.listpeerchannels()['channels'])['state']
+             == 'CHANNELD_AWAITING_LOCKIN')
+
+    # Advance past THRESHOLD blocks.  The funder side has no forget
+    # code path that consults THRESHOLD, so the channel record is
+    # expected to remain in CHANNELD_AWAITING_LOCKIN even though we
+    # set the knob low.
+    bitcoind.generate_block(THRESHOLD + 5)
+    sync_blockheight(bitcoind, [l1, l2])
+
+    # Sanity: funding never confirmed.
+    assert only_one(l1.rpc.listpeerchannels()['channels']).get('short_channel_id') is None
+
+    # Expected behavior under fix: funder's channel record has been
+    # cleaned up (forgotten, transitioned to a new "abandoned"
+    # terminal state, or some other resolved disposition).  Any
+    # forward progress is enough; we do not prescribe a specific
+    # cleanup shape.
+    chans_l1 = l1.rpc.listpeerchannels()['channels']
+    assert all(c['state'] != 'CHANNELD_AWAITING_LOCKIN' for c in chans_l1), (
+        f"l1 (funder) still has channel in CHANNELD_AWAITING_LOCKIN "
+        f"after {THRESHOLD + 5} blocks (THRESHOLD={THRESHOLD}): "
+        f"{[c['state'] for c in chans_l1]}"
+    )

From 19a7976f9fba83a4f5a5fd517910bcd7e44018dc Mon Sep 17 00:00:00 2001
From: Ken Sedgwick <ken@bonsai.com>
Date: Tue, 5 May 2026 12:09:36 -0700
Subject: [PATCH 2/3] tests: add xfail-strict reproducer for funder-side stuck
 AWAITING_UNILATERAL

Same root cause as the previous test_funder_stuck_no_funding_confirm
(funding tx unbroadcastable/unconfirmable, no funder-side cleanup).
This variant covers the second symptom: when the operator (or an
automation like CLBOSS's spenderp) issues `close` on the AWAITING_LOCKIN
channel, CLN transitions to AWAITING_UNILATERAL and tries to broadcast
a commitment tx that spends the (non-existent) funding output. That
commit tx can never confirm either, so the channel record now sits
stuck in AWAITING_UNILATERAL indefinitely.

Stops l2 before close to force unilateral and avoid mutual close
racing in.

Marked xfail-strict so the bug is documented without breaking CI.

Changelog-None
---
 tests/test_opening.py | 66 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/tests/test_opening.py b/tests/test_opening.py
index 7c4e3de05e73..56edd5b390e3 100644
--- a/tests/test_opening.py
+++ b/tests/test_opening.py
@@ -3093,3 +3093,69 @@ def censor(tx):
         f"after {THRESHOLD + 5} blocks (THRESHOLD={THRESHOLD}): "
         f"{[c['state'] for c in chans_l1]}"
     )
+
+
+@pytest.mark.xfail(
+    strict=True,
+    reason="Bug: funder-side channel stuck in AWAITING_UNILATERAL if closed before funding confirms"
+)
+def test_funder_stuck_close_before_funding_confirm(node_factory, bitcoind):
+    """Funder-side channel stuck in AWAITING_UNILATERAL after close
+    if funding never confirmed.
+
+    Same root cause as test_funder_stuck_no_funding_confirm: the
+    funding tx is unbroadcastable/unconfirmable and CLN has no
+    funder-side cleanup.  This variant covers what happens when the
+    operator (or an automation like CLBOSS's spenderp) issues `close`
+    on the AWAITING_LOCKIN channel: CLN transitions to
+    AWAITING_UNILATERAL and tries to broadcast a commitment tx that
+    spends the (non-existent) funding output.  That commit tx can
+    never confirm either, so the channel record now sits stuck in
+    AWAITING_UNILATERAL indefinitely.
+
+    Marked xfail-strict because no fix yet exists; once fixed, the
+    marker should be removed.
+    """
+    THRESHOLD = 10
+    l1, l2 = node_factory.line_graph(
+        2,
+        fundchannel=False,
+        opts={'dev-max-funding-unconfirmed-blocks': THRESHOLD},
+    )
+    l1.fundwallet(10**7)
+
+    def censor(tx):
+        return {'id': tx['id'], 'result': {}}
+    l1.daemon.rpcproxy.mock_rpc('sendrawtransaction', censor)
+
+    l1.rpc.fundchannel(l2.info['id'], 10**6)
+    wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state']
+             == 'CHANNELD_AWAITING_LOCKIN')
+    wait_for(lambda: only_one(l2.rpc.listpeerchannels()['channels'])['state']
+             == 'CHANNELD_AWAITING_LOCKIN')
+
+    # Force unilateral close.  Stopping l2 ensures mutual close cannot
+    # race in and land us in CLOSINGD_COMPLETE instead.
+    l2.stop()
+    l1.rpc.close(l2.info['id'], unilateraltimeout=1)
+
+    # Funder transitions to AWAITING_UNILATERAL with a commit tx whose
+    # input is the never-existing funding output.  The commit tx is
+    # also censored by the mock; even without the mock it would be
+    # rejected by bitcoind for spending a non-existent output.
+    wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state']
+             == 'AWAITING_UNILATERAL')
+
+    bitcoind.generate_block(THRESHOLD + 5)
+    sync_blockheight(bitcoind, [l1])
+
+    # Expected behavior under fix: funder's channel record has been
+    # cleaned up (forgotten, transitioned to a new "abandoned" terminal
+    # state, or some other resolved disposition).  Any forward progress
+    # is enough; we do not prescribe a specific cleanup shape.
+    chans_l1 = l1.rpc.listpeerchannels()['channels']
+    assert all(c['state'] != 'AWAITING_UNILATERAL' for c in chans_l1), (
+        f"l1 (funder) still has channel in AWAITING_UNILATERAL "
+        f"after {THRESHOLD + 5} blocks (THRESHOLD={THRESHOLD}): "
+        f"{[c['state'] for c in chans_l1]}"
+    )

From cbbaaff0fc540bc819dae544fc3b21121c83e282 Mon Sep 17 00:00:00 2001
From: Ken Sedgwick <ken@bonsai.com>
Date: Wed, 13 May 2026 16:56:53 -0700
Subject: [PATCH 3/3] tests: extend funder-stuck reproducer with double-spent
 funding inputs

The two existing funder-stuck reproducers (test_funder_stuck_no_funding_confirm,
test_funder_stuck_close_before_funding_confirm) demonstrate the channel
record stays in CHANNELD_AWAITING_LOCKIN / AWAITING_UNILATERAL while
the funding tx is merely unbroadcastable (censored at the proxy).

That leaves an hole in the policy argument: as long as the funding
inputs remain unspent, the funding tx could in principle still
confirm, so the state-machine wait is defensible.

This new test removes that hole.  After the channel reaches
CHANNELD_AWAITING_LOCKIN with a censored funding tx, we:

  1. Capture the funding tx hex via the proxy mock.
  2. Force-unreserve the funding inputs (the funding-tx reservation
     is ~2016 blocks, so we explicitly pass a large reserve= value
     to push reserved_til below current height).
  3. Spend the same UTXOs in a separate withdraw tx that DOES land
     on chain (the proxy mock forwards non-funding-tx broadcasts).
  4. Mature the double-spend 100 blocks past confirmation, matching
     Bitcoin's coinbase maturity rule (the canonical reorg-safe depth).

At this point the funding tx is provably and permanently invalid;
no Bitcoin convention treats the spend as still reversible.  Yet
CLN keeps the channel record stuck in CHANNELD_AWAITING_LOCKIN.

The test is marked xfail-strict, like its siblings.  Once a fix
exists, removing the marker will turn an xpass into a hard failure
to alert the developer to clean up the marker.

Reproduces #9112 with a stronger demonstration than the existing
tests.
---
 tests/test_opening.py | 121 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)

diff --git a/tests/test_opening.py b/tests/test_opening.py
index 56edd5b390e3..cf7e6542901d 100644
--- a/tests/test_opening.py
+++ b/tests/test_opening.py
@@ -6,6 +6,7 @@
 )
 from pyln.testing.utils import FUNDAMOUNT
 
+from decimal import Decimal
 from pathlib import Path
 import pytest
 import re
@@ -3159,3 +3160,123 @@ def censor(tx):
         f"after {THRESHOLD + 5} blocks (THRESHOLD={THRESHOLD}): "
         f"{[c['state'] for c in chans_l1]}"
     )
+
+
+# Bitcoin's coinbase maturity rule: coinbase outputs are not spendable
+# until 100 confirmations, the canonical "reorg-safe" depth.  We mine
+# the double-spend to this depth so the assertion is robust against
+# any "but a reorg could undo it" objection — at this depth no Bitcoin
+# convention treats the spend as still reversible.
+COINBASE_MATURITY = 100
+
+
+@pytest.mark.xfail(
+    strict=True,
+    reason="Bug: funder-side channel stuck in CHANNELD_AWAITING_LOCKIN even when funding inputs are demonstrably double-spent at coinbase-maturity depth"
+)
+def test_funder_stuck_funding_inputs_double_spent(node_factory, bitcoind):
+    """Funder-side channel record remains in CHANNELD_AWAITING_LOCKIN
+    even when the funding inputs are spent in another confirmed tx.
+
+    This is the strongest form of the bug.  The other funder-side
+    stuck tests demonstrate the channel stays stuck while the funding
+    tx is merely unbroadcastable (could in principle still confirm if
+    re-broadcast).  This test removes that escape hatch entirely: the
+    funding tx's inputs are spent by a different, confirmed transaction
+    that is matured 100 blocks past confirmation — Bitcoin's coinbase
+    maturity rule, the canonical "reorg-safe" depth.  No Bitcoin
+    convention treats the double-spend as still reversible at this
+    depth, so the funding tx is provably and permanently invalid.
+
+    Yet CLN keeps the channel record in CHANNELD_AWAITING_LOCKIN.
+
+    Marked xfail-strict because no fix yet exists; once fixed, the
+    marker should be removed.
+    """
+    l1, l2 = node_factory.line_graph(2, fundchannel=False)
+    l1.fundwallet(10**7)
+
+    # Capture-and-censor mock.  Stash the first sendrawtransaction (the
+    # funding tx) and censor any re-broadcast of the same hex.  Other
+    # sendrawtransaction calls (our double-spend, which lightningd will
+    # construct from withdraw) are forwarded to bitcoind so they
+    # actually land on chain.
+    captured = []
+
+    def censor(r):
+        raw = r['params'][0]
+        if not captured:
+            captured.append(raw)
+            return {'id': r['id'], 'result': {}}
+        if raw == captured[0]:
+            return {'id': r['id'], 'result': {}}
+        try:
+            txid = bitcoind.rpc.sendrawtransaction(raw)
+            return {'id': r['id'], 'result': txid, 'error': None}
+        except Exception as e:
+            return {'id': r['id'], 'error': {'code': -32603, 'message': str(e)}}
+    l1.daemon.rpcproxy.mock_rpc('sendrawtransaction', censor)
+
+    # Open the channel — funding tx is captured + censored.
+    l1.rpc.fundchannel(l2.info['id'], 10**6)
+    wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state']
+             == 'CHANNELD_AWAITING_LOCKIN')
+    wait_for(lambda: only_one(l2.rpc.listpeerchannels()['channels'])['state']
+             == 'CHANNELD_AWAITING_LOCKIN')
+    assert len(captured) > 0, "funding tx was not captured"
+
+    # Decode the captured funding tx to extract its inputs.
+    decoded = bitcoind.rpc.decoderawtransaction(captured[0])
+    funding_inputs = [f"{vin['txid']}:{vin['vout']}" for vin in decoded['vin']]
+
+    # The funding-tx reservation marks these UTXOs as reserved for
+    # ~2016 blocks (the dual-open auto-unreserve interval), which
+    # blocks withdraw from selecting them.  Force-unreserve via a
+    # PSBT with the same inputs and a `reserve` value large enough
+    # to push reserved_til back below the current block height.
+    # This mirrors what would happen naturally after 2016 blocks
+    # pass, but compresses the test runtime.  The PSBT outputs are
+    # placeholders; only the input set matters for unreserveinputs.
+    psbt_inputs = [{'txid': vin['txid'], 'vout': vin['vout']}
+                   for vin in decoded['vin']]
+    total_sat = sum(
+        int(bitcoind.rpc.getrawtransaction(vin['txid'], True)
+            ['vout'][vin['vout']]['value'] * Decimal(100_000_000))
+        for vin in decoded['vin']
+    )
+    dummy = bitcoind.rpc.getnewaddress()
+    dummy_psbt = bitcoind.rpc.createpsbt(
+        psbt_inputs,
+        [{dummy: float(Decimal(total_sat - 1000) / Decimal(100_000_000))}],
+    )
+    l1.rpc.unreserveinputs(dummy_psbt, reserve=10_000)
+
+    # Now spend the same UTXOs in a different tx.  This goes through
+    # the proxy's censor mock, which forwards non-funding-tx broadcasts
+    # to bitcoind so the double-spend actually lands.
+    addr = l1.rpc.newaddr('p2tr')['p2tr']
+    l1.rpc.withdraw(addr, "all", utxos=funding_inputs)
+
+    # Confirm the double-spend.
+    bitcoind.generate_block(1)
+    sync_blockheight(bitcoind, [l1])
+
+    # Mature past Bitcoin's coinbase maturity rule so the double-spend
+    # is at canonical "reorg-safe" depth.  Beyond this depth there is
+    # no remaining "but a reorg could undo it" argument.
+    bitcoind.generate_block(COINBASE_MATURITY)
+    sync_blockheight(bitcoind, [l1])
+
+    # Expected behavior under fix: the funder's channel record has
+    # been cleaned up.  The funding tx is provably impossible to
+    # confirm (its inputs are spent at coinbase-maturity depth), so
+    # there is no reason to keep the channel record in
+    # CHANNELD_AWAITING_LOCKIN.  Any forward progress is enough; we
+    # do not prescribe a specific cleanup shape.
+    chans_l1 = l1.rpc.listpeerchannels()['channels']
+    assert all(c['state'] != 'CHANNELD_AWAITING_LOCKIN' for c in chans_l1), (
+        f"l1 (funder) still has channel in CHANNELD_AWAITING_LOCKIN "
+        f"after funding inputs were double-spent and matured to "
+        f"{COINBASE_MATURITY + 1} confirmations: "
+        f"{[c['state'] for c in chans_l1]}"
+    )