From da5c1e4ddd8785b88ecf467aa31dfb8f1662e411 Mon Sep 17 00:00:00 2001 From: Ken Sedgwick Date: Tue, 5 May 2026 10:08:16 -0700 Subject: [PATCH 1/3] tests: add xfail-strict reproducer for funder-side stuck CHANNELD_AWAITING_LOCKIN When a node is the funder of a channel whose funding tx never confirms (broadcast rejected at ATMP, evicted from mempool, or simply never broadcast), the channel record stays in CHANNELD_AWAITING_LOCKIN indefinitely. CLN already implements the BOLT 2 fundee-side forget rule (PR #1468, --max-funding-unconfirmed-blocks, default 2016) but has no equivalent on the funder side. The test asserts the desired post-fix behavior (state has moved beyond CHANNELD_AWAITING_LOCKIN) and is marked @pytest.mark.xfail(strict=True) so: - CI reports XFAIL today (acceptable; documents the open bug) - When the bug is fixed, the test reports XPASS, which strict=True promotes to a hard failure to alert the dev to remove the marker. Changelog-None --- tests/test_opening.py | 75 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tests/test_opening.py b/tests/test_opening.py index 4954c31b87b4..7c4e3de05e73 100644 --- a/tests/test_opening.py +++ b/tests/test_opening.py @@ -3018,3 +3018,78 @@ def test_zeroconf_withhold_htlc_failback(node_factory, bitcoind): # l1's channel to l2 is still normal — no force-close assert only_one(l1.rpc.listpeerchannels(l2.info['id'])['channels'])['state'] == 'CHANNELD_NORMAL' + + +@pytest.mark.xfail( + strict=True, + reason="Bug: funder-side channel stuck in CHANNELD_AWAITING_LOCKIN if funding never confirms" +) +def test_funder_stuck_no_funding_confirm(node_factory, bitcoind): + """Funder-side channel record is never cleaned up if funding never confirms. + + BOLT 2 mandates a 2016-block forget rule for fundees and CLN + implements it (PR #1468, --max-funding-unconfirmed-blocks). But CLN + has no equivalent on the funder side: when the funding tx is + rejected at broadcast (e.g. fee too low) or evicted from mempool, + the channel record persists in CHANNELD_AWAITING_LOCKIN + indefinitely with no auto-cleanup mechanism. Even after the + would-be funding inputs are spent in other transactions (making + the funding tx permanently unconfirmable), the channel record + remains. + + This test demonstrates the stuck state. It is marked xfail-strict + because no fix yet exists; once fixed, the marker should be + removed. + """ + # Lower the unconfirmed-funding threshold on the funder so we + # don't have to mine 2016 blocks to make the point. This dev + # knob is the same one CLN's existing fundee-side test + # (test_zeroconf_forget) uses to control --max-funding-unconfirmed-blocks. + # On the funder side, no code path consults it — that's the bug. + THRESHOLD = 10 + l1, l2 = node_factory.line_graph( + 2, + fundchannel=False, + opts={'dev-max-funding-unconfirmed-blocks': THRESHOLD}, + ) + l1.fundwallet(10**7) + + # Censor sendrawtransaction so the funding tx never reaches + # bitcoind's mempool. lightningd will think the broadcast + # succeeded; bitcoind never sees the tx. Same trick as + # test_zeroconf_forget. + def censor(tx): + return {'id': tx['id'], 'result': {}} + l1.daemon.rpcproxy.mock_rpc('sendrawtransaction', censor) + + # Open the channel. Broadcast appears to succeed (mock) but the + # tx never lands. + l1.rpc.fundchannel(l2.info['id'], 10**6) + + # Both sides reach CHANNELD_AWAITING_LOCKIN. + wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state'] + == 'CHANNELD_AWAITING_LOCKIN') + wait_for(lambda: only_one(l2.rpc.listpeerchannels()['channels'])['state'] + == 'CHANNELD_AWAITING_LOCKIN') + + # Advance past THRESHOLD blocks. The funder side has no forget + # code path that consults THRESHOLD, so the channel record is + # expected to remain in CHANNELD_AWAITING_LOCKIN even though we + # set the knob low. + bitcoind.generate_block(THRESHOLD + 5) + sync_blockheight(bitcoind, [l1, l2]) + + # Sanity: funding never confirmed. + assert only_one(l1.rpc.listpeerchannels()['channels']).get('short_channel_id') is None + + # Expected behavior under fix: funder's channel record has been + # cleaned up (forgotten, transitioned to a new "abandoned" + # terminal state, or some other resolved disposition). Any + # forward progress is enough; we do not prescribe a specific + # cleanup shape. + chans_l1 = l1.rpc.listpeerchannels()['channels'] + assert all(c['state'] != 'CHANNELD_AWAITING_LOCKIN' for c in chans_l1), ( + f"l1 (funder) still has channel in CHANNELD_AWAITING_LOCKIN " + f"after {THRESHOLD + 5} blocks (THRESHOLD={THRESHOLD}): " + f"{[c['state'] for c in chans_l1]}" + ) From 19a7976f9fba83a4f5a5fd517910bcd7e44018dc Mon Sep 17 00:00:00 2001 From: Ken Sedgwick Date: Tue, 5 May 2026 12:09:36 -0700 Subject: [PATCH 2/3] tests: add xfail-strict reproducer for funder-side stuck AWAITING_UNILATERAL Same root cause as the previous test_funder_stuck_no_funding_confirm (funding tx unbroadcastable/unconfirmable, no funder-side cleanup). This variant covers the second symptom: when the operator (or an automation like CLBOSS's spenderp) issues `close` on the AWAITING_LOCKIN channel, CLN transitions to AWAITING_UNILATERAL and tries to broadcast a commitment tx that spends the (non-existent) funding output. That commit tx can never confirm either, so the channel record now sits stuck in AWAITING_UNILATERAL indefinitely. Stops l2 before close to force unilateral and avoid mutual close racing in. Marked xfail-strict so the bug is documented without breaking CI. Changelog-None --- tests/test_opening.py | 66 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tests/test_opening.py b/tests/test_opening.py index 7c4e3de05e73..56edd5b390e3 100644 --- a/tests/test_opening.py +++ b/tests/test_opening.py @@ -3093,3 +3093,69 @@ def censor(tx): f"after {THRESHOLD + 5} blocks (THRESHOLD={THRESHOLD}): " f"{[c['state'] for c in chans_l1]}" ) + + +@pytest.mark.xfail( + strict=True, + reason="Bug: funder-side channel stuck in AWAITING_UNILATERAL if closed before funding confirms" +) +def test_funder_stuck_close_before_funding_confirm(node_factory, bitcoind): + """Funder-side channel stuck in AWAITING_UNILATERAL after close + if funding never confirmed. + + Same root cause as test_funder_stuck_no_funding_confirm: the + funding tx is unbroadcastable/unconfirmable and CLN has no + funder-side cleanup. This variant covers what happens when the + operator (or an automation like CLBOSS's spenderp) issues `close` + on the AWAITING_LOCKIN channel: CLN transitions to + AWAITING_UNILATERAL and tries to broadcast a commitment tx that + spends the (non-existent) funding output. That commit tx can + never confirm either, so the channel record now sits stuck in + AWAITING_UNILATERAL indefinitely. + + Marked xfail-strict because no fix yet exists; once fixed, the + marker should be removed. + """ + THRESHOLD = 10 + l1, l2 = node_factory.line_graph( + 2, + fundchannel=False, + opts={'dev-max-funding-unconfirmed-blocks': THRESHOLD}, + ) + l1.fundwallet(10**7) + + def censor(tx): + return {'id': tx['id'], 'result': {}} + l1.daemon.rpcproxy.mock_rpc('sendrawtransaction', censor) + + l1.rpc.fundchannel(l2.info['id'], 10**6) + wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state'] + == 'CHANNELD_AWAITING_LOCKIN') + wait_for(lambda: only_one(l2.rpc.listpeerchannels()['channels'])['state'] + == 'CHANNELD_AWAITING_LOCKIN') + + # Force unilateral close. Stopping l2 ensures mutual close cannot + # race in and land us in CLOSINGD_COMPLETE instead. + l2.stop() + l1.rpc.close(l2.info['id'], unilateraltimeout=1) + + # Funder transitions to AWAITING_UNILATERAL with a commit tx whose + # input is the never-existing funding output. The commit tx is + # also censored by the mock; even without the mock it would be + # rejected by bitcoind for spending a non-existent output. + wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state'] + == 'AWAITING_UNILATERAL') + + bitcoind.generate_block(THRESHOLD + 5) + sync_blockheight(bitcoind, [l1]) + + # Expected behavior under fix: funder's channel record has been + # cleaned up (forgotten, transitioned to a new "abandoned" terminal + # state, or some other resolved disposition). Any forward progress + # is enough; we do not prescribe a specific cleanup shape. + chans_l1 = l1.rpc.listpeerchannels()['channels'] + assert all(c['state'] != 'AWAITING_UNILATERAL' for c in chans_l1), ( + f"l1 (funder) still has channel in AWAITING_UNILATERAL " + f"after {THRESHOLD + 5} blocks (THRESHOLD={THRESHOLD}): " + f"{[c['state'] for c in chans_l1]}" + ) From cbbaaff0fc540bc819dae544fc3b21121c83e282 Mon Sep 17 00:00:00 2001 From: Ken Sedgwick Date: Wed, 13 May 2026 16:56:53 -0700 Subject: [PATCH 3/3] tests: extend funder-stuck reproducer with double-spent funding inputs The two existing funder-stuck reproducers (test_funder_stuck_no_funding_confirm, test_funder_stuck_close_before_funding_confirm) demonstrate the channel record stays in CHANNELD_AWAITING_LOCKIN / AWAITING_UNILATERAL while the funding tx is merely unbroadcastable (censored at the proxy). That leaves an hole in the policy argument: as long as the funding inputs remain unspent, the funding tx could in principle still confirm, so the state-machine wait is defensible. This new test removes that hole. After the channel reaches CHANNELD_AWAITING_LOCKIN with a censored funding tx, we: 1. Capture the funding tx hex via the proxy mock. 2. Force-unreserve the funding inputs (the funding-tx reservation is ~2016 blocks, so we explicitly pass a large reserve= value to push reserved_til below current height). 3. Spend the same UTXOs in a separate withdraw tx that DOES land on chain (the proxy mock forwards non-funding-tx broadcasts). 4. Mature the double-spend 100 blocks past confirmation, matching Bitcoin's coinbase maturity rule (the canonical reorg-safe depth). At this point the funding tx is provably and permanently invalid; no Bitcoin convention treats the spend as still reversible. Yet CLN keeps the channel record stuck in CHANNELD_AWAITING_LOCKIN. The test is marked xfail-strict, like its siblings. Once a fix exists, removing the marker will turn an xpass into a hard failure to alert the developer to clean up the marker. Reproduces #9112 with a stronger demonstration than the existing tests. --- tests/test_opening.py | 121 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/tests/test_opening.py b/tests/test_opening.py index 56edd5b390e3..cf7e6542901d 100644 --- a/tests/test_opening.py +++ b/tests/test_opening.py @@ -6,6 +6,7 @@ ) from pyln.testing.utils import FUNDAMOUNT +from decimal import Decimal from pathlib import Path import pytest import re @@ -3159,3 +3160,123 @@ def censor(tx): f"after {THRESHOLD + 5} blocks (THRESHOLD={THRESHOLD}): " f"{[c['state'] for c in chans_l1]}" ) + + +# Bitcoin's coinbase maturity rule: coinbase outputs are not spendable +# until 100 confirmations, the canonical "reorg-safe" depth. We mine +# the double-spend to this depth so the assertion is robust against +# any "but a reorg could undo it" objection — at this depth no Bitcoin +# convention treats the spend as still reversible. +COINBASE_MATURITY = 100 + + +@pytest.mark.xfail( + strict=True, + reason="Bug: funder-side channel stuck in CHANNELD_AWAITING_LOCKIN even when funding inputs are demonstrably double-spent at coinbase-maturity depth" +) +def test_funder_stuck_funding_inputs_double_spent(node_factory, bitcoind): + """Funder-side channel record remains in CHANNELD_AWAITING_LOCKIN + even when the funding inputs are spent in another confirmed tx. + + This is the strongest form of the bug. The other funder-side + stuck tests demonstrate the channel stays stuck while the funding + tx is merely unbroadcastable (could in principle still confirm if + re-broadcast). This test removes that escape hatch entirely: the + funding tx's inputs are spent by a different, confirmed transaction + that is matured 100 blocks past confirmation — Bitcoin's coinbase + maturity rule, the canonical "reorg-safe" depth. No Bitcoin + convention treats the double-spend as still reversible at this + depth, so the funding tx is provably and permanently invalid. + + Yet CLN keeps the channel record in CHANNELD_AWAITING_LOCKIN. + + Marked xfail-strict because no fix yet exists; once fixed, the + marker should be removed. + """ + l1, l2 = node_factory.line_graph(2, fundchannel=False) + l1.fundwallet(10**7) + + # Capture-and-censor mock. Stash the first sendrawtransaction (the + # funding tx) and censor any re-broadcast of the same hex. Other + # sendrawtransaction calls (our double-spend, which lightningd will + # construct from withdraw) are forwarded to bitcoind so they + # actually land on chain. + captured = [] + + def censor(r): + raw = r['params'][0] + if not captured: + captured.append(raw) + return {'id': r['id'], 'result': {}} + if raw == captured[0]: + return {'id': r['id'], 'result': {}} + try: + txid = bitcoind.rpc.sendrawtransaction(raw) + return {'id': r['id'], 'result': txid, 'error': None} + except Exception as e: + return {'id': r['id'], 'error': {'code': -32603, 'message': str(e)}} + l1.daemon.rpcproxy.mock_rpc('sendrawtransaction', censor) + + # Open the channel — funding tx is captured + censored. + l1.rpc.fundchannel(l2.info['id'], 10**6) + wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state'] + == 'CHANNELD_AWAITING_LOCKIN') + wait_for(lambda: only_one(l2.rpc.listpeerchannels()['channels'])['state'] + == 'CHANNELD_AWAITING_LOCKIN') + assert len(captured) > 0, "funding tx was not captured" + + # Decode the captured funding tx to extract its inputs. + decoded = bitcoind.rpc.decoderawtransaction(captured[0]) + funding_inputs = [f"{vin['txid']}:{vin['vout']}" for vin in decoded['vin']] + + # The funding-tx reservation marks these UTXOs as reserved for + # ~2016 blocks (the dual-open auto-unreserve interval), which + # blocks withdraw from selecting them. Force-unreserve via a + # PSBT with the same inputs and a `reserve` value large enough + # to push reserved_til back below the current block height. + # This mirrors what would happen naturally after 2016 blocks + # pass, but compresses the test runtime. The PSBT outputs are + # placeholders; only the input set matters for unreserveinputs. + psbt_inputs = [{'txid': vin['txid'], 'vout': vin['vout']} + for vin in decoded['vin']] + total_sat = sum( + int(bitcoind.rpc.getrawtransaction(vin['txid'], True) + ['vout'][vin['vout']]['value'] * Decimal(100_000_000)) + for vin in decoded['vin'] + ) + dummy = bitcoind.rpc.getnewaddress() + dummy_psbt = bitcoind.rpc.createpsbt( + psbt_inputs, + [{dummy: float(Decimal(total_sat - 1000) / Decimal(100_000_000))}], + ) + l1.rpc.unreserveinputs(dummy_psbt, reserve=10_000) + + # Now spend the same UTXOs in a different tx. This goes through + # the proxy's censor mock, which forwards non-funding-tx broadcasts + # to bitcoind so the double-spend actually lands. + addr = l1.rpc.newaddr('p2tr')['p2tr'] + l1.rpc.withdraw(addr, "all", utxos=funding_inputs) + + # Confirm the double-spend. + bitcoind.generate_block(1) + sync_blockheight(bitcoind, [l1]) + + # Mature past Bitcoin's coinbase maturity rule so the double-spend + # is at canonical "reorg-safe" depth. Beyond this depth there is + # no remaining "but a reorg could undo it" argument. + bitcoind.generate_block(COINBASE_MATURITY) + sync_blockheight(bitcoind, [l1]) + + # Expected behavior under fix: the funder's channel record has + # been cleaned up. The funding tx is provably impossible to + # confirm (its inputs are spent at coinbase-maturity depth), so + # there is no reason to keep the channel record in + # CHANNELD_AWAITING_LOCKIN. Any forward progress is enough; we + # do not prescribe a specific cleanup shape. + chans_l1 = l1.rpc.listpeerchannels()['channels'] + assert all(c['state'] != 'CHANNELD_AWAITING_LOCKIN' for c in chans_l1), ( + f"l1 (funder) still has channel in CHANNELD_AWAITING_LOCKIN " + f"after funding inputs were double-spent and matured to " + f"{COINBASE_MATURITY + 1} confirmations: " + f"{[c['state'] for c in chans_l1]}" + )