diff --git a/.github/buildomat/jobs/bench.sh b/.github/buildomat/jobs/bench.sh
index b96773f0..019e6a39 100644
--- a/.github/buildomat/jobs/bench.sh
+++ b/.github/buildomat/jobs/bench.sh
@@ -39,8 +39,19 @@ fi
 
 pfexec /usr/lib/brand/omicron1/baseline -w /var/run/brand/omicron1/baseline
 
+# Resolve the invoking user for ownership restoration. When this script is run
+# elevated locally, the effective `id -un` resolves to root and would chown the
+# tree to root. Prefer `$SUDO_USER` (set by sudo), then `logname`, which reports
+# the login user across pfexec. Fall back to `id` in CI, however.
+run_user=${SUDO_USER:-$(logname 2>/dev/null || id -un)}
+run_group=$(id -gn "$run_user" 2>/dev/null || id -gn)
+
 function cleanup {
-    pfexec chown -R `id -un`:`id -gn` .
+    # A restore-to-owner that resolves to root would defeat its own purpose.
+    # Skip rather than re-root the tree.
+    if [[ $run_user != root ]]; then
+        pfexec chown -R "$run_user":"$run_group" .
+    fi
     if [[ -z $BUILDOMAT_JOB_ID ]]; then
         pfexec rm -rf /input/xde
     fi
@@ -67,7 +78,14 @@ function get_artifact {
     return $curl_res
 }
 
-OUT_DIR=/work/bench-results
+# TGT_BASE allows one to run this more easily in their local
+# environment:
+#
+#   TGT_BASE=/var/tmp ./bench.sh
+#
+TGT_BASE=${TGT_BASE:=/work}
+
+OUT_DIR=$TGT_BASE/bench-results
 
 mkdir -p $OUT_DIR
 mkdir -p target/criterion
@@ -118,6 +136,6 @@ cargo ubench
 cp -r target/criterion $OUT_DIR
 cp -r target/xde-bench $OUT_DIR
 
-pushd /work
+pushd $TGT_BASE
 tar -caf bench-results.tgz bench-results
 popd
diff --git a/.github/buildomat/jobs/opteadm.sh b/.github/buildomat/jobs/opteadm.sh
index 6838a743..62cf2977 100755
--- a/.github/buildomat/jobs/opteadm.sh
+++ b/.github/buildomat/jobs/opteadm.sh
@@ -41,9 +41,16 @@ ptime -m cargo build --release
 
 popd
 
+# TGT_BASE allows one to run this more easily in their local
+# environment:
+#
+#   TGT_BASE=/var/tmp ./opteadm.sh
+#
+TGT_BASE=${TGT_BASE:=/work}
+
 for x in debug release
 do
-    mkdir -p /work/$x
-    cp target/$x/opteadm /work/$x/
-    sha256sum "target/$x/opteadm" > "/work/$x/opteadm.$x.sha256"
+    mkdir -p $TGT_BASE/$x
+    cp target/$x/opteadm $TGT_BASE/$x/
+    sha256sum "target/$x/opteadm" > "$TGT_BASE/$x/opteadm.$x.sha256"
 done
diff --git a/.github/buildomat/jobs/test.sh b/.github/buildomat/jobs/test.sh
index 544eea88..b32dd7ce 100755
--- a/.github/buildomat/jobs/test.sh
+++ b/.github/buildomat/jobs/test.sh
@@ -22,17 +22,36 @@ set -o xtrace
 
 pfexec pkg install brand/omicron1 brand/omicron1/tools opte
 
+# TGT_BASE mirrors the artifact location used by xde.sh. Override it to match a
+# local xde.sh run, e.g. TGT_BASE=/var/tmp ./test.sh, so the test binaries are
+# found without forced writing to the root-owned /work.
+TGT_BASE=${TGT_BASE:=/work}
+
 if [[ -z $BUILDOMAT_JOB_ID ]]; then
     echo Note: if you are running this locally, you must run the xde.sh job first
     echo to have the artifacts at the expected spot.
     pfexec mkdir -p /input/xde
-    pfexec ln -s /work /input/xde/work
+    # Replace any stale symlink from an interrupted prior run so the link is
+    # idempotent across local re-runs.
+    pfexec rm -f /input/xde/work
+    pfexec ln -s $TGT_BASE /input/xde/work
 fi
 
 pfexec /usr/lib/brand/omicron1/baseline -w /var/run/brand/omicron1/baseline
 
+# Resolve the invoking user for ownership restoration. When this script is run
+# elevated locally, the effective `id -un` resolves to root and would chown the
+# tree to root. Prefer `$SUDO_USER` (set by sudo), then `logname`, which reports
+# the login user across pfexec. Fall back to `id` in CI, however.
+run_user=${SUDO_USER:-$(logname 2>/dev/null || id -un)}
+run_group=$(id -gn "$run_user" 2>/dev/null || id -gn)
+
 function cleanup {
-    pfexec chown -R `id -un`:`id -gn` .
+    # A restore-to-owner that resolves to root would defeat its own purpose.
+    # Skip rather than re-root the tree.
+    if [[ $run_user != root ]]; then
+        pfexec chown -R "$run_user":"$run_group" .
+    fi
     if [[ -z $BUILDOMAT_JOB_ID ]]; then
         pfexec rm -rf /input/xde
     fi
@@ -98,6 +117,9 @@ pfexec /input/xde/work/test/multicast_validation --nocapture --test-threads=1
 pfexec chmod +x /input/xde/work/test/multicast_source_filter
 pfexec /input/xde/work/test/multicast_source_filter --nocapture --test-threads=1
 
+pfexec chmod +x /input/xde/work/test/multicast_multi_nexthop
+pfexec /input/xde/work/test/multicast_multi_nexthop --nocapture --test-threads=1
+
 banner "teardown"
 # Ensure full driver teardown is exercised after tests complete
 pfexec rem_drv xde
diff --git a/.github/buildomat/jobs/xde.sh b/.github/buildomat/jobs/xde.sh
index cf676a1a..a41eda63 100755
--- a/.github/buildomat/jobs/xde.sh
+++ b/.github/buildomat/jobs/xde.sh
@@ -18,6 +18,7 @@
 #:   "=/work/test/multicast_multi_sub",
 #:   "=/work/test/multicast_validation",
 #:   "=/work/test/multicast_source_filter",
+#:   "=/work/test/multicast_multi_nexthop",
 #:   "=/work/xde.conf",
 #: ]
 #:
@@ -62,7 +63,7 @@ install_pkg jq
 
 pushd xde
 
-cp xde.conf /work/xde.conf
+cp xde.conf $TGT_BASE/xde.conf
 
 header "check style"
 ptime -m cargo +$NIGHTLY fmt -p xde -p xde-link -- --check
@@ -140,9 +141,15 @@ multicast_source_filter_test=$(
     cargo build -q --test multicast_source_filter --message-format=json |\
     jq -r "select(.profile.test == true) | .filenames[]"
 )
-mkdir -p /work/test
-cp $loopback_test /work/test/loopback
-cp $multicast_rx_test /work/test/multicast_rx
-cp $multicast_multi_sub_test /work/test/multicast_multi_sub
-cp $multicast_validation_test /work/test/multicast_validation
-cp $multicast_source_filter_test /work/test/multicast_source_filter
+cargo build --test multicast_multi_nexthop
+multicast_multi_nexthop_test=$(
+    cargo build -q --test multicast_multi_nexthop --message-format=json |\
+    jq -r "select(.profile.test == true) | .filenames[]"
+)
+mkdir -p $TGT_BASE/test
+cp $loopback_test $TGT_BASE/test/loopback
+cp $multicast_rx_test $TGT_BASE/test/multicast_rx
+cp $multicast_multi_sub_test $TGT_BASE/test/multicast_multi_sub
+cp $multicast_validation_test $TGT_BASE/test/multicast_validation
+cp $multicast_source_filter_test $TGT_BASE/test/multicast_source_filter
+cp $multicast_multi_nexthop_test $TGT_BASE/test/multicast_multi_nexthop
diff --git a/Cargo.lock b/Cargo.lock
index 94f50821..134509c2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2616,6 +2616,7 @@ name = "xde-tests"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "dlpi",
  "libnet",
  "opte-ioctl",
  "opte-test-utils",
diff --git a/Cargo.toml b/Cargo.toml
index 7fd38941..322367ea 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -48,6 +48,7 @@ crc32fast = { version = "1", default-features = false }
 criterion = "0.8"
 ctor = "0.10"
 darling = "0.23"
+dlpi = { git = "https://github.com/oxidecomputer/dlpi-sys", default-features = false }
 dyn-clone = "1.0"
 heapless = "0.8"
 ingot = "0.1.1"
diff --git a/dtrace/opte-mcast-delivery.d b/dtrace/opte-mcast-delivery.d
index babd5a1d..7d309576 100644
--- a/dtrace/opte-mcast-delivery.d
+++ b/dtrace/opte-mcast-delivery.d
@@ -137,6 +137,9 @@ mcast-local-delivery {
 	@by_vni["DELIVER", this->vni] = count();
 	@by_port[this->port] = count();
 	@by_group["DELIVER", this->group_str] = count();
+
+	/* Per-(port,group,vni) delivery matrix for end-to-end fan-out verification. */
+	@deliver_by_port_group[this->port, this->group_str, this->vni] = count();
 }
 
 mcast-local-delivery
@@ -165,6 +168,9 @@ mcast-underlay-fwd {
 	@by_vni["UNDERLAY", this->vni] = count();
 	@by_underlay["UNDERLAY", this->underlay_str] = count();
 	@by_nexthop_unicast[this->next_hop_str] = count();
+
+	/* Per-(group,vni,next-hop) fan-out; cross-checks ddm-peers rear-port count. */
+	@fwd_by_group_nh[this->underlay_str, this->vni, this->next_hop_str] = count();
 }
 
 mcast-underlay-fwd
@@ -421,6 +427,11 @@ mcast-source-filtered {
 	@by_vni["FILTERED", this->vni] = count();
 	@by_port[this->port] = count();
 	@filtered_by_mode[this->mode_str] = count();
+
+	/* Per-(event,scope,group,vni) drops for end-to-end loss attribution.
+	 * The scope names the address space of the group column: overlay for the
+	 * inner multicast group, underlay for the outer delivery address. */
+	@drops["FILTERED", "overlay", this->dst_str, this->vni] = count();
 }
 
 mcast-source-filtered
@@ -454,6 +465,9 @@ mcast-fwd-source-filtered {
 	@by_vni["FWD_FILT", this->vni] = count();
 	@by_nexthop_unicast[this->next_hop_str] = count();
 	@filtered_by_mode[this->mode_str] = count();
+
+	/* Per-(event,scope,group,vni) drops; see mcast-source-filtered. */
+	@drops["FWD_FILT", "overlay", this->dst_str, this->vni] = count();
 }
 
 mcast-fwd-source-filtered
@@ -473,9 +487,13 @@ mcast-no-fwd-entry {
 	/* arg0=underlay_ptr, arg1=vni */
 	this->underlay = (in6_addr_t *)arg0;
 	this->vni = arg1;
+	this->underlay_str = inet_ntoa6(this->underlay);
 
 	/* Always track aggregations */
 	@by_event["NOFWD"] = count();
+
+	/* Per-(event,scope,group,vni) drops; see mcast-source-filtered. */
+	@drops["NOFWD", "underlay", this->underlay_str, this->vni] = count();
 }
 
 mcast-no-fwd-entry
@@ -499,10 +517,16 @@ END
 	printa(@by_underlay);
 	printf("\nLocal delivery by port:\n");
 	printa(@by_port);
+	printf("\nDelivery matrix (port, group, vni):\n");
+	printa(@deliver_by_port_group);
 	printf("\nForwarding by unicast next hop (routing address):\n");
 	printa(@by_nexthop_unicast);
+	printf("\nForwarding fan-out (underlay group, vni, next hop):\n");
+	printa(@fwd_by_group_nh);
 	printf("\nSource filtering by mode:\n");
 	printa(@filtered_by_mode);
+	printf("\nDrops (event, scope, group, vni):\n");
+	printa(@drops);
 	printf("\nConfig ops:\n");
 	printa(@cfg_counts);
 }
diff --git a/xde-tests/Cargo.toml b/xde-tests/Cargo.toml
index 6ca3dc3a..2747ea36 100644
--- a/xde-tests/Cargo.toml
+++ b/xde-tests/Cargo.toml
@@ -12,6 +12,7 @@ opte-test-utils.workspace = true
 oxide-vpc.workspace = true
 
 anyhow.workspace = true
+dlpi.workspace = true
 libnet.workspace = true
 rand.workspace = true
 slog.workspace = true
diff --git a/xde-tests/src/lib.rs b/xde-tests/src/lib.rs
index a3772df9..8ed6398a 100644
--- a/xde-tests/src/lib.rs
+++ b/xde-tests/src/lib.rs
@@ -8,6 +8,16 @@ use anyhow::Result;
 use anyhow::anyhow;
 use anyhow::bail;
 use opte_ioctl::OpteHdl;
+use opte_test_utils::Ethernet;
+use opte_test_utils::Ethertype;
+use opte_test_utils::GENEVE_PORT;
+use opte_test_utils::Geneve;
+use opte_test_utils::HeaderLen;
+use opte_test_utils::IngotIpProto;
+use opte_test_utils::Ipv4;
+use opte_test_utils::Ipv6;
+use opte_test_utils::MsgBlk;
+use opte_test_utils::Udp;
 use oxide_vpc::api::AddFwRuleReq;
 use oxide_vpc::api::AddRouterEntryReq;
 use oxide_vpc::api::Address;
@@ -150,6 +160,20 @@ pub const GENEVE_UNDERLAY_FILTER: &str = "ip6 and udp port 6081";
 /// The simnet pair creates a loopback underlay for multicast tests.
 pub const UNDERLAY_TEST_DEVICE: &str = "xde_test_sim1";
 
+/// Underlay device used to inject raw frames into the receive path.
+///
+/// A frame written here (the simnet `end_a`) is received on its peer
+/// [`UNDERLAY_TEST_DEVICE`] (`end_b`), rises through `xde_test_vnic1`'s MAC
+/// client, and reaches XDE's `xde_rx` callback.
+pub const UNDERLAY_INJECT_DEVICE: &str = "xde_test_sim0";
+
+/// Service access point is bound on the raw injection stream purely to reach
+/// DLPI's `DL_IDLE` state, a precondition of `dlpi_send`. For ethernet the
+/// service access point is the ethertype. In `DLPI_RAW` it plays no role in
+/// building the frame, so this is an unused experimental ethertype chosen to
+/// avoid demuxing real inbound traffic back into the stream.
+const INJECT_SAP: u32 = 0x4000;
+
 /// This is a wrapper around the ztest::Zone object that encapsulates common
 /// logic needed for running the OPTE tests zones used in this test suite.
 pub struct OpteZone {
@@ -729,6 +753,135 @@ pub fn ensure_underlay_admin_scoped_route_v6(interface: &str) -> Result<()> {
     Ok(())
 }
 
+/// Inject a raw Geneve-over-IPv6 multicast frame onto the underlay receive path.
+///
+/// Builds the full wire frame for an IPv4 multicast datagram tunnelled in
+/// Geneve and writes it to [`UNDERLAY_INJECT_DEVICE`] in DLPI raw mode, so it
+/// arrives at XDE's `xde_rx` callback exactly as a frame from a remote sled
+/// would. This exercises `handle_mcast_rx` in isolation: no Tx processing and
+/// thus no `guest_loopback` same-sled delivery occurs, unlike a guest send via
+/// [`OpteZone::send_udp_v4`]/[`OpteZone::send_udp_v6`].
+///
+/// `underlay_group` is the outer IPv6 multicast destination (the subscribed
+/// [`MulticastUnderlay`] group). `inner_src`/`inner_dst` are the inner IPv4
+/// source (subject to source filtering) and multicast destination group. `vni` is the
+/// Geneve VNI. The Rx path keys delivery on the outer group rather than the VNI,
+/// but a well-formed value is required for the frame to parse.
+///
+/// # Errors
+///
+/// Returns an error if the DLPI link cannot be opened in raw mode or the frame
+/// cannot be transmitted.
+///
+/// # Examples
+///
+/// ```ignore
+/// inject_underlay_mcast_v4(
+///     &mcast_underlay,                  // underlay_group
+///     "10.0.0.1".parse().unwrap(),      // inner_src
+///     Ipv4Addr::from([224, 0, 0, 251]), // inner_dst
+///     Vni::new(DEFAULT_MULTICAST_VNI)?, // vni
+///     MCAST_TEST_PORT,                  // dst_port
+///     b"rx-only",                       // payload
+/// )?;
+/// ```
+pub fn inject_underlay_mcast_v4(
+    underlay_group: &MulticastUnderlay,
+    inner_src: Ipv4Addr,
+    inner_dst: Ipv4Addr,
+    vni: Vni,
+    dst_port: u16,
+    payload: &[u8],
+) -> Result<()> {
+    let outer_group = underlay_group.addr();
+    let outer_group_bytes = outer_group.bytes();
+
+    // Inner Ethernet header. The Rx path rewrites this destination MAC to the
+    // canonical multicast MAC derived from the inner IP, so the value set here
+    // is overwritten before delivery.
+    let inner_eth = Ethernet {
+        destination: MacAddr::from([0x01, 0x00, 0x5e, 0x00, 0x00, 0x01]),
+        source: MacAddr::from([0x00, 0x16, 0x3e, 0x00, 0x00, 0x01]),
+        ethertype: Ethertype::IPV4,
+    };
+    let inner_ip = Ipv4 {
+        source: inner_src,
+        destination: inner_dst,
+        protocol: IngotIpProto::UDP,
+        hop_limit: 64,
+        total_len: (Ipv4::MINIMUM_LENGTH + Udp::MINIMUM_LENGTH + payload.len())
+            as u16,
+        ..Default::default()
+    };
+    let inner_udp = Udp {
+        source: 0x1234,
+        destination: dst_port,
+        length: (Udp::MINIMUM_LENGTH + payload.len()) as u16,
+        ..Default::default()
+    };
+
+    let mut inner_pkt =
+        MsgBlk::new_ethernet_pkt((inner_eth, inner_ip, inner_udp));
+    if !payload.is_empty() {
+        inner_pkt.append(MsgBlk::copy(payload));
+    }
+    let inner_len = inner_pkt.byte_len();
+
+    // Geneve with no options. The default protocol type is Ethernet (0x6558).
+    let geneve = Geneve { vni, ..Default::default() };
+
+    let outer_udp = Udp {
+        source: 0x1e61,
+        destination: GENEVE_PORT,
+        length: (Udp::MINIMUM_LENGTH + geneve.packet_length() + inner_len)
+            as u16,
+        ..Default::default()
+    };
+    let outer_ip = Ipv6 {
+        source: "fd00::1".parse().unwrap(),
+        destination: outer_group,
+        next_header: IngotIpProto::UDP,
+        hop_limit: 64,
+        payload_len: outer_udp.length,
+        ..Default::default()
+    };
+    // Outer Ethernet: IPv6 multicast MAC per RFC 2464 (33:33 + low 32 bits).
+    let outer_eth = Ethernet {
+        destination: MacAddr::from([
+            0x33,
+            0x33,
+            outer_group_bytes[12],
+            outer_group_bytes[13],
+            outer_group_bytes[14],
+            outer_group_bytes[15],
+        ]),
+        source: MacAddr::from([0x00, 0x11, 0x22, 0x33, 0x44, 0x55]),
+        ethertype: Ethertype::IPV6,
+    };
+
+    let mut frame =
+        MsgBlk::new_ethernet_pkt((outer_eth, outer_ip, outer_udp, geneve));
+    frame.append(inner_pkt);
+    let bytes: Vec<u8> = frame.iter().flat_map(|n| n.iter().copied()).collect();
+
+    // Open the underlay link in raw mode and transmit the assembled frame.
+    // The handle is closed when `_h` drops, before this function returns.
+    let handle = dlpi::open(UNDERLAY_INJECT_DEVICE, dlpi::sys::DLPI_RAW)
+        .map_err(|e| {
+            anyhow!("dlpi::open({UNDERLAY_INJECT_DEVICE}) failed: {e}")
+        })?;
+    let _h = dlpi::DropHandle(handle);
+
+    // `dlpi_send` requires the stream in DL_IDLE, which `dlpi_bind` provides;
+    // an unbound send is rejected with DL_OUTSTATE. See [`INJECT_SAP`] for why
+    // the bound service access point is arbitrary in DLPI_RAW.
+    dlpi::bind(handle, INJECT_SAP)
+        .map_err(|e| anyhow!("dlpi::bind on {UNDERLAY_INJECT_DEVICE}: {e}"))?;
+    dlpi::send(handle, &[], &bytes, None)
+        .map_err(|e| anyhow!("dlpi::send on {UNDERLAY_INJECT_DEVICE}: {e}"))?;
+    Ok(())
+}
+
 /// Global multicast group state that cleans up M2P mappings and forwarding
 /// entries on drop. Port-specific subscriptions are handled automatically by
 /// [`OptePort::drop()`].
diff --git a/xde-tests/tests/multicast_multi_nexthop.rs b/xde-tests/tests/multicast_multi_nexthop.rs
index 4c1f9ffd..1e471412 100644
--- a/xde-tests/tests/multicast_multi_nexthop.rs
+++ b/xde-tests/tests/multicast_multi_nexthop.rs
@@ -4,18 +4,23 @@
 
 // Copyright 2025 Oxide Computer Company
 
-//! XDE multicast multi-next-hop fanout tests.
+//! XDE multicast replication-target fanout and redundant-next-hop collapse
+//! tests.
 //!
-//! These tests validate that when multiple next hops are configured with
-//! different replication modes, OPTE sends a separate packet to each next hop
-//! with the correct replication flag in the Geneve header.
+//! Distinct replication targets represent distinct multicast delivery sets, so
+//! XDE emits one packet per target carrying the correct Geneve flag. Redundant
+//! next hops sharing a target are alternate switch paths to the same delivery
+//! set, so they collapse to a single per-flow copy via ECMP select-one rather
+//! than fanning out a duplicate.
 
 use anyhow::Result;
 use opte_ioctl::OpteHdl;
 use opte_test_utils::geneve_verify;
 use oxide_vpc::api::DEFAULT_MULTICAST_VNI;
+use oxide_vpc::api::IpAddr;
 use oxide_vpc::api::IpCidr;
 use oxide_vpc::api::Ipv4Addr;
+use oxide_vpc::api::Ipv6Addr;
 use oxide_vpc::api::McastForwardingNextHop;
 use oxide_vpc::api::MulticastUnderlay;
 use oxide_vpc::api::NextHopV6;
@@ -26,20 +31,21 @@ use xde_tests::GENEVE_UNDERLAY_FILTER;
 use xde_tests::IPV4_MULTICAST_CIDR;
 use xde_tests::MCAST_TEST_PORT;
 use xde_tests::MulticastGroup;
+use xde_tests::SNOOP_TIMEOUT_EXPECT_NONE;
 use xde_tests::SnoopGuard;
 use xde_tests::UNDERLAY_TEST_DEVICE;
 
 #[test]
 fn test_multicast_multi_nexthop_fanout() -> Result<()> {
-    // Test that multicast forwarding with multiple next hops sends packets to
-    // all configured destinations, each with the correct replication flag.
+    // Test that multicast forwarding with multiple replication targets sends one
+    // packet per target, each with the correct replication flag.
     //
     // This test configures two next hops with different replication modes:
     // - NextHop 1: External replication (to boundary switch)
     // - NextHop 2: Underlay replication (sled-to-sled)
     //
-    // After sending one multicast packet, we verify that two distinct Geneve
-    // packets appear on the underlay, each with the correct replication flag.
+    // After sending one multicast packet, we verify that the External and
+    // Underlay targets each produce a Geneve packet with the correct flag.
 
     let topol = xde_tests::two_node_topology()?;
     let mcast_group = Ipv4Addr::from([224, 1, 2, 100]);
@@ -54,8 +60,8 @@ fn test_multicast_multi_nexthop_fanout() -> Result<()> {
     // Use different addresses since NextHopV6 is the key in the forwarding table.
     // In production, these would be different switch addresses.
     // For single-sled testing, we use two synthetic addresses.
-    let nexthop1: oxide_vpc::api::Ipv6Addr = "fd77::1".parse().unwrap();
-    let nexthop2: oxide_vpc::api::Ipv6Addr = "fd77::2".parse().unwrap();
+    let nexthop1: Ipv6Addr = "fd77::1".parse().unwrap();
+    let nexthop2: Ipv6Addr = "fd77::2".parse().unwrap();
 
     mcast.set_forwarding(vec![
         McastForwardingNextHop {
@@ -194,3 +200,221 @@ fn test_multicast_multi_nexthop_fanout() -> Result<()> {
 
     Ok(())
 }
+
+#[test]
+fn test_multicast_dual_external_select_one() -> Result<()> {
+    // Two External next hops are redundant switch paths to the same external
+    // multicast network, so the flow must yield a single egress copy. Exercised
+    // for both any-source (ASM) and source-specific (SSM) entries, since
+    // selection is filter-aware.
+
+    let topol = xde_tests::two_node_topology()?;
+    let sender_ip: IpAddr = topol.nodes[0].port.ip().into();
+
+    // ASM: both hops accept any source via the default `Exclude(empty)` filter.
+    assert_dual_select_one(
+        &topol,
+        Ipv4Addr::from([224, 1, 2, 101]),
+        MulticastUnderlay::new("ff04::e001:265".parse().unwrap()).unwrap(),
+        SourceFilter::default(),
+        Replication::External,
+        ["fd77::1", "fd77::2"],
+    )?;
+
+    // SSM: both hops `Include` the sender, so both admit this flow's source and
+    // remain ECMP candidates.
+    assert_dual_select_one(
+        &topol,
+        Ipv4Addr::from([224, 1, 2, 102]),
+        MulticastUnderlay::new("ff04::e001:266".parse().unwrap()).unwrap(),
+        SourceFilter::Include([sender_ip].into_iter().collect()),
+        Replication::External,
+        ["fd77::1", "fd77::2"],
+    )?;
+
+    Ok(())
+}
+
+#[test]
+fn test_multicast_dual_underlay_select_one() -> Result<()> {
+    // Two Underlay next hops are redundant switch paths to the same sled
+    // subscribers, so the flow must leave this sled as a single underlay copy
+    // rather than a duplicate the Rx path could not dedup. Exercised for both
+    // ASM and SSM entries.
+
+    let topol = xde_tests::two_node_topology()?;
+    let sender_ip: IpAddr = topol.nodes[0].port.ip().into();
+
+    assert_dual_select_one(
+        &topol,
+        Ipv4Addr::from([224, 1, 2, 105]),
+        MulticastUnderlay::new("ff04::e001:269".parse().unwrap()).unwrap(),
+        SourceFilter::default(),
+        Replication::Underlay,
+        ["fd77::5", "fd77::6"],
+    )?;
+
+    assert_dual_select_one(
+        &topol,
+        Ipv4Addr::from([224, 1, 2, 106]),
+        MulticastUnderlay::new("ff04::e001:270".parse().unwrap()).unwrap(),
+        SourceFilter::Include([sender_ip].into_iter().collect()),
+        Replication::Underlay,
+        ["fd77::5", "fd77::6"],
+    )?;
+
+    Ok(())
+}
+
+#[test]
+fn test_multicast_dual_both_select_one() -> Result<()> {
+    // Two Both next hops are redundant switch paths to the same external network
+    // and the same sled subscribers. Since both targets see the same candidate
+    // set, the egress and underlay selections land on the same switch. The flow
+    // leaves as a single copy carrying the Both flag while the peer is fully
+    // suppressed. Exercised for both ASM and SSM entries.
+
+    let topol = xde_tests::two_node_topology()?;
+    let sender_ip: IpAddr = topol.nodes[0].port.ip().into();
+
+    assert_dual_select_one(
+        &topol,
+        Ipv4Addr::from([224, 1, 2, 103]),
+        MulticastUnderlay::new("ff04::e001:267".parse().unwrap()).unwrap(),
+        SourceFilter::default(),
+        Replication::Both,
+        ["fd77::3", "fd77::4"],
+    )?;
+
+    assert_dual_select_one(
+        &topol,
+        Ipv4Addr::from([224, 1, 2, 104]),
+        MulticastUnderlay::new("ff04::e001:268".parse().unwrap()).unwrap(),
+        SourceFilter::Include([sender_ip].into_iter().collect()),
+        Replication::Both,
+        ["fd77::3", "fd77::4"],
+    )?;
+
+    Ok(())
+}
+
+/// Program two redundant next hops sharing a replication target, send one
+/// packet, and assert that exactly one copy leaves carrying the requested
+/// replication flag.
+///
+/// Switches sharing a target reach the same multicast delivery set, so a flow
+/// needs a single copy per target. For a homogeneous pair, the egress and
+/// underlay selections index the same candidate set with the same flow hash
+/// and pick the same hop, so the result is one copy with the configured flag and
+/// the peer is suppressed.
+fn assert_dual_select_one(
+    topol: &xde_tests::Topology,
+    mcast_group: Ipv4Addr,
+    mcast_underlay: MulticastUnderlay,
+    source_filter: SourceFilter,
+    replication: Replication,
+    nexthops: [&str; 2],
+) -> Result<()> {
+    let vni = Vni::new(DEFAULT_MULTICAST_VNI)?;
+    let mcast = MulticastGroup::new(mcast_group.into(), mcast_underlay)?;
+
+    let nexthop1: Ipv6Addr = nexthops[0].parse().unwrap();
+    let nexthop2: Ipv6Addr = nexthops[1].parse().unwrap();
+
+    mcast.set_forwarding(vec![
+        McastForwardingNextHop {
+            next_hop: NextHopV6::new(nexthop1, vni),
+            replication,
+            source_filter: source_filter.clone(),
+        },
+        McastForwardingNextHop {
+            next_hop: NextHopV6::new(nexthop2, vni),
+            replication,
+            source_filter,
+        },
+    ])?;
+
+    let mcast_cidr = IpCidr::Ip4(IPV4_MULTICAST_CIDR.parse().unwrap());
+    topol.nodes[0].port.add_multicast_router_entry(mcast_cidr)?;
+
+    topol.nodes[0]
+        .port
+        .subscribe_multicast(mcast_group.into())
+        .expect("subscribe port 0 should succeed");
+
+    // Confirm both next hops are programmed for failover.
+    let hdl = OpteHdl::open()?;
+    let mfwd = hdl.dump_mcast_fwd()?;
+    let entry = mfwd
+        .entries
+        .iter()
+        .find(|e| e.underlay == mcast_underlay)
+        .expect("missing multicast forwarding entry for underlay group");
+
+    assert_eq!(
+        entry
+            .next_hops
+            .iter()
+            .filter(|hop| hop.replication == replication)
+            .count(),
+        2,
+        "expected both next hops programmed with {replication:?}; got: {:?}",
+        entry.next_hops
+    );
+
+    let sender_v4 = topol.nodes[0].port.ip();
+    let payload = "dual select-one";
+
+    // 1st send: exactly one copy carrying the configured replication flag.
+    {
+        let mut snoop =
+            SnoopGuard::start(UNDERLAY_TEST_DEVICE, GENEVE_UNDERLAY_FILTER)?;
+
+        topol.nodes[0].zone.send_udp_v4(
+            sender_v4,
+            mcast_group,
+            MCAST_TEST_PORT,
+            payload,
+        )?;
+
+        let out = snoop.assert_packet("single underlay copy");
+        let stdout = String::from_utf8_lossy(&out.stdout);
+        let packets = geneve_verify::extract_snoop_hex(&stdout)
+            .expect("snoop output should contain a hex dump");
+        let bytes = geneve_verify::parse_snoop_hex(&packets[0])
+            .expect("captured packet should parse as hex");
+        let info = geneve_verify::parse_geneve_packet(&bytes)
+            .expect("captured packet should parse as Geneve");
+        assert_eq!(
+            info.replication,
+            Some(replication),
+            "selected copy must carry {replication:?} replication"
+        );
+    }
+
+    // 2nd send: a snoop waiting for two packets must time out, proving the
+    // redundant switch path for the same target emitted no duplicate copy.
+    {
+        let mut snoop = SnoopGuard::start_with_count(
+            UNDERLAY_TEST_DEVICE,
+            GENEVE_UNDERLAY_FILTER,
+            2,
+        )?;
+
+        topol.nodes[0].zone.send_udp_v4(
+            sender_v4,
+            mcast_group,
+            MCAST_TEST_PORT,
+            payload,
+        )?;
+
+        if let Ok(out) = snoop.wait_with_timeout(SNOOP_TIMEOUT_EXPECT_NONE) {
+            let stdout = String::from_utf8_lossy(&out.stdout);
+            panic!(
+                "expected a single copy, but snoop captured a duplicate:\n{stdout}"
+            );
+        }
+    }
+
+    Ok(())
+}
diff --git a/xde-tests/tests/multicast_rx.rs b/xde-tests/tests/multicast_rx.rs
index 2415ab15..d97ad80e 100644
--- a/xde-tests/tests/multicast_rx.rs
+++ b/xde-tests/tests/multicast_rx.rs
@@ -27,6 +27,8 @@ use oxide_vpc::api::NextHopV6;
 use oxide_vpc::api::Replication;
 use oxide_vpc::api::SourceFilter;
 use oxide_vpc::api::Vni;
+use std::thread;
+use std::time::Duration;
 use xde_tests::GENEVE_UNDERLAY_FILTER;
 use xde_tests::IPV4_MULTICAST_CIDR;
 use xde_tests::IPV6_ADMIN_LOCAL_MULTICAST_CIDR;
@@ -35,6 +37,7 @@ use xde_tests::MulticastGroup;
 use xde_tests::SNOOP_TIMEOUT_EXPECT_NONE;
 use xde_tests::SnoopGuard;
 use xde_tests::UNDERLAY_TEST_DEVICE;
+use xde_tests::inject_underlay_mcast_v4;
 
 #[test]
 fn test_xde_multicast_rx_dual_family() -> Result<()> {
@@ -267,6 +270,113 @@ fn test_xde_multicast_rx_dual_family() -> Result<()> {
     Ok(())
 }
 
+#[test]
+fn test_multicast_rx_only_delivery() -> Result<()> {
+    // Rx-path isolation test: drive `handle_mcast_rx` directly by injecting a
+    // raw Geneve-over-IPv6 multicast frame onto the underlay, with no Tx ever
+    // issued from a guest.
+    //
+    // The dual-family test relies on `OpteZone::send_udp_v4`/`send_udp_v6`, which on
+    // a single sled also trigger the Tx `guest_loopback` same-sled delivery.
+    // Here, we never send from a guest, so a delivered packet can only have
+    // arrived via the underlay receive path.
+
+    let topol = xde_tests::two_node_topology()?;
+
+    // IPv4 multicast group mapped to its admin-local IPv6 underlay address per
+    // Omicron's map_external_to_underlay_ip() (last 4 bytes encode the IPv4).
+    let mcast_group = Ipv4Addr::from([224, 0, 0, 251]);
+    let vni = Vni::new(DEFAULT_MULTICAST_VNI)?;
+    let mcast_underlay =
+        MulticastUnderlay::new("ff04::e000:fb".parse().unwrap()).unwrap();
+
+    // Establish the M2P mapping (cleaned up on drop). No forwarding entry is
+    // configured because forwarding drives Tx replication only.
+    let _mcast = MulticastGroup::new(mcast_group.into(), mcast_underlay)?;
+
+    // Allow IPv4 multicast through the receiver's firewall and subscribe it.
+    let mcast_cidr = IpCidr::Ip4(IPV4_MULTICAST_CIDR.parse().unwrap());
+    topol.nodes[1].port.add_multicast_router_entry(mcast_cidr)?;
+    topol.nodes[1]
+        .port
+        .subscribe_multicast(mcast_group.into())
+        .expect("subscribe receiver port should succeed");
+
+    // Confirm the subscription is present before injecting.
+    let hdl = OpteHdl::open()?;
+    let subs = hdl.dump_mcast_subs()?;
+    let p1 = topol.nodes[1].port.name().to_string();
+    let s_entry = subs
+        .entries
+        .iter()
+        .find(|e| e.underlay == mcast_underlay)
+        .expect("missing multicast subscription entry for underlay group");
+    assert!(
+        s_entry.has_port(&p1),
+        "expected {p1} to be subscribed; got {:?}",
+        s_entry.subscribers
+    );
+
+    // Snoop the receiver's guest device for the delivered inner packet.
+    let dev_name_b = topol.nodes[1].port.name().to_string();
+    let filter =
+        format!("udp and ip dst {mcast_group} and port {MCAST_TEST_PORT}");
+    let mut snoop_rx = SnoopGuard::start(&dev_name_b, &filter)?;
+
+    // Inject a raw underlay frame. The inner source mirrors a remote sender
+    // (node 0's overlay address). Note that delivery is keyed on the outer
+    // group, not the arrival VNIC or VNI.
+    //
+    // `SnoopGuard::start` spawns `snoop` and returns before the capture is
+    // actually live, so a single frame can race ahead of snoop and be missed.
+    // We therefore re-inject until the capture observes a frame; the resulting
+    // duplicate multicast deliveries are harmless.
+    //
+    // Injection runs on this initial thread by design. illumos privileges are
+    // per-LWP, and `dlpi_open` resolves the link through a dlmgmtd door call
+    // that requires privileges `pfexec` grants only to the process's first
+    // thread; a frame injected from a freshly spawned thread fails link lookup
+    // with ENOLINK. The blocking snoop wait needs no such privilege, so it runs
+    // on the worker thread instead.
+    let payload = b"rx-only delivery";
+    let inner_src = topol.nodes[0].port.ip();
+    let ctx = format!("on {dev_name_b}");
+    let snoop_handle = thread::spawn(move || snoop_rx.assert_packet(&ctx));
+
+    while !snoop_handle.is_finished() {
+        inject_underlay_mcast_v4(
+            &mcast_underlay,
+            inner_src,
+            mcast_group,
+            vni,
+            MCAST_TEST_PORT,
+            payload,
+        )?;
+        thread::sleep(Duration::from_millis(100));
+    }
+
+    let snoop_output = snoop_handle.join().unwrap();
+
+    let stdout = String::from_utf8_lossy(&snoop_output.stdout);
+    assert!(
+        stdout.contains("224.0.0.251"),
+        "expected destination 224.0.0.251 in snoop output:\n{stdout}"
+    );
+    assert!(
+        stdout.contains("delivery"),
+        "expected payload substring 'delivery' in snoop output:\n{stdout}"
+    );
+
+    // L2 dest is rewritten by XDE to the canonical IPv4 multicast MAC per
+    // RFC 1112: 01:00:5e + low 23 bits of 224.0.0.251 -> 01:00:5e:00:00:fb.
+    assert!(
+        stdout.to_ascii_lowercase().contains("0100 5e00 00fb"),
+        "expected IPv4 multicast MAC '0100 5e00 00fb' in snoop output; got:\n{stdout}"
+    );
+
+    Ok(())
+}
+
 #[test]
 fn test_reject_link_local_underlay_ff02() -> Result<()> {
     let hdl = OpteHdl::open()?;
diff --git a/xde/src/xde.rs b/xde/src/xde.rs
index 2c814957..244cae1c 100644
--- a/xde/src/xde.rs
+++ b/xde/src/xde.rs
@@ -320,11 +320,11 @@ use oxide_vpc::engine::router;
 
 const ETHERNET_MTU: u16 = 1500;
 
-// Type alias for multicast forwarding table:
-// Maps underlay multicast addresses to next hops with replication and source filters.
-// The source filter is the aggregated filter for the destination sled (union of
-// all subscriber filters on that sled). Packets are only forwarded if the
-// aggregated filter allows the source.
+// Type alias for multicast forwarding table: maps underlay multicast addresses
+// to switch next hops with replication and source filters. Each source filter is
+// aggregated over the subscriber set reachable through that next hop. Packets are
+// only forwarded to a selected next hop if its aggregated filter allows the
+// source.
 type McastForwardingTable = BTreeMap<
     MulticastUnderlay,
     BTreeMap<NextHopV6, (Replication, SourceFilter)>,
@@ -2302,6 +2302,113 @@ struct MulticastRxContext<'a> {
     inner_eth_off: usize,
 }
 
+/// The replication target an ECMP next hop selection runs over.
+///
+/// A next hop is a switch endpoint, and the switch is the replication engine.
+/// XDE's fanout is across replication targets, not redundant switch endpoints:
+/// the targets are external egress and underlay delivery. Next hops sharing a
+/// target are redundant paths to the same multicast delivery set, so a flow
+/// should use one of them rather than one copy per switch. Both targets admit
+/// `Both` (replication) next hops, which contribute to egress and underlay
+/// independently.
+#[derive(Clone, Copy)]
+enum ReplicationTarget {
+    /// Egress to the external network via the switch front panel.
+    External,
+    /// Underlay delivery to sleds behind the switch.
+    Underlay,
+}
+
+impl ReplicationTarget {
+    /// Whether a next hop with this `replication` mode serves this target.
+    fn includes(self, replication: &Replication) -> bool {
+        match self {
+            ReplicationTarget::External => {
+                matches!(replication, Replication::External | Replication::Both)
+            }
+            ReplicationTarget::Underlay => {
+                matches!(replication, Replication::Underlay | Replication::Both)
+            }
+        }
+    }
+}
+
+/// The next hop chosen to carry a flow's single copy for each replication
+/// target.
+///
+/// A field is `None` when no next hop for that target admits the flow's source.
+struct ReplicationSelection {
+    external: Option<NextHopV6>,
+    underlay: Option<NextHopV6>,
+}
+
+/// Select one next hop per replication target to carry a flow's single copy.
+///
+/// The control plane programs multiple next hops sharing a target for switch
+/// redundancy, not to represent disjoint multicast destination sets. For a
+/// given target, each candidate switch reaches the same external network or the
+/// same sled subscribers because group membership is mirrored across the
+/// redundant switches. A multicast stream, therefore, needs only a single copy
+/// per target leaving this sled. Emitting to every candidate would duplicate
+/// the stream, and a receiver cannot tell duplicate copies apart, so it cannot
+/// deduplicate.
+///
+/// One candidate is chosen per target per flow while all remain programmed in
+/// the forwarding table, so any peer can carry the flow on failover. The caller
+/// suppresses the redundant copy on the others.
+///
+/// Only next hops whose source filter admits `inner_src` are candidates. For an
+/// any-source group (the default `Exclude(empty)`) every hop for the target
+/// qualifies.
+///
+/// For a source-filtered group, only the hops that permit this source do, so a
+/// denied source never selects a hop that would have dropped it while another
+/// would have forwarded.
+///
+/// Among the candidates, selection is keyed on the inner flow's L4 hash (the
+/// flow's CRC32, the same key the V2B boundary path uses to ECMP over tunnel
+/// endpoints). For multicast, that hash includes the inner source and group
+/// (and L4 fields when present), so a given flow pins deterministically to one
+/// switch across reboots and OPTE instances while distinct flows are spread
+/// across switches.
+///
+/// Each target is resolved independently. The eligible count is not known in
+/// advance because the source filter depends on the flow, so candidates are
+/// counted in one pass and the `hash % count` index is taken in a second,
+/// mirroring the boundary path's `nth(hash % len)` without materialising the
+/// filtered set.
+fn select_nexthops(
+    next_hops: &BTreeMap<NextHopV6, (Replication, SourceFilter)>,
+    inner_src: oxide_vpc::api::IpAddr,
+    l4_hash: u32,
+) -> ReplicationSelection {
+    // A candidate is eligible when its source filter admits this flow and its
+    // next hop serves the target. The count pass precedes the indexing pass.
+    let select = |target: ReplicationTarget| {
+        let count = next_hops
+            .iter()
+            .filter(|(_, (replication, source_filter))| {
+                source_filter.allows(inner_src) && target.includes(replication)
+            })
+            .count();
+        (count > 0).then(|| l4_hash as usize % count).and_then(|idx| {
+            next_hops
+                .iter()
+                .filter(|(_, (replication, source_filter))| {
+                    source_filter.allows(inner_src)
+                        && target.includes(replication)
+                })
+                .map(|(next_hop, _)| *next_hop)
+                .nth(idx)
+        })
+    };
+
+    ReplicationSelection {
+        external: select(ReplicationTarget::External),
+        underlay: select(ReplicationTarget::Underlay),
+    }
+}
+
 /// Handle multicast packet forwarding for same-sled delivery and underlay
 /// replication based on the XDE-wide multicast forwarding table.
 ///
@@ -2401,11 +2508,11 @@ fn handle_mcast_tx<'a>(
         }
     }
 
-    // Next hop forwarding: send packets to configured next hops.
+    // Next hop forwarding: send packets to configured switch next hops.
     //
-    // At the leaf level, we process all next hops in the forwarding table.
-    // Each next hop's `Replication` is a Tx-only instruction telling the switch
-    // which ports to replicate to:
+    // At the leaf level, we process the forwarding table, but we do not
+    // transmit to every next hop. Each next hop's `Replication` is a Tx-only
+    // instruction telling the chosen switch which ports to replicate to:
     // - External: ports set for external multicast traffic (egress to external networks)
     // - Underlay: replicate to other sleds (using multicast outer dst)
     // - Both: both external and underlay replication
@@ -2422,12 +2529,32 @@ fn handle_mcast_tx<'a>(
     }
 
     if let Some(next_hops) = cpu_mcast_fwd.get(&underlay_key) {
-        // We found forwarding entries, replicate to each next hop
+        // A next hop is a switch, and the switch replicates to every destination
+        // in the requested target's multicast delivery set. Next hops sharing a
+        // target are redundant switch paths to that set: external candidates
+        // reach the same external multicast network, and underlay candidates
+        // reach the same sled subscribers. Emitting to every next hop for a
+        // target would duplicate the stream.
+        //
+        // We therefore run a two-pass `%` ECMP select-one per target. The first
+        // pass counts source-eligible candidates for the target, and the second
+        // selects `l4_hash % count` in the same stable order. Because the
+        // candidate switches are redundant, picking one avoids duplication. The
+        // two targets are selected independently, so a `Both` replication next
+        // hop can be the choice for one target and not the other.
+        let ReplicationSelection {
+            external: chosen_external,
+            underlay: chosen_underlay,
+        } = select_nexthops(next_hops, ctx.inner_src, ctx.l4_hash);
+
+        // Iterate the programmed next hops, narrowing each to its choice.
         for (next_hop, (replication, source_filter)) in next_hops.iter() {
-            // Check aggregated source filter before forwarding.
-            // This filter is the union of all subscriber filters for
-            // this next hop. If no subscriber would accept this source,
-            // skip forwarding.
+            // Check aggregated source filter before forwarding. This filter is
+            // the union of all subscriber filters for destinations reachable
+            // through this next hop. If no subscriber would accept this source,
+            // we skip forwarding. Selection has already excluded this hop. This
+            // second check preserves per-hop drop telemetry for filtered
+            // entries.
             if !source_filter.allows(ctx.inner_src) {
                 let xde = get_xde_state();
                 xde.stats.vals.mcast_tx_fwd_source_filtered().incr(1);
@@ -2444,6 +2571,41 @@ fn handle_mcast_tx<'a>(
                 continue;
             }
 
+            // Compose the per-flow selections into this hop's effective
+            // replication. A hop keeps a target only if it is that target's
+            // choice.
+            //
+            // A `Both` replication hop is narrowed when it is the choice for
+            // one target but not the other, and skipped entirely when it is the
+            // choice for neither. This emits exactly one external copy and one
+            // underlay copy per flow while each target can land on a different
+            // switch.
+            let keep_external = chosen_external.as_ref() == Some(next_hop);
+            let keep_underlay = chosen_underlay.as_ref() == Some(next_hop);
+            let effective_replication = match replication {
+                Replication::External => {
+                    if keep_external {
+                        Replication::External
+                    } else {
+                        continue;
+                    }
+                }
+                Replication::Underlay => {
+                    if keep_underlay {
+                        Replication::Underlay
+                    } else {
+                        continue;
+                    }
+                }
+                Replication::Both => match (keep_external, keep_underlay) {
+                    (true, true) => Replication::Both,
+                    (true, false) => Replication::External,
+                    (false, true) => Replication::Underlay,
+                    (false, false) => continue,
+                },
+                Replication::Reserved => Replication::Reserved,
+            };
+
             // Clone packet with headers using pullup
             let Ok(mut fwd_pkt) =
                 ctx.out_pkt.pullup(NonZeroUsize::new(pullup_len))
@@ -2481,7 +2643,11 @@ fn handle_mcast_tx<'a>(
             }
             // Update Geneve multicast option with the Tx-only replication
             // instruction for the switch.
-            update_mcast_replication(&mut fwd_pkt, geneve_offset, *replication);
+            update_mcast_replication(
+                &mut fwd_pkt,
+                geneve_offset,
+                effective_replication,
+            );
 
             // Route to switch unicast address to determine which underlay
             // port/MAC to use. Packet destination is multicast address with
@@ -2525,7 +2691,7 @@ fn handle_mcast_tx<'a>(
                 (AF_INET6 as usize, &outer_ip6 as *const _ as uintptr_t);
 
             // Fire DTrace probes and increment stats based on replication mode
-            match replication {
+            match effective_replication {
                 oxide_vpc::api::Replication::Underlay => {
                     __dtrace_probe_mcast__underlay__fwd(
                         af,
@@ -3848,7 +4014,9 @@ fn set_mcast_forwarding_hdlr(
     // Validation of admin-local IPv6 (ff04::/16) happens at deserialization
     let underlay = req.underlay;
 
-    // Fleet-level multicast: enforce DEFAULT_MULTICAST_VNI for all replication modes.
+    // Fleet-level multicast: enforce DEFAULT_MULTICAST_VNI for all replication
+    // modes.
+    //
     // NextHopV6.addr must be unicast (switch address for routing).
     // The packet will be sent to the multicast address (req.underlay).
     for entry in &req.next_hops {