From 1b9439c933b500cb24710bbd81fe56e9b0025b6f Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Mon, 24 Nov 2025 15:04:27 +1000
Subject: [PATCH 001/258] KVM: selftests: Add missing "break" in rseq_test's
 param parsing

In commit 0297cdc12a87 ("KVM: selftests: Add option to rseq test to
override /dev/cpu_dma_latency"), a 'break' is missed before the option
'l' in the argument parsing loop, which leads to an unexpected core
dump in atoi_paranoid(). It tries to get the latency from non-existent
argument.

  host$ ./rseq_test -u
  Random seed: 0x6b8b4567
  Segmentation fault (core dumped)

Add a 'break' before the option 'l' in the argument parsing loop to avoid
the unexpected core dump.

Fixes: 0297cdc12a87 ("KVM: selftests: Add option to rseq test to override /dev/cpu_dma_latency")
Cc: stable@vger.kernel.org # v6.15+
Signed-off-by: Gavin Shan <gshan@redhat.com>
Link: https://patch.msgid.link/20251124050427.1924591-1-gshan@redhat.com
[sean: describe code change in shortlog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/rseq_test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index 1375fca80bcdbe..f80ad6b47d16b0 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -215,6 +215,7 @@ int main(int argc, char *argv[])
 		switch (opt) {
 		case 'u':
 			skip_sanity_check = true;
+			break;
 		case 'l':
 			latency = atoi_paranoid(optarg);
 			break;

From e2b43fb25243d502ad36b07bab9de09f4b76fff9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 1 Dec 2025 17:50:48 -0800
Subject: [PATCH 002/258] KVM: x86: Apply runtime updates to current CPUID
 during KVM_SET_CPUID{,2}

When handling KVM_SET_CPUID{,2}, do runtime CPUID updates on the vCPU's
current CPUID (and caps) prior to swapping in the incoming CPUID state so
that KVM doesn't lose pending updates if the incoming CPUID is rejected,
and to prevent a false failure on the equality check.

Note, runtime updates are unconditionally performed on the incoming/new
CPUID (and associated caps), i.e. clearing the dirty flag won't negatively
affect the new CPUID.

Fixes: 93da6af3ae56 ("KVM: x86: Defer runtime updates of dynamic CPUID bits until CPUID emulation")
Reported-by: Igor Mammedov <imammedo@redhat.com>
Closes: https://lore.kernel.org/all/20251128123202.68424a95@imammedo
Cc: stable@vger.kernel.org
Acked-by: Igor Mammedov <imammedo@redhat.com>
Tested-by: Igor Mammedov <imammedo@redhat.com>
Link: https://patch.msgid.link/20251202015049.1167490-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/cpuid.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 52524e0ca97f78..913ffb995279af 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -509,11 +509,18 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
 	u32 vcpu_caps[NR_KVM_CPU_CAPS];
 	int r;
 
+	/*
+	 * Apply pending runtime CPUID updates to the current CPUID entries to
+	 * avoid false positives due to mismatches on KVM-owned feature flags.
+	 */
+	if (vcpu->arch.cpuid_dynamic_bits_dirty)
+		kvm_update_cpuid_runtime(vcpu);
+
 	/*
 	 * Swap the existing (old) entries with the incoming (new) entries in
 	 * order to massage the new entries, e.g. to account for dynamic bits
-	 * that KVM controls, without clobbering the current guest CPUID, which
-	 * KVM needs to preserve in order to unwind on failure.
+	 * that KVM controls, without losing the current guest CPUID, which KVM
+	 * needs to preserve in order to unwind on failure.
 	 *
 	 * Similarly, save the vCPU's current cpu_caps so that the capabilities
 	 * can be updated alongside the CPUID entries when performing runtime

From 824d227324dcd328857b70e37b41780f02225729 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 1 Dec 2025 17:50:49 -0800
Subject: [PATCH 003/258] KVM: selftests: Add a CPUID testcase for
 KVM_SET_CPUID2 with runtime updates

Add a CPUID testcase to verify that KVM allows KVM_SET_CPUID2 after (or in
conjunction with) runtime updates.  This is a regression test for the bug
introduced by commit 93da6af3ae56 ("KVM: x86: Defer runtime updates of
dynamic CPUID bits until CPUID emulation"), where KVM would incorrectly
reject KVM_SET_CPUID due to a not handling a pending runtime update on the
current CPUID, resulting in a false mismatch between the "old" and "new"
CPUID entries.

Link: https://lore.kernel.org/all/20251128123202.68424a95@imammedo
Link: https://patch.msgid.link/20251202015049.1167490-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/x86/cpuid_test.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tools/testing/selftests/kvm/x86/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c
index 7b3fda6842bcec..f9ed14996977ab 100644
--- a/tools/testing/selftests/kvm/x86/cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86/cpuid_test.c
@@ -155,6 +155,7 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct
 static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *ent;
+	struct kvm_sregs sregs;
 	int rc;
 	u32 eax, ebx, x;
 
@@ -162,6 +163,20 @@ static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
 	rc = __vcpu_set_cpuid(vcpu);
 	TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
 
+	/*
+	 * Toggle CR4 bits that affect dynamic CPUID feature flags to verify
+	 * setting unmodified CPUID succeeds with runtime CPUID updates.
+	 */
+	vcpu_sregs_get(vcpu, &sregs);
+	if (kvm_cpu_has(X86_FEATURE_XSAVE))
+		sregs.cr4 ^= X86_CR4_OSXSAVE;
+	if (kvm_cpu_has(X86_FEATURE_PKU))
+		sregs.cr4 ^= X86_CR4_PKE;
+	vcpu_sregs_set(vcpu, &sregs);
+
+	rc = __vcpu_set_cpuid(vcpu);
+	TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
 	/* Changing CPU features is forbidden */
 	ent = vcpu_get_cpuid_entry(vcpu, 0x7);
 	ebx = ent->ebx;

From 9935df5333aa503a18de5071f53762b65c783c4c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 1 Dec 2025 18:03:33 -0800
Subject: [PATCH 004/258] KVM: Disallow toggling KVM_MEM_GUEST_MEMFD on an
 existing memslot

Reject attempts to disable KVM_MEM_GUEST_MEMFD on a memslot that was
initially created with a guest_memfd binding, as KVM doesn't support
toggling KVM_MEM_GUEST_MEMFD on existing memslots.  KVM prevents enabling
KVM_MEM_GUEST_MEMFD, but doesn't prevent clearing the flag.

Failure to reject the new memslot results in a use-after-free due to KVM
not unbinding from the guest_memfd instance.  Unbinding on a FLAGS_ONLY
change is easy enough, and can/will be done as a hardening measure (in
anticipation of KVM supporting dirty logging on guest_memfd at some point),
but fixing the use-after-free would only address the immediate symptom.

  ==================================================================
  BUG: KASAN: slab-use-after-free in kvm_gmem_release+0x362/0x400 [kvm]
  Write of size 8 at addr ffff8881111ae908 by task repro/745

  CPU: 7 UID: 1000 PID: 745 Comm: repro Not tainted 6.18.0-rc6-115d5de2eef3-next-kasan #3 NONE
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  Call Trace:
   <TASK>
   dump_stack_lvl+0x51/0x60
   print_report+0xcb/0x5c0
   kasan_report+0xb4/0xe0
   kvm_gmem_release+0x362/0x400 [kvm]
   __fput+0x2fa/0x9d0
   task_work_run+0x12c/0x200
   do_exit+0x6ae/0x2100
   do_group_exit+0xa8/0x230
   __x64_sys_exit_group+0x3a/0x50
   x64_sys_call+0x737/0x740
   do_syscall_64+0x5b/0x900
   entry_SYSCALL_64_after_hwframe+0x4b/0x53
  RIP: 0033:0x7f581f2eac31
   </TASK>

  Allocated by task 745 on cpu 6 at 9.746971s:
   kasan_save_stack+0x20/0x40
   kasan_save_track+0x13/0x50
   __kasan_kmalloc+0x77/0x90
   kvm_set_memory_region.part.0+0x652/0x1110 [kvm]
   kvm_vm_ioctl+0x14b0/0x3290 [kvm]
   __x64_sys_ioctl+0x129/0x1a0
   do_syscall_64+0x5b/0x900
   entry_SYSCALL_64_after_hwframe+0x4b/0x53

  Freed by task 745 on cpu 6 at 9.747467s:
   kasan_save_stack+0x20/0x40
   kasan_save_track+0x13/0x50
   __kasan_save_free_info+0x37/0x50
   __kasan_slab_free+0x3b/0x60
   kfree+0xf5/0x440
   kvm_set_memslot+0x3c2/0x1160 [kvm]
   kvm_set_memory_region.part.0+0x86a/0x1110 [kvm]
   kvm_vm_ioctl+0x14b0/0x3290 [kvm]
   __x64_sys_ioctl+0x129/0x1a0
   do_syscall_64+0x5b/0x900
   entry_SYSCALL_64_after_hwframe+0x4b/0x53

Reported-by: Alexander Potapenko <glider@google.com>
Fixes: a7800aa80ea4 ("KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory")
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20251202020334.1171351-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cde1eddbaa9111..7fea6ba91c1ef3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2085,7 +2085,7 @@ static int kvm_set_memory_region(struct kvm *kvm,
 			return -EINVAL;
 		if ((mem->userspace_addr != old->userspace_addr) ||
 		    (npages != old->npages) ||
-		    ((mem->flags ^ old->flags) & KVM_MEM_READONLY))
+		    ((mem->flags ^ old->flags) & (KVM_MEM_READONLY | KVM_MEM_GUEST_MEMFD)))
 			return -EINVAL;
 
 		if (base_gfn != old->base_gfn)

From af62fe2494da84eb01752282c8228c9bb3fe9f67 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 1 Dec 2025 18:03:34 -0800
Subject: [PATCH 005/258] KVM: Harden and prepare for modifying existing
 guest_memfd memslots

Unbind guest_memfd memslots if KVM commits a MOVE or FLAGS_ONLY memslot
change to harden against use-after-free, and to prepare for eventually
supporting dirty logging on guest_memfd memslots, at which point
FLAGS_ONLY changes will be expected/supported.

Add two separate WARNs, once to yell if a guest_memfd memslot is moved
(which KVM is never expected to allow/support), and again if the unbind()
is triggered, to help detect uAPI goofs prior to deliberately allowing
FLAGS_ONLY changes.

Link: https://patch.msgid.link/20251202020334.1171351-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 virt/kvm/kvm_main.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7fea6ba91c1ef3..32b6c6209b6388 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1748,6 +1748,12 @@ static void kvm_commit_memory_region(struct kvm *kvm,
 		kvm_free_memslot(kvm, old);
 		break;
 	case KVM_MR_MOVE:
+		/*
+		 * Moving a guest_memfd memslot isn't supported, and will never
+		 * be supported.
+		 */
+		WARN_ON_ONCE(old->flags & KVM_MEM_GUEST_MEMFD);
+		fallthrough;
 	case KVM_MR_FLAGS_ONLY:
 		/*
 		 * Free the dirty bitmap as needed; the below check encompasses
@@ -1756,6 +1762,15 @@ static void kvm_commit_memory_region(struct kvm *kvm,
 		if (old->dirty_bitmap && !new->dirty_bitmap)
 			kvm_destroy_dirty_bitmap(old);
 
+		/*
+		 * Unbind the guest_memfd instance as needed; the @new slot has
+		 * already created its own binding.  TODO: Drop the WARN when
+		 * dirty logging guest_memfd memslots is supported.  Until then,
+		 * flags-only changes on guest_memfd slots should be impossible.
+		 */
+		if (WARN_ON_ONCE(old->flags & KVM_MEM_GUEST_MEMFD))
+			kvm_gmem_unbind(old);
+
 		/*
 		 * The final quirk.  Free the detached, old slot, but only its
 		 * memory, not any metadata.  Metadata, including arch specific

From 835a50753579aa8368a08fca307e638723207768 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 3 Dec 2025 20:14:30 -0800
Subject: [PATCH 006/258] selftests/bpf: Add -fms-extensions to bpf build flags

The kernel is now built with -fms-extensions, therefore
generated vmlinux.h contains types like:
struct slab {
   ..
   struct freelist_counters;
};

Use -fms-extensions and -Wno-microsoft-anon-tag flags
to build bpf programs that #include "vmlinux.h"

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index b7030a6e2e763b..4aa60e83ff191a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -437,6 +437,8 @@ BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)	\
 	     -I$(abspath $(OUTPUT)/../usr/include)			\
 	     -std=gnu11		 					\
 	     -fno-strict-aliasing 					\
+	     -Wno-microsoft-anon-tag					\
+	     -fms-extensions						\
 	     -Wno-compare-distinct-pointer-types			\
 	     -Wno-initializer-overrides					\
 	     #

From e8e032cd24dda7cceaa27bc2eb627f82843f0466 Mon Sep 17 00:00:00 2001
From: Wei Fang <wei.fang@nxp.com>
Date: Fri, 28 Nov 2025 10:59:15 +0800
Subject: [PATCH 007/258] net: fec: ERR007885 Workaround for XDP TX path

The ERR007885 will lead to a TDAR race condition for mutliQ when the
driver sets TDAR and the UDMA clears TDAR simultaneously or in a small
window (2-4 cycles). And it will cause the udma_tx and udma_tx_arbiter
state machines to hang. Therefore, the commit 53bb20d1faba ("net: fec:
add variable reg_desc_active to speed things up") and the commit
a179aad12bad ("net: fec: ERR007885 Workaround for conventional TX") have
added the workaround to fix the potential issue for the conventional TX
path. Similarly, the XDP TX path should also have the potential hang
issue, so add the workaround for XDP TX path.

Fixes: 6d6b39f180b8 ("net: fec: add initial XDP support")
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Link: https://patch.msgid.link/20251128025915.2486943-1-wei.fang@nxp.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/freescale/fec_main.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index c685a5c0cc51a5..a753265961af51 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3933,7 +3933,12 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
 	txq->bd.cur = bdp;
 
 	/* Trigger transmission start */
-	writel(0, txq->bd.reg_desc_active);
+	if (!(fep->quirks & FEC_QUIRK_ERR007885) ||
+	    !readl(txq->bd.reg_desc_active) ||
+	    !readl(txq->bd.reg_desc_active) ||
+	    !readl(txq->bd.reg_desc_active) ||
+	    !readl(txq->bd.reg_desc_active))
+		writel(0, txq->bd.reg_desc_active);
 
 	return 0;
 }

From 613d12dd794e078be8ff3cf6b62a6b9acf7f4619 Mon Sep 17 00:00:00 2001
From: Wang Liang <wangliang74@huawei.com>
Date: Sat, 29 Nov 2025 12:13:15 +0800
Subject: [PATCH 008/258] netrom: Fix memory leak in nr_sendmsg()

syzbot reported a memory leak [1].

When function sock_alloc_send_skb() return NULL in nr_output(), the
original skb is not freed, which was allocated in nr_sendmsg(). Fix this
by freeing it before return.

[1]
BUG: memory leak
unreferenced object 0xffff888129f35500 (size 240):
  comm "syz.0.17", pid 6119, jiffies 4294944652
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 10 52 28 81 88 ff ff  ..........R(....
  backtrace (crc 1456a3e4):
    kmemleak_alloc_recursive include/linux/kmemleak.h:44 [inline]
    slab_post_alloc_hook mm/slub.c:4983 [inline]
    slab_alloc_node mm/slub.c:5288 [inline]
    kmem_cache_alloc_node_noprof+0x36f/0x5e0 mm/slub.c:5340
    __alloc_skb+0x203/0x240 net/core/skbuff.c:660
    alloc_skb include/linux/skbuff.h:1383 [inline]
    alloc_skb_with_frags+0x69/0x3f0 net/core/skbuff.c:6671
    sock_alloc_send_pskb+0x379/0x3e0 net/core/sock.c:2965
    sock_alloc_send_skb include/net/sock.h:1859 [inline]
    nr_sendmsg+0x287/0x450 net/netrom/af_netrom.c:1105
    sock_sendmsg_nosec net/socket.c:727 [inline]
    __sock_sendmsg net/socket.c:742 [inline]
    sock_write_iter+0x293/0x2a0 net/socket.c:1195
    new_sync_write fs/read_write.c:593 [inline]
    vfs_write+0x45d/0x710 fs/read_write.c:686
    ksys_write+0x143/0x170 fs/read_write.c:738
    do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
    do_syscall_64+0xa4/0xfa0 arch/x86/entry/syscall_64.c:94
    entry_SYSCALL_64_after_hwframe+0x77/0x7f

Reported-by: syzbot+d7abc36bbbb6d7d40b58@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=d7abc36bbbb6d7d40b58
Tested-by: syzbot+d7abc36bbbb6d7d40b58@syzkaller.appspotmail.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Wang Liang <wangliang74@huawei.com>
Link: https://patch.msgid.link/20251129041315.1550766-1-wangliang74@huawei.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/netrom/nr_out.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 5e531394a724b7..2b3cbceb0b52d5 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -43,8 +43,10 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
 		frontlen = skb_headroom(skb);
 
 		while (skb->len > 0) {
-			if ((skbn = sock_alloc_send_skb(sk, frontlen + NR_MAX_PACKET_SIZE, 0, &err)) == NULL)
+			if ((skbn = sock_alloc_send_skb(sk, frontlen + NR_MAX_PACKET_SIZE, 0, &err)) == NULL) {
+				kfree_skb(skb);
 				return;
+			}
 
 			skb_reserve(skbn, frontlen);
 

From 188e0fa5a679570ea35474575e724d8211423d17 Mon Sep 17 00:00:00 2001
From: Shaurya Rane <ssrane_b23@ee.vjti.ac.in>
Date: Sat, 29 Nov 2025 15:07:18 +0530
Subject: [PATCH 009/258] net/hsr: fix NULL pointer dereference in
 prp_get_untagged_frame()

prp_get_untagged_frame() calls __pskb_copy() to create frame->skb_std
but doesn't check if the allocation failed. If __pskb_copy() returns
NULL, skb_clone() is called with a NULL pointer, causing a crash:

Oops: general protection fault, probably for non-canonical address 0xdffffc000000000f: 0000 [#1] SMP KASAN NOPTI
KASAN: null-ptr-deref in range [0x0000000000000078-0x000000000000007f]
CPU: 0 UID: 0 PID: 5625 Comm: syz.1.18 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
RIP: 0010:skb_clone+0xd7/0x3a0 net/core/skbuff.c:2041
Code: 03 42 80 3c 20 00 74 08 4c 89 f7 e8 23 29 05 f9 49 83 3e 00 0f 85 a0 01 00 00 e8 94 dd 9d f8 48 8d 6b 7e 49 89 ee 49 c1 ee 03 <43> 0f b6 04 26 84 c0 0f 85 d1 01 00 00 44 0f b6 7d 00 41 83 e7 0c
RSP: 0018:ffffc9000d00f200 EFLAGS: 00010207
RAX: ffffffff892235a1 RBX: 0000000000000000 RCX: ffff88803372a480
RDX: 0000000000000000 RSI: 0000000000000820 RDI: 0000000000000000
RBP: 000000000000007e R08: ffffffff8f7d0f77 R09: 1ffffffff1efa1ee
R10: dffffc0000000000 R11: fffffbfff1efa1ef R12: dffffc0000000000
R13: 0000000000000820 R14: 000000000000000f R15: ffff88805144cc00
FS:  0000555557f6d500(0000) GS:ffff88808d72f000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000555581d35808 CR3: 000000005040e000 CR4: 0000000000352ef0
Call Trace:
 <TASK>
 hsr_forward_do net/hsr/hsr_forward.c:-1 [inline]
 hsr_forward_skb+0x1013/0x2860 net/hsr/hsr_forward.c:741
 hsr_handle_frame+0x6ce/0xa70 net/hsr/hsr_slave.c:84
 __netif_receive_skb_core+0x10b9/0x4380 net/core/dev.c:5966
 __netif_receive_skb_one_core net/core/dev.c:6077 [inline]
 __netif_receive_skb+0x72/0x380 net/core/dev.c:6192
 netif_receive_skb_internal net/core/dev.c:6278 [inline]
 netif_receive_skb+0x1cb/0x790 net/core/dev.c:6337
 tun_rx_batched+0x1b9/0x730 drivers/net/tun.c:1485
 tun_get_user+0x2b65/0x3e90 drivers/net/tun.c:1953
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f0449f8e1ff
Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 f9 92 02 00 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 44 24 08 e8 4c 93 02 00 48
RSP: 002b:00007ffd7ad94c90 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007f044a1e5fa0 RCX: 00007f0449f8e1ff
RDX: 000000000000003e RSI: 0000200000000500 RDI: 00000000000000c8
RBP: 00007ffd7ad94d20 R08: 0000000000000000 R09: 0000000000000000
R10: 000000000000003e R11: 0000000000000293 R12: 0000000000000001
R13: 00007f044a1e5fa0 R14: 00007f044a1e5fa0 R15: 0000000000000003
 </TASK>

Add a NULL check immediately after __pskb_copy() to handle allocation
failures gracefully.

Reported-by: syzbot+2fa344348a579b779e05@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=2fa344348a579b779e05
Fixes: f266a683a480 ("net/hsr: Better frame dispatch")
Cc: stable@vger.kernel.org
Signed-off-by: Shaurya Rane <ssrane_b23@ee.vjti.ac.in>
Reviewed-by: Felix Maurer <fmaurer@redhat.com>
Tested-by: Felix Maurer <fmaurer@redhat.com>
Link: https://patch.msgid.link/20251129093718.25320-1-ssrane_b23@ee.vjti.ac.in
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/hsr/hsr_forward.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 339f0d22021294..aefc9b6936ba0c 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -205,6 +205,8 @@ struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame,
 				__pskb_copy(frame->skb_prp,
 					    skb_headroom(frame->skb_prp),
 					    GFP_ATOMIC);
+			if (!frame->skb_std)
+				return NULL;
 		} else {
 			/* Unexpected */
 			WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n",

From ce052b9402e461a9aded599f5b47e76bc727f7de Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Fri, 28 Nov 2025 10:19:19 -0500
Subject: [PATCH 010/258] net/sched: ets: Always remove class from active list
 before deleting in ets_qdisc_change

zdi-disclosures@trendmicro.com says:

The vulnerability is a race condition between `ets_qdisc_dequeue` and
`ets_qdisc_change`.  It leads to UAF on `struct Qdisc` object.
Attacker requires the capability to create new user and network namespace
in order to trigger the bug.
See my additional commentary at the end of the analysis.

Analysis:

static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
                          struct netlink_ext_ack *extack)
{
...

      // (1) this lock is preventing .change handler (`ets_qdisc_change`)
      //to race with .dequeue handler (`ets_qdisc_dequeue`)
      sch_tree_lock(sch);

      for (i = nbands; i < oldbands; i++) {
              if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
                      list_del_init(&q->classes[i].alist);
              qdisc_purge_queue(q->classes[i].qdisc);
      }

      WRITE_ONCE(q->nbands, nbands);
      for (i = nstrict; i < q->nstrict; i++) {
              if (q->classes[i].qdisc->q.qlen) {
		      // (2) the class is added to the q->active
                      list_add_tail(&q->classes[i].alist, &q->active);
                      q->classes[i].deficit = quanta[i];
              }
      }
      WRITE_ONCE(q->nstrict, nstrict);
      memcpy(q->prio2band, priomap, sizeof(priomap));

      for (i = 0; i < q->nbands; i++)
              WRITE_ONCE(q->classes[i].quantum, quanta[i]);

      for (i = oldbands; i < q->nbands; i++) {
              q->classes[i].qdisc = queues[i];
              if (q->classes[i].qdisc != &noop_qdisc)
                      qdisc_hash_add(q->classes[i].qdisc, true);
      }

      // (3) the qdisc is unlocked, now dequeue can be called in parallel
      // to the rest of .change handler
      sch_tree_unlock(sch);

      ets_offload_change(sch);
      for (i = q->nbands; i < oldbands; i++) {
	      // (4) we're reducing the refcount for our class's qdisc and
	      //  freeing it
              qdisc_put(q->classes[i].qdisc);
	      // (5) If we call .dequeue between (4) and (5), we will have
	      // a strong UAF and we can control RIP
              q->classes[i].qdisc = NULL;
              WRITE_ONCE(q->classes[i].quantum, 0);
              q->classes[i].deficit = 0;
              gnet_stats_basic_sync_init(&q->classes[i].bstats);
              memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
      }
      return 0;
}

Comment:
This happens because some of the classes have their qdiscs assigned to
NULL, but remain in the active list. This commit fixes this issue by always
removing the class from the active list before deleting and freeing its
associated qdisc

Reproducer Steps
(trimmed version of what was sent by zdi-disclosures@trendmicro.com)

```
DEV="${DEV:-lo}"
ROOT_HANDLE="${ROOT_HANDLE:-1:}"
BAND2_HANDLE="${BAND2_HANDLE:-20:}"   # child under 1:2
PING_BYTES="${PING_BYTES:-48}"
PING_COUNT="${PING_COUNT:-200000}"
PING_DST="${PING_DST:-127.0.0.1}"

SLOW_TBF_RATE="${SLOW_TBF_RATE:-8bit}"
SLOW_TBF_BURST="${SLOW_TBF_BURST:-100b}"
SLOW_TBF_LAT="${SLOW_TBF_LAT:-1s}"

cleanup() {
  tc qdisc del dev "$DEV" root 2>/dev/null
}
trap cleanup EXIT

ip link set "$DEV" up

tc qdisc del dev "$DEV" root 2>/dev/null || true

tc qdisc add dev "$DEV" root handle "$ROOT_HANDLE" ets bands 2 strict 2

tc qdisc add dev "$DEV" parent 1:2 handle "$BAND2_HANDLE" \
  tbf rate "$SLOW_TBF_RATE" burst "$SLOW_TBF_BURST" latency "$SLOW_TBF_LAT"

tc filter add dev "$DEV" parent 1: protocol all prio 1 u32 match u32 0 0 flowid 1:2
tc -s qdisc ls dev $DEV

ping -I "$DEV" -f -c "$PING_COUNT" -s "$PING_BYTES" -W 0.001 "$PING_DST" \
  >/dev/null 2>&1 &
tc qdisc change dev "$DEV" root handle "$ROOT_HANDLE" ets bands 2 strict 0
tc qdisc change dev "$DEV" root handle "$ROOT_HANDLE" ets bands 2 strict 2
tc -s qdisc ls dev $DEV
tc qdisc del dev "$DEV" parent 1:2 || true
tc -s qdisc ls dev $DEV
tc qdisc change dev "$DEV" root handle "$ROOT_HANDLE" ets bands 1 strict 1
```

KASAN report
```
==================================================================
BUG: KASAN: slab-use-after-free in ets_qdisc_dequeue+0x1071/0x11b0 kernel/net/sched/sch_ets.c:481
Read of size 8 at addr ffff8880502fc018 by task ping/12308
>
CPU: 0 UID: 0 PID: 12308 Comm: ping Not tainted 6.18.0-rc4-dirty #1 PREEMPT(full)
Hardware name: QEMU Ubuntu 25.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
Call Trace:
 <IRQ>
 __dump_stack kernel/lib/dump_stack.c:94
 dump_stack_lvl+0x100/0x190 kernel/lib/dump_stack.c:120
 print_address_description kernel/mm/kasan/report.c:378
 print_report+0x156/0x4c9 kernel/mm/kasan/report.c:482
 kasan_report+0xdf/0x110 kernel/mm/kasan/report.c:595
 ets_qdisc_dequeue+0x1071/0x11b0 kernel/net/sched/sch_ets.c:481
 dequeue_skb kernel/net/sched/sch_generic.c:294
 qdisc_restart kernel/net/sched/sch_generic.c:399
 __qdisc_run+0x1c9/0x1b00 kernel/net/sched/sch_generic.c:417
 __dev_xmit_skb kernel/net/core/dev.c:4221
 __dev_queue_xmit+0x2848/0x4410 kernel/net/core/dev.c:4729
 dev_queue_xmit kernel/./include/linux/netdevice.h:3365
[...]

Allocated by task 17115:
 kasan_save_stack+0x30/0x50 kernel/mm/kasan/common.c:56
 kasan_save_track+0x14/0x30 kernel/mm/kasan/common.c:77
 poison_kmalloc_redzone kernel/mm/kasan/common.c:400
 __kasan_kmalloc+0xaa/0xb0 kernel/mm/kasan/common.c:417
 kasan_kmalloc kernel/./include/linux/kasan.h:262
 __do_kmalloc_node kernel/mm/slub.c:5642
 __kmalloc_node_noprof+0x34e/0x990 kernel/mm/slub.c:5648
 kmalloc_node_noprof kernel/./include/linux/slab.h:987
 qdisc_alloc+0xb8/0xc30 kernel/net/sched/sch_generic.c:950
 qdisc_create_dflt+0x93/0x490 kernel/net/sched/sch_generic.c:1012
 ets_class_graft+0x4fd/0x800 kernel/net/sched/sch_ets.c:261
 qdisc_graft+0x3e4/0x1780 kernel/net/sched/sch_api.c:1196
[...]

Freed by task 9905:
 kasan_save_stack+0x30/0x50 kernel/mm/kasan/common.c:56
 kasan_save_track+0x14/0x30 kernel/mm/kasan/common.c:77
 __kasan_save_free_info+0x3b/0x70 kernel/mm/kasan/generic.c:587
 kasan_save_free_info kernel/mm/kasan/kasan.h:406
 poison_slab_object kernel/mm/kasan/common.c:252
 __kasan_slab_free+0x5f/0x80 kernel/mm/kasan/common.c:284
 kasan_slab_free kernel/./include/linux/kasan.h:234
 slab_free_hook kernel/mm/slub.c:2539
 slab_free kernel/mm/slub.c:6630
 kfree+0x144/0x700 kernel/mm/slub.c:6837
 rcu_do_batch kernel/kernel/rcu/tree.c:2605
 rcu_core+0x7c0/0x1500 kernel/kernel/rcu/tree.c:2861
 handle_softirqs+0x1ea/0x8a0 kernel/kernel/softirq.c:622
 __do_softirq kernel/kernel/softirq.c:656
[...]

Commentary:

1. Maher Azzouzi working with Trend Micro Zero Day Initiative was reported as
the person who found the issue. I requested to get a proper email to add to the
reported-by tag but got no response. For this reason i will credit the person
i exchanged emails with i.e zdi-disclosures@trendmicro.com

2. Neither i nor Victor who did a much more thorough testing was able to
reproduce a UAF with the PoC or other approaches we tried. We were both able to
reproduce a null ptr deref. After exchange with zdi-disclosures@trendmicro.com
they sent a small change to be made to the code to add an extra delay which
was able to simulate the UAF. i.e, this:
   qdisc_put(q->classes[i].qdisc);
   mdelay(90);
   q->classes[i].qdisc = NULL;

I was informed by Thomas Gleixner(tglx@linutronix.de) that adding delays was
acceptable approach for demonstrating the bug, quote:
"Adding such delays is common exploit validation practice"
The equivalent delay could happen "by virt scheduling the vCPU out, SMIs,
NMIs, PREEMPT_RT enabled kernel"

3. I asked the OP to test and report back but got no response and after a
few days gave up and proceeded to submit this fix.

Fixes: de6d25924c2a ("net/sched: sch_ets: don't peek at classes beyond 'nbands'")
Reported-by: zdi-disclosures@trendmicro.com
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Davide Caratti <dcaratti@redhat.com>
Link: https://patch.msgid.link/20251128151919.576920-1-jhs@mojatatu.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/sched/sch_ets.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 82635dd2cfa59f..ae46643e596d30 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -652,7 +652,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
 	sch_tree_lock(sch);
 
 	for (i = nbands; i < oldbands; i++) {
-		if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
+		if (cl_is_active(&q->classes[i]))
 			list_del_init(&q->classes[i].alist);
 		qdisc_purge_queue(q->classes[i].qdisc);
 	}

From cd7671ef4cf2edf73cd2a3dca3a2f522a4525bf5 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Mon, 1 Dec 2025 17:13:27 +0200
Subject: [PATCH 011/258] net/mlx5: make enable_mpesw idempotent

The enable_mpesw() function returns -EINVAL if ldev->mode is not
MLX5_LAG_MODE_NONE. This means attempting to enable MPESW mode when it's
already enabled will fail. In contrast, disable_mpesw() properly checks
if the mode is MLX5_LAG_MODE_MPESW before proceeding, making it
naturally idempotent and safe to call multiple times.

Fix enable_mpesw() to return success if mpesw is already enabled.

Fixes: a32327a3a02c ("net/mlx5: Lag, Control MultiPort E-Switch single FDB mode")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Shay Drori <shayd@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/1764602008-1334866-2-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index aad52d3a90e68b..2d86af8f0d9b81 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -67,12 +67,19 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev)
 
 static int enable_mpesw(struct mlx5_lag *ldev)
 {
-	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	struct mlx5_core_dev *dev0;
 	int err;
+	int idx;
 	int i;
 
-	if (idx < 0 || ldev->mode != MLX5_LAG_MODE_NONE)
+	if (ldev->mode == MLX5_LAG_MODE_MPESW)
+		return 0;
+
+	if (ldev->mode != MLX5_LAG_MODE_NONE)
+		return -EINVAL;
+
+	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	if (idx < 0)
 		return -EINVAL;
 
 	dev0 = ldev->pf[idx].dev;

From 35e93736f69963337912594eb3951ab320b77521 Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@nvidia.com>
Date: Mon, 1 Dec 2025 17:13:28 +0200
Subject: [PATCH 012/258] net/mlx5e: Avoid unregistering PSP twice

PSP is unregistered twice in:
_mlx5e_remove -> mlx5e_psp_unregister
mlx5e_nic_cleanup -> mlx5e_psp_unregister

This leads to a refcount underflow in some conditions:
------------[ cut here ]------------
refcount_t: underflow; use-after-free.
WARNING: CPU: 2 PID: 1694 at lib/refcount.c:28 refcount_warn_saturate+0xd8/0xe0
[...]
 mlx5e_psp_unregister+0x26/0x50 [mlx5_core]
 mlx5e_nic_cleanup+0x26/0x90 [mlx5_core]
 mlx5e_remove+0xe6/0x1f0 [mlx5_core]
 auxiliary_bus_remove+0x18/0x30
 device_release_driver_internal+0x194/0x1f0
 bus_remove_device+0xc6/0x130
 device_del+0x159/0x3c0
 mlx5_rescan_drivers_locked+0xbc/0x2a0 [mlx5_core]
[...]

Do not directly remove psp from the _mlx5e_remove path, the PSP cleanup
happens as part of profile cleanup.

Fixes: 89ee2d92f66c ("net/mlx5e: Support PSP offload functionality")
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/1764602008-1334866-3-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 6168f081441483..07fc4d2c8fadd4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -6825,7 +6825,6 @@ static void _mlx5e_remove(struct auxiliary_device *adev)
 	 * is already unregistered before changing to NIC profile.
 	 */
 	if (priv->netdev->reg_state == NETREG_REGISTERED) {
-		mlx5e_psp_unregister(priv);
 		unregister_netdev(priv->netdev);
 		_mlx5e_suspend(adev, false);
 	} else {

From 4f0638b12451112de4138689fa679315c8d388dc Mon Sep 17 00:00:00 2001
From: Ivan Galkin <ivan.galkin@axis.com>
Date: Tue, 2 Dec 2025 10:07:42 +0100
Subject: [PATCH 013/258] net: phy: RTL8211FVD: Restore disabling of PHY-mode
 EEE

When support for RTL8211F(D)(I)-VD-CG was introduced in commit
bb726b753f75 ("net: phy: realtek: add support for RTL8211F(D)(I)-VD-CG")
the implementation assumed that this PHY model doesn't have the
control register PHYCR2 (Page 0xa43 Address 0x19). This
assumption was based on the differences in CLKOUT configurations
between RTL8211FVD and the remaining RTL8211F PHYs. In the latter
commit 2c67301584f2
("net: phy: realtek: Avoid PHYCR2 access if PHYCR2 not present")
this assumption was expanded to the PHY-mode EEE.

I performed tests on RTL8211FI-VD-CG and confirmed that disabling
PHY-mode EEE works correctly and is uniform with other PHYs
supported by the driver. To validate the correctness,
I contacted Realtek support. Realtek confirmed that PHY-mode EEE on
RTL8211F(D)(I)-VD-CG is configured via Page 0xa43 Address 0x19 bit 5.

Moreover, Realtek informed me that the most recent datasheet
for RTL8211F(D)(I)-VD-CG v1.1 is incomplete and the naming of
control registers is partly inconsistent. The errata I
received from Realtek corrects the naming as follows:

| Register                | Datasheet v1.1 | Errata |
|-------------------------|----------------|--------|
| Page 0xa44 Address 0x11 | PHYCR2         | PHYCR3 |
| Page 0xa43 Address 0x19 | N/A            | PHYCR2 |

This information confirms that the supposedly missing control register,
PHYCR2, exists in the RTL8211F(D)(I)-VD-CG under the same address and
the same name. It controls widely the same configs as other PHYs from
the RTL8211F series (e.g. PHY-mode EEE). Clock out configuration is an
exception.

Given all this information, restore disabling of the PHY-mode EEE.

Fixes: 2c67301584f2 ("net: phy: realtek: Avoid PHYCR2 access if PHYCR2 not present")
Signed-off-by: Ivan Galkin <ivan.galkin@axis.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20251202-phy_eee-v1-1-fe0bf6ab3df0@axis.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/phy/realtek/realtek_main.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c
index 67ecf3d4af2b19..6ff0385201a571 100644
--- a/drivers/net/phy/realtek/realtek_main.c
+++ b/drivers/net/phy/realtek/realtek_main.c
@@ -691,10 +691,6 @@ static int rtl8211f_config_aldps(struct phy_device *phydev)
 
 static int rtl8211f_config_phy_eee(struct phy_device *phydev)
 {
-	/* RTL8211FVD has no PHYCR2 register */
-	if (phydev->drv->phy_id == RTL_8211FVD_PHYID)
-		return 0;
-
 	/* Disable PHY-mode EEE so LPI is passed to the MAC */
 	return phy_modify_paged(phydev, RTL8211F_PHYCR_PAGE, RTL8211F_PHYCR2,
 				RTL8211F_PHYCR2_PHY_EEE_ENABLE, 0);

From 5b48f49ee94888f3cd4360286ee9921eff2b2e46 Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Tue, 2 Dec 2025 09:57:21 +0000
Subject: [PATCH 014/258] net: dsa: mxl-gsw1xx: fix SerDes RX polarity

According to MaxLinear engineer Benny Weng the RX lane of the SerDes
port of the GSW1xx switches is inverted in hardware, and the
SGMII_PHY_RX0_CFG2_INVERT bit is set by default in order to compensate
for that. Hence also set the SGMII_PHY_RX0_CFG2_INVERT bit by default in
gsw1xx_pcs_reset().

Fixes: 22335939ec90 ("net: dsa: add driver for MaxLinear GSW1xx switch family")
Reported-by: Rasmus Villemoes <ravi@prevas.dk>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://patch.msgid.link/ca10e9f780c0152ecf9ae8cbac5bf975802e8f99.1764668951.git.daniel@makrotopia.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/dsa/lantiq/mxl-gsw1xx.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/lantiq/mxl-gsw1xx.c b/drivers/net/dsa/lantiq/mxl-gsw1xx.c
index 0816c61a47f12c..cf33a16fd183b4 100644
--- a/drivers/net/dsa/lantiq/mxl-gsw1xx.c
+++ b/drivers/net/dsa/lantiq/mxl-gsw1xx.c
@@ -255,10 +255,16 @@ static int gsw1xx_pcs_reset(struct gsw1xx_priv *priv)
 	      FIELD_PREP(GSW1XX_SGMII_PHY_RX0_CFG2_FILT_CNT,
 			 GSW1XX_SGMII_PHY_RX0_CFG2_FILT_CNT_DEF);
 
-	/* TODO: Take care of inverted RX pair once generic property is
+	/* RX lane seems to be inverted internally, so bit
+	 * GSW1XX_SGMII_PHY_RX0_CFG2_INVERT needs to be set for normal
+	 * (ie. non-inverted) operation.
+	 *
+	 * TODO: Take care of inverted RX pair once generic property is
 	 *       available
 	 */
 
+	val |= GSW1XX_SGMII_PHY_RX0_CFG2_INVERT;
+
 	ret = regmap_write(priv->sgmii, GSW1XX_SGMII_PHY_RX0_CFG2, val);
 	if (ret < 0)
 		return ret;

From 0c57ff008a11f24f7f05fa760222692a00465fec Mon Sep 17 00:00:00 2001
From: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
Date: Tue, 2 Dec 2025 13:39:03 +0300
Subject: [PATCH 015/258] ipvlan: Ignore PACKET_LOOPBACK in handle_mode_l2()

Packets with pkt_type == PACKET_LOOPBACK are captured by
handle_frame() function, but they don't have L2 header.
We should not process them in handle_mode_l2().

This doesn't affect old L2 functionality, since handling
was anyway incorrect.

Handle them the same way as in br_handle_frame():
just pass the skb.

To observe invalid behaviour, just start "ping -b" on bcast address
of port-interface.

Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.")
Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
Link: https://patch.msgid.link/20251202103906.4087675-1-skorodumov.dmitry@huawei.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index dea411e132dba1..2efa3ba148aa7e 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -737,6 +737,9 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 	struct ethhdr *eth = eth_hdr(skb);
 	rx_handler_result_t ret = RX_HANDLER_PASS;
 
+	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
+		return RX_HANDLER_PASS;
+
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		if (ipvlan_external_frame(skb, port)) {
 			struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);

From 6a107cfe9c99a079e578a4c5eb70038101a3599f Mon Sep 17 00:00:00 2001
From: Gerd Bayer <gbayer@linux.ibm.com>
Date: Tue, 2 Dec 2025 12:12:57 +0100
Subject: [PATCH 016/258] net/mlx5: Fix double unregister of HCA_PORTS
 component

Clear hca_devcom_comp in device's private data after unregistering it in
LAG teardown. Otherwise a slightly lagging second pass through
mlx5_unload_one() might try to unregister it again and trip over
use-after-free.

On s390 almost all PCI level recovery events trigger two passes through
mxl5_unload_one() - one through the poll_health() method and one through
mlx5_pci_err_detected() as callback from generic PCI error recovery.
While testing PCI error recovery paths with more kernel debug features
enabled, this issue reproducibly led to kernel panics with the following
call chain:

 Unable to handle kernel pointer dereference in virtual kernel address space
 Failing address: 6b6b6b6b6b6b6000 TEID: 6b6b6b6b6b6b6803 ESOP-2 FSI
 Fault in home space mode while using kernel ASCE.
 AS:00000000705c4007 R3:0000000000000024
 Oops: 0038 ilc:3 [#1]SMP

 CPU: 14 UID: 0 PID: 156 Comm: kmcheck Kdump: loaded Not tainted
      6.18.0-20251130.rc7.git0.16131a59cab1.300.fc43.s390x+debug #1 PREEMPT

 Krnl PSW : 0404e00180000000 0000020fc86aa1dc (__lock_acquire+0x5c/0x15f0)
            R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3
 Krnl GPRS: 0000000000000000 0000020f00000001 6b6b6b6b6b6b6c33 0000000000000000
            0000000000000000 0000000000000000 0000000000000001 0000000000000000
            0000000000000000 0000020fca28b820 0000000000000000 0000010a1ced8100
            0000010a1ced8100 0000020fc9775068 0000018fce14f8b8 0000018fce14f7f8
 Krnl Code: 0000020fc86aa1cc: e3b003400004        lg      %r11,832
            0000020fc86aa1d2: a7840211           brc     8,0000020fc86aa5f4
           *0000020fc86aa1d6: c09000df0b25       larl    %r9,0000020fca28b820
           >0000020fc86aa1dc: d50790002000       clc     0(8,%r9),0(%r2)
            0000020fc86aa1e2: a7840209           brc     8,0000020fc86aa5f4
            0000020fc86aa1e6: c0e001100401       larl    %r14,0000020fca8aa9e8
            0000020fc86aa1ec: c01000e25a00       larl    %r1,0000020fca2f55ec
            0000020fc86aa1f2: a7eb00e8           aghi    %r14,232

 Call Trace:
  __lock_acquire+0x5c/0x15f0
  lock_acquire.part.0+0xf8/0x270
  lock_acquire+0xb0/0x1b0
  down_write+0x5a/0x250
  mlx5_detach_device+0x42/0x110 [mlx5_core]
  mlx5_unload_one_devl_locked+0x50/0xc0 [mlx5_core]
  mlx5_unload_one+0x42/0x60 [mlx5_core]
  mlx5_pci_err_detected+0x94/0x150 [mlx5_core]
  zpci_event_attempt_error_recovery+0xcc/0x388

Fixes: 5a977b5833b7 ("net/mlx5: Lag, move devcom registration to LAG layer")
Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Acked-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20251202-fix_lag-v1-1-59e8177ffce0@linux.ibm.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 1ac933cd8f02b1..a459a30f36cae6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -1413,6 +1413,7 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
 static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev)
 {
 	mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp);
+	dev->priv.hca_devcom_comp = NULL;
 }
 
 static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)

From da01f64e7470988f8607776aa7afa924208863fb Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 13 Nov 2025 14:56:13 -0800
Subject: [PATCH 017/258] KVM: nSVM: Clear exit_code_hi in VMCB when
 synthesizing nested VM-Exits

Explicitly clear exit_code_hi in the VMCB when synthesizing "normal"
nested VM-Exits, as the full exit code is a 64-bit value (spoiler alert),
and all exit codes for non-failing VMRUN use only bits 31:0.

Cc: Jim Mattson <jmattson@google.com>
Cc: Yosry Ahmed <yosry.ahmed@linux.dev>
Cc: stable@vger.kernel.org
Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Link: https://patch.msgid.link/20251113225621.1688428-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/svm/svm.c | 2 ++
 arch/x86/kvm/svm/svm.h | 7 ++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9d29b2e7e855d6..eeeb4ae4c617eb 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2435,6 +2435,7 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
 
 	if (cr0 ^ val) {
 		svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
+		svm->vmcb->control.exit_code_hi = 0;
 		ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
 	}
 
@@ -4611,6 +4612,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
 	if (static_cpu_has(X86_FEATURE_NRIPS))
 		vmcb->control.next_rip  = info->next_rip;
 	vmcb->control.exit_code = icpt_info.exit_code;
+	vmcb->control.exit_code_hi = 0;
 	vmexit = nested_svm_exit_handled(svm);
 
 	ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index dd78e64023450b..e66a16e59b1a5f 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -764,9 +764,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm);
 
 static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
 {
-	svm->vmcb->control.exit_code   = exit_code;
-	svm->vmcb->control.exit_info_1 = 0;
-	svm->vmcb->control.exit_info_2 = 0;
+	svm->vmcb->control.exit_code	= exit_code;
+	svm->vmcb->control.exit_code_hi	= 0;
+	svm->vmcb->control.exit_info_1	= 0;
+	svm->vmcb->control.exit_info_2	= 0;
 	return nested_svm_vmexit(svm);
 }
 

From f402ecd7a8b6446547076f4bd24bd5d4dcc94481 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 13 Nov 2025 14:56:14 -0800
Subject: [PATCH 018/258] KVM: nSVM: Set exit_code_hi to -1 when synthesizing
 SVM_EXIT_ERR (failed VMRUN)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Set exit_code_hi to -1u as a temporary band-aid to fix a long-standing
(effectively since KVM's inception) bug where KVM treats the exit code as
a 32-bit value, when in reality it's a 64-bit value.  Per the APM, offset
0x70 is a single 64-bit value:

  070h 63:0 EXITCODE

And a sane reading of the error values defined in "Table C-1. SVM Intercept
Codes" is that negative values use the full 64 bits:

  –1 VMEXIT_INVALID Invalid guest state in VMCB.
  –2 VMEXIT_BUSYBUSY bit was set in the VMSA
  –3 VMEXIT_IDLE_REQUIREDThe sibling thread is not in an idle state
  -4 VMEXIT_INVALID_PMC Invalid PMC state

And that interpretation is confirmed by testing on Milan and Turin (by
setting bits in CR0[63:32] to generate VMEXIT_INVALID on VMRUN).

Furthermore, Xen has treated exitcode as a 64-bit value since HVM support
was adding in 2006 (see Xen commit d1bd157fbc ("Big merge the HVM
full-virtualisation abstractions.")).

Cc: Jim Mattson <jmattson@google.com>
Cc: Yosry Ahmed <yosry.ahmed@linux.dev>
Cc: stable@vger.kernel.org
Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Link: https://patch.msgid.link/20251113225621.1688428-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/svm/nested.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index da6e80b3ac353c..143a0ef02b03ed 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -983,7 +983,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
 	if (!nested_vmcb_check_save(vcpu) ||
 	    !nested_vmcb_check_controls(vcpu)) {
 		vmcb12->control.exit_code    = SVM_EXIT_ERR;
-		vmcb12->control.exit_code_hi = 0;
+		vmcb12->control.exit_code_hi = -1u;
 		vmcb12->control.exit_info_1  = 0;
 		vmcb12->control.exit_info_2  = 0;
 		goto out;
@@ -1016,7 +1016,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
 	svm->soft_int_injected = false;
 
 	svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
-	svm->vmcb->control.exit_code_hi = 0;
+	svm->vmcb->control.exit_code_hi = -1u;
 	svm->vmcb->control.exit_info_1  = 0;
 	svm->vmcb->control.exit_info_2  = 0;
 

From 861111b69896145a928c889d9344797ea3711028 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 4 Dec 2025 11:29:16 +0100
Subject: [PATCH 019/258] net: smc: SMC_HS_CTRL_BPF should depend on BPF_JIT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_BPF_SYSCALL=y, but CONFIG_BPF_JIT=n:

    net/smc/smc_hs_bpf.c: In function ‘bpf_smc_hs_ctrl_init’:
    include/linux/bpf.h:2068:50: error: statement with no effect [-Werror=unused-value]
     2068 | #define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; })
	  |                                                  ^~~~~~~~~~~~~~~~
    net/smc/smc_hs_bpf.c:139:16: note: in expansion of macro ‘register_bpf_struct_ops’
      139 |         return register_bpf_struct_ops(&bpf_smc_hs_ctrl_ops, smc_hs_ctrl);
	  |                ^~~~~~~~~~~~~~~~~~~~~~~

While this compile error is caused by a bug in <linux/bpf.h>, none of
the code in net/smc/smc_hs_bpf.c becomes effective if CONFIG_BPF_JIT is
not enabled.  Hence add a dependency on BPF_JIT.

While at it, add the missing newline at the end of the file.

Fixes: 15f295f55656658e ("net/smc: bpf: Introduce generic hook for handshake flow")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/988c61e5fea280872d81b3640f1f34d0619cfbbf.1764843951.git.geert@linux-m68k.org
---
 net/smc/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index 325addf83cc69f..277ef504bc26ef 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -22,10 +22,10 @@ config SMC_DIAG
 
 config SMC_HS_CTRL_BPF
 	bool "Generic eBPF hook for SMC handshake flow"
-	depends on SMC && BPF_SYSCALL
+	depends on SMC && BPF_JIT && BPF_SYSCALL
 	default y
 	help
 	  SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC
 	  handshake flow, which offer much greater flexibility in modifying the behavior
 	  of the SMC protocol stack compared to a complete kernel-based approach. Select
-	  this option if you want filtring the handshake process via eBPF programs.
\ No newline at end of file
+	  this option if you want filtring the handshake process via eBPF programs.

From c4cdf7376271bce5714c06d79ec67759b18910eb Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Tue, 2 Dec 2025 18:27:44 +0100
Subject: [PATCH 020/258] net: phy: marvell-88q2xxx: Fix clamped value in
 mv88q2xxx_hwmon_write

The local variable 'val' was never clamped to -75000 or 180000 because
the return value of clamp_val() was not used. Fix this by assigning the
clamped value back to 'val', and use clamp() instead of clamp_val().

Cc: stable@vger.kernel.org
Fixes: a557a92e6881 ("net: phy: marvell-88q2xxx: add support for temperature sensor")
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Reviewed-by: Dimitri Fedrau <dima.fedrau@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20251202172743.453055-3-thorsten.blum@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/marvell-88q2xxx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c
index f3d83b04c9535c..201dee1a16985e 100644
--- a/drivers/net/phy/marvell-88q2xxx.c
+++ b/drivers/net/phy/marvell-88q2xxx.c
@@ -698,7 +698,7 @@ static int mv88q2xxx_hwmon_write(struct device *dev,
 
 	switch (attr) {
 	case hwmon_temp_max:
-		clamp_val(val, -75000, 180000);
+		val = clamp(val, -75000, 180000);
 		val = (val / 1000) + 75;
 		val = FIELD_PREP(MDIO_MMD_PCS_MV_TEMP_SENSOR3_INT_THRESH_MASK,
 				 val);

From b6b638bda240395dff49a87403b2e32493e56d2a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 2 Dec 2025 18:44:11 +0100
Subject: [PATCH 021/258] mlxsw: spectrum_router: Fix possible neighbour
 reference count leak

mlxsw_sp_router_schedule_work() takes a reference on a neighbour,
expecting a work item to release it later on. However, we might fail to
schedule the work item, in which case the neighbour reference count will
be leaked.

Fix by taking the reference just before scheduling the work item. Note
that mlxsw_sp_router_schedule_work() can receive a NULL neighbour
pointer, but neigh_clone() handles that correctly.

Spotted during code review, did not actually observe the reference count
leak.

Fixes: 151b89f6025a ("mlxsw: spectrum_router: Reuse work neighbor initialization in work scheduler")
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/ec2934ae4aca187a8d8c9329a08ce93cca411378.1764695650.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index a2033837182e86..f4e9ecaeb104ff 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2858,6 +2858,11 @@ static int mlxsw_sp_router_schedule_work(struct net *net,
 	if (!net_work)
 		return NOTIFY_BAD;
 
+	/* Take a reference to ensure the neighbour won't be destructed until
+	 * we drop the reference in the work item.
+	 */
+	neigh_clone(n);
+
 	INIT_WORK(&net_work->work, cb);
 	net_work->mlxsw_sp = router->mlxsw_sp;
 	net_work->n = n;
@@ -2881,11 +2886,6 @@ static int mlxsw_sp_router_schedule_neigh_work(struct mlxsw_sp_router *router,
 	struct net *net;
 
 	net = neigh_parms_net(n->parms);
-
-	/* Take a reference to ensure the neighbour won't be destructed until we
-	 * drop the reference in delayed work.
-	 */
-	neigh_clone(n);
 	return mlxsw_sp_router_schedule_work(net, router, n,
 					     mlxsw_sp_router_neigh_event_work);
 }

From 8b0e69763ef948fb872a7767df4be665d18f5fd4 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 2 Dec 2025 18:44:12 +0100
Subject: [PATCH 022/258] mlxsw: spectrum_router: Fix neighbour use-after-free

We sometimes observe use-after-free when dereferencing a neighbour [1].
The problem seems to be that the driver stores a pointer to the
neighbour, but without holding a reference on it. A reference is only
taken when the neighbour is used by a nexthop.

Fix by simplifying the reference counting scheme. Always take a
reference when storing a neighbour pointer in a neighbour entry. Avoid
taking a referencing when the neighbour is used by a nexthop as the
neighbour entry associated with the nexthop already holds a reference.

Tested by running the test that uncovered the problem over 300 times.
Without this patch the problem was reproduced after a handful of
iterations.

[1]
BUG: KASAN: slab-use-after-free in mlxsw_sp_neigh_entry_update+0x2d4/0x310
Read of size 8 at addr ffff88817f8e3420 by task ip/3929

CPU: 3 UID: 0 PID: 3929 Comm: ip Not tainted 6.18.0-rc4-virtme-g36b21a067510 #3 PREEMPT(full)
Hardware name: Nvidia SN5600/VMOD0013, BIOS 5.13 05/31/2023
Call Trace:
 <TASK>
 dump_stack_lvl+0x6f/0xa0
 print_address_description.constprop.0+0x6e/0x300
 print_report+0xfc/0x1fb
 kasan_report+0xe4/0x110
 mlxsw_sp_neigh_entry_update+0x2d4/0x310
 mlxsw_sp_router_rif_gone_sync+0x35f/0x510
 mlxsw_sp_rif_destroy+0x1ea/0x730
 mlxsw_sp_inetaddr_port_vlan_event+0xa1/0x1b0
 __mlxsw_sp_inetaddr_lag_event+0xcc/0x130
 __mlxsw_sp_inetaddr_event+0xf5/0x3c0
 mlxsw_sp_router_netdevice_event+0x1015/0x1580
 notifier_call_chain+0xcc/0x150
 call_netdevice_notifiers_info+0x7e/0x100
 __netdev_upper_dev_unlink+0x10b/0x210
 netdev_upper_dev_unlink+0x79/0xa0
 vrf_del_slave+0x18/0x50
 do_set_master+0x146/0x7d0
 do_setlink.isra.0+0x9a0/0x2880
 rtnl_newlink+0x637/0xb20
 rtnetlink_rcv_msg+0x6fe/0xb90
 netlink_rcv_skb+0x123/0x380
 netlink_unicast+0x4a3/0x770
 netlink_sendmsg+0x75b/0xc90
 __sock_sendmsg+0xbe/0x160
 ____sys_sendmsg+0x5b2/0x7d0
 ___sys_sendmsg+0xfd/0x180
 __sys_sendmsg+0x124/0x1c0
 do_syscall_64+0xbb/0xfd0
 entry_SYSCALL_64_after_hwframe+0x4b/0x53
[...]

Allocated by task 109:
 kasan_save_stack+0x30/0x50
 kasan_save_track+0x14/0x30
 __kasan_kmalloc+0x7b/0x90
 __kmalloc_noprof+0x2c1/0x790
 neigh_alloc+0x6af/0x8f0
 ___neigh_create+0x63/0xe90
 mlxsw_sp_nexthop_neigh_init+0x430/0x7e0
 mlxsw_sp_nexthop_type_init+0x212/0x960
 mlxsw_sp_nexthop6_group_info_init.constprop.0+0x81f/0x1280
 mlxsw_sp_nexthop6_group_get+0x392/0x6a0
 mlxsw_sp_fib6_entry_create+0x46a/0xfd0
 mlxsw_sp_router_fib6_replace+0x1ed/0x5f0
 mlxsw_sp_router_fib6_event_work+0x10a/0x2a0
 process_one_work+0xd57/0x1390
 worker_thread+0x4d6/0xd40
 kthread+0x355/0x5b0
 ret_from_fork+0x1d4/0x270
 ret_from_fork_asm+0x11/0x20

Freed by task 154:
 kasan_save_stack+0x30/0x50
 kasan_save_track+0x14/0x30
 __kasan_save_free_info+0x3b/0x60
 __kasan_slab_free+0x43/0x70
 kmem_cache_free_bulk.part.0+0x1eb/0x5e0
 kvfree_rcu_bulk+0x1f2/0x260
 kfree_rcu_work+0x130/0x1b0
 process_one_work+0xd57/0x1390
 worker_thread+0x4d6/0xd40
 kthread+0x355/0x5b0
 ret_from_fork+0x1d4/0x270
 ret_from_fork_asm+0x11/0x20

Last potentially related work creation:
 kasan_save_stack+0x30/0x50
 kasan_record_aux_stack+0x8c/0xa0
 kvfree_call_rcu+0x93/0x5b0
 mlxsw_sp_router_neigh_event_work+0x67d/0x860
 process_one_work+0xd57/0x1390
 worker_thread+0x4d6/0xd40
 kthread+0x355/0x5b0
 ret_from_fork+0x1d4/0x270
 ret_from_fork_asm+0x11/0x20

Fixes: 6cf3c971dc84 ("mlxsw: spectrum_router: Add private neigh table")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/92d75e21d95d163a41b5cea67a15cd33f547cba6.1764695650.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../ethernet/mellanox/mlxsw/spectrum_router.c   | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index f4e9ecaeb104ff..2d0e89bd2fb9ca 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2265,6 +2265,7 @@ mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
 	if (!neigh_entry)
 		return NULL;
 
+	neigh_hold(n);
 	neigh_entry->key.n = n;
 	neigh_entry->rif = rif;
 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
@@ -2274,6 +2275,7 @@ mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
 
 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
 {
+	neigh_release(neigh_entry->key.n);
 	kfree(neigh_entry);
 }
 
@@ -4320,6 +4322,8 @@ mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_neigh_entry_insert;
 
+	neigh_release(old_n);
+
 	read_lock_bh(&n->lock);
 	nud_state = n->nud_state;
 	dead = n->dead;
@@ -4328,14 +4332,10 @@ mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
 
 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
 			    neigh_list_node) {
-		neigh_release(old_n);
-		neigh_clone(n);
 		__mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
 	}
 
-	neigh_release(n);
-
 	return 0;
 
 err_neigh_entry_insert:
@@ -4428,6 +4428,11 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
 		}
 	}
 
+	/* Release the reference taken by neigh_lookup() / neigh_create() since
+	 * neigh_entry already holds one.
+	 */
+	neigh_release(n);
+
 	/* If that is the first nexthop connected to that neigh, add to
 	 * nexthop_neighs_list
 	 */
@@ -4454,11 +4459,9 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
 					struct mlxsw_sp_nexthop *nh)
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
-	struct neighbour *n;
 
 	if (!neigh_entry)
 		return;
-	n = neigh_entry->key.n;
 
 	__mlxsw_sp_nexthop_neigh_update(nh, true);
 	list_del(&nh->neigh_list_node);
@@ -4472,8 +4475,6 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
 
 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
-
-	neigh_release(n);
 }
 
 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)

From 8ac1dacec458f55f871f7153242ed6ab60373b90 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 2 Dec 2025 18:44:13 +0100
Subject: [PATCH 023/258] mlxsw: spectrum_mr: Fix use-after-free when updating
 multicast route stats

Cited commit added a dedicated mutex (instead of RTNL) to protect the
multicast route list, so that it will not change while the driver
periodically traverses it in order to update the kernel about multicast
route stats that were queried from the device.

One instance of list entry deletion (during route replace) was missed
and it can result in a use-after-free [1].

Fix by acquiring the mutex before deleting the entry from the list and
releasing it afterwards.

[1]
BUG: KASAN: slab-use-after-free in mlxsw_sp_mr_stats_update+0x4a5/0x540 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c:1006 [mlxsw_spectrum]
Read of size 8 at addr ffff8881523c2fa8 by task kworker/2:5/22043

CPU: 2 UID: 0 PID: 22043 Comm: kworker/2:5 Not tainted 6.18.0-rc1-custom-g1a3d6d7cd014 #1 PREEMPT(full)
Hardware name: Mellanox Technologies Ltd. MSN2010/SA002610, BIOS 5.6.5 08/24/2017
Workqueue: mlxsw_core mlxsw_sp_mr_stats_update [mlxsw_spectrum]
Call Trace:
 <TASK>
 dump_stack_lvl+0xba/0x110
 print_report+0x174/0x4f5
 kasan_report+0xdf/0x110
 mlxsw_sp_mr_stats_update+0x4a5/0x540 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c:1006 [mlxsw_spectrum]
 process_one_work+0x9cc/0x18e0
 worker_thread+0x5df/0xe40
 kthread+0x3b8/0x730
 ret_from_fork+0x3e9/0x560
 ret_from_fork_asm+0x1a/0x30
 </TASK>

Allocated by task 29933:
 kasan_save_stack+0x30/0x50
 kasan_save_track+0x14/0x30
 __kasan_kmalloc+0x8f/0xa0
 mlxsw_sp_mr_route_add+0xd8/0x4770 [mlxsw_spectrum]
 mlxsw_sp_router_fibmr_event_work+0x371/0xad0 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:7965 [mlxsw_spectrum]
 process_one_work+0x9cc/0x18e0
 worker_thread+0x5df/0xe40
 kthread+0x3b8/0x730
 ret_from_fork+0x3e9/0x560
 ret_from_fork_asm+0x1a/0x30

Freed by task 29933:
 kasan_save_stack+0x30/0x50
 kasan_save_track+0x14/0x30
 __kasan_save_free_info+0x3b/0x70
 __kasan_slab_free+0x43/0x70
 kfree+0x14e/0x700
 mlxsw_sp_mr_route_add+0x2dea/0x4770 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c:444 [mlxsw_spectrum]
 mlxsw_sp_router_fibmr_event_work+0x371/0xad0 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:7965 [mlxsw_spectrum]
 process_one_work+0x9cc/0x18e0
 worker_thread+0x5df/0xe40
 kthread+0x3b8/0x730
 ret_from_fork+0x3e9/0x560
 ret_from_fork_asm+0x1a/0x30

Fixes: f38656d06725 ("mlxsw: spectrum_mr: Protect multicast route list with a lock")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/f996feecfd59fde297964bfc85040b6d83ec6089.1764695650.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index 5afe6b155ef0d5..81935f87bfcd71 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -440,7 +440,9 @@ int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table,
 		rhashtable_remove_fast(&mr_table->route_ht,
 				       &mr_orig_route->ht_node,
 				       mlxsw_sp_mr_route_ht_params);
+		mutex_lock(&mr_table->route_list_lock);
 		list_del(&mr_orig_route->node);
+		mutex_unlock(&mr_table->route_list_lock);
 		mlxsw_sp_mr_route_destroy(mr_table, mr_orig_route);
 	}
 

From dd75c723ef566f7f009c047f47e0eee95fe348ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Rebe?= <rene@exactco.de>
Date: Tue, 2 Dec 2025 19:41:37 +0100
Subject: [PATCH 024/258] r8169: fix RTL8117 Wake-on-Lan in DASH mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wake-on-Lan does currently not work for r8169 in DASH mode, e.g. the
ASUS Pro WS X570-ACE with RTL8168fp/RTL8117.

Fix by not returning early in rtl_prepare_power_down when dash_enabled.
While this fixes WoL, it still kills the OOB RTL8117 remote management
BMC connection. Fix by not calling rtl8168_driver_stop if WoL is enabled.

Fixes: 065c27c184d6 ("r8169: phy power ops")
Signed-off-by: René Rebe <rene@exactco.de>
Cc: stable@vger.kernel.org
Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/20251202.194137.1647877804487085954.rene@exactco.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/realtek/r8169_main.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 405e91eb3141f2..755083852eef2a 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2655,9 +2655,6 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp)
 
 static void rtl_prepare_power_down(struct rtl8169_private *tp)
 {
-	if (tp->dash_enabled)
-		return;
-
 	if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_33)
 		rtl_ephy_write(tp, 0x19, 0xff64);
@@ -4812,7 +4809,7 @@ static void rtl8169_down(struct rtl8169_private *tp)
 	rtl_disable_exit_l1(tp);
 	rtl_prepare_power_down(tp);
 
-	if (tp->dash_type != RTL_DASH_NONE)
+	if (tp->dash_type != RTL_DASH_NONE && !tp->saved_wolopts)
 		rtl8168_driver_stop(tp);
 }
 

From a479a27f4da4d1f8a9b7540a800f80253ed1bad0 Mon Sep 17 00:00:00 2001
From: Tim Hostetler <thostet@google.com>
Date: Tue, 2 Dec 2025 20:02:07 +0000
Subject: [PATCH 025/258] gve: Move gve_init_clock to after AQ
 CONFIGURE_DEVICE_RESOURCES call

commit 46e7860ef941 ("gve: Move ptp_schedule_worker to gve_init_clock")
moved the first invocation of the AQ command REPORT_NIC_TIMESTAMP to
gve_probe(). However, gve_init_clock() invoking REPORT_NIC_TIMESTAMP is
not valid until after gve_probe() invokes the AQ command
CONFIGURE_DEVICE_RESOURCES.

Failure to do so results in the following error:

gve 0000:00:07.0: failed to read NIC clock -11

This was missed earlier because the driver under test was loaded at
runtime instead of boot-time. The boot-time driver had already
initialized the device, causing the runtime driver to successfully call
gve_init_clock() incorrectly.

Fixes: 46e7860ef941 ("gve: Move ptp_schedule_worker to gve_init_clock")
Reviewed-by: Ankit Garg <nktgrg@google.com>
Signed-off-by: Tim Hostetler <thostet@google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20251202200207.1434749-1-hramamurthy@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/google/gve/gve_main.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index a5a2b18d309b8c..a7a088a77f3786 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -647,12 +647,9 @@ static int gve_setup_device_resources(struct gve_priv *priv)
 	err = gve_alloc_counter_array(priv);
 	if (err)
 		goto abort_with_rss_config_cache;
-	err = gve_init_clock(priv);
-	if (err)
-		goto abort_with_counter;
 	err = gve_alloc_notify_blocks(priv);
 	if (err)
-		goto abort_with_clock;
+		goto abort_with_counter;
 	err = gve_alloc_stats_report(priv);
 	if (err)
 		goto abort_with_ntfy_blocks;
@@ -683,10 +680,16 @@ static int gve_setup_device_resources(struct gve_priv *priv)
 		}
 	}
 
+	err = gve_init_clock(priv);
+	if (err) {
+		dev_err(&priv->pdev->dev, "Failed to init clock");
+		goto abort_with_ptype_lut;
+	}
+
 	err = gve_init_rss_config(priv, priv->rx_cfg.num_queues);
 	if (err) {
 		dev_err(&priv->pdev->dev, "Failed to init RSS config");
-		goto abort_with_ptype_lut;
+		goto abort_with_clock;
 	}
 
 	err = gve_adminq_report_stats(priv, priv->stats_report_len,
@@ -698,6 +701,8 @@ static int gve_setup_device_resources(struct gve_priv *priv)
 	gve_set_device_resources_ok(priv);
 	return 0;
 
+abort_with_clock:
+	gve_teardown_clock(priv);
 abort_with_ptype_lut:
 	kvfree(priv->ptype_lut_dqo);
 	priv->ptype_lut_dqo = NULL;
@@ -705,8 +710,6 @@ static int gve_setup_device_resources(struct gve_priv *priv)
 	gve_free_stats_report(priv);
 abort_with_ntfy_blocks:
 	gve_free_notify_blocks(priv);
-abort_with_clock:
-	gve_teardown_clock(priv);
 abort_with_counter:
 	gve_free_counter_array(priv);
 abort_with_rss_config_cache:

From 0373d5c387f24de749cc22e694a14b3a7c7eb515 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 2 Dec 2025 16:30:24 -0800
Subject: [PATCH 026/258] bnxt_en: Fix XDP_TX path

For XDP_TX action in bnxt_rx_xdp(), clearing of the event flags is not
correct.  __bnxt_poll_work() -> bnxt_rx_pkt() -> bnxt_rx_xdp() may be
looping within NAPI and some event flags may be set in earlier
iterations.  In particular, if BNXT_TX_EVENT is set earlier indicating
some XDP_TX packets are ready and pending, it will be cleared if it is
XDP_TX action again.  Normally, we will set BNXT_TX_EVENT again when we
successfully call __bnxt_xmit_xdp().  But if the TX ring has no more
room, the flag will not be set.  This will cause the TX producer to be
ahead but the driver will not hit the TX doorbell.

For multi-buf XDP_TX, there is no need to clear the event flags and set
BNXT_AGG_EVENT.  The BNXT_AGG_EVENT flag should have been set earlier in
bnxt_rx_pkt().

The visible symptom of this is that the RX ring associated with the
TX XDP ring will eventually become empty and all packets will be dropped.
Because this condition will cause the driver to not refill the RX ring
seeing that the TX ring has forever pending XDP_TX packets.

The fix is to only clear BNXT_RX_EVENT when we have successfully
called __bnxt_xmit_xdp().

Fixes: 7f0a168b0441 ("bnxt_en: Add completion ring pointer in TX and RX ring structures")
Reported-by: Pavel Dubovitsky <pdubovitsky@meta.com>
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://patch.msgid.link/20251203003024.2246699-1-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 3e77a96e5a3e39..c94a391b1ba5b2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -268,13 +268,11 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 	case XDP_TX:
 		rx_buf = &rxr->rx_buf_ring[cons];
 		mapping = rx_buf->mapping - bp->rx_dma_offset;
-		*event &= BNXT_TX_CMP_EVENT;
 
 		if (unlikely(xdp_buff_has_frags(xdp))) {
 			struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
 
 			tx_needed += sinfo->nr_frags;
-			*event = BNXT_AGG_EVENT;
 		}
 
 		if (tx_avail < tx_needed) {
@@ -287,6 +285,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 		dma_sync_single_for_device(&pdev->dev, mapping + offset, *len,
 					   bp->rx_dir);
 
+		*event &= ~BNXT_RX_EVENT;
 		*event |= BNXT_TX_EVENT;
 		__bnxt_xmit_xdp(bp, txr, mapping + offset, *len,
 				NEXT_RX(rxr->rx_prod), xdp);

From b2849bec936be642b5420801f902337f2507648e Mon Sep 17 00:00:00 2001
From: Dongli Zhang <dongli.zhang@oracle.com>
Date: Fri, 5 Dec 2025 15:19:04 -0800
Subject: [PATCH 027/258] KVM: VMX: Update SVI during runtime APICv activation

The APICv (apic->apicv_active) can be activated or deactivated at runtime,
for instance, because of APICv inhibit reasons. Intel VMX employs different
mechanisms to virtualize LAPIC based on whether APICv is active.

When APICv is activated at runtime, GUEST_INTR_STATUS is used to configure
and report the current pending IRR and ISR states. Unless a specific vector
is explicitly included in EOI_EXIT_BITMAP, its EOI will not be trapped to
KVM. Intel VMX automatically clears the corresponding ISR bit based on the
GUEST_INTR_STATUS.SVI field.

When APICv is deactivated at runtime, the VM_ENTRY_INTR_INFO_FIELD is used
to specify the next interrupt vector to invoke upon VM-entry. The
VMX IDT_VECTORING_INFO_FIELD is used to report un-invoked vectors on
VM-exit. EOIs are always trapped to KVM, so the software can manually clear
pending ISR bits.

There are scenarios where, with APICv activated at runtime, a guest-issued
EOI may not be able to clear the pending ISR bit.

Taking vector 236 as an example, here is one scenario.

1. Suppose APICv is inactive. Vector 236 is pending in the IRR.
2. To handle KVM_REQ_EVENT, KVM moves vector 236 from the IRR to the ISR,
and configures the VM_ENTRY_INTR_INFO_FIELD via vmx_inject_irq().
3. After VM-entry, vector 236 is invoked through the guest IDT. At this
point, the data in VM_ENTRY_INTR_INFO_FIELD is no longer valid. The guest
interrupt handler for vector 236 is invoked.
4. Suppose a VM exit occurs very early in the guest interrupt handler,
before the EOI is issued.
5. Nothing is reported through the IDT_VECTORING_INFO_FIELD because
vector 236 has already been invoked in the guest.
6. Now, suppose APICv is activated. Before the next VM-entry, KVM calls
kvm_vcpu_update_apicv() to activate APICv.
7. Unfortunately, GUEST_INTR_STATUS.SVI is not configured, although
vector 236 is still pending in the ISR.
8. After VM-entry, the guest finally issues the EOI for vector 236.
However, because SVI is not configured, vector 236 is not cleared.
9. ISR is stalled forever on vector 236.

Here is another scenario.

1. Suppose APICv is inactive. Vector 236 is pending in the IRR.
2. To handle KVM_REQ_EVENT, KVM moves vector 236 from the IRR to the ISR,
and configures the VM_ENTRY_INTR_INFO_FIELD via vmx_inject_irq().
3. VM-exit occurs immediately after the next VM-entry. The vector 236 is
not invoked through the guest IDT. Instead, it is saved to the
IDT_VECTORING_INFO_FIELD during the VM-exit.
4. KVM calls kvm_queue_interrupt() to re-queue the un-invoked vector 236
into vcpu->arch.interrupt. A KVM_REQ_EVENT is requested.
5. Now, suppose APICv is activated. Before the next VM-entry, KVM calls
kvm_vcpu_update_apicv() to activate APICv.
6. Although APICv is now active, KVM still uses the legacy
VM_ENTRY_INTR_INFO_FIELD to re-inject vector 236. GUEST_INTR_STATUS.SVI is
not configured.
7. After the next VM-entry, vector 236 is invoked through the guest IDT.
Finally, an EOI occurs. However, due to the lack of GUEST_INTR_STATUS.SVI
configuration, vector 236 is not cleared from the ISR.
8. ISR is stalled forever on vector 236.

Using QEMU as an example, vector 236 is stuck in ISR forever.

(qemu) info lapic 1
dumping local APIC state for CPU 1

LVT0	 0x00010700 active-hi edge  masked                      ExtINT (vec 0)
LVT1	 0x00010400 active-hi edge  masked                      NMI
LVTPC	 0x00000400 active-hi edge                              NMI
LVTERR	 0x000000fe active-hi edge                              Fixed  (vec 254)
LVTTHMR	 0x00010000 active-hi edge  masked                      Fixed  (vec 0)
LVTT	 0x000400ec active-hi edge                 tsc-deadline Fixed  (vec 236)
Timer	 DCR=0x0 (divide by 2) initial_count = 0 current_count = 0
SPIV	 0x000001ff APIC enabled, focus=off, spurious vec 255
ICR	 0x000000fd physical edge de-assert no-shorthand
ICR2	 0x00000000 cpu 0 (X2APIC ID)
ESR	 0x00000000
ISR	 236
IRR	 37(level) 236

The issue isn't applicable to AMD SVM as KVM simply writes vmcb01 directly
irrespective of whether L1 (vmcs01) or L2 (vmcb02) is active (unlike VMX,
there is no need/cost to switch between VMCBs).  In addition,
APICV_INHIBIT_REASON_IRQWIN ensures AMD SVM AVIC is not activated until
the last interrupt is EOI'd.

Fix the bug by configuring Intel VMX GUEST_INTR_STATUS.SVI if APICv is
activated at runtime.

Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Link: https://patch.msgid.link/20251110063212.34902-1-dongli.zhang@oracle.com
[sean: call out that SVM writes vmcb01 directly, tweak comment]
Link: https://patch.msgid.link/20251205231913.441872-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/vmx.c | 9 ---------
 arch/x86/kvm/x86.c     | 7 +++++++
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 91b6f2f3edc2ab..c3b9eb72b6f38a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6886,15 +6886,6 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
 	 * VM-Exit, otherwise L1 with run with a stale SVI.
 	 */
 	if (is_guest_mode(vcpu)) {
-		/*
-		 * KVM is supposed to forward intercepted L2 EOIs to L1 if VID
-		 * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC.
-		 * Note, userspace can stuff state while L2 is active; assert
-		 * that VID is disabled if and only if the vCPU is in KVM_RUN
-		 * to avoid false positives if userspace is setting APIC state.
-		 */
-		WARN_ON_ONCE(vcpu->wants_to_run &&
-			     nested_cpu_has_vid(get_vmcs12(vcpu)));
 		to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
 		return;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c9c2aa6f4705e1..82036205945fbf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10877,9 +10877,16 @@ void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 	 * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
 	 * still active when the interrupt got accepted. Make sure
 	 * kvm_check_and_inject_events() is called to check for that.
+	 *
+	 * Update SVI when APICv gets enabled, otherwise SVI won't reflect the
+	 * highest bit in vISR and the next accelerated EOI in the guest won't
+	 * be virtualized correctly (the CPU uses SVI to determine which vISR
+	 * vector to clear).
 	 */
 	if (!apic->apicv_active)
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
+	else
+		kvm_apic_update_hwapic_isr(vcpu);
 
 out:
 	preempt_enable();

From 29763138830916f46daaa50e83e7f4f907a3236b Mon Sep 17 00:00:00 2001
From: Dongli Zhang <dongli.zhang@oracle.com>
Date: Fri, 5 Dec 2025 15:19:05 -0800
Subject: [PATCH 028/258] KVM: nVMX: Immediately refresh APICv controls as
 needed on nested VM-Exit

If an APICv status updated was pended while L2 was active, immediately
refresh vmcs01's controls instead of pending KVM_REQ_APICV_UPDATE as
kvm_vcpu_update_apicv() only calls into vendor code if a change is
necessary.

E.g. if APICv is inhibited, and then activated while L2 is running:

  kvm_vcpu_update_apicv()
  |
  -> __kvm_vcpu_update_apicv()
     |
     -> apic->apicv_active = true
      |
      -> vmx_refresh_apicv_exec_ctrl()
         |
         -> vmx->nested.update_vmcs01_apicv_status = true
          |
          -> return

Then L2 exits to L1:

  __nested_vmx_vmexit()
  |
  -> kvm_make_request(KVM_REQ_APICV_UPDATE)

  vcpu_enter_guest(): KVM_REQ_APICV_UPDATE
  -> kvm_vcpu_update_apicv()
     |
     -> __kvm_vcpu_update_apicv()
        |
        -> return // because if (apic->apicv_active == activate)

Reported-by: Chao Gao <chao.gao@intel.com>
Closes: https://lore.kernel.org/all/aQ2jmnN8wUYVEawF@intel.com
Fixes: 7c69661e225c ("KVM: nVMX: Defer APICv updates while L2 is active until L1 is active")
Cc: stable@vger.kernel.org
Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
[sean: write changelog]
Link: https://patch.msgid.link/20251205231913.441872-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/nested.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bcea087b642fd0..1725c6a94f99bc 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -19,6 +19,7 @@
 #include "trace.h"
 #include "vmx.h"
 #include "smm.h"
+#include "x86_ops.h"
 
 static bool __read_mostly enable_shadow_vmcs = 1;
 module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
@@ -5216,7 +5217,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 
 	if (vmx->nested.update_vmcs01_apicv_status) {
 		vmx->nested.update_vmcs01_apicv_status = false;
-		kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+		vmx_refresh_apicv_exec_ctrl(vcpu);
 	}
 
 	if (vmx->nested.update_vmcs01_hwapic_isr) {

From 2183a5c8a04f554d03174ddcfd0078b44217fa54 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Wed, 3 Dec 2025 11:01:22 +0100
Subject: [PATCH 029/258] af_unix: annotate unix_gc_lock with
 __cacheline_aligned_in_smp

Otherwise the lock is susceptible to ever-changing false-sharing due to
unrelated changes. This in particular popped up here where an unrelated
change improved performance:
https://lore.kernel.org/oe-lkp/202511281306.51105b46-lkp@intel.com/

Stabilize it with an explicit annotation which also has a side effect
of furher improving scalability:
> in our oiginal report, 284922f4c5 has a 6.1% performance improvement comparing
> to parent 17d85f33a8.
> we applied your patch directly upon 284922f4c5. as below, now by
> "284922f4c5 + your patch"
> we observe a 12.8% performance improvements (still comparing to 17d85f33a8).

Note nothing was done for the other fields, so some fluctuation is still
possible.

Tested-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251203100122.291550-1-mjguzik@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/unix/garbage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 78323d43e63ed1..25f65817faab93 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -199,7 +199,7 @@ static void unix_free_vertices(struct scm_fp_list *fpl)
 	}
 }
 
-static DEFINE_SPINLOCK(unix_gc_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(unix_gc_lock);
 
 void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver)
 {

From e9e5047df953c9b1054d9a3c7b07c68ab2714263 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Wed, 3 Dec 2025 21:44:17 -0800
Subject: [PATCH 030/258] mptcp: select CRYPTO_LIB_UTILS instead of CRYPTO

Since the only crypto functions used by the mptcp code are the SHA-256
library functions and crypto_memneq(), select only the options needed
for those: CRYPTO_LIB_SHA256 and CRYPTO_LIB_UTILS.

Previously, CRYPTO was selected instead of CRYPTO_LIB_UTILS.  That does
pull in CRYPTO_LIB_UTILS as well, but it's unnecessarily broad.

Years ago, the CRYPTO_LIB_* options were visible only when CRYPTO.  That
may be another reason why CRYPTO is selected here.  However, that was
fixed years ago, and the libraries can now be selected directly.

Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Reviewed-by: Mat Martineau <martineau@kernel.org>
Link: https://patch.msgid.link/20251204054417.491439-1-ebiggers@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
index 20328920f6ed17..be71fc9b463814 100644
--- a/net/mptcp/Kconfig
+++ b/net/mptcp/Kconfig
@@ -4,7 +4,7 @@ config MPTCP
 	depends on INET
 	select SKB_EXTENSIONS
 	select CRYPTO_LIB_SHA256
-	select CRYPTO
+	select CRYPTO_LIB_UTILS
 	help
 	  Multipath TCP (MPTCP) connections send and receive data over multiple
 	  subflows in order to utilize multiple network paths. Each subflow

From e56cadaa27fd156106c5583ed98976927c6febc9 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 6 Dec 2025 16:47:40 -0800
Subject: [PATCH 031/258] ynl: add regen hint to new headers

Recent commit 68e83f347266 ("tools: ynl-gen: add regeneration comment")
added a hint how to regenerate the code to the headers. Update
the new headers from this release cycle to also include it.

Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20251207004740.1657799-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/energy_model.h | 1 +
 kernel/power/em_netlink_autogen.c | 1 +
 kernel/power/em_netlink_autogen.h | 1 +
 3 files changed, 3 insertions(+)

diff --git a/include/uapi/linux/energy_model.h b/include/uapi/linux/energy_model.h
index 4ec4c0eabbbbc5..0bcad967854ffa 100644
--- a/include/uapi/linux/energy_model.h
+++ b/include/uapi/linux/energy_model.h
@@ -2,6 +2,7 @@
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/em.yaml */
 /* YNL-GEN uapi header */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
 
 #ifndef _UAPI_LINUX_ENERGY_MODEL_H
 #define _UAPI_LINUX_ENERGY_MODEL_H
diff --git a/kernel/power/em_netlink_autogen.c b/kernel/power/em_netlink_autogen.c
index a7a09ab1d1c21a..ceb3b2bb6ebe0c 100644
--- a/kernel/power/em_netlink_autogen.c
+++ b/kernel/power/em_netlink_autogen.c
@@ -2,6 +2,7 @@
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/em.yaml */
 /* YNL-GEN kernel source */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
 
 #include <net/netlink.h>
 #include <net/genetlink.h>
diff --git a/kernel/power/em_netlink_autogen.h b/kernel/power/em_netlink_autogen.h
index 78ce609641f11b..140ab548103ced 100644
--- a/kernel/power/em_netlink_autogen.h
+++ b/kernel/power/em_netlink_autogen.h
@@ -2,6 +2,7 @@
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/em.yaml */
 /* YNL-GEN kernel header */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
 
 #ifndef _LINUX_EM_GEN_H
 #define _LINUX_EM_GEN_H

From db6b35cffe59c619ea3772b21d7c7c8a7b885dc1 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 6 Dec 2025 17:38:48 -0800
Subject: [PATCH 032/258] tools: ynl: fix build on systems with old kernel
 headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The wireguard YNL conversion was missing the customary .deps entry.
NIPA doesn't catch this but my CentOS 9 system complains:

 wireguard-user.c:72:10: error: ‘WGALLOWEDIP_A_FLAGS’ undeclared here
 wireguard-user.c:58:67: error: parameter 1 (‘value’) has incomplete type
 58 | const char *wireguard_wgallowedip_flags_str(enum wgallowedip_flag value)
    |                                             ~~~~~~~~~~~~~~~~~~~~~~^~~~~

And similarly does Ubuntu 22.04.

One extra complication here is that we renamed the header guard,
so we need to compat with both old and new guard define.

Reviewed-by: Asbjørn Sloth Tønnesen <ast@fiberby.net>
Link: https://patch.msgid.link/20251207013848.1692990-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/Makefile.deps | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index 865fd2e8519ed9..08205f9fc52575 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -13,6 +13,7 @@ UAPI_PATH:=../../../../include/uapi/
 # need the explicit -D matching what's in /usr, to avoid multiple definitions.
 
 get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2)
+get_hdr_inc2=-D$(1) -D$(2) -include $(UAPI_PATH)/linux/$(3)
 
 CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h)
 CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h)
@@ -48,3 +49,4 @@ CFLAGS_tc:= $(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
 	$(call get_hdr_inc,_TC_SKBEDIT_H,tc_act/tc_skbedit.h) \
 	$(call get_hdr_inc,_TC_TUNNEL_KEY_H,tc_act/tc_tunnel_key.h)
 CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h)
+CFLAGS_wireguard:=$(call get_hdr_inc2,_LINUX_WIREGUARD_H,_WG_UAPI_WIREGUARD_H,wireguard.h)

From 0ace3297a7301911e52d8195cb1006414897c859 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Fri, 5 Dec 2025 19:55:14 +0100
Subject: [PATCH 033/258] mptcp: pm: ignore unknown endpoint flags

Before this patch, the kernel was saving any flags set by the userspace,
even unknown ones. This doesn't cause critical issues because the kernel
is only looking at specific ones. But on the other hand, endpoints dumps
could tell the userspace some recent flags seem to be supported on older
kernel versions.

Instead, ignore all unknown flags when parsing them. By doing that, the
userspace can continue to set unsupported flags, but it has a way to
verify what is supported by the kernel.

Note that it sounds better to continue accepting unsupported flags not
to change the behaviour, but also that eases things on the userspace
side by adding "optional" endpoint types only supported by newer kernel
versions without having to deal with the different kernel versions.

A note for the backports: there will be conflicts in mptcp.h on older
versions not having the mentioned flags, the new line should still be
added last, and the '5' needs to be adapted to have the same value as
the last entry.

Fixes: 01cacb00b35c ("mptcp: add netlink-based PM")
Cc: stable@vger.kernel.org
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-1-9e4781a6c1b8@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/mptcp.h | 1 +
 net/mptcp/pm_netlink.c     | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 04eea6d1d0a9bc..72a5d030154e1a 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -40,6 +40,7 @@
 #define MPTCP_PM_ADDR_FLAG_FULLMESH		_BITUL(3)
 #define MPTCP_PM_ADDR_FLAG_IMPLICIT		_BITUL(4)
 #define MPTCP_PM_ADDR_FLAG_LAMINAR		_BITUL(5)
+#define MPTCP_PM_ADDR_FLAGS_MASK		GENMASK(5, 0)
 
 struct mptcp_info {
 	__u8	mptcpi_subflows;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index d5b383870f7995..7aa42de9c47b55 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -119,7 +119,8 @@ int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
 	}
 
 	if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
-		entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
+		entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]) &
+			       MPTCP_PM_ADDR_FLAGS_MASK;
 
 	if (tb[MPTCP_PM_ADDR_ATTR_PORT])
 		entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));

From 29f4801e9c8dfd12bdcb33b61a6ac479c7162bd7 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Fri, 5 Dec 2025 19:55:15 +0100
Subject: [PATCH 034/258] selftests: mptcp: pm: ensure unknown flags are
 ignored

This validates the previous commit: the userspace can set unknown flags
-- the 7th bit is currently unused -- without errors, but only the
supported ones are printed in the endpoints dumps.

The 'Fixes' tag here below is the same as the one from the previous
commit: this patch here is not fixing anything wrong in the selftests,
but it validates the previous fix for an issue introduced by this commit
ID.

Fixes: 01cacb00b35c ("mptcp: add netlink-based PM")
Cc: stable@vger.kernel.org
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-2-9e4781a6c1b8@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/pm_netlink.sh |  4 ++++
 tools/testing/selftests/net/mptcp/pm_nl_ctl.c   | 11 +++++++++++
 2 files changed, 15 insertions(+)

diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index ec6a8758819194..123d9d7a0278cd 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -192,6 +192,10 @@ check "show_endpoints" \
 flush_endpoint
 check "show_endpoints" "" "flush addrs"
 
+add_endpoint 10.0.1.1 flags unknown
+check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags"
+flush_endpoint
+
 set_limits 9 1 2>/dev/null
 check "get_limits" "${default_limits}" "rcv addrs above hard limit"
 
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 65b374232ff5ac..99eecccbf0c876 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -24,6 +24,8 @@
 #define IPPROTO_MPTCP 262
 #endif
 
+#define MPTCP_PM_ADDR_FLAG_UNKNOWN _BITUL(7)
+
 static void syntax(char *argv[])
 {
 	fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept [<args>]\n", argv[0]);
@@ -836,6 +838,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
 					flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
 				else if (!strcmp(tok, "fullmesh"))
 					flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
+				else if (!strcmp(tok, "unknown"))
+					flags |= MPTCP_PM_ADDR_FLAG_UNKNOWN;
 				else
 					error(1, errno,
 					      "unknown flag %s", argv[arg]);
@@ -1048,6 +1052,13 @@ static void print_addr(struct rtattr *attrs, int len)
 					printf(",");
 			}
 
+			if (flags & MPTCP_PM_ADDR_FLAG_UNKNOWN) {
+				printf("unknown");
+				flags &= ~MPTCP_PM_ADDR_FLAG_UNKNOWN;
+				if (flags)
+					printf(",");
+			}
+
 			/* bump unknown flags, if any */
 			if (flags)
 				printf("0x%x", flags);

From 2ea6190f42d0416a4310e60a7fcb0b49fcbbd4fb Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 5 Dec 2025 19:55:16 +0100
Subject: [PATCH 035/258] mptcp: schedule rtx timer only after pushing data

The MPTCP protocol usually schedule the retransmission timer only
when there is some chances for such retransmissions to happen.

With a notable exception: __mptcp_push_pending() currently schedule
such timer unconditionally, potentially leading to unnecessary rtx
timer expiration.

The issue is present since the blamed commit below but become easily
reproducible after commit 27b0e701d387 ("mptcp: drop bogus optimization
in __mptcp_check_push()")

Fixes: 33d41c9cd74c ("mptcp: more accurate timeout")
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-3-9e4781a6c1b8@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/protocol.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e212c1374bd043..d8a7f702916455 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1623,7 +1623,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 	struct mptcp_sendmsg_info info = {
 				.flags = flags,
 	};
-	bool do_check_data_fin = false;
+	bool copied = false;
 	int push_count = 1;
 
 	while (mptcp_send_head(sk) && (push_count > 0)) {
@@ -1665,7 +1665,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 						push_count--;
 					continue;
 				}
-				do_check_data_fin = true;
+				copied = true;
 			}
 		}
 	}
@@ -1674,11 +1674,14 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 	if (ssk)
 		mptcp_push_release(ssk, &info);
 
-	/* ensure the rtx timer is running */
-	if (!mptcp_rtx_timer_pending(sk))
-		mptcp_reset_rtx_timer(sk);
-	if (do_check_data_fin)
+	/* Avoid scheduling the rtx timer if no data has been pushed; the timer
+	 * will be updated on positive acks by __mptcp_cleanup_una().
+	 */
+	if (copied) {
+		if (!mptcp_rtx_timer_pending(sk))
+			mptcp_reset_rtx_timer(sk);
 		mptcp_check_send_data_fin(sk);
+	}
 }
 
 static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first)

From ffb8c27b0539dd90262d1021488e7817fae57c42 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 5 Dec 2025 19:55:17 +0100
Subject: [PATCH 036/258] mptcp: avoid deadlock on fallback while reinjecting

Jakub reported an MPTCP deadlock at fallback time:

 WARNING: possible recursive locking detected
 6.18.0-rc7-virtme #1 Not tainted
 --------------------------------------------
 mptcp_connect/20858 is trying to acquire lock:
 ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_try_fallback+0xd8/0x280

 but task is already holding lock:
 ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_retrans+0x352/0xaa0

 other info that might help us debug this:
  Possible unsafe locking scenario:

        CPU0
        ----
   lock(&msk->fallback_lock);
   lock(&msk->fallback_lock);

  *** DEADLOCK ***

  May be due to missing lock nesting notation

 3 locks held by mptcp_connect/20858:
  #0: ff1100001da18290 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0x114/0x1bc0
  #1: ff1100001db40fd0 (k-sk_lock-AF_INET#2){+.+.}-{0:0}, at: __mptcp_retrans+0x2cb/0xaa0
  #2: ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_retrans+0x352/0xaa0

 stack backtrace:
 CPU: 0 UID: 0 PID: 20858 Comm: mptcp_connect Not tainted 6.18.0-rc7-virtme #1 PREEMPT(full)
 Hardware name: Bochs, BIOS Bochs 01/01/2011
 Call Trace:
  <TASK>
  dump_stack_lvl+0x6f/0xa0
  print_deadlock_bug.cold+0xc0/0xcd
  validate_chain+0x2ff/0x5f0
  __lock_acquire+0x34c/0x740
  lock_acquire.part.0+0xbc/0x260
  _raw_spin_lock_bh+0x38/0x50
  __mptcp_try_fallback+0xd8/0x280
  mptcp_sendmsg_frag+0x16c2/0x3050
  __mptcp_retrans+0x421/0xaa0
  mptcp_release_cb+0x5aa/0xa70
  release_sock+0xab/0x1d0
  mptcp_sendmsg+0xd5b/0x1bc0
  sock_write_iter+0x281/0x4d0
  new_sync_write+0x3c5/0x6f0
  vfs_write+0x65e/0xbb0
  ksys_write+0x17e/0x200
  do_syscall_64+0xbb/0xfd0
  entry_SYSCALL_64_after_hwframe+0x4b/0x53
 RIP: 0033:0x7fa5627cbc5e
 Code: 4d 89 d8 e8 14 bd 00 00 4c 8b 5d f8 41 8b 93 08 03 00 00 59 5e 48 83 f8 fc 74 11 c9 c3 0f 1f 80 00 00 00 00 48 8b 45 10 0f 05 <c9> c3 83 e2 39 83 fa 08 75 e7 e8 13 ff ff ff 0f 1f 00 f3 0f 1e fa
 RSP: 002b:00007fff1fe14700 EFLAGS: 00000202 ORIG_RAX: 0000000000000001
 RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007fa5627cbc5e
 RDX: 0000000000001f9c RSI: 00007fff1fe16984 RDI: 0000000000000005
 RBP: 00007fff1fe14710 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000000000000 R11: 0000000000000202 R12: 00007fff1fe16920
 R13: 0000000000002000 R14: 0000000000001f9c R15: 0000000000001f9c

The packet scheduler could attempt a reinjection after receiving an
MP_FAIL and before the infinite map has been transmitted, causing a
deadlock since MPTCP needs to do the reinjection atomically from WRT
fallback.

Address the issue explicitly avoiding the reinjection in the critical
scenario. Note that this is the only fallback critical section that
could potentially send packets and hit the double-lock.

Reported-by: Jakub Kicinski <kuba@kernel.org>
Closes: https://netdev-ctrl.bots.linux.dev/logs/vmksft/mptcp-dbg/results/412720/1-mptcp-join-sh/stderr
Fixes: f8a1d9b18c5e ("mptcp: make fallback action and fallback decision atomic")
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-4-9e4781a6c1b8@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/protocol.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d8a7f702916455..9b1fafd87cb94b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2769,10 +2769,13 @@ static void __mptcp_retrans(struct sock *sk)
 
 			/*
 			 * make the whole retrans decision, xmit, disallow
-			 * fallback atomic
+			 * fallback atomic, note that we can't retrans even
+			 * when an infinite fallback is in progress, i.e. new
+			 * subflows are disallowed.
 			 */
 			spin_lock_bh(&msk->fallback_lock);
-			if (__mptcp_check_fallback(msk)) {
+			if (__mptcp_check_fallback(msk) ||
+			    !msk->allow_subflows) {
 				spin_unlock_bh(&msk->fallback_lock);
 				release_sock(ssk);
 				goto clear_scheduled;

From 639f58a0f4808e78ff6c764747ee125c0e2f093c Mon Sep 17 00:00:00 2001
From: Quentin Monnet <qmo@kernel.org>
Date: Mon, 8 Dec 2025 13:07:48 +0000
Subject: [PATCH 037/258] bpftool: Fix build warnings due to MS extensions

The kernel is now built with -fms-extensions. Anonymous structs or
unions permitted by these extensions have been used in several places,
and can end up in the generated vmlinux.h file, for example:

    struct ns_tree {
        [...]
    };

    [...]

    struct ns_common {
            [...]
            union {
                    struct ns_tree;
                    struct callback_head ns_rcu;
            };
    };

Trying to include this header for compiling a tool may result in build
warnings, if the compiler does not expect these extensions. This is the
case, for example, with bpftool:

    In file included from skeleton/pid_iter.bpf.c:3:
    .../tools/testing/selftests/bpf/tools/build/bpftool/vmlinux.h:64057:3:
    warning: declaration does not declare anything
    [-Wmissing-declarations]
     64057 |                 struct ns_tree;
           |                 ^~~~~~~~~~~~~~

Fix these build warnings in bpftool by turning on Microsoft extensions
when compiling the two BPF programs that rely on vmlinux.h.

Reported-by: Alexei Starovoitov <ast@kernel.org>
Closes: https://lore.kernel.org/bpf/CAADnVQK9ZkPC7+R5VXKHVdtj8tumpMXm7BTp0u9CoiFLz_aPTg@mail.gmail.com/
Signed-off-by: Quentin Monnet <qmo@kernel.org>
Link: https://lore.kernel.org/r/20251208130748.68371-1-qmo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 586d1b2595d16b..5442073a2e4286 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -224,6 +224,8 @@ endif
 
 $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP)
 	$(QUIET_CLANG)$(CLANG) \
+		-Wno-microsoft-anon-tag \
+		-fms-extensions \
 		-I$(or $(OUTPUT),.) \
 		-I$(srctree)/tools/include/uapi/ \
 		-I$(LIBBPF_BOOTSTRAP_INCLUDE) \

From d70f79fef65810faf64dbae1f3a1b5623cdb2345 Mon Sep 17 00:00:00 2001
From: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Date: Sat, 6 Dec 2025 14:28:25 +0500
Subject: [PATCH 038/258] libbpf: Fix -Wdiscarded-qualifiers under C23
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

glibc ≥ 2.42 (GCC 15) defaults to -std=gnu23, which promotes
-Wdiscarded-qualifiers to an error.

In C23, strstr() and strchr() return "const char *".

Change variable types to const char * where the pointers are never
modified (res, sym_sfx, next_path).

Suggested-by: Florian Weimer <fweimer@redhat.com>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Link: https://lore.kernel.org/r/20251206092825.1471385-1-mikhail.v.gavrilov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 3dc8a807881551..f4dfd23148a55f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8484,7 +8484,7 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
 	struct bpf_object *obj = ctx;
 	const struct btf_type *t;
 	struct extern_desc *ext;
-	char *res;
+	const char *res;
 
 	res = strstr(sym_name, ".llvm.");
 	if (sym_type == 'd' && res)
@@ -11818,7 +11818,8 @@ static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
 		 *
 		 *   [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG")
 		 */
-		char sym_trim[256], *psym_trim = sym_trim, *sym_sfx;
+		char sym_trim[256], *psym_trim = sym_trim;
+		const char *sym_sfx;
 
 		if (!(sym_sfx = strstr(sym_name, ".llvm.")))
 			return 0;
@@ -12401,7 +12402,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
 		if (!search_paths[i])
 			continue;
 		for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
-			char *next_path;
+			const char *next_path;
 			int seg_len;
 
 			if (s[0] == ':')

From 189e5deb944a6f9c7992355d60bffd8ec2e54a9c Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Thu, 4 Dec 2025 13:59:16 +0100
Subject: [PATCH 039/258] bpf, arm64: Do not audit capability check in do_jit()

Analogically to the x86 commit 881a9c9cb785 ("bpf: Do not audit
capability check in do_jit()"), change the capable() call to
ns_capable_noaudit() in order to avoid spurious SELinux denials in audit
log.

The commit log from that commit applies here as well:
"""
The failure of this check only results in a security mitigation being
applied, slightly affecting performance of the compiled BPF program. It
doesn't result in a failed syscall, an thus auditing a failed LSM
permission check for it is unwanted. For example with SELinux, it causes
a denial to be reported for confined processes running as root, which
tends to be flagged as a problem to be fixed in the policy. Yet
dontauditing or allowing CAP_SYS_ADMIN to the domain may not be
desirable, as it would allow/silence also other checks - either going
against the principle of least privilege or making debugging potentially
harder.

Fix it by changing it from capable() to ns_capable_noaudit(), which
instructs the LSMs to not audit the resulting denials.
"""

Fixes: f300769ead03 ("arm64: bpf: Only mitigate cBPF programs loaded by unprivileged users")
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Link: https://lore.kernel.org/r/20251204125916.441021-1-omosnace@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/arm64/net/bpf_jit_comp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 74dd29816f36a4..b6eb7a465ad248 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1004,7 +1004,7 @@ static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx)
 	    arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE)
 		return;
 
-	if (capable(CAP_SYS_ADMIN))
+	if (ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
 		return;
 
 	if (supports_clearbhb(SCOPE_SYSTEM)) {

From ca45c84afb8c91a8d688b0012657099c24f59266 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 3 Dec 2025 19:32:15 -0800
Subject: [PATCH 040/258] bpf: Add bpf_has_frame_pointer()

Introduce a bpf_has_frame_pointer() helper that unwinders can call to
determine whether a given instruction pointer is within the valid frame
pointer region of a BPF JIT program or trampoline (i.e., after the
prologue, before the epilogue).

This will enable livepatch (with the ORC unwinder) to reliably unwind
through BPF JIT frames.

Acked-by: Song Liu <song@kernel.org>
Acked-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Link: https://lore.kernel.org/r/fd2bc5b4e261a680774b28f6100509fd5ebad2f0.1764818927.git.jpoimboe@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
---
 arch/x86/net/bpf_jit_comp.c | 12 ++++++++++++
 include/linux/bpf.h         |  3 +++
 kernel/bpf/core.c           | 16 ++++++++++++++++
 3 files changed, 31 insertions(+)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b69dc7194e2c07..b0bac2a66eff3c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1678,6 +1678,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 	emit_prologue(&prog, image, stack_depth,
 		      bpf_prog_was_classic(bpf_prog), tail_call_reachable,
 		      bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
+
+	bpf_prog->aux->ksym.fp_start = prog - temp;
+
 	/* Exception callback will clobber callee regs for its own use, and
 	 * restore the original callee regs from main prog's stack frame.
 	 */
@@ -2736,6 +2739,8 @@ st:			if (is_imm8(insn->off))
 					pop_r12(&prog);
 			}
 			EMIT1(0xC9);         /* leave */
+			bpf_prog->aux->ksym.fp_end = prog - temp;
+
 			emit_return(&prog, image + addrs[i - 1] + (prog - temp));
 			break;
 
@@ -3325,6 +3330,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 	}
 	EMIT1(0x55);		 /* push rbp */
 	EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+	if (im)
+		im->ksym.fp_start = prog - (u8 *)rw_image;
+
 	if (!is_imm8(stack_size)) {
 		/* sub rsp, stack_size */
 		EMIT3_off32(0x48, 0x81, 0xEC, stack_size);
@@ -3462,7 +3470,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
 
 	emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
+
 	EMIT1(0xC9); /* leave */
+	if (im)
+		im->ksym.fp_end = prog - (u8 *)rw_image;
+
 	if (flags & BPF_TRAMP_F_SKIP_FRAME) {
 		/* skip our return address and return to parent */
 		EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6498be4c44f8c2..e5be698256d15a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1283,6 +1283,8 @@ struct bpf_ksym {
 	struct list_head	 lnode;
 	struct latch_tree_node	 tnode;
 	bool			 prog;
+	u32			 fp_start;
+	u32			 fp_end;
 };
 
 enum bpf_tramp_prog_type {
@@ -1511,6 +1513,7 @@ void bpf_image_ksym_add(struct bpf_ksym *ksym);
 void bpf_image_ksym_del(struct bpf_ksym *ksym);
 void bpf_ksym_add(struct bpf_ksym *ksym);
 void bpf_ksym_del(struct bpf_ksym *ksym);
+bool bpf_has_frame_pointer(unsigned long ip);
 int bpf_jit_charge_modmem(u32 size);
 void bpf_jit_uncharge_modmem(u32 size);
 bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c8ae6ab3165100..1b9b18e5b03cb0 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -760,6 +760,22 @@ struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
 	       NULL;
 }
 
+bool bpf_has_frame_pointer(unsigned long ip)
+{
+	struct bpf_ksym *ksym;
+	unsigned long offset;
+
+	guard(rcu)();
+
+	ksym = bpf_ksym_find(ip);
+	if (!ksym || !ksym->fp_start || !ksym->fp_end)
+		return false;
+
+	offset = ip - ksym->start;
+
+	return offset >= ksym->fp_start && offset < ksym->fp_end;
+}
+
 const struct exception_table_entry *search_bpf_extables(unsigned long addr)
 {
 	const struct exception_table_entry *e = NULL;

From 01bc3b6db18d6e0a2e93c37885996bf339bfe337 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 3 Dec 2025 19:32:16 -0800
Subject: [PATCH 041/258] x86/unwind/orc: Support reliable unwinding through
 BPF stack frames

BPF JIT programs and trampolines use a frame pointer, so the current ORC
unwinder strategy of falling back to frame pointers (when an ORC entry
is missing) usually works in practice when unwinding through BPF JIT
stack frames.

However, that frame pointer fallback is just a guess, so the unwind gets
marked unreliable for live patching, which can cause livepatch
transition stalls.

Make the common case reliable by calling the bpf_has_frame_pointer()
helper to detect the valid frame pointer region of BPF JIT programs and
trampolines.

Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder")
Reported-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
Closes: https://lore.kernel.org/0e555733-c670-4e84-b2e6-abb8b84ade38@crowdstrike.com
Acked-by: Song Liu <song@kernel.org>
Acked-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Link: https://lore.kernel.org/r/a18505975662328c8ffb1090dded890c6f8c1004.1764818927.git.jpoimboe@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
---
 arch/x86/kernel/unwind_orc.c | 39 +++++++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 977ee75e047c84..f610fde2d5c4b2 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -2,6 +2,7 @@
 #include <linux/objtool.h>
 #include <linux/module.h>
 #include <linux/sort.h>
+#include <linux/bpf.h>
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
@@ -172,6 +173,25 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip)
 }
 #endif
 
+/* Fake frame pointer entry -- used as a fallback for generated code */
+static struct orc_entry orc_fp_entry = {
+	.type		= ORC_TYPE_CALL,
+	.sp_reg		= ORC_REG_BP,
+	.sp_offset	= 16,
+	.bp_reg		= ORC_REG_PREV_SP,
+	.bp_offset	= -16,
+};
+
+static struct orc_entry *orc_bpf_find(unsigned long ip)
+{
+#ifdef CONFIG_BPF_JIT
+	if (bpf_has_frame_pointer(ip))
+		return &orc_fp_entry;
+#endif
+
+	return NULL;
+}
+
 /*
  * If we crash with IP==0, the last successfully executed instruction
  * was probably an indirect function call with a NULL function pointer,
@@ -186,15 +206,6 @@ static struct orc_entry null_orc_entry = {
 	.type = ORC_TYPE_CALL
 };
 
-/* Fake frame pointer entry -- used as a fallback for generated code */
-static struct orc_entry orc_fp_entry = {
-	.type		= ORC_TYPE_CALL,
-	.sp_reg		= ORC_REG_BP,
-	.sp_offset	= 16,
-	.bp_reg		= ORC_REG_PREV_SP,
-	.bp_offset	= -16,
-};
-
 static struct orc_entry *orc_find(unsigned long ip)
 {
 	static struct orc_entry *orc;
@@ -238,6 +249,11 @@ static struct orc_entry *orc_find(unsigned long ip)
 	if (orc)
 		return orc;
 
+	/* BPF lookup: */
+	orc = orc_bpf_find(ip);
+	if (orc)
+		return orc;
+
 	return orc_ftrace_find(ip);
 }
 
@@ -495,9 +511,8 @@ bool unwind_next_frame(struct unwind_state *state)
 	if (!orc) {
 		/*
 		 * As a fallback, try to assume this code uses a frame pointer.
-		 * This is useful for generated code, like BPF, which ORC
-		 * doesn't know about.  This is just a guess, so the rest of
-		 * the unwind is no longer considered reliable.
+		 * This is just a guess, so the rest of the unwind is no longer
+		 * considered reliable.
 		 */
 		orc = &orc_fp_entry;
 		state->error = true;

From 5288176a541215ba48d38fb74bb619e64d4d9bab Mon Sep 17 00:00:00 2001
From: Swaraj Gaikwad <swarajgaikwad1925@gmail.com>
Date: Wed, 10 Dec 2025 09:28:14 +0000
Subject: [PATCH 042/258] x86/boot/Documentation: Fix htmldocs build warning
 due to malformed table in boot.rst

Sphinx reports htmldocs warnings:

  Documentation/arch/x86/boot.rst:437: ERROR: Malformed table.
  Text in column margin in table line 2.

The table header defined the first column width as 2 characters ("=="),
which is too narrow for entries like "0x10" and "0x13". This caused the
text to spill into the margin, triggering a docutils parsing failure.

Fix it by extending the first column of assigned boot loader ID to 4
characters ("====") to fit the widest entries.

Fixes: 1c3377bee212 ("x86/boot/Documentation: Prefix hexadecimal literals with 0x")
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Swaraj Gaikwad <swarajgaikwad1925@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Link: https://patch.msgid.link/20251210092814.9986-1-swarajgaikwad1925@gmail.com
---
 Documentation/arch/x86/boot.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst
index 6d36ce86fd8ec0..18574f010d46cd 100644
--- a/Documentation/arch/x86/boot.rst
+++ b/Documentation/arch/x86/boot.rst
@@ -433,7 +433,7 @@ Protocol:	2.00+
 
   Assigned boot loader IDs:
 
-	== =======================================
+	==== =======================================
 	0x0  LILO
 	     (0x00 reserved for pre-2.00 bootloader)
 	0x1  Loadlin
@@ -456,7 +456,7 @@ Protocol:	2.00+
 	     <http://sebastian-plotz.blogspot.de>
 	0x12 OVMF UEFI virtualization stack
 	0x13 barebox
-	== =======================================
+	==== =======================================
 
   Please contact <hpa@zytor.com> if you need a bootloader ID value assigned.
 

From c8161e5304abb26e6c0bec6efc947992500fa6c5 Mon Sep 17 00:00:00 2001
From: Yongxin Liu <yongxin.liu@windriver.com>
Date: Wed, 10 Dec 2025 08:02:20 +0800
Subject: [PATCH 043/258] x86/fpu: Fix FPU state core dump truncation on CPUs
 with no extended xfeatures

Zero can be a valid value of num_records. For example, on Intel Atom x6425RE,
only x87 and SSE are supported (features 0, 1), and fpu_user_cfg.max_features
is 3. The for_each_extended_xfeature() loop only iterates feature 2, which is
not enabled, so num_records = 0. This is valid and should not cause core dump
failure.

The issue is that dump_xsave_layout_desc() returns 0 for both genuine errors
(dump_emit() failure) and valid cases (no extended features). Use negative
return values for errors and only abort on genuine failures.

Fixes: ba386777a30b ("x86/elf: Add a new FPU buffer layout info to x86 core files")
Signed-off-by: Yongxin Liu <yongxin.liu@windriver.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://patch.msgid.link/20251210000219.4094353-2-yongxin.liu@windriver.com
---
 arch/x86/kernel/fpu/xstate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 48113c5193aa3c..76153dfb58c9d0 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1946,7 +1946,7 @@ static int dump_xsave_layout_desc(struct coredump_params *cprm)
 		};
 
 		if (!dump_emit(cprm, &xc, sizeof(xc)))
-			return 0;
+			return -1;
 
 		num_records++;
 	}
@@ -1984,7 +1984,7 @@ int elf_coredump_extra_notes_write(struct coredump_params *cprm)
 		return 1;
 
 	num_records = dump_xsave_layout_desc(cprm);
-	if (!num_records)
+	if (num_records < 0)
 		return 1;
 
 	/* Total size should be equal to the number of records */

From 234483565dbb2b264fdd165927c89fbf3ecf4733 Mon Sep 17 00:00:00 2001
From: "T.J. Mercier" <tjmercier@google.com>
Date: Wed, 3 Dec 2025 16:03:47 -0800
Subject: [PATCH 044/258] bpf: Fix truncated dmabuf iterator reads

If there is a large number (hundreds) of dmabufs allocated, the text
output generated from dmabuf_iter_seq_show can exceed common user buffer
sizes (e.g. PAGE_SIZE) necessitating multiple start/stop cycles to
iterate through all dmabufs. However the dmabuf iterator currently
returns NULL in dmabuf_iter_seq_start for all non-zero pos values, which
results in the truncation of the output before all dmabufs are handled.

After dma_buf_iter_begin / dma_buf_iter_next, the refcount of the buffer
is elevated so that the BPF iterator program can run without holding any
locks. When a stop occurs, instead of immediately dropping the reference
on the buffer, stash a pointer to the buffer in seq->priv until
either start is called or the iterator is released. This also enables
the resumption of iteration without first walking through the list of
dmabufs based on the pos value.

Fixes: 76ea95534995 ("bpf: Add dmabuf iterator")
Signed-off-by: T.J. Mercier <tjmercier@google.com>
Link: https://lore.kernel.org/r/20251204000348.1413593-1-tjmercier@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/dmabuf_iter.c | 56 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 7 deletions(-)

diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c
index 4dd7ef7c145ca7..cd500248abd959 100644
--- a/kernel/bpf/dmabuf_iter.c
+++ b/kernel/bpf/dmabuf_iter.c
@@ -6,10 +6,33 @@
 #include <linux/kernel.h>
 #include <linux/seq_file.h>
 
+struct dmabuf_iter_priv {
+	/*
+	 * If this pointer is non-NULL, the buffer's refcount is elevated to
+	 * prevent destruction between stop/start. If reading is not resumed and
+	 * start is never called again, then dmabuf_iter_seq_fini drops the
+	 * reference when the iterator is released.
+	 */
+	struct dma_buf *dmabuf;
+};
+
 static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	if (*pos)
-		return NULL;
+	struct dmabuf_iter_priv *p = seq->private;
+
+	if (*pos) {
+		struct dma_buf *dmabuf = p->dmabuf;
+
+		if (!dmabuf)
+			return NULL;
+
+		/*
+		 * Always resume from where we stopped, regardless of the value
+		 * of pos.
+		 */
+		p->dmabuf = NULL;
+		return dmabuf;
+	}
 
 	return dma_buf_iter_begin();
 }
@@ -54,8 +77,11 @@ static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v)
 {
 	struct dma_buf *dmabuf = v;
 
-	if (dmabuf)
-		dma_buf_put(dmabuf);
+	if (dmabuf) {
+		struct dmabuf_iter_priv *p = seq->private;
+
+		p->dmabuf = dmabuf;
+	}
 }
 
 static const struct seq_operations dmabuf_iter_seq_ops = {
@@ -71,11 +97,27 @@ static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux,
 	seq_puts(seq, "dmabuf iter\n");
 }
 
+static int dmabuf_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux)
+{
+	struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv;
+
+	p->dmabuf = NULL;
+	return 0;
+}
+
+static void dmabuf_iter_seq_fini(void *priv)
+{
+	struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv;
+
+	if (p->dmabuf)
+		dma_buf_put(p->dmabuf);
+}
+
 static const struct bpf_iter_seq_info dmabuf_iter_seq_info = {
 	.seq_ops		= &dmabuf_iter_seq_ops,
-	.init_seq_private	= NULL,
-	.fini_seq_private	= NULL,
-	.seq_priv_size		= 0,
+	.init_seq_private	= dmabuf_iter_seq_init,
+	.fini_seq_private	= dmabuf_iter_seq_fini,
+	.seq_priv_size		= sizeof(struct dmabuf_iter_priv),
 };
 
 static struct bpf_iter_reg bpf_dmabuf_reg_info = {

From 9489d457d48bd10c4eacd8670840132be00c15cd Mon Sep 17 00:00:00 2001
From: "T.J. Mercier" <tjmercier@google.com>
Date: Wed, 3 Dec 2025 16:03:48 -0800
Subject: [PATCH 045/258] selftests/bpf: Add test for truncated dmabuf_iter
 reads

If many dmabufs are present, reads of the dmabuf iterator can be
truncated at PAGE_SIZE or user buffer size boundaries before the fix in
"bpf: Fix truncated dmabuf iterator reads". Add a test to
confirm truncation does not occur.

Signed-off-by: T.J. Mercier <tjmercier@google.com>
Link: https://lore.kernel.org/r/20251204000348.1413593-2-tjmercier@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/dmabuf_iter.c    | 47 +++++++++++++++++--
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
index 6c2b0c3dbcd86c..e442be9dde7e15 100644
--- a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
@@ -73,12 +73,10 @@ static int create_udmabuf(void)
 	return -1;
 }
 
-static int create_sys_heap_dmabuf(void)
+static int create_sys_heap_dmabuf(size_t bytes)
 {
-	sysheap_test_buffer_size = 20 * getpagesize();
-
 	struct dma_heap_allocation_data data = {
-		.len = sysheap_test_buffer_size,
+		.len = bytes,
 		.fd = 0,
 		.fd_flags = O_RDWR | O_CLOEXEC,
 		.heap_flags = 0,
@@ -110,7 +108,9 @@ static int create_sys_heap_dmabuf(void)
 static int create_test_buffers(void)
 {
 	udmabuf = create_udmabuf();
-	sysheap_dmabuf = create_sys_heap_dmabuf();
+
+	sysheap_test_buffer_size = 20 * getpagesize();
+	sysheap_dmabuf = create_sys_heap_dmabuf(sysheap_test_buffer_size);
 
 	if (udmabuf < 0 || sysheap_dmabuf < 0)
 		return -1;
@@ -219,6 +219,26 @@ static void subtest_dmabuf_iter_check_default_iter(struct dmabuf_iter *skel)
 	close(iter_fd);
 }
 
+static void subtest_dmabuf_iter_check_lots_of_buffers(struct dmabuf_iter *skel)
+{
+	int iter_fd;
+	char buf[1024];
+	size_t total_bytes_read = 0;
+	ssize_t bytes_read;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+	if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+		return;
+
+	while ((bytes_read = read(iter_fd, buf, sizeof(buf))) > 0)
+		total_bytes_read += bytes_read;
+
+	ASSERT_GT(total_bytes_read, getpagesize(), "total_bytes_read");
+
+	close(iter_fd);
+}
+
+
 static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd)
 {
 	LIBBPF_OPTS(bpf_test_run_opts, topts);
@@ -275,6 +295,23 @@ void test_dmabuf_iter(void)
 		subtest_dmabuf_iter_check_no_infinite_reads(skel);
 	if (test__start_subtest("default_iter"))
 		subtest_dmabuf_iter_check_default_iter(skel);
+	if (test__start_subtest("lots_of_buffers")) {
+		size_t NUM_BUFS = 100;
+		int buffers[NUM_BUFS];
+		int i;
+
+		for (i = 0; i < NUM_BUFS; ++i) {
+			buffers[i] = create_sys_heap_dmabuf(getpagesize());
+			if (!ASSERT_OK_FD(buffers[i], "dmabuf_fd"))
+				goto cleanup_bufs;
+		}
+
+		subtest_dmabuf_iter_check_lots_of_buffers(skel);
+
+cleanup_bufs:
+		for (--i; i >= 0; --i)
+			close(buffers[i]);
+	}
 	if (test__start_subtest("open_coded"))
 		subtest_dmabuf_iter_check_open_coded(skel, map_fd);
 

From 6abd4577bccc66f83edfdb24dc484723ae99cbe8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 4 Dec 2025 11:00:09 +0100
Subject: [PATCH 046/258] can: fix build dependency

A recent bugfix introduced a new problem with Kconfig dependencies:

WARNING: unmet direct dependencies detected for CAN_DEV
  Depends on [n]: NETDEVICES [=n] && CAN [=m]
  Selected by [m]:
  - CAN [=m] && NET [=y]

Since the CAN core code now links into the CAN device code, that
particular function needs to be available, though the rest of it
does not.

Revert the incomplete fix and instead use Makefile logic to avoid
the link failure.

Fixes: cb2dc6d2869a ("can: Kconfig: select CAN driver infrastructure by default")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512091523.zty3CLmc-lkp@intel.com/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Oliver Hartkopp <socketcan@hartkopp.net>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://patch.msgid.link/20251204100015.1033688-1-arnd@kernel.org
[mkl: removed module option from CAN_DEV help text (thanks Vincent)]
[mkl: removed '&& CAN' from Kconfig dependency (thanks Vincent)]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/Kconfig      | 5 +----
 drivers/net/can/Makefile     | 2 +-
 drivers/net/can/dev/Makefile | 5 ++---
 net/can/Kconfig              | 1 -
 4 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index e15e320db47636..460a74ae692330 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
 menuconfig CAN_DEV
-	tristate "CAN Device Drivers"
+	bool "CAN Device Drivers"
 	default y
 	depends on CAN
 	help
@@ -17,9 +17,6 @@ menuconfig CAN_DEV
 	  virtual ones. If you own such devices or plan to use the virtual CAN
 	  interfaces to develop applications, say Y here.
 
-	  To compile as a module, choose M here: the module will be called
-	  can-dev.
-
 if CAN_DEV
 
 config CAN_VCAN
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index d7bc10a6b8eae2..37e2f1a2faecd7 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_CAN_VCAN)		+= vcan.o
 obj-$(CONFIG_CAN_VXCAN)		+= vxcan.o
 obj-$(CONFIG_CAN_SLCAN)		+= slcan/
 
-obj-y				+= dev/
+obj-$(CONFIG_CAN_DEV)		+= dev/
 obj-y				+= esd/
 obj-y				+= rcar/
 obj-y				+= rockchip/
diff --git a/drivers/net/can/dev/Makefile b/drivers/net/can/dev/Makefile
index 633687d6b6c0c0..64226acf0f3d40 100644
--- a/drivers/net/can/dev/Makefile
+++ b/drivers/net/can/dev/Makefile
@@ -1,9 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
-obj-$(CONFIG_CAN_DEV) += can-dev.o
-
-can-dev-y += skb.o
+obj-$(CONFIG_CAN) += can-dev.o
 
+can-dev-$(CONFIG_CAN_DEV) += skb.o
 can-dev-$(CONFIG_CAN_CALC_BITTIMING) += calc_bittiming.o
 can-dev-$(CONFIG_CAN_NETLINK) += bittiming.o
 can-dev-$(CONFIG_CAN_NETLINK) += dev.o
diff --git a/net/can/Kconfig b/net/can/Kconfig
index e4ccf731a24ce4..af64a6f764588c 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -5,7 +5,6 @@
 
 menuconfig CAN
 	tristate "CAN bus subsystem support"
-	select CAN_DEV
 	help
 	  Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial
 	  communications protocol. Development of the CAN bus started in

From 3e54d3b4a8437b6783d4145c86962a2aa51022f3 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 1 Dec 2025 19:26:38 +0100
Subject: [PATCH 047/258] can: gs_usb: gs_can_open(): fix error handling

Commit 2603be9e8167 ("can: gs_usb: gs_can_open(): improve error handling")
added missing error handling to the gs_can_open() function.

The driver uses 2 USB anchors to track the allocated URBs: the TX URBs in
struct gs_can::tx_submitted for each netdev and the RX URBs in struct
gs_usb::rx_submitted for the USB device. gs_can_open() allocates the RX
URBs, while TX URBs are allocated during gs_can_start_xmit().

The cleanup in gs_can_open() kills all anchored dev->tx_submitted
URBs (which is not necessary since the netdev is not yet registered), but
misses the parent->rx_submitted URBs.

Fix the problem by killing the rx_submitted instead of the tx_submitted.

Fixes: 2603be9e8167 ("can: gs_usb: gs_can_open(): improve error handling")
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20251210-gs_usb-fix-error-handling-v1-1-d6a5a03f10bb@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/gs_usb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index e29e85b67fd405..a0233e550a5ad9 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -1074,7 +1074,7 @@ static int gs_can_open(struct net_device *netdev)
 	usb_free_urb(urb);
 out_usb_kill_anchored_urbs:
 	if (!parent->active_channels) {
-		usb_kill_anchored_urbs(&dev->tx_submitted);
+		usb_kill_anchored_urbs(&parent->rx_submitted);
 
 		if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
 			gs_usb_timestamp_stop(parent);

From 5ace7ef87f059d68b5f50837ef3e8a1a4870c36e Mon Sep 17 00:00:00 2001
From: Ilya Maximets <i.maximets@ovn.org>
Date: Thu, 4 Dec 2025 11:53:32 +0100
Subject: [PATCH 048/258] net: openvswitch: fix middle attribute validation in
 push_nsh() action

The push_nsh() action structure looks like this:

 OVS_ACTION_ATTR_PUSH_NSH(OVS_KEY_ATTR_NSH(OVS_NSH_KEY_ATTR_BASE,...))

The outermost OVS_ACTION_ATTR_PUSH_NSH attribute is OK'ed by the
nla_for_each_nested() inside __ovs_nla_copy_actions().  The innermost
OVS_NSH_KEY_ATTR_BASE/MD1/MD2 are OK'ed by the nla_for_each_nested()
inside nsh_key_put_from_nlattr().  But nothing checks if the attribute
in the middle is OK.  We don't even check that this attribute is the
OVS_KEY_ATTR_NSH.  We just do a double unwrap with a pair of nla_data()
calls - first time directly while calling validate_push_nsh() and the
second time as part of the nla_for_each_nested() macro, which isn't
safe, potentially causing invalid memory access if the size of this
attribute is incorrect.  The failure may not be noticed during
validation due to larger netlink buffer, but cause trouble later during
action execution where the buffer is allocated exactly to the size:

 BUG: KASAN: slab-out-of-bounds in nsh_hdr_from_nlattr+0x1dd/0x6a0 [openvswitch]
 Read of size 184 at addr ffff88816459a634 by task a.out/22624

 CPU: 8 UID: 0 PID: 22624 6.18.0-rc7+ #115 PREEMPT(voluntary)
 Call Trace:
  <TASK>
  dump_stack_lvl+0x51/0x70
  print_address_description.constprop.0+0x2c/0x390
  kasan_report+0xdd/0x110
  kasan_check_range+0x35/0x1b0
  __asan_memcpy+0x20/0x60
  nsh_hdr_from_nlattr+0x1dd/0x6a0 [openvswitch]
  push_nsh+0x82/0x120 [openvswitch]
  do_execute_actions+0x1405/0x2840 [openvswitch]
  ovs_execute_actions+0xd5/0x3b0 [openvswitch]
  ovs_packet_cmd_execute+0x949/0xdb0 [openvswitch]
  genl_family_rcv_msg_doit+0x1d6/0x2b0
  genl_family_rcv_msg+0x336/0x580
  genl_rcv_msg+0x9f/0x130
  netlink_rcv_skb+0x11f/0x370
  genl_rcv+0x24/0x40
  netlink_unicast+0x73e/0xaa0
  netlink_sendmsg+0x744/0xbf0
  __sys_sendto+0x3d6/0x450
  do_syscall_64+0x79/0x2c0
  entry_SYSCALL_64_after_hwframe+0x76/0x7e
  </TASK>

Let's add some checks that the attribute is properly sized and it's
the only one attribute inside the action.  Technically, there is no
real reason for OVS_KEY_ATTR_NSH to be there, as we know that we're
pushing an NSH header already, it just creates extra nesting, but
that's how uAPI works today.  So, keeping as it is.

Fixes: b2d0f5d5dc53 ("openvswitch: enable NSH support")
Reported-by: Junvy Yang <zhuque@tencent.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Eelco Chaudron echaudro@redhat.com
Reviewed-by: Aaron Conole <aconole@redhat.com>
Link: https://patch.msgid.link/20251204105334.900379-1-i.maximets@ovn.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/openvswitch/flow_netlink.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 1cb4f97335d87b..2d536901309ea9 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2802,13 +2802,20 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	return err;
 }
 
-static bool validate_push_nsh(const struct nlattr *attr, bool log)
+static bool validate_push_nsh(const struct nlattr *a, bool log)
 {
+	struct nlattr *nsh_key = nla_data(a);
 	struct sw_flow_match match;
 	struct sw_flow_key key;
 
+	/* There must be one and only one NSH header. */
+	if (!nla_ok(nsh_key, nla_len(a)) ||
+	    nla_total_size(nla_len(nsh_key)) != nla_len(a) ||
+	    nla_type(nsh_key) != OVS_KEY_ATTR_NSH)
+		return false;
+
 	ovs_match_init(&match, &key, true, NULL);
-	return !nsh_key_put_from_nlattr(attr, &match, false, true, log);
+	return !nsh_key_put_from_nlattr(nsh_key, &match, false, true, log);
 }
 
 /* Return false if there are any non-masked bits set.
@@ -3389,7 +3396,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 					return -EINVAL;
 			}
 			mac_proto = MAC_PROTO_NONE;
-			if (!validate_push_nsh(nla_data(a), log))
+			if (!validate_push_nsh(a, log))
 				return -EINVAL;
 			break;
 

From 9e7477a427449a8a3cd00c188e20a880e3d94638 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 4 Dec 2025 11:01:28 +0100
Subject: [PATCH 049/258] net: ti: icssg-prueth: add PTP_1588_CLOCK_OPTIONAL
 dependency

The new icssg-prueth driver needs the same dependency as the other parts
that use the ptp-1588:

WARNING: unmet direct dependencies detected for TI_ICSS_IEP
  Depends on [m]: NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_TI [=y] && PTP_1588_CLOCK_OPTIONAL [=m] && TI_PRUSS [=y]
  Selected by [y]:
  - TI_PRUETH [=y] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_TI [=y] && PRU_REMOTEPROC [=y] && NET_SWITCHDEV [=y]

Add the correct dependency on the two drivers missing it, and remove
the pointless 'imply' in the process.

Fixes: e654b85a693e ("net: ti: icssg-prueth: Add ICSSG Ethernet driver for AM65x SR1.0 platforms")
Fixes: 511f6c1ae093 ("net: ti: icssm-prueth: Adds ICSSM Ethernet driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Link: https://patch.msgid.link/20251204100138.1034175-1-arnd@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ti/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index a54d71155263c2..fe5b2926d8ab06 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -209,6 +209,7 @@ config TI_ICSSG_PRUETH_SR1
 	depends on PRU_REMOTEPROC
 	depends on NET_SWITCHDEV
 	depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER
+	depends on PTP_1588_CLOCK_OPTIONAL
 	help
 	  Support dual Gigabit Ethernet ports over the ICSSG PRU Subsystem.
 	  This subsystem is available on the AM65 SR1.0 platform.
@@ -234,7 +235,7 @@ config TI_PRUETH
 	depends on PRU_REMOTEPROC
 	depends on NET_SWITCHDEV
 	select TI_ICSS_IEP
-	imply PTP_1588_CLOCK
+	depends on PTP_1588_CLOCK_OPTIONAL
 	help
 	  Some TI SoCs has Programmable Realtime Unit (PRU) cores which can
 	  support Single or Dual Ethernet ports with the help of firmware code

From 6af2a01d65f89e73c1cbb9267f8880d83a88cee4 Mon Sep 17 00:00:00 2001
From: caoping <caoping@cmss.chinamobile.com>
Date: Thu, 4 Dec 2025 01:10:58 -0800
Subject: [PATCH 050/258] net/handshake: restore destructor on submit failure

handshake_req_submit() replaces sk->sk_destruct but never restores it when
submission fails before the request is hashed. handshake_sk_destruct() then
returns early and the original destructor never runs, leaking the socket.
Restore sk_destruct on the error path.

Fixes: 3b3009ea8abb ("net/handshake: Create a NETLINK service for handling handshake requests")
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Cc: stable@vger.kernel.org
Signed-off-by: caoping <caoping@cmss.chinamobile.com>
Link: https://patch.msgid.link/20251204091058.1545151-1-caoping@cmss.chinamobile.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/handshake/request.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/handshake/request.c b/net/handshake/request.c
index 274d2c89b6b207..89435ed755cd00 100644
--- a/net/handshake/request.c
+++ b/net/handshake/request.c
@@ -276,6 +276,8 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req,
 out_unlock:
 	spin_unlock(&hn->hn_lock);
 out_err:
+	/* Restore original destructor so socket teardown still runs on failure */
+	req->hr_sk->sk_destruct = req->hr_odestruct;
 	trace_handshake_submit_err(net, req, req->hr_sk, ret);
 	handshake_req_destroy(req);
 	return ret;

From 50b3db3e11864cb4e18ff099cfb38e11e7f87a68 Mon Sep 17 00:00:00 2001
From: Alexey Simakov <bigalex934@gmail.com>
Date: Fri, 5 Dec 2025 18:58:16 +0300
Subject: [PATCH 051/258] broadcom: b44: prevent uninitialized value usage

On execution path with raised B44_FLAG_EXTERNAL_PHY, b44_readphy()
leaves bmcr value uninitialized and it is used later in the code.

Add check of this flag at the beginning of the b44_nway_reset() and
exit early of the function with restarting autonegotiation if an
external PHY is used.

Fixes: 753f492093da ("[B44]: port to native ssb support")
Reviewed-by: Jonas Gorski <jonas.gorski@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Alexey Simakov <bigalex934@gmail.com>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20251205155815.4348-1-bigalex934@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/b44.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 888f28f11406fb..90df02e0039cba 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -1790,6 +1790,9 @@ static int b44_nway_reset(struct net_device *dev)
 	u32 bmcr;
 	int r;
 
+	if (bp->flags & B44_FLAG_EXTERNAL_PHY)
+		return phy_ethtool_nway_reset(dev);
+
 	spin_lock_irq(&bp->lock);
 	b44_readphy(bp, MII_BMCR, &bmcr);
 	b44_readphy(bp, MII_BMCR, &bmcr);

From 9580f6d47dd6156c6d16e988d28faa74e5a0b8ba Mon Sep 17 00:00:00 2001
From: Ankit Khushwaha <ankitkhushwaha.linux@gmail.com>
Date: Fri, 5 Dec 2025 22:02:42 +0530
Subject: [PATCH 052/258] selftests: tls: fix warning of uninitialized variable

In 'poll_partial_rec_async' a uninitialized char variable 'token' with
is used for write/read instruction to synchronize between threads
via a pipe.

tls.c:2833:26: warning: variable 'token' is uninitialized
      		   when passed as a const pointer argument

Initialize 'token' to '\0' to silence compiler warning.

Signed-off-by: Ankit Khushwaha <ankitkhushwaha.linux@gmail.com>
Link: https://patch.msgid.link/20251205163242.14615-1-ankitkhushwaha.linux@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/tls.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index da1b50b3071946..a625d0be62d0e9 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -2786,10 +2786,10 @@ TEST_F(tls_err, epoll_partial_rec)
 TEST_F(tls_err, poll_partial_rec_async)
 {
 	struct pollfd pfd = { };
+	char token = '\0';
 	ssize_t rec_len;
 	char rec[256];
 	char buf[128];
-	char token;
 	int p[2];
 	int ret;
 

From 06f7cae92fe346fa49a8a9b161124b26cc5c3ed1 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 5 Dec 2025 09:10:00 -0800
Subject: [PATCH 053/258] selftest: af_unix: Support compilers without
 flex-array-member-not-at-end support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix:

gcc: error: unrecognized command-line option ‘-Wflex-array-member-not-at-end’

by making the compiler option dependent on its support.

Fixes: 1838731f1072c ("selftest: af_unix: Add -Wall and -Wflex-array-member-not-at-end to CFLAGS.")
Cc: Kuniyuki Iwashima <kuniyu@google.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Link: https://patch.msgid.link/20251205171010.515236-7-linux@roeck-us.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/af_unix/Makefile | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 3cd677b720728a..4c0375e28bbeeb 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,9 @@
-CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end
+top_srcdir := ../../../../..
+include $(top_srcdir)/scripts/Makefile.compiler
+
+cc-option = $(call __cc-option, $(CC),,$(1),$(2))
+
+CFLAGS += $(KHDR_INCLUDES) -Wall $(call cc-option,-Wflex-array-member-not-at-end)
 
 TEST_GEN_PROGS := \
 	diag_uid \

From 59546e874403c1dd0cbc42df06fdf8c113f72022 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 5 Dec 2025 09:10:04 -0800
Subject: [PATCH 054/258] selftests: net: Fix build warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix

ksft.h: In function ‘ksft_ready’:
ksft.h:27:9: warning: ignoring return value of ‘write’ declared with attribute ‘warn_unused_result’

ksft.h: In function ‘ksft_wait’:
ksft.h:51:9: warning: ignoring return value of ‘read’ declared with attribute ‘warn_unused_result’

by checking the return value of the affected functions and displaying
an error message if an error is seen.

Fixes: 2b6d490b82668 ("selftests: drv-net: Factor out ksft C helpers")
Cc: Joe Damato <jdamato@fastly.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Link: https://patch.msgid.link/20251205171010.515236-11-linux@roeck-us.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/lib/ksft.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h
index 17dc34a612c64e..03912902a6d30c 100644
--- a/tools/testing/selftests/net/lib/ksft.h
+++ b/tools/testing/selftests/net/lib/ksft.h
@@ -24,7 +24,8 @@ static inline void ksft_ready(void)
 		fd = STDOUT_FILENO;
 	}
 
-	write(fd, msg, sizeof(msg));
+	if (write(fd, msg, sizeof(msg)) < 0)
+		perror("write()");
 	if (fd != STDOUT_FILENO)
 		close(fd);
 }
@@ -48,7 +49,8 @@ static inline void ksft_wait(void)
 		fd = STDIN_FILENO;
 	}
 
-	read(fd, &byte, sizeof(byte));
+	if (read(fd, &byte, sizeof(byte)) < 0)
+		perror("read()");
 	if (fd != STDIN_FILENO)
 		close(fd);
 }

From 91dc09a609d9443e6b34bdb355a18d579a95e132 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 5 Dec 2025 09:10:07 -0800
Subject: [PATCH 055/258] selftests: net: tfo: Fix build warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix

tfo.c: In function ‘run_server’:
tfo.c:84:9: warning: ignoring return value of ‘read’ declared with attribute ‘warn_unused_result’

by evaluating the return value from read() and displaying an error message
if it reports an error.

Fixes: c65b5bb2329e3 ("selftests: net: add passive TFO test binary")
Cc: David Wei <dw@davidwei.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Link: https://patch.msgid.link/20251205171010.515236-14-linux@roeck-us.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/tfo.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c
index eb3cac5e583c9d..8d82140f0f7679 100644
--- a/tools/testing/selftests/net/tfo.c
+++ b/tools/testing/selftests/net/tfo.c
@@ -81,7 +81,8 @@ static void run_server(void)
 	if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0)
 		error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)");
 
-	read(connfd, buf, 64);
+	if (read(connfd, buf, 64) < 0)
+		perror("read()");
 	fprintf(outfile, "%d\n", opt);
 
 	fclose(outfile);

From 8ef522c8a59a048117f7e05eb5213043c02f986f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 6 Dec 2025 17:09:39 -0800
Subject: [PATCH 056/258] inet: frags: avoid theoretical race in
 ip_frag_reinit()

In ip_frag_reinit() we want to move the frag timeout timer into
the future. If the timer fires in the meantime we inadvertently
scheduled it again, and since the timer assumes a ref on frag_queue
we need to acquire one to balance things out.

This is technically racy, we should have acquired the reference
_before_ we touch the timer, it may fire again before we take the ref.
Avoid this entire dance by using mod_timer_pending() which only modifies
the timer if its pending (and which exists since Linux v2.6.30)

Note that this was the only place we ever took a ref on frag_queue
since Eric's conversion to RCU. So we could potentially replace
the whole refcnt field with an atomic flag and a bit more RCU.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20251207010942.1672972-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/inet_fragment.c | 4 +++-
 net/ipv4/ip_fragment.c   | 4 +---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 025895eb6ec597..30f4fa50ee2d73 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -327,7 +327,9 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
 
 	timer_setup(&q->timer, f->frag_expire, 0);
 	spin_lock_init(&q->lock);
-	/* One reference for the timer, one for the hash table. */
+	/* One reference for the timer, one for the hash table.
+	 * We never take any extra references, only decrement this field.
+	 */
 	refcount_set(&q->refcnt, 2);
 
 	return q;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index f7012479713ba6..d7bccdc9dc6938 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -242,10 +242,8 @@ static int ip_frag_reinit(struct ipq *qp)
 {
 	unsigned int sum_truesize = 0;
 
-	if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) {
-		refcount_inc(&qp->q.refcnt);
+	if (!mod_timer_pending(&qp->q.timer, jiffies + qp->q.fqdir->timeout))
 		return -ETIMEDOUT;
-	}
 
 	sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments,
 					      SKB_DROP_REASON_FRAG_TOO_FAR);

From 1231eec6994be29d6bb5c303dfa54731ed9fc0e6 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 6 Dec 2025 17:09:40 -0800
Subject: [PATCH 057/258] inet: frags: add inet_frag_queue_flush()

Instead of exporting inet_frag_rbtree_purge() which requires that
caller takes care of memory accounting, add a new helper. We will
need to call it from a few places in the next patch.

Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20251207010942.1672972-3-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_frag.h  |  5 ++---
 net/ipv4/inet_fragment.c | 15 ++++++++++++---
 net/ipv4/ip_fragment.c   |  6 +-----
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 0eccd9c3a883fb..3ffaceee7bbc0a 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -141,9 +141,8 @@ void inet_frag_kill(struct inet_frag_queue *q, int *refs);
 void inet_frag_destroy(struct inet_frag_queue *q);
 struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key);
 
-/* Free all skbs in the queue; return the sum of their truesizes. */
-unsigned int inet_frag_rbtree_purge(struct rb_root *root,
-				    enum skb_drop_reason reason);
+void inet_frag_queue_flush(struct inet_frag_queue *q,
+			   enum skb_drop_reason reason);
 
 static inline void inet_frag_putn(struct inet_frag_queue *q, int refs)
 {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 30f4fa50ee2d73..1bf969b5a1cb55 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -263,8 +263,8 @@ static void inet_frag_destroy_rcu(struct rcu_head *head)
 	kmem_cache_free(f->frags_cachep, q);
 }
 
-unsigned int inet_frag_rbtree_purge(struct rb_root *root,
-				    enum skb_drop_reason reason)
+static unsigned int
+inet_frag_rbtree_purge(struct rb_root *root, enum skb_drop_reason reason)
 {
 	struct rb_node *p = rb_first(root);
 	unsigned int sum = 0;
@@ -284,7 +284,16 @@ unsigned int inet_frag_rbtree_purge(struct rb_root *root,
 	}
 	return sum;
 }
-EXPORT_SYMBOL(inet_frag_rbtree_purge);
+
+void inet_frag_queue_flush(struct inet_frag_queue *q,
+			   enum skb_drop_reason reason)
+{
+	unsigned int sum;
+
+	sum = inet_frag_rbtree_purge(&q->rb_fragments, reason);
+	sub_frag_mem_limit(q->fqdir, sum);
+}
+EXPORT_SYMBOL(inet_frag_queue_flush);
 
 void inet_frag_destroy(struct inet_frag_queue *q)
 {
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index d7bccdc9dc6938..32f1c1a46ba72b 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -240,14 +240,10 @@ static int ip_frag_too_far(struct ipq *qp)
 
 static int ip_frag_reinit(struct ipq *qp)
 {
-	unsigned int sum_truesize = 0;
-
 	if (!mod_timer_pending(&qp->q.timer, jiffies + qp->q.fqdir->timeout))
 		return -ETIMEDOUT;
 
-	sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments,
-					      SKB_DROP_REASON_FRAG_TOO_FAR);
-	sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
+	inet_frag_queue_flush(&qp->q, SKB_DROP_REASON_FRAG_TOO_FAR);
 
 	qp->q.flags = 0;
 	qp->q.len = 0;

From 006a5035b495dec008805df249f92c22c89c3d2e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 6 Dec 2025 17:09:41 -0800
Subject: [PATCH 058/258] inet: frags: flush pending skbs in fqdir_pre_exit()

We have been seeing occasional deadlocks on pernet_ops_rwsem since
September in NIPA. The stuck task was usually modprobe (often loading
a driver like ipvlan), trying to take the lock as a Writer.
lockdep does not track readers for rwsems so the read wasn't obvious
from the reports.

On closer inspection the Reader holding the lock was conntrack looping
forever in nf_conntrack_cleanup_net_list(). Based on past experience
with occasional NIPA crashes I looked thru the tests which run before
the crash and noticed that the crash follows ip_defrag.sh. An immediate
red flag. Scouring thru (de)fragmentation queues reveals skbs sitting
around, holding conntrack references.

The problem is that since conntrack depends on nf_defrag_ipv6,
nf_defrag_ipv6 will load first. Since nf_defrag_ipv6 loads first its
netns exit hooks run _after_ conntrack's netns exit hook.

Flush all fragment queue SKBs during fqdir_pre_exit() to release
conntrack references before conntrack cleanup runs. Also flush
the queues in timer expiry handlers when they discover fqdir->dead
is set, in case packet sneaks in while we're running the pre_exit
flush.

The commit under Fixes is not exactly the culprit, but I think
previously the timer firing would eventually unblock the spinning
conntrack.

Fixes: d5dd88794a13 ("inet: fix various use-after-free in defrags units")
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20251207010942.1672972-4-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_frag.h  | 13 +------------
 include/net/ipv6_frag.h  |  9 ++++++---
 net/ipv4/inet_fragment.c | 36 ++++++++++++++++++++++++++++++++++++
 net/ipv4/ip_fragment.c   | 12 +++++++-----
 4 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 3ffaceee7bbc0a..365925c9d26286 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -123,18 +123,7 @@ void inet_frags_fini(struct inet_frags *);
 
 int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net);
 
-static inline void fqdir_pre_exit(struct fqdir *fqdir)
-{
-	/* Prevent creation of new frags.
-	 * Pairs with READ_ONCE() in inet_frag_find().
-	 */
-	WRITE_ONCE(fqdir->high_thresh, 0);
-
-	/* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire()
-	 * and ip6frag_expire_frag_queue().
-	 */
-	WRITE_ONCE(fqdir->dead, true);
-}
+void fqdir_pre_exit(struct fqdir *fqdir);
 void fqdir_exit(struct fqdir *fqdir);
 
 void inet_frag_kill(struct inet_frag_queue *q, int *refs);
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 38ef66826939ee..41d9fc6965f9a0 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -69,9 +69,6 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
 	int refs = 1;
 
 	rcu_read_lock();
-	/* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */
-	if (READ_ONCE(fq->q.fqdir->dead))
-		goto out_rcu_unlock;
 	spin_lock(&fq->q.lock);
 
 	if (fq->q.flags & INET_FRAG_COMPLETE)
@@ -80,6 +77,12 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
 	fq->q.flags |= INET_FRAG_DROP;
 	inet_frag_kill(&fq->q, &refs);
 
+	/* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */
+	if (READ_ONCE(fq->q.fqdir->dead)) {
+		inet_frag_queue_flush(&fq->q, 0);
+		goto out;
+	}
+
 	dev = dev_get_by_index_rcu(net, fq->iif);
 	if (!dev)
 		goto out;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 1bf969b5a1cb55..001ee5c4d962e3 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -218,6 +218,41 @@ static int __init inet_frag_wq_init(void)
 
 pure_initcall(inet_frag_wq_init);
 
+void fqdir_pre_exit(struct fqdir *fqdir)
+{
+	struct inet_frag_queue *fq;
+	struct rhashtable_iter hti;
+
+	/* Prevent creation of new frags.
+	 * Pairs with READ_ONCE() in inet_frag_find().
+	 */
+	WRITE_ONCE(fqdir->high_thresh, 0);
+
+	/* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire()
+	 * and ip6frag_expire_frag_queue().
+	 */
+	WRITE_ONCE(fqdir->dead, true);
+
+	rhashtable_walk_enter(&fqdir->rhashtable, &hti);
+	rhashtable_walk_start(&hti);
+
+	while ((fq = rhashtable_walk_next(&hti))) {
+		if (IS_ERR(fq)) {
+			if (PTR_ERR(fq) != -EAGAIN)
+				break;
+			continue;
+		}
+		spin_lock_bh(&fq->lock);
+		if (!(fq->flags & INET_FRAG_COMPLETE))
+			inet_frag_queue_flush(fq, 0);
+		spin_unlock_bh(&fq->lock);
+	}
+
+	rhashtable_walk_stop(&hti);
+	rhashtable_walk_exit(&hti);
+}
+EXPORT_SYMBOL(fqdir_pre_exit);
+
 void fqdir_exit(struct fqdir *fqdir)
 {
 	INIT_WORK(&fqdir->destroy_work, fqdir_work_fn);
@@ -290,6 +325,7 @@ void inet_frag_queue_flush(struct inet_frag_queue *q,
 {
 	unsigned int sum;
 
+	reason = reason ?: SKB_DROP_REASON_FRAG_REASM_TIMEOUT;
 	sum = inet_frag_rbtree_purge(&q->rb_fragments, reason);
 	sub_frag_mem_limit(q->fqdir, sum);
 }
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 32f1c1a46ba72b..56b0f738d2f27b 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -134,11 +134,6 @@ static void ip_expire(struct timer_list *t)
 	net = qp->q.fqdir->net;
 
 	rcu_read_lock();
-
-	/* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
-	if (READ_ONCE(qp->q.fqdir->dead))
-		goto out_rcu_unlock;
-
 	spin_lock(&qp->q.lock);
 
 	if (qp->q.flags & INET_FRAG_COMPLETE)
@@ -146,6 +141,13 @@ static void ip_expire(struct timer_list *t)
 
 	qp->q.flags |= INET_FRAG_DROP;
 	inet_frag_kill(&qp->q, &refs);
+
+	/* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
+	if (READ_ONCE(qp->q.fqdir->dead)) {
+		inet_frag_queue_flush(&qp->q, 0);
+		goto out;
+	}
+
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
 

From 92df4c56cf5b739c2977001c581badeaf82b9857 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 6 Dec 2025 17:09:42 -0800
Subject: [PATCH 059/258] netfilter: conntrack: warn when cleanup is stuck

nf_conntrack_cleanup_net_list() calls schedule() so it does not
show up as a hung task. Add an explicit check to make debugging
leaked skbs/conntack references more obvious.

Acked-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20251207010942.1672972-5-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/netfilter/nf_conntrack_core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0b95f226f21116..d1f8eb725d4223 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2487,6 +2487,7 @@ void nf_conntrack_cleanup_net(struct net *net)
 void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
 {
 	struct nf_ct_iter_data iter_data = {};
+	unsigned long start = jiffies;
 	struct net *net;
 	int busy;
 
@@ -2507,6 +2508,8 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
 			busy = 1;
 	}
 	if (busy) {
+		DEBUG_NET_WARN_ONCE(time_after(jiffies, start + 60 * HZ),
+				    "conntrack cleanup blocked for 60s");
 		schedule();
 		goto i_see_dead_people;
 	}

From ac44dcc788b950606793e8f9690c30925f59df02 Mon Sep 17 00:00:00 2001
From: Shuran Liu <electronlsr@gmail.com>
Date: Sat, 6 Dec 2025 22:12:09 +0800
Subject: [PATCH 060/258] bpf: Fix verifier assumptions of bpf_d_path's output
 buffer

Commit 37cce22dbd51 ("bpf: verifier: Refactor helper access type
tracking") started distinguishing read vs write accesses performed by
helpers.

The second argument of bpf_d_path() is a pointer to a buffer that the
helper fills with the resulting path. However, its prototype currently
uses ARG_PTR_TO_MEM without MEM_WRITE.

Before 37cce22dbd51, helper accesses were conservatively treated as
potential writes, so this mismatch did not cause issues. Since that
commit, the verifier may incorrectly assume that the buffer contents
are unchanged across the helper call and base its optimizations on this
wrong assumption. This can lead to misbehaviour in BPF programs that
read back the buffer, such as prefix comparisons on the returned path.

Fix this by marking the second argument of bpf_d_path() as
ARG_PTR_TO_MEM | MEM_WRITE so that the verifier correctly models the
write to the caller-provided buffer.

Fixes: 37cce22dbd51 ("bpf: verifier: Refactor helper access type tracking")
Co-developed-by: Zesen Liu <ftyg@live.com>
Signed-off-by: Zesen Liu <ftyg@live.com>
Co-developed-by: Peili Gao <gplhust955@gmail.com>
Signed-off-by: Peili Gao <gplhust955@gmail.com>
Co-developed-by: Haoran Ni <haoran.ni.cs@gmail.com>
Signed-off-by: Haoran Ni <haoran.ni.cs@gmail.com>
Signed-off-by: Shuran Liu <electronlsr@gmail.com>
Reviewed-by: Matt Bobrowski <mattbobrowski@google.com>
Link: https://lore.kernel.org/r/20251206141210.3148-2-electronlsr@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/trace/bpf_trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d57727abaade7f..fe28d86f7c3576 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -965,7 +965,7 @@ static const struct bpf_func_proto bpf_d_path_proto = {
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_BTF_ID,
 	.arg1_btf_id	= &bpf_d_path_btf_ids[0],
-	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg2_type	= ARG_PTR_TO_MEM | MEM_WRITE,
 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
 	.allowed	= bpf_d_path_allowed,
 };

From 79e247d6608848c09038d0b1e7d256432624fd84 Mon Sep 17 00:00:00 2001
From: Shuran Liu <electronlsr@gmail.com>
Date: Sat, 6 Dec 2025 22:12:10 +0800
Subject: [PATCH 061/258] selftests/bpf: add regression test for bpf_d_path()

Add a regression test for bpf_d_path() to cover incorrect verifier
assumptions caused by an incorrect function prototype. The test
attaches to the fallocate hook, calls bpf_d_path() and verifies that
a simple prefix comparison on the returned pathname behaves correctly
after the fix in patch 1. It ensures the verifier does not assume
the buffer remains unwritten.

Co-developed-by: Zesen Liu <ftyg@live.com>
Signed-off-by: Zesen Liu <ftyg@live.com>
Co-developed-by: Peili Gao <gplhust955@gmail.com>
Signed-off-by: Peili Gao <gplhust955@gmail.com>
Co-developed-by: Haoran Ni <haoran.ni.cs@gmail.com>
Signed-off-by: Haoran Ni <haoran.ni.cs@gmail.com>
Signed-off-by: Shuran Liu <electronlsr@gmail.com>
Link: https://lore.kernel.org/r/20251206141210.3148-3-electronlsr@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/d_path.c | 89 +++++++++++++++----
 .../testing/selftests/bpf/progs/test_d_path.c | 23 +++++
 2 files changed, 95 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
index ccc768592e66a6..1a2a2f1abf0335 100644
--- a/tools/testing/selftests/bpf/prog_tests/d_path.c
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -38,6 +38,14 @@ static int set_pathname(int fd, pid_t pid)
 	return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN);
 }
 
+static inline long syscall_close(int fd)
+{
+	return syscall(__NR_close_range,
+			(unsigned int)fd,
+			(unsigned int)fd,
+			0u);
+}
+
 static int trigger_fstat_events(pid_t pid)
 {
 	int sockfd = -1, procfd = -1, devfd = -1;
@@ -104,18 +112,34 @@ static int trigger_fstat_events(pid_t pid)
 	/* sys_close no longer triggers filp_close, but we can
 	 * call sys_close_range instead which still does
 	 */
-#define close(fd) syscall(__NR_close_range, fd, fd, 0)
+	syscall_close(pipefd[0]);
+	syscall_close(pipefd[1]);
+	syscall_close(sockfd);
+	syscall_close(procfd);
+	syscall_close(devfd);
+	syscall_close(localfd);
+	syscall_close(indicatorfd);
+	return ret;
+}
 
-	close(pipefd[0]);
-	close(pipefd[1]);
-	close(sockfd);
-	close(procfd);
-	close(devfd);
-	close(localfd);
-	close(indicatorfd);
+static void attach_and_load(struct test_d_path **skel)
+{
+	int err;
 
-#undef close
-	return ret;
+	*skel = test_d_path__open_and_load();
+	if (CHECK(!*skel, "setup", "d_path skeleton failed\n"))
+		goto cleanup;
+
+	err = test_d_path__attach(*skel);
+	if (CHECK(err, "setup", "attach failed: %d\n", err))
+		goto cleanup;
+
+	(*skel)->bss->my_pid = getpid();
+	return;
+
+cleanup:
+	test_d_path__destroy(*skel);
+	*skel = NULL;
 }
 
 static void test_d_path_basic(void)
@@ -124,16 +148,11 @@ static void test_d_path_basic(void)
 	struct test_d_path *skel;
 	int err;
 
-	skel = test_d_path__open_and_load();
-	if (CHECK(!skel, "setup", "d_path skeleton failed\n"))
-		goto cleanup;
-
-	err = test_d_path__attach(skel);
-	if (CHECK(err, "setup", "attach failed: %d\n", err))
+	attach_and_load(&skel);
+	if (!skel)
 		goto cleanup;
 
 	bss = skel->bss;
-	bss->my_pid = getpid();
 
 	err = trigger_fstat_events(bss->my_pid);
 	if (err < 0)
@@ -195,6 +214,39 @@ static void test_d_path_check_types(void)
 	test_d_path_check_types__destroy(skel);
 }
 
+/* Check if the verifier correctly generates code for
+ * accessing the memory modified by d_path helper.
+ */
+static void test_d_path_mem_access(void)
+{
+	int localfd = -1;
+	char path_template[] = "/dev/shm/d_path_loadgen.XXXXXX";
+	struct test_d_path__bss *bss;
+	struct test_d_path *skel;
+
+	attach_and_load(&skel);
+	if (!skel)
+		goto cleanup;
+
+	bss = skel->bss;
+
+	localfd = mkstemp(path_template);
+	if (CHECK(localfd < 0, "trigger", "mkstemp failed\n"))
+		goto cleanup;
+
+	if (CHECK(fallocate(localfd, 0, 0, 1024) < 0, "trigger", "fallocate failed\n"))
+		goto cleanup;
+	remove(path_template);
+
+	if (CHECK(!bss->path_match_fallocate, "check",
+		  "failed to read fallocate path"))
+		goto cleanup;
+
+cleanup:
+	syscall_close(localfd);
+	test_d_path__destroy(skel);
+}
+
 void test_d_path(void)
 {
 	if (test__start_subtest("basic"))
@@ -205,4 +257,7 @@ void test_d_path(void)
 
 	if (test__start_subtest("check_alloc_mem"))
 		test_d_path_check_types();
+
+	if (test__start_subtest("check_mem_access"))
+		test_d_path_mem_access();
 }
diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c
index 84e1f883f97bc6..561b2f861808ef 100644
--- a/tools/testing/selftests/bpf/progs/test_d_path.c
+++ b/tools/testing/selftests/bpf/progs/test_d_path.c
@@ -17,6 +17,7 @@ int rets_close[MAX_FILES] = {};
 
 int called_stat = 0;
 int called_close = 0;
+int path_match_fallocate = 0;
 
 SEC("fentry/security_inode_getattr")
 int BPF_PROG(prog_stat, struct path *path, struct kstat *stat,
@@ -62,4 +63,26 @@ int BPF_PROG(prog_close, struct file *file, void *id)
 	return 0;
 }
 
+SEC("fentry/vfs_fallocate")
+int BPF_PROG(prog_fallocate, struct file *file, int mode, loff_t offset, loff_t len)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+	int ret = 0;
+	char path_fallocate[MAX_PATH_LEN] = {};
+
+	if (pid != my_pid)
+		return 0;
+
+	ret = bpf_d_path(&file->f_path,
+			 path_fallocate, MAX_PATH_LEN);
+	if (ret < 0)
+		return 0;
+
+	if (!path_fallocate[0])
+		return 0;
+
+	path_match_fallocate = 1;
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";

From 2e2a720766886190a6d35c116794693aabd332b6 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <fmancera@suse.de>
Date: Fri, 5 Dec 2025 12:58:01 +0100
Subject: [PATCH 062/258] netfilter: nf_conncount: fix leaked ct in error paths

There are some situations where ct might be leaked as error paths are
skipping the refcounted check and return immediately. In order to solve
it make sure that the check is always called.

Fixes: be102eb6a0e7 ("netfilter: nf_conncount: rework API to use sk_buff directly")
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_conncount.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index f1be4dd5cf85f5..3654f1e8976c91 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -172,14 +172,14 @@ static int __nf_conncount_add(struct net *net,
 	struct nf_conn *found_ct;
 	unsigned int collect = 0;
 	bool refcounted = false;
+	int err = 0;
 
 	if (!get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted))
 		return -ENOENT;
 
 	if (ct && nf_ct_is_confirmed(ct)) {
-		if (refcounted)
-			nf_ct_put(ct);
-		return -EEXIST;
+		err = -EEXIST;
+		goto out_put;
 	}
 
 	if ((u32)jiffies == list->last_gc)
@@ -231,12 +231,16 @@ static int __nf_conncount_add(struct net *net,
 	}
 
 add_new_node:
-	if (WARN_ON_ONCE(list->count > INT_MAX))
-		return -EOVERFLOW;
+	if (WARN_ON_ONCE(list->count > INT_MAX)) {
+		err = -EOVERFLOW;
+		goto out_put;
+	}
 
 	conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
-	if (conn == NULL)
-		return -ENOMEM;
+	if (conn == NULL) {
+		err = -ENOMEM;
+		goto out_put;
+	}
 
 	conn->tuple = tuple;
 	conn->zone = *zone;
@@ -249,7 +253,7 @@ static int __nf_conncount_add(struct net *net,
 out_put:
 	if (refcounted)
 		nf_ct_put(ct);
-	return 0;
+	return err;
 }
 
 int nf_conncount_add_skb(struct net *net,
@@ -456,11 +460,10 @@ insert_tree(struct net *net,
 
 		rb_link_node_rcu(&rbconn->node, parent, rbnode);
 		rb_insert_color(&rbconn->node, root);
-
-		if (refcounted)
-			nf_ct_put(ct);
 	}
 out_unlock:
+	if (refcounted)
+		nf_ct_put(ct);
 	spin_unlock_bh(&nf_conncount_locks[hash]);
 	return count;
 }

From ad891bb3d079a46a821bf2b8867854645191bab0 Mon Sep 17 00:00:00 2001
From: Slavin Liu <slavin452@gmail.com>
Date: Fri, 21 Nov 2025 16:52:13 +0800
Subject: [PATCH 063/258] ipvs: fix ipv4 null-ptr-deref in route error path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The IPv4 code path in __ip_vs_get_out_rt() calls dst_link_failure()
without ensuring skb->dev is set, leading to a NULL pointer dereference
in fib_compute_spec_dst() when ipv4_link_failure() attempts to send
ICMP destination unreachable messages.

The issue emerged after commit ed0de45a1008 ("ipv4: recompile ip options
in ipv4_link_failure") started calling __ip_options_compile() from
ipv4_link_failure(). This code path eventually calls fib_compute_spec_dst()
which dereferences skb->dev. An attempt was made to fix the NULL skb->dev
dereference in commit 0113d9c9d1cc ("ipv4: fix null-deref in
ipv4_link_failure"), but it only addressed the immediate dev_net(skb->dev)
dereference by using a fallback device. The fix was incomplete because
fib_compute_spec_dst() later in the call chain still accesses skb->dev
directly, which remains NULL when IPVS calls dst_link_failure().

The crash occurs when:
1. IPVS processes a packet in NAT mode with a misconfigured destination
2. Route lookup fails in __ip_vs_get_out_rt() before establishing a route
3. The error path calls dst_link_failure(skb) with skb->dev == NULL
4. ipv4_link_failure() → ipv4_send_dest_unreach() →
   __ip_options_compile() → fib_compute_spec_dst()
5. fib_compute_spec_dst() dereferences NULL skb->dev

Apply the same fix used for IPv6 in commit 326bf17ea5d4 ("ipvs: fix
ipv6 route unreach panic"): set skb->dev from skb_dst(skb)->dev before
calling dst_link_failure().

KASAN: null-ptr-deref in range [0x0000000000000328-0x000000000000032f]
CPU: 1 PID: 12732 Comm: syz.1.3469 Not tainted 6.6.114 #2
RIP: 0010:__in_dev_get_rcu include/linux/inetdevice.h:233
RIP: 0010:fib_compute_spec_dst+0x17a/0x9f0 net/ipv4/fib_frontend.c:285
Call Trace:
  <TASK>
  spec_dst_fill net/ipv4/ip_options.c:232
  spec_dst_fill net/ipv4/ip_options.c:229
  __ip_options_compile+0x13a1/0x17d0 net/ipv4/ip_options.c:330
  ipv4_send_dest_unreach net/ipv4/route.c:1252
  ipv4_link_failure+0x702/0xb80 net/ipv4/route.c:1265
  dst_link_failure include/net/dst.h:437
  __ip_vs_get_out_rt+0x15fd/0x19e0 net/netfilter/ipvs/ip_vs_xmit.c:412
  ip_vs_nat_xmit+0x1d8/0xc80 net/netfilter/ipvs/ip_vs_xmit.c:764

Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure")
Signed-off-by: Slavin Liu <slavin452@gmail.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/ipvs/ip_vs_xmit.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3162ce3c26404f..64c697212578ae 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -408,6 +408,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
 	return -1;
 
 err_unreach:
+	if (!skb->dev)
+		skb->dev = skb_dst(skb)->dev;
+
 	dst_link_failure(skb);
 	return -1;
 }

From 2bdc536c9da7fa08baf0fafe9d91243b83cb9c8b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 1 Dec 2025 11:22:45 +0100
Subject: [PATCH 064/258] netfilter: always set route tuple out ifindex

Always set nf_flow_route tuple out ifindex even if the indev is not one
of the flowtable configured devices since otherwise the outdev lookup in
nf_flow_offload_ip_hook() or nf_flow_offload_ipv6_hook() for
FLOW_OFFLOAD_XMIT_NEIGH flowtable entries will fail.
The above issue occurs in the following configuration since IP6IP6
tunnel does not support flowtable acceleration yet:

$ip addr show
5: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 00:11:22:33:22:55 brd ff:ff:ff:ff:ff:ff link-netns ns1
    inet6 2001:db8:1::2/64 scope global nodad
       valid_lft forever preferred_lft forever
    inet6 fe80::211:22ff:fe33:2255/64 scope link tentative proto kernel_ll
       valid_lft forever preferred_lft forever
6: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 00:22:22:33:22:55 brd ff:ff:ff:ff:ff:ff link-netns ns3
    inet6 2001:db8:2::1/64 scope global nodad
       valid_lft forever preferred_lft forever
    inet6 fe80::222:22ff:fe33:2255/64 scope link tentative proto kernel_ll
       valid_lft forever preferred_lft forever
7: tun0@NONE: <POINTOPOINT,NOARP,UP,LOWER_UP> mtu 1452 qdisc noqueue state UNKNOWN group default qlen 1000
    link/tunnel6 2001:db8:2::1 peer 2001:db8:2::2 permaddr a85:e732:2c37::
    inet6 2002:db8:1::1/64 scope global nodad
       valid_lft forever preferred_lft forever
    inet6 fe80::885:e7ff:fe32:2c37/64 scope link proto kernel_ll
       valid_lft forever preferred_lft forever

$ip -6 route show
2001:db8:1::/64 dev eth0 proto kernel metric 256 pref medium
2001:db8:2::/64 dev eth1 proto kernel metric 256 pref medium
2002:db8:1::/64 dev tun0 proto kernel metric 256 pref medium
default via 2002:db8:1::2 dev tun0 metric 1024 pref medium

$nft list ruleset
table inet filter {
        flowtable ft {
                hook ingress priority filter
                devices = { eth0, eth1 }
        }

        chain forward {
                type filter hook forward priority filter; policy accept;
                meta l4proto { tcp, udp } flow add @ft
        }
}

Fixes: b5964aac51e0 ("netfilter: flowtable: consolidate xmit path")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_flow_table_path.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c
index f0984cf69a09bb..eb24fe2715dcd5 100644
--- a/net/netfilter/nf_flow_table_path.c
+++ b/net/netfilter/nf_flow_table_path.c
@@ -250,6 +250,9 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt,
 	if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
 		nft_dev_path_info(&stack, &info, ha, &ft->data);
 
+	if (info.outdev)
+		route->tuple[dir].out.ifindex = info.outdev->ifindex;
+
 	if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
 		return;
 
@@ -269,7 +272,6 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt,
 
 	route->tuple[!dir].in.num_encaps = info.num_encaps;
 	route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
-	route->tuple[dir].out.ifindex = info.outdev->ifindex;
 
 	if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
 		memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);

From b8a81b0ce539e021ac72825238aea1eb657000f0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 9 Dec 2025 00:03:36 +0100
Subject: [PATCH 065/258] selftests: netfilter: prefer xfail in case race
 wasn't triggered

Jakub says: "We try to reserve SKIP for tests skipped because tool is
missing in env, something isn't built into the kernel etc."

use xfail, we can't force the race condition to appear at will
so its expected that the test 'fails' occasionally.

Fixes: 78a588363587 ("selftests: netfilter: add conntrack clash resolution test case")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Closes: https://lore.kernel.org/netdev/20251206175647.5c32f419@kernel.org/
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 tools/testing/selftests/net/netfilter/conntrack_clash.sh | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
index 7fc6c5dbd5516e..84b8eb12143ae7 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_clash.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
@@ -116,7 +116,7 @@ run_one_clash_test()
 	# not a failure: clash resolution logic did not trigger.
 	# With right timing, xmit completed sequentially and
 	# no parallel insertion occurs.
-	return $ksft_skip
+	return $ksft_xfail
 }
 
 run_clash_test()
@@ -133,12 +133,12 @@ run_clash_test()
 		if [ $rv -eq 0 ];then
 			echo "PASS: clash resolution test for $daddr:$dport on attempt $i"
 			return 0
-		elif [ $rv -eq $ksft_skip ]; then
+		elif [ $rv -eq $ksft_xfail ]; then
 			softerr=1
 		fi
 	done
 
-	[ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger"
+	[ $softerr -eq 1 ] && echo "XFAIL: clash resolution for $daddr:$dport did not trigger"
 }
 
 ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter"
@@ -167,8 +167,7 @@ load_simple_ruleset "$nsclient2"
 run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001
 
 if [ $clash_resolution_active -eq 0 ];then
-	[ "$ret" -eq 0 ] && ret=$ksft_skip
-	echo "SKIP: Clash resolution did not trigger"
+	[ "$ret" -eq 0 ] && ret=$ksft_xfail
 fi
 
 exit $ret

From 53ca00a19d345197a37a1bf552e8d1e7b091666c Mon Sep 17 00:00:00 2001
From: Deepanshu Kartikey <kartikey406@gmail.com>
Date: Wed, 10 Dec 2025 07:50:24 +0530
Subject: [PATCH 066/258] mm/slub: reset KASAN tag in defer_free() before
 accessing freed memory

When CONFIG_SLUB_TINY is enabled, kfree_nolock() calls kasan_slab_free()
before defer_free(). On ARM64 with MTE (Memory Tagging Extension),
kasan_slab_free() poisons the memory and changes the tag from the
original (e.g., 0xf3) to a poison tag (0xfe).

When defer_free() then tries to write to the freed object to build the
deferred free list via llist_add(), the pointer still has the old tag,
causing a tag mismatch and triggering a KASAN use-after-free report:

  BUG: KASAN: slab-use-after-free in defer_free+0x3c/0xbc mm/slub.c:6537
  Write at addr f3f000000854f020 by task kworker/u8:6/983
  Pointer tag: [f3], memory tag: [fe]

Fix this by calling kasan_reset_tag() before accessing the freed memory.
This is safe because defer_free() is part of the allocator itself and is
expected to manipulate freed memory for bookkeeping purposes.

Fixes: af92793e52c3 ("slab: Introduce kmalloc_nolock() and kfree_nolock().")
Cc: stable@vger.kernel.org
Reported-by: syzbot+7a25305a76d872abcfa1@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=7a25305a76d872abcfa1
Tested-by: syzbot+7a25305a76d872abcfa1@syzkaller.appspotmail.com
Signed-off-by: Deepanshu Kartikey <kartikey406@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Link: https://patch.msgid.link/20251210022024.3255826-1-kartikey406@gmail.com
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 mm/slub.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/slub.c b/mm/slub.c
index f22ba8be29e060..97204d93b1dc16 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -6539,6 +6539,8 @@ static void defer_free(struct kmem_cache *s, void *head)
 
 	guard(preempt)();
 
+	head = kasan_reset_tag(head);
+
 	df = this_cpu_ptr(&defer_free_objects);
 	if (llist_add(head + s->offset, &df->objects))
 		irq_work_queue(&df->work);

From 0842e34849f65dc0aef0c7a0baae1dceb2b8bb33 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 9 Dec 2025 16:29:01 +0100
Subject: [PATCH 067/258] selftests: net: lib: tc_rule_stats_get(): Don't
 hard-code array index

Flower is commonly used to match on packets in many bash-based selftests.
A dump of a flower filter including statistics looks something like this:

 [
   {
     "protocol": "all",
     "pref": 49152,
     "kind": "flower",
     "chain": 0
   },
   {
     ...
     "options": {
       ...
       "actions": [
         {
	   ...
           "stats": {
             "bytes": 0,
             "packets": 0,
             "drops": 0,
             "overlimits": 0,
             "requeues": 0,
             "backlog": 0,
             "qlen": 0
           }
         }
       ]
     }
   }
 ]

The JQ query in the helper function tc_rule_stats_get() assumes this form
and looks for the second element of the array.

However, a dump of a u32 filter looks like this:

 [
   {
     "protocol": "all",
     "pref": 49151,
     "kind": "u32",
     "chain": 0
   },
   {
     "protocol": "all",
     "pref": 49151,
     "kind": "u32",
     "chain": 0,
     "options": {
       "fh": "800:",
       "ht_divisor": 1
     }
   },
   {
     ...
     "options": {
       ...
       "actions": [
         {
	   ...
           "stats": {
             "bytes": 0,
             "packets": 0,
             "drops": 0,
             "overlimits": 0,
             "requeues": 0,
             "backlog": 0,
             "qlen": 0
           }
         }
       ]
     }
   },
 ]

There's an extra element which the JQ query ends up choosing.

Instead of hard-coding a particular index, look for the entry on which a
selector .options.actions yields anything.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/12982a44471c834511a0ee6c1e8f57e3a5307105.1765289566.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/lib.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index f448bafb3f208e..0ec131b339bc42 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -280,7 +280,8 @@ tc_rule_stats_get()
 	local selector=${1:-.packets}; shift
 
 	tc -j -s filter show dev $dev $dir pref $pref \
-	    | jq ".[1].options.actions[].stats$selector"
+	    | jq ".[] | select(.options.actions) |
+		  .options.actions[].stats$selector"
 }
 
 tc_rule_handle_stats_get()

From 0c8b9a68b344ba2aa327278688d66c31f5f04275 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 9 Dec 2025 16:29:02 +0100
Subject: [PATCH 068/258] selftests: forwarding: vxlan_bridge_1q_mc_ul: Fix
 flakiness

This test runs an overlay traffic, forwarded over a multicast-routed VXLAN
underlay. In order to determine whether packets reach their intended
destination, it uses a TC match. For convenience, it uses a flower match,
which however does not allow matching on the encapsulated packet. So
various service traffic ends up being indistinguishable from the test
packets, and ends up confusing the test. To alleviate the problem, the test
uses sleep to allow the necessary service traffic to run and clear the
channel, before running the test traffic. This worked for a while, but
lately we have nevertheless seen flakiness of the test in the CI.

Fix the issue by using u32 to match the encapsulated packet as well. The
confusing packets seem to always be IPv6 multicast listener reports.
Realistically they could be ARP or other ICMP6 traffic as well. Therefore
look for ethertype IPv4 in the IPv4 traffic test, and for IPv6 / UDP
combination in the IPv6 traffic test.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/6438cb1613a2a667d3ff64089eb5994778f247af.1765289566.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/config       |  1 +
 .../net/forwarding/vxlan_bridge_1q_mc_ul.sh         | 13 +++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index ce64518aaa1113..75a6c3d3c1da3d 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -29,6 +29,7 @@ CONFIG_NET_ACT_VLAN=m
 CONFIG_NET_CLS_BASIC=m
 CONFIG_NET_CLS_FLOWER=m
 CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_CLS_U32=m
 CONFIG_NET_EMATCH=y
 CONFIG_NET_EMATCH_META=m
 CONFIG_NETFILTER=y
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
index 6a570d256e07be..5ce19ca0884615 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
@@ -138,13 +138,18 @@ install_capture()
 	defer tc qdisc del dev "$dev" clsact
 
 	tc filter add dev "$dev" ingress proto ip pref 104 \
-	   flower skip_hw ip_proto udp dst_port "$VXPORT" \
-	   action pass
+	   u32 match ip protocol 0x11 0xff \
+	       match u16 "$VXPORT" 0xffff at 0x16 \
+	       match u16 0x0800 0xffff at 0x30 \
+	       action pass
 	defer tc filter del dev "$dev" ingress proto ip pref 104
 
 	tc filter add dev "$dev" ingress proto ipv6 pref 106 \
-	   flower skip_hw ip_proto udp dst_port "$VXPORT" \
-	   action pass
+	   u32 match ip6 protocol 0x11 0xff \
+	       match u16 "$VXPORT" 0xffff at 0x2a \
+	       match u16 0x86dd 0xffff at 0x44 \
+	       match u8 0x11 0xff at 0x4c \
+	       action pass
 	defer tc filter del dev "$dev" ingress proto ipv6 pref 106
 }
 

From 514520b34ba7d0eb36890f9f9c5c874a7e41544e Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 9 Dec 2025 16:29:03 +0100
Subject: [PATCH 069/258] selftests: forwarding: vxlan_bridge_1q_mc_ul: Drop
 useless sleeping

After fixing traffic matching in the previous patch, the test does not need
to use the sleep anymore. So drop vx_wait() altogether, migrate all callers
of vx{10,20}_create_wait() to the corresponding _create(), and drop the now
unused _create_wait() helpers.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/eabfe4fa12ae788cf3b8c5c876a989de81dfc3d3.1765289566.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/forwarding/vxlan_bridge_1q_mc_ul.sh   | 63 +++++++------------
 1 file changed, 22 insertions(+), 41 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
index 5ce19ca0884615..2cf4c6d9245ba1 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
@@ -253,13 +253,6 @@ vx_create()
 }
 export -f vx_create
 
-vx_wait()
-{
-	# Wait for all the ARP, IGMP etc. noise to settle down so that the
-	# tunnel is clear for measurements.
-	sleep 10
-}
-
 vx10_create()
 {
 	vx_create vx10 10 id 1000 "$@"
@@ -272,18 +265,6 @@ vx20_create()
 }
 export -f vx20_create
 
-vx10_create_wait()
-{
-	vx10_create "$@"
-	vx_wait
-}
-
-vx20_create_wait()
-{
-	vx20_create "$@"
-	vx_wait
-}
-
 ns_init_common()
 {
 	local ns=$1; shift
@@ -559,7 +540,7 @@ ipv4_nomcroute()
 	# Install a misleading (S,G) rule to attempt to trick the system into
 	# pushing the packets elsewhere.
 	adf_install_broken_sg
-	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2"
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2"
 	do_test 4 10 0 "IPv4 nomcroute"
 }
 
@@ -567,7 +548,7 @@ ipv6_nomcroute()
 {
 	# Like for IPv4, install a misleading (S,G).
 	adf_install_broken_sg
-	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
 	do_test 6 10 0 "IPv6 nomcroute"
 }
 
@@ -586,35 +567,35 @@ ipv6_nomcroute_rx()
 ipv4_mcroute()
 {
 	adf_install_sg
-	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
 	do_test 4 10 10 "IPv4 mcroute"
 }
 
 ipv6_mcroute()
 {
 	adf_install_sg
-	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
 	do_test 6 10 10 "IPv6 mcroute"
 }
 
 ipv4_mcroute_rx()
 {
 	adf_install_sg
-	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
 	ipv4_do_test_rx 0 "IPv4 mcroute ping"
 }
 
 ipv6_mcroute_rx()
 {
 	adf_install_sg
-	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
 	ipv6_do_test_rx 0 "IPv6 mcroute ping"
 }
 
 ipv4_mcroute_changelink()
 {
 	adf_install_sg
-	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR"
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR"
 	ip link set dev vx10 type vxlan mcroute
 	sleep 1
 	do_test 4 10 10 "IPv4 mcroute changelink"
@@ -623,7 +604,7 @@ ipv4_mcroute_changelink()
 ipv6_mcroute_changelink()
 {
 	adf_install_sg
-	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
 	ip link set dev vx20 type vxlan mcroute
 	sleep 1
 	do_test 6 10 10 "IPv6 mcroute changelink"
@@ -632,47 +613,47 @@ ipv6_mcroute_changelink()
 ipv4_mcroute_starg()
 {
 	adf_install_starg
-	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
 	do_test 4 10 10 "IPv4 mcroute (*,G)"
 }
 
 ipv6_mcroute_starg()
 {
 	adf_install_starg
-	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
 	do_test 6 10 10 "IPv6 mcroute (*,G)"
 }
 
 ipv4_mcroute_starg_rx()
 {
 	adf_install_starg
-	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
 	ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping"
 }
 
 ipv6_mcroute_starg_rx()
 {
 	adf_install_starg
-	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
 	ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping"
 }
 
 ipv4_mcroute_noroute()
 {
-	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
 	do_test 4 0 0 "IPv4 mcroute, no route"
 }
 
 ipv6_mcroute_noroute()
 {
-	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
 	do_test 6 0 0 "IPv6 mcroute, no route"
 }
 
 ipv4_mcroute_fdb()
 {
 	adf_install_sg
-	vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute
+	vx10_create local 192.0.2.100 dev "$IPMR" mcroute
 	bridge fdb add dev vx10 \
 		00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR"
 	do_test 4 10 10 "IPv4 mcroute FDB"
@@ -681,7 +662,7 @@ ipv4_mcroute_fdb()
 ipv6_mcroute_fdb()
 {
 	adf_install_sg
-	vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute
+	vx20_create local 2001:db8:4::1 dev "$IPMR" mcroute
 	bridge -6 fdb add dev vx20 \
 		00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR"
 	do_test 6 10 10 "IPv6 mcroute FDB"
@@ -691,7 +672,7 @@ ipv6_mcroute_fdb()
 ipv4_mcroute_fdb_oif0()
 {
 	adf_install_sg
-	vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
 	bridge fdb del dev vx10 00:00:00:00:00:00
 	bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
 	do_test 4 10 10 "IPv4 mcroute oif=0"
@@ -708,7 +689,7 @@ ipv6_mcroute_fdb_oif0()
 	defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR"
 
 	adf_install_sg
-	vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
 	bridge -6 fdb del dev vx20 00:00:00:00:00:00
 	bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6"
 	do_test 6 10 10 "IPv6 mcroute oif=0"
@@ -721,7 +702,7 @@ ipv4_mcroute_fdb_oif0_sep()
 	adf_install_sg_sep
 
 	adf_ip_addr_add lo 192.0.2.120/28
-	vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+	vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
 	bridge fdb del dev vx10 00:00:00:00:00:00
 	bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
 	do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0"
@@ -732,7 +713,7 @@ ipv4_mcroute_fdb_oif0_sep_rx()
 	adf_install_sg_sep_rx lo
 
 	adf_ip_addr_add lo 192.0.2.120/28
-	vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+	vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
 	bridge fdb del dev vx10 00:00:00:00:00:00
 	bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
 	ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping"
@@ -743,7 +724,7 @@ ipv4_mcroute_fdb_sep_rx()
 	adf_install_sg_sep_rx lo
 
 	adf_ip_addr_add lo 192.0.2.120/28
-	vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+	vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
 	bridge fdb del dev vx10 00:00:00:00:00:00
 	bridge fdb add \
 	       dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo
@@ -755,7 +736,7 @@ ipv6_mcroute_fdb_sep_rx()
 	adf_install_sg_sep_rx "X$IPMR"
 
 	adf_ip_addr_add "X$IPMR" 2001:db8:5::1/64
-	vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute
+	vx20_create local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute
 	bridge -6 fdb del dev vx20 00:00:00:00:00:00
 	bridge -6 fdb add dev vx20 00:00:00:00:00:00 \
 			  self static dst "$GROUP6" via "X$IPMR"

From 71cfa7c893a05d09e7dc14713b27a8309fd4a2db Mon Sep 17 00:00:00 2001
From: Marcus Hughes <marcus.hughes@betterinternet.ltd>
Date: Sun, 7 Dec 2025 21:03:55 +0000
Subject: [PATCH 070/258] net: sfp: extend Potron XGSPON quirk to cover
 additional EEPROM variant

Some Potron SFP+ XGSPON ONU sticks are shipped with different EEPROM
vendor ID and vendor name strings, but are otherwise functionally
identical to the existing "Potron SFP+ XGSPON ONU Stick" handled by
sfp_quirk_potron().

These modules, including units distributed under the "Better Internet"
branding, use the same UART pin assignment and require the same
TX_FAULT/LOS behaviour and boot delay. Re-use the existing Potron
quirk for this EEPROM variant.

Signed-off-by: Marcus Hughes <marcus.hughes@betterinternet.ltd>
Link: https://patch.msgid.link/20251207210355.333451-1-marcus.hughes@betterinternet.ltd
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/sfp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 0401fa6b24d257..6166e91963644d 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -497,6 +497,8 @@ static const struct sfp_quirk sfp_quirks[] = {
 	SFP_QUIRK("ALCATELLUCENT", "3FE46541AA", sfp_quirk_2500basex,
 		  sfp_fixup_nokia),
 
+	SFP_QUIRK_F("BIDB", "X-ONU-SFPP", sfp_fixup_potron),
+
 	// FLYPRO SFP-10GT-CS-30M uses Rollball protocol to talk to the PHY.
 	SFP_QUIRK_F("FLYPRO", "SFP-10GT-CS-30M", sfp_fixup_rollball),
 

From 8a11ff0948b5ad09b71896b7ccc850625f9878d1 Mon Sep 17 00:00:00 2001
From: Junrui Luo <moonafterrain@outlook.com>
Date: Thu, 4 Dec 2025 21:30:47 +0800
Subject: [PATCH 071/258] caif: fix integer underflow in cffrml_receive()

The cffrml_receive() function extracts a length field from the packet
header and, when FCS is disabled, subtracts 2 from this length without
validating that len >= 2.

If an attacker sends a malicious packet with a length field of 0 or 1
to an interface with FCS disabled, the subtraction causes an integer
underflow.

This can lead to memory exhaustion and kernel instability, potential
information disclosure if padding contains uninitialized kernel memory.

Fix this by validating that len >= 2 before performing the subtraction.

Reported-by: Yuhao Jiang <danisjiang@gmail.com>
Reported-by: Junrui Luo <moonafterrain@outlook.com>
Fixes: b482cd2053e3 ("net-caif: add CAIF core protocol stack")
Signed-off-by: Junrui Luo <moonafterrain@outlook.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/SYBPR01MB7881511122BAFEA8212A1608AFA6A@SYBPR01MB7881.ausprd01.prod.outlook.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/caif/cffrml.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index 6651a8dc62e04d..d4d63586053ad4 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -92,8 +92,15 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
 	len = le16_to_cpu(tmp);
 
 	/* Subtract for FCS on length if FCS is not used. */
-	if (!this->dofcs)
+	if (!this->dofcs) {
+		if (len < 2) {
+			++cffrml_rcv_error;
+			pr_err("Invalid frame length (%d)\n", len);
+			cfpkt_destroy(pkt);
+			return -EPROTO;
+		}
 		len -= 2;
+	}
 
 	if (cfpkt_setlen(pkt, len) < 0) {
 		++cffrml_rcv_error;

From b1e125ae425aba9b45252e933ca8df52a843ec70 Mon Sep 17 00:00:00 2001
From: Victor Nogueira <victor@mojatatu.com>
Date: Mon, 8 Dec 2025 16:01:24 -0300
Subject: [PATCH 072/258] net/sched: ets: Remove drr class from the active list
 if it changes to strict

Whenever a user issues an ets qdisc change command, transforming a
drr class into a strict one, the ets code isn't checking whether that
class was in the active list and removing it. This means that, if a
user changes a strict class (which was in the active list) back to a drr
one, that class will be added twice to the active list [1].

Doing so with the following commands:

tc qdisc add dev lo root handle 1: ets bands 2 strict 1
tc qdisc add dev lo parent 1:2 handle 20: \
    tbf rate 8bit burst 100b latency 1s
tc filter add dev lo parent 1: basic classid 1:2
ping -c1 -W0.01 -s 56 127.0.0.1
tc qdisc change dev lo root handle 1: ets bands 2 strict 2
tc qdisc change dev lo root handle 1: ets bands 2 strict 1
ping -c1 -W0.01 -s 56 127.0.0.1

Will trigger the following splat with list debug turned on:

[   59.279014][  T365] ------------[ cut here ]------------
[   59.279452][  T365] list_add double add: new=ffff88801d60e350, prev=ffff88801d60e350, next=ffff88801d60e2c0.
[   59.280153][  T365] WARNING: CPU: 3 PID: 365 at lib/list_debug.c:35 __list_add_valid_or_report+0x17f/0x220
[   59.280860][  T365] Modules linked in:
[   59.281165][  T365] CPU: 3 UID: 0 PID: 365 Comm: tc Not tainted 6.18.0-rc7-00105-g7e9f13163c13-dirty #239 PREEMPT(voluntary)
[   59.281977][  T365] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[   59.282391][  T365] RIP: 0010:__list_add_valid_or_report+0x17f/0x220
[   59.282842][  T365] Code: 89 c6 e8 d4 b7 0d ff 90 0f 0b 90 90 31 c0 e9 31 ff ff ff 90 48 c7 c7 e0 a0 22 9f 48 89 f2 48 89 c1 4c 89 c6 e8 b2 b7 0d ff 90 <0f> 0b 90 90 31 c0 e9 0f ff ff ff 48 89 f7 48 89 44 24 10 4c 89 44
...
[   59.288812][  T365] Call Trace:
[   59.289056][  T365]  <TASK>
[   59.289224][  T365]  ? srso_alias_return_thunk+0x5/0xfbef5
[   59.289546][  T365]  ets_qdisc_change+0xd2b/0x1e80
[   59.289891][  T365]  ? __lock_acquire+0x7e7/0x1be0
[   59.290223][  T365]  ? __pfx_ets_qdisc_change+0x10/0x10
[   59.290546][  T365]  ? srso_alias_return_thunk+0x5/0xfbef5
[   59.290898][  T365]  ? __mutex_trylock_common+0xda/0x240
[   59.291228][  T365]  ? __pfx___mutex_trylock_common+0x10/0x10
[   59.291655][  T365]  ? srso_alias_return_thunk+0x5/0xfbef5
[   59.291993][  T365]  ? srso_alias_return_thunk+0x5/0xfbef5
[   59.292313][  T365]  ? trace_contention_end+0xc8/0x110
[   59.292656][  T365]  ? srso_alias_return_thunk+0x5/0xfbef5
[   59.293022][  T365]  ? srso_alias_return_thunk+0x5/0xfbef5
[   59.293351][  T365]  tc_modify_qdisc+0x63a/0x1cf0

Fix this by always checking and removing an ets class from the active list
when changing it to strict.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/tree/net/sched/sch_ets.c?id=ce052b9402e461a9aded599f5b47e76bc727f7de#n663

Fixes: cd9b50adc6bb9 ("net/sched: ets: fix crash when flipping from 'strict' to 'quantum'")
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Link: https://patch.msgid.link/20251208190125.1868423-1-victor@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/sch_ets.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index ae46643e596d30..306e046276d465 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -664,6 +664,10 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
 			q->classes[i].deficit = quanta[i];
 		}
 	}
+	for (i = q->nstrict; i < nstrict; i++) {
+		if (cl_is_active(&q->classes[i]))
+			list_del_init(&q->classes[i].alist);
+	}
 	WRITE_ONCE(q->nstrict, nstrict);
 	memcpy(q->prio2band, priomap, sizeof(priomap));
 

From 5914428e0e44c4dcb64ad42cc37fa23a57fd1c5c Mon Sep 17 00:00:00 2001
From: Victor Nogueira <victor@mojatatu.com>
Date: Mon, 8 Dec 2025 16:01:25 -0300
Subject: [PATCH 073/258] selftests/tc-testing: Create tests to exercise ets
 classes active list misplacements

Add a test case for a bug fixed by Jamal [1] and for scenario where an
ets drr class is inserted into the active list twice.

- Try to delete ets drr class' qdisc while still keeping it in the
  active list
- Try to add ets class to the active list twice

[1] https://lore.kernel.org/netdev/20251128151919.576920-1-jhs@mojatatu.com/

Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Link: https://patch.msgid.link/20251208190125.1868423-2-victor@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/infra/qdiscs.json     | 78 +++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index 47de27fd4f90f8..6a39640aa2a862 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -1033,5 +1033,83 @@
         "teardown": [
             "$TC qdisc del dev $DUMMY handle 1: root"
         ]
+    },
+    {
+        "id": "6e4f",
+        "name": "Try to delete ets drr class' qdisc while still keeping it in the active list",
+        "category": [
+            "qdisc",
+            "ets",
+            "tbf"
+        ],
+        "plugins": {
+            "requires": [
+                "nsPlugin",
+                "scapyPlugin"
+            ]
+        },
+        "setup": [
+            "$IP link set dev $DUMMY up || true",
+            "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+            "$TC qdisc add dev $DUMMY root handle 1: ets bands 2 strict 1",
+            "$TC qdisc add dev $DUMMY parent 1:2 handle 20: tbf rate 8bit burst 100b latency 1s",
+            "$TC filter add dev $DUMMY parent 1: basic classid 1:2",
+            "ping -c2 -W0.01 -s 56 -I $DUMMY 10.10.11.11 || true",
+            "$TC qdisc change dev $DUMMY root handle 1: ets bands 2 strict 2",
+            "$TC qdisc change dev $DUMMY root handle 1: ets bands 1 strict 1"
+        ],
+        "cmdUnderTest": "ping -c1 -W0.01 -s 56 -I $DUMMY 10.10.11.11",
+        "expExitCode": "1",
+        "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY root",
+        "matchJSON": [
+            {
+                "kind": "ets",
+                "handle": "1:",
+                "bytes": 196,
+                "packets": 2
+            }
+        ],
+        "teardown": [
+            "$TC qdisc del dev $DUMMY root handle 1:"
+        ]
+    },
+    {
+        "id": "0b8f",
+        "name": "Try to add ets class to the active list twice",
+        "category": [
+            "qdisc",
+            "ets",
+            "tbf"
+        ],
+        "plugins": {
+            "requires": [
+                "nsPlugin",
+                "scapyPlugin"
+            ]
+        },
+        "setup": [
+            "$IP link set dev $DUMMY up || true",
+            "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+            "$TC qdisc add dev $DUMMY root handle 1: ets bands 2 strict 1",
+            "$TC qdisc add dev $DUMMY parent 1:2 handle 20: tbf rate 8bit burst 100b latency 1s",
+            "$TC filter add dev $DUMMY parent 1: basic classid 1:2",
+            "ping -c2 -W0.01 -s 56 -I $DUMMY 10.10.11.11 || true",
+            "$TC qdisc change dev $DUMMY root handle 1: ets bands 2 strict 2",
+            "$TC qdisc change dev $DUMMY root handle 1: ets bands 2 strict 1"
+        ],
+        "cmdUnderTest": "ping -c1 -W0.01 -s 56 -I $DUMMY 10.10.11.11",
+        "expExitCode": "1",
+        "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY root",
+        "matchJSON": [
+            {
+                "kind": "ets",
+                "handle": "1:",
+                "bytes": 98,
+                "packets": 1
+            }
+        ],
+        "teardown": [
+            "$TC qdisc del dev $DUMMY root handle 1:"
+        ]
     }
 ]

From 885bebac9909994050bbbeed0829c727e42bd1b7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Tue, 9 Dec 2025 09:56:39 +0300
Subject: [PATCH 074/258] nfc: pn533: Fix error code in
 pn533_acr122_poweron_rdr()

Set the error code if "transferred != sizeof(cmd)" instead of
returning success.

Fixes: dbafc28955fa ("NFC: pn533: don't send USB data off of the stack")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Link: https://patch.msgid.link/aTfIJ9tZPmeUF4W1@stanley.mountain
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/pn533/usb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c
index ffd7367ce11945..018a80674f06ed 100644
--- a/drivers/nfc/pn533/usb.c
+++ b/drivers/nfc/pn533/usb.c
@@ -406,7 +406,7 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy)
 	if (rc || (transferred != sizeof(cmd))) {
 		nfc_err(&phy->udev->dev,
 			"Reader power on cmd error %d\n", rc);
-		return rc;
+		return rc ?: -EINVAL;
 	}
 
 	rc =  usb_submit_urb(phy->in_urb, GFP_KERNEL);

From 99c6931fe1f5d3de1174ce771cb86c57f75bff14 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@netfilter.org>
Date: Tue, 2 Dec 2025 15:20:15 +0100
Subject: [PATCH 075/258] MAINTAINERS: Remove Jozsef Kadlecsik from MAINTAINERS
 file

I'm retiring from maintaining netfilter. I'll still keep an
eye on ipset and respond to anything related to it.

Thank you!

Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 CREDITS     | 1 +
 MAINTAINERS | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/CREDITS b/CREDITS
index fa5397f4ebcdd0..cb9b2d6184d013 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1983,6 +1983,7 @@ D: netfilter: TCP window tracking code
 D: netfilter: raw table
 D: netfilter: iprange match
 D: netfilter: new logging interfaces
+D: netfilter: ipset
 D: netfilter: various other hacks
 S: Tata
 S: Hungary
diff --git a/MAINTAINERS b/MAINTAINERS
index e36689cd7cc7b3..45b22f4205936f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17808,7 +17808,6 @@ F:	drivers/net/ethernet/neterion/
 
 NETFILTER
 M:	Pablo Neira Ayuso <pablo@netfilter.org>
-M:	Jozsef Kadlecsik <kadlec@netfilter.org>
 M:	Florian Westphal <fw@strlen.de>
 R:	Phil Sutter <phil@nwl.cc>
 L:	netfilter-devel@vger.kernel.org

From 5ec8ca26fe93103577c904644b0957f069d0051a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 8 Dec 2025 16:00:34 +0100
Subject: [PATCH 076/258] netfilter: nf_nat: remove bogus direction check

Jakub reports spurious failures of the 'conntrack_reverse_clash.sh'
selftest.  A bogus test makes nat core resort to port rewrite even
though there is no need for this.

When the test is made, nf_nat_used_tuple() would already have caused us
to return if no other CPU had added a colliding entry.
Moreover, nf_nat_used_tuple() would have ignored the colliding entry if
their origin tuples had been the same.

All that is left to check is if the colliding entry in the hash table
is subject to NAT, and, if its not, if our entry matches in the reverse
direction, e.g. hash table has

addr1:1234 -> addr2:80, and we want to commit
addr2:80   -> addr1:1234.

Because we already checked that neither the new nor the committed entry is
subject to NAT we only have to check origin vs. reply tuple:
for non-nat entries, the reply tuple is always the inverted original.

Just in case there are more problems extend the error reporting
in the selftest while at it and dump conntrack table/stats on error.

Reported-by: Jakub Kicinski <kuba@kernel.org>
Closes: https://lore.kernel.org/netdev/20251206175135.4a56591b@kernel.org/
Fixes: d8f84a9bc7c4 ("netfilter: nf_nat: don't try nat source port reallocation for reverse dir clash")
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_nat_core.c                        | 14 +-------------
 .../net/netfilter/conntrack_reverse_clash.c        | 13 +++++++++----
 .../net/netfilter/conntrack_reverse_clash.sh       |  2 ++
 3 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 78a61dac4ade82..e6b24586d2fed5 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -294,25 +294,13 @@ nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
 
 	ct = nf_ct_tuplehash_to_ctrack(thash);
 
-	/* NB: IP_CT_DIR_ORIGINAL should be impossible because
-	 * nf_nat_used_tuple() handles origin collisions.
-	 *
-	 * Handle remote chance other CPU confirmed its ct right after.
-	 */
-	if (thash->tuple.dst.dir != IP_CT_DIR_REPLY)
-		goto out;
-
 	/* clashing connection subject to NAT? Retry with new tuple. */
 	if (READ_ONCE(ct->status) & uses_nat)
 		goto out;
 
 	if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-			      &ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple) &&
-	    nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-			      &ignored_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) {
+			      &ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple))
 		taken = false;
-		goto out;
-	}
 out:
 	nf_ct_put(ct);
 	return taken;
diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
index 507930cee8cb69..462d628cc3bdba 100644
--- a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
+++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
@@ -33,9 +33,14 @@ static void die(const char *e)
 	exit(111);
 }
 
-static void die_port(uint16_t got, uint16_t want)
+static void die_port(const struct sockaddr_in *sin, uint16_t want)
 {
-	fprintf(stderr, "Port number changed, wanted %d got %d\n", want, ntohs(got));
+	uint16_t got = ntohs(sin->sin_port);
+	char str[INET_ADDRSTRLEN];
+
+	inet_ntop(AF_INET, &sin->sin_addr, str, sizeof(str));
+
+	fprintf(stderr, "Port number changed, wanted %d got %d from %s\n", want, got, str);
 	exit(1);
 }
 
@@ -100,7 +105,7 @@ int main(int argc, char *argv[])
 				die("child recvfrom");
 
 			if (peer.sin_port != htons(PORT))
-				die_port(peer.sin_port, PORT);
+				die_port(&peer, PORT);
 		} else {
 			if (sendto(s2, buf, LEN, 0, (struct sockaddr *)&sa1, sizeof(sa1)) != LEN)
 				continue;
@@ -109,7 +114,7 @@ int main(int argc, char *argv[])
 				die("parent recvfrom");
 
 			if (peer.sin_port != htons((PORT + 1)))
-				die_port(peer.sin_port, PORT + 1);
+				die_port(&peer, PORT + 1);
 		}
 	}
 
diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
index a24c896347a889..dc7e9d6da0624c 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
@@ -45,6 +45,8 @@ if ip netns exec "$ns0" ./conntrack_reverse_clash; then
 	echo "PASS: No SNAT performed for null bindings"
 else
 	echo "ERROR: SNAT performed without any matching snat rule"
+	ip netns exec "$ns0" conntrack -L
+	ip netns exec "$ns0" conntrack -S
 	exit 1
 fi
 

From a67fd55f6a09f4119b7232c19e0f348fe31ab0db Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 19 Nov 2025 13:42:05 +0100
Subject: [PATCH 077/258] netfilter: nf_tables: remove redundant chain
 validation on register store

This validation predates the introduction of the state machine that
determines when to enter slow path validation for error reporting.

Currently, table validation is perform when:

- new rule contains expressions that need validation.
- new set element with jump/goto verdict.

Validation on register store skips most checks with no basechains, still
this walks the graph searching for loops and ensuring expressions are
called from the right hook. Remove this.

Fixes: a654de8fdc18 ("netfilter: nf_tables: fix chain dependency validation")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_tables_api.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f3de2f9bbebf11..c46b1bb0efe0f1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -11676,21 +11676,10 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
 				       enum nft_data_types type,
 				       unsigned int len)
 {
-	int err;
-
 	switch (reg) {
 	case NFT_REG_VERDICT:
 		if (type != NFT_DATA_VERDICT)
 			return -EINVAL;
-
-		if (data != NULL &&
-		    (data->verdict.code == NFT_GOTO ||
-		     data->verdict.code == NFT_JUMP)) {
-			err = nft_chain_validate(ctx, data->verdict.chain);
-			if (err < 0)
-				return err;
-		}
-
 		break;
 	default:
 		if (type != NFT_DATA_VALUE)

From ac87efcf9e42f07526438b67405659a8c1d0480e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 10 Dec 2025 08:36:18 +0100
Subject: [PATCH 078/258] x86/boot/Documentation: Fix whitespace noise in
 boot.rst

There's a lot of unnecessary whitespace damage in this
file: space before tabs, etc., that has no formatting
or readability effect or advantages.

Fix them.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://patch.msgid.link/176535283007.498.16442167388418039352.tip-bot2@tip-bot2
---
 Documentation/arch/x86/boot.rst | 194 ++++++++++++++++----------------
 1 file changed, 97 insertions(+), 97 deletions(-)

diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst
index 18574f010d46cd..dca3875a24351e 100644
--- a/Documentation/arch/x86/boot.rst
+++ b/Documentation/arch/x86/boot.rst
@@ -95,26 +95,26 @@ Memory Layout
 The traditional memory map for the kernel loader, used for Image or
 zImage kernels, typically looks like::
 
-  		|  			 |
+		|  			 |
   0A0000	+------------------------+
-  		|  Reserved for BIOS	 |	Do not use.  Reserved for BIOS EBDA.
+		|  Reserved for BIOS	 |	Do not use.  Reserved for BIOS EBDA.
   09A000	+------------------------+
-  		|  Command line		 |
-  		|  Stack/heap		 |	For use by the kernel real-mode code.
+		|  Command line		 |
+		|  Stack/heap		 |	For use by the kernel real-mode code.
   098000	+------------------------+
-  		|  Kernel setup		 |	The kernel real-mode code.
+		|  Kernel setup		 |	The kernel real-mode code.
   090200	+------------------------+
-  		|  Kernel boot sector	 |	The kernel legacy boot sector.
+		|  Kernel boot sector	 |	The kernel legacy boot sector.
   090000	+------------------------+
-  		|  Protected-mode kernel |	The bulk of the kernel image.
+		|  Protected-mode kernel |	The bulk of the kernel image.
   010000	+------------------------+
-  		|  Boot loader		 |	<- Boot sector entry point 0000:7C00
+		|  Boot loader		 |	<- Boot sector entry point 0000:7C00
   001000	+------------------------+
-  		|  Reserved for MBR/BIOS |
+		|  Reserved for MBR/BIOS |
   000800	+------------------------+
-  		|  Typically used by MBR |
+		|  Typically used by MBR |
   000600	+------------------------+
-  		|  BIOS use only	 |
+		|  BIOS use only	 |
   000000	+------------------------+
 
 When using bzImage, the protected-mode kernel was relocated to
@@ -142,27 +142,27 @@ above the 0x9A000 point; too many BIOSes will break above that point.
 For a modern bzImage kernel with boot protocol version >= 2.02, a
 memory layout like the following is suggested::
 
-  		~  			 ~
-  		|  Protected-mode kernel |
+		~  			 ~
+		|  Protected-mode kernel |
   100000	+------------------------+
-  		|  I/O memory hole	 |
+		|  I/O memory hole	 |
   0A0000	+------------------------+
-  		|  Reserved for BIOS	 |	Leave as much as possible unused
-  		~  			 ~
-  		|  Command line		 |	(Can also be below the X+10000 mark)
+		|  Reserved for BIOS	 |	Leave as much as possible unused
+		~  			 ~
+		|  Command line		 |	(Can also be below the X+10000 mark)
   X+10000	+------------------------+
-  		|  Stack/heap		 |	For use by the kernel real-mode code.
+		|  Stack/heap		 |	For use by the kernel real-mode code.
   X+08000	+------------------------+
-  		|  Kernel setup		 |	The kernel real-mode code.
-  		|  Kernel boot sector	 |	The kernel legacy boot sector.
+		|  Kernel setup		 |	The kernel real-mode code.
+		|  Kernel boot sector	 |	The kernel legacy boot sector.
   X		+------------------------+
-  		|  Boot loader		 |	<- Boot sector entry point 0000:7C00
+		|  Boot loader		 |	<- Boot sector entry point 0000:7C00
   001000	+------------------------+
-  		|  Reserved for MBR/BIOS |
+		|  Reserved for MBR/BIOS |
   000800	+------------------------+
-  		|  Typically used by MBR |
+		|  Typically used by MBR |
   000600	+------------------------+
-  		|  BIOS use only	 |
+		|  BIOS use only	 |
   000000	+------------------------+
 
   ... where the address X is as low as the design of the boot loader permits.
@@ -809,12 +809,12 @@ Protocol:	2.09+
   as follow::
 
    struct setup_data {
-   	__u64 next;
-   	__u32 type;
-   	__u32 len;
-   	__u8 data[];
+	__u64 next;
+	__u32 type;
+	__u32 len;
+	__u8 data[];
    }
-   
+
   Where, the next is a 64-bit physical pointer to the next node of
   linked list, the next field of the last node is 0; the type is used
   to identify the contents of data; the len is the length of data
@@ -835,10 +835,10 @@ Protocol:	2.09+
   protocol 2.15::
 
    struct setup_indirect {
-   	__u32 type;
-   	__u32 reserved;		/* Reserved, must be set to zero. */
-   	__u64 len;
-   	__u64 addr;
+	__u32 type;
+	__u32 reserved;		/* Reserved, must be set to zero. */
+	__u64 len;
+	__u64 addr;
    };
 
   The type member is a SETUP_INDIRECT | SETUP_* type. However, it cannot be
@@ -850,15 +850,15 @@ Protocol:	2.09+
   In this case setup_data and setup_indirect will look like this::
 
    struct setup_data {
-   	.next = 0,	/* or <addr_of_next_setup_data_struct> */
-   	.type = SETUP_INDIRECT,
-   	.len = sizeof(setup_indirect),
-   	.data[sizeof(setup_indirect)] = (struct setup_indirect) {
-   		.type = SETUP_INDIRECT | SETUP_E820_EXT,
-   		.reserved = 0,
-   		.len = <len_of_SETUP_E820_EXT_data>,
-   		.addr = <addr_of_SETUP_E820_EXT_data>,
-   	},
+	.next = 0,	/* or <addr_of_next_setup_data_struct> */
+	.type = SETUP_INDIRECT,
+	.len = sizeof(setup_indirect),
+	.data[sizeof(setup_indirect)] = (struct setup_indirect) {
+		.type = SETUP_INDIRECT | SETUP_E820_EXT,
+		.reserved = 0,
+		.len = <len_of_SETUP_E820_EXT_data>,
+		.addr = <addr_of_SETUP_E820_EXT_data>,
+	},
    }
 
 .. note::
@@ -897,11 +897,11 @@ Offset/size:	0x260/4
   The kernel runtime start address is determined by the following algorithm::
 
    if (relocatable_kernel) {
-    	if (load_address < pref_address)
-    		load_address = pref_address;
-    	runtime_start = align_up(load_address, kernel_alignment);
+	if (load_address < pref_address)
+		load_address = pref_address;
+	runtime_start = align_up(load_address, kernel_alignment);
    } else {
-    	runtime_start = pref_address;
+	runtime_start = pref_address;
    }
 
 Hence the necessary memory window location and size can be estimated by
@@ -975,22 +975,22 @@ after kernel_info_var_len_data label. Each chunk of variable size data has to
 be prefixed with header/magic and its size, e.g.::
 
   kernel_info:
-  	.ascii  "LToP"		/* Header, Linux top (structure). */
-  	.long   kernel_info_var_len_data - kernel_info
-  	.long   kernel_info_end - kernel_info
-  	.long   0x01234567	/* Some fixed size data for the bootloaders. */
+	.ascii  "LToP"		/* Header, Linux top (structure). */
+	.long   kernel_info_var_len_data - kernel_info
+	.long   kernel_info_end - kernel_info
+	.long   0x01234567	/* Some fixed size data for the bootloaders. */
   kernel_info_var_len_data:
   example_struct:		/* Some variable size data for the bootloaders. */
-  	.ascii  "0123"		/* Header/Magic. */
-  	.long   example_struct_end - example_struct
-  	.ascii  "Struct"
-  	.long   0x89012345
+	.ascii  "0123"		/* Header/Magic. */
+	.long   example_struct_end - example_struct
+	.ascii  "Struct"
+	.long   0x89012345
   example_struct_end:
   example_strings:		/* Some variable size data for the bootloaders. */
-  	.ascii  "ABCD"		/* Header/Magic. */
-  	.long   example_strings_end - example_strings
-  	.asciz  "String_0"
-  	.asciz  "String_1"
+	.ascii  "ABCD"		/* Header/Magic. */
+	.long   example_strings_end - example_strings
+	.asciz  "String_0"
+	.asciz  "String_1"
   example_strings_end:
   kernel_info_end:
 
@@ -1132,53 +1132,53 @@ Such a boot loader should enter the following fields in the header::
   unsigned long base_ptr;	/* base address for real-mode segment */
 
   if (setup_sects == 0)
-  	setup_sects = 4;
+	setup_sects = 4;
 
   if (protocol >= 0x0200) {
-  	type_of_loader = <type code>;
-  	if (loading_initrd) {
-  		ramdisk_image = <initrd_address>;
-  		ramdisk_size = <initrd_size>;
-  	}
-
-  	if (protocol >= 0x0202 && loadflags & 0x01)
-  		heap_end = 0xe000;
-  	else
-  		heap_end = 0x9800;
-
-  	if (protocol >= 0x0201) {
-  		heap_end_ptr = heap_end - 0x200;
-  		loadflags |= 0x80;		/* CAN_USE_HEAP */
-  	}
-
-  	if (protocol >= 0x0202) {
-  		cmd_line_ptr = base_ptr + heap_end;
-  		strcpy(cmd_line_ptr, cmdline);
-  	} else {
-  		cmd_line_magic	= 0xA33F;
-  		cmd_line_offset = heap_end;
-  		setup_move_size = heap_end + strlen(cmdline) + 1;
-  		strcpy(base_ptr + cmd_line_offset, cmdline);
-  	}
+	type_of_loader = <type code>;
+	if (loading_initrd) {
+		ramdisk_image = <initrd_address>;
+		ramdisk_size = <initrd_size>;
+	}
+
+	if (protocol >= 0x0202 && loadflags & 0x01)
+		heap_end = 0xe000;
+	else
+		heap_end = 0x9800;
+
+	if (protocol >= 0x0201) {
+		heap_end_ptr = heap_end - 0x200;
+		loadflags |= 0x80;		/* CAN_USE_HEAP */
+	}
+
+	if (protocol >= 0x0202) {
+		cmd_line_ptr = base_ptr + heap_end;
+		strcpy(cmd_line_ptr, cmdline);
+	} else {
+		cmd_line_magic	= 0xA33F;
+		cmd_line_offset = heap_end;
+		setup_move_size = heap_end + strlen(cmdline) + 1;
+		strcpy(base_ptr + cmd_line_offset, cmdline);
+	}
   } else {
-  	/* Very old kernel */
+	/* Very old kernel */
 
-  	heap_end = 0x9800;
+	heap_end = 0x9800;
 
-  	cmd_line_magic	= 0xA33F;
-  	cmd_line_offset = heap_end;
+	cmd_line_magic	= 0xA33F;
+	cmd_line_offset = heap_end;
 
-  	/* A very old kernel MUST have its real-mode code loaded at 0x90000 */
-  	if (base_ptr != 0x90000) {
-  		/* Copy the real-mode kernel */
-  		memcpy(0x90000, base_ptr, (setup_sects + 1) * 512);
-  		base_ptr = 0x90000;		 /* Relocated */
-  	}
+	/* A very old kernel MUST have its real-mode code loaded at 0x90000 */
+	if (base_ptr != 0x90000) {
+		/* Copy the real-mode kernel */
+		memcpy(0x90000, base_ptr, (setup_sects + 1) * 512);
+		base_ptr = 0x90000;		 /* Relocated */
+	}
 
-  	strcpy(0x90000 + cmd_line_offset, cmdline);
+	strcpy(0x90000 + cmd_line_offset, cmdline);
 
-  	/* It is recommended to clear memory up to the 32K mark */
-  	memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512);
+	/* It is recommended to clear memory up to the 32K mark */
+	memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512);
   }
 
 

From e58c88f0cb2d8ed89de78f6f17409d29cfab6c5c Mon Sep 17 00:00:00 2001
From: Minseong Kim <ii4gsp@gmail.com>
Date: Fri, 12 Dec 2025 00:29:23 -0800
Subject: [PATCH 079/258] Input: lkkbd - disable pending work before freeing
 device

lkkbd_interrupt() schedules lk->tq via schedule_work(), and the work
handler lkkbd_reinit() dereferences the lkkbd structure and its
serio/input_dev fields.

lkkbd_disconnect() and error paths in lkkbd_connect() free the lkkbd
structure without preventing the reinit work from being queued again
until serio_close() returns. This can allow the work handler to run
after the structure has been freed, leading to a potential use-after-free.

Use disable_work_sync() instead of cancel_work_sync() to ensure the
reinit work cannot be re-queued, and call it both in lkkbd_disconnect()
and in lkkbd_connect() error paths after serio_open().

Signed-off-by: Minseong Kim <ii4gsp@gmail.com>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20251212052314.16139-1-ii4gsp@gmail.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/lkkbd.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/input/keyboard/lkkbd.c b/drivers/input/keyboard/lkkbd.c
index c035216dd27c12..2f130f819363c6 100644
--- a/drivers/input/keyboard/lkkbd.c
+++ b/drivers/input/keyboard/lkkbd.c
@@ -670,7 +670,8 @@ static int lkkbd_connect(struct serio *serio, struct serio_driver *drv)
 
 	return 0;
 
- fail3:	serio_close(serio);
+ fail3:	disable_work_sync(&lk->tq);
+	serio_close(serio);
  fail2:	serio_set_drvdata(serio, NULL);
  fail1:	input_free_device(input_dev);
 	kfree(lk);
@@ -684,6 +685,8 @@ static void lkkbd_disconnect(struct serio *serio)
 {
 	struct lkkbd *lk = serio_get_drvdata(serio);
 
+	disable_work_sync(&lk->tq);
+
 	input_get_device(lk->dev);
 	input_unregister_device(lk->dev);
 	serio_close(serio);

From fe27e709d91fb645182751b602cb88966b4a1bb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= <jose.exposito89@gmail.com>
Date: Tue, 4 Nov 2025 11:22:35 +0100
Subject: [PATCH 080/258] drm/tests: hdmi: Handle
 drm_kunit_helper_enable_crtc_connector() returning EDEADLK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fedora/CentOS/RHEL CI is reporting intermittent failures while running
the KUnit tests present in drm_hdmi_state_helper_test.c [1].

While the specific test causing the failure change between runs, all of
them are caused by drm_kunit_helper_enable_crtc_connector() returning
-EDEADLK. The error trace always follow this structure:

    # <test name>: ASSERTION FAILED at
    # drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c:<line>
    Expected ret == 0, but
        ret == -35 (0xffffffffffffffdd)

As documented, if the drm_kunit_helper_enable_crtc_connector() function
returns -EDEADLK (-35), the entire atomic sequence must be restarted.

Handle this error code for all function calls.

Closes: https://datawarehouse.cki-project.org/issue/4039  [1]
Fixes: 6a5c0ad7e08e ("drm/tests: hdmi_state_helpers: Switch to new helper")
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: José Expósito <jose.exposito89@gmail.com>
Link: https://patch.msgid.link/20251104102258.10026-1-jose.exposito89@gmail.com
---
 .../drm/tests/drm_hdmi_state_helper_test.c    | 143 ++++++++++++++++++
 1 file changed, 143 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c
index 8bd412735000cb..70f9aa70214303 100644
--- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c
+++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c
@@ -257,10 +257,16 @@ static void drm_test_check_broadcast_rgb_crtc_mode_changed(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);
@@ -326,10 +332,16 @@ static void drm_test_check_broadcast_rgb_crtc_mode_not_changed(struct kunit *tes
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);
@@ -397,10 +409,16 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);
@@ -457,10 +475,17 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode_vic_1(struct kunit *test)
 	KUNIT_ASSERT_NOT_NULL(test, mode);
 
 	crtc = priv->crtc;
+
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     mode,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);
@@ -518,10 +543,16 @@ static void drm_test_check_broadcast_rgb_full_cea_mode(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);
@@ -580,10 +611,17 @@ static void drm_test_check_broadcast_rgb_full_cea_mode_vic_1(struct kunit *test)
 	KUNIT_ASSERT_NOT_NULL(test, mode);
 
 	crtc = priv->crtc;
+
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     mode,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);
@@ -643,10 +681,16 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);
@@ -705,10 +749,17 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode_vic_1(struct kunit *te
 	KUNIT_ASSERT_NOT_NULL(test, mode);
 
 	crtc = priv->crtc;
+
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     mode,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);
@@ -870,10 +921,16 @@ static void drm_test_check_output_bpc_crtc_mode_changed(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);
@@ -946,10 +1003,16 @@ static void drm_test_check_output_bpc_crtc_mode_not_changed(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);
@@ -1022,10 +1085,16 @@ static void drm_test_check_output_bpc_dvi(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	conn_state = conn->state;
@@ -1069,10 +1138,16 @@ static void drm_test_check_tmds_char_rate_rgb_8bpc(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	conn_state = conn->state;
@@ -1118,10 +1193,16 @@ static void drm_test_check_tmds_char_rate_rgb_10bpc(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	conn_state = conn->state;
@@ -1167,10 +1248,16 @@ static void drm_test_check_tmds_char_rate_rgb_12bpc(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	conn_state = conn->state;
@@ -1218,10 +1305,16 @@ static void drm_test_check_hdmi_funcs_reject_rate(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	/* You shouldn't be doing that at home. */
@@ -1292,10 +1385,16 @@ static void drm_test_check_max_tmds_rate_bpc_fallback_rgb(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_EXPECT_EQ(test, ret, 0);
 
 	conn_state = conn->state;
@@ -1440,10 +1539,16 @@ static void drm_test_check_max_tmds_rate_bpc_fallback_ignore_yuv422(struct kunit
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_EXPECT_EQ(test, ret, 0);
 
 	conn_state = conn->state;
@@ -1669,10 +1774,17 @@ static void drm_test_check_output_bpc_format_vic_1(struct kunit *test)
 	drm_modeset_acquire_init(&ctx, 0);
 
 	crtc = priv->crtc;
+
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     mode,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_EXPECT_EQ(test, ret, 0);
 
 	conn_state = conn->state;
@@ -1736,10 +1848,16 @@ static void drm_test_check_output_bpc_format_driver_rgb_only(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_EXPECT_EQ(test, ret, 0);
 
 	conn_state = conn->state;
@@ -1805,10 +1923,16 @@ static void drm_test_check_output_bpc_format_display_rgb_only(struct kunit *test
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_EXPECT_EQ(test, ret, 0);
 
 	conn_state = conn->state;
@@ -1865,10 +1989,16 @@ static void drm_test_check_output_bpc_format_driver_8bpc_only(struct kunit *test
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_EXPECT_EQ(test, ret, 0);
 
 	conn_state = conn->state;
@@ -1927,10 +2057,16 @@ static void drm_test_check_output_bpc_format_display_8bpc_only(struct kunit *tes
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_EXPECT_EQ(test, ret, 0);
 
 	conn_state = conn->state;
@@ -1970,10 +2106,17 @@ static void drm_test_check_disable_connector(struct kunit *test)
 
 	drm = &priv->drm;
 	crtc = priv->crtc;
+
+retry_conn_enable:
 	ret = drm_kunit_helper_enable_crtc_connector(test, drm,
 						     crtc, conn,
 						     preferred,
 						     &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_conn_enable;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);

From 141d95e42884628314f5ad9394657b0b35424300 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= <jose.exposito89@gmail.com>
Date: Tue, 4 Nov 2025 11:25:21 +0100
Subject: [PATCH 081/258] drm/tests: Handle EDEADLK in
 drm_test_check_valid_clones()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fedora/CentOS/RHEL CI is reporting intermittent failures while running
the drm_test_check_valid_clones() KUnit test.

The error log can be either [1]:

    # drm_test_check_valid_clones: ASSERTION FAILED at
    # drivers/gpu/drm/tests/drm_atomic_state_test.c:295
    Expected ret == param->expected_result, but
        ret == -35 (0xffffffffffffffdd)
        param->expected_result == 0 (0x0)

Or [2] depending on the test case:

    # drm_test_check_valid_clones: ASSERTION FAILED at
    # drivers/gpu/drm/tests/drm_atomic_state_test.c:295
    Expected ret == param->expected_result, but
        ret == -35 (0xffffffffffffffdd)
        param->expected_result == -22 (0xffffffffffffffea)

Restart the atomic sequence when EDEADLK is returned.

[1] https://s3.amazonaws.com/arr-cki-prod-trusted-artifacts/trusted-artifacts/2113057246/test_x86_64/11802139999/artifacts/jobwatch/logs/recipes/19824965/tasks/204347800/results/946112713/logs/dmesg.log
[2] https://s3.amazonaws.com/arr-cki-prod-trusted-artifacts/trusted-artifacts/2106744297/test_aarch64/11762450907/artifacts/jobwatch/logs/recipes/19797942/tasks/204139727/results/945094561/logs/dmesg.log

Fixes: 88849f24e2ab ("drm/tests: Add test for drm_atomic_helper_check_modeset()")
Closes: https://datawarehouse.cki-project.org/issue/4004
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: José Expósito <jose.exposito89@gmail.com>
Link: https://patch.msgid.link/20251104102535.12212-1-jose.exposito89@gmail.com
---
 drivers/gpu/drm/tests/drm_atomic_state_test.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_atomic_state_test.c b/drivers/gpu/drm/tests/drm_atomic_state_test.c
index 2f6ac7a09f4458..1e857d86574cc5 100644
--- a/drivers/gpu/drm/tests/drm_atomic_state_test.c
+++ b/drivers/gpu/drm/tests/drm_atomic_state_test.c
@@ -283,7 +283,14 @@ static void drm_test_check_valid_clones(struct kunit *test)
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state);
 
+retry:
 	crtc_state = drm_atomic_get_crtc_state(state, priv->crtc);
+	if (PTR_ERR(crtc_state) == -EDEADLK) {
+		drm_atomic_state_clear(state);
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry;
+	}
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_state);
 
 	crtc_state->encoder_mask = param->encoder_mask;
@@ -292,6 +299,12 @@ static void drm_test_check_valid_clones(struct kunit *test)
 	crtc_state->mode_changed = true;
 
 	ret = drm_atomic_helper_check_modeset(drm, state);
+	if (ret == -EDEADLK) {
+		drm_atomic_state_clear(state);
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry;
+	}
 	KUNIT_ASSERT_EQ(test, ret, param->expected_result);
 
 	drm_modeset_drop_locks(&ctx);

From 526aafabd756cc56401b383d6ae554af3e21dcdd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= <jose.exposito89@gmail.com>
Date: Tue, 4 Nov 2025 11:25:22 +0100
Subject: [PATCH 082/258] drm/tests: Handle EDEADLK in set_up_atomic_state()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fedora/CentOS/RHEL CI is reporting intermittent failures while running
the drm_validate_modeset test [1]:

    # drm_test_check_connector_changed_modeset: EXPECTATION FAILED at
    # drivers/gpu/drm/tests/drm_atomic_state_test.c:162
    Expected ret == 0, but
        ret == -35 (0xffffffffffffffdd)

Change the set_up_atomic_state() helper function to return on error and
restart the atomic sequence when the returned error is EDEADLK.

[1] https://s3.amazonaws.com/arr-cki-prod-trusted-artifacts/trusted-artifacts/2106744096/test_x86_64/11762450343/artifacts/jobwatch/logs/recipes/19797909/tasks/204139142/results/945095586/logs/dmesg.log

Fixes: 73d934d7b6e3 ("drm/tests: Add test for drm_atomic_helper_commit_modeset_disables()")
Closes: https://datawarehouse.cki-project.org/issue/4004
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: José Expósito <jose.exposito89@gmail.com>
Link: https://patch.msgid.link/20251104102535.12212-2-jose.exposito89@gmail.com
---
 drivers/gpu/drm/tests/drm_atomic_state_test.c | 27 +++++++++++++++----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/tests/drm_atomic_state_test.c b/drivers/gpu/drm/tests/drm_atomic_state_test.c
index 1e857d86574cc5..bc27f65b282339 100644
--- a/drivers/gpu/drm/tests/drm_atomic_state_test.c
+++ b/drivers/gpu/drm/tests/drm_atomic_state_test.c
@@ -156,24 +156,29 @@ static int set_up_atomic_state(struct kunit *test,
 
 	if (connector) {
 		conn_state = drm_atomic_get_connector_state(state, connector);
-		KUNIT_ASSERT_NOT_ERR_OR_NULL(test, conn_state);
+		if (IS_ERR(conn_state))
+			return PTR_ERR(conn_state);
 
 		ret = drm_atomic_set_crtc_for_connector(conn_state, crtc);
-		KUNIT_EXPECT_EQ(test, ret, 0);
+		if (ret)
+			return ret;
 	}
 
 	crtc_state = drm_atomic_get_crtc_state(state, crtc);
-	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_state);
+	if (IS_ERR(crtc_state))
+		return PTR_ERR(crtc_state);
 
 	ret = drm_atomic_set_mode_for_crtc(crtc_state, &drm_atomic_test_mode);
-	KUNIT_EXPECT_EQ(test, ret, 0);
+	if (ret)
+		return ret;
 
 	crtc_state->enable = true;
 	crtc_state->active = true;
 
 	if (connector) {
 		ret = drm_atomic_commit(state);
-		KUNIT_ASSERT_EQ(test, ret, 0);
+		if (ret)
+			return ret;
 	} else {
 		// dummy connector mask
 		crtc_state->connector_mask = DRM_TEST_CONN_0;
@@ -206,7 +211,13 @@ static void drm_test_check_connector_changed_modeset(struct kunit *test)
 	drm_modeset_acquire_init(&ctx, 0);
 
 	// first modeset to enable
+retry_set_up:
 	ret = set_up_atomic_state(test, priv, old_conn, &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_set_up;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);
@@ -277,7 +288,13 @@ static void drm_test_check_valid_clones(struct kunit *test)
 
 	drm_modeset_acquire_init(&ctx, 0);
 
+retry_set_up:
 	ret = set_up_atomic_state(test, priv, NULL, &ctx);
+	if (ret == -EDEADLK) {
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry_set_up;
+	}
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx);

From 630efee9493cf64ff7b9a1652978807fef385fdd Mon Sep 17 00:00:00 2001
From: Karol Wachowski <karol.wachowski@linux.intel.com>
Date: Fri, 12 Dec 2025 14:41:33 +0100
Subject: [PATCH 083/258] drm: Fix object leak in DRM_IOCTL_GEM_CHANGE_HANDLE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add missing drm_gem_object_put() call when drm_gem_object_lookup()
successfully returns an object. This fixes a GEM object reference
leak that can prevent driver modules from unloading when using
prime buffers.

Fixes: 53096728b891 ("drm: Add DRM prime interface to reassign GEM handle")
Cc: <stable@vger.kernel.org> # v6.18+
Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/20251212134133.475218-1-karol.wachowski@linux.intel.com
---
 drivers/gpu/drm/drm_gem.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index f884d155a832a3..3b9df655e83772 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -979,8 +979,10 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data,
 	if (!obj)
 		return -ENOENT;
 
-	if (args->handle == args->new_handle)
-		return 0;
+	if (args->handle == args->new_handle) {
+		ret = 0;
+		goto out;
+	}
 
 	mutex_lock(&file_priv->prime.lock);
 
@@ -1012,6 +1014,8 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data,
 
 out_unlock:
 	mutex_unlock(&file_priv->prime.lock);
+out:
+	drm_gem_object_put(obj);
 
 	return ret;
 }

From d9f514d3e6ee48c34d70d637479b4c9384832d4f Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 23 Nov 2025 22:51:23 +0000
Subject: [PATCH 084/258] block: move around bio flagging helpers

We'll need bio_flagged() earlier in bio.h for later patches, move it
together with all related helpers, and mark the bio_flagged()'s bio
argument as const.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/linux/bio.h b/include/linux/bio.h
index ad2d57908c1c0e..c75a9b3672aa41 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -46,6 +46,21 @@ static inline unsigned int bio_max_segs(unsigned int nr_segs)
 #define bio_data_dir(bio) \
 	(op_is_write(bio_op(bio)) ? WRITE : READ)
 
+static inline bool bio_flagged(const struct bio *bio, unsigned int bit)
+{
+	return bio->bi_flags & (1U << bit);
+}
+
+static inline void bio_set_flag(struct bio *bio, unsigned int bit)
+{
+	bio->bi_flags |= (1U << bit);
+}
+
+static inline void bio_clear_flag(struct bio *bio, unsigned int bit)
+{
+	bio->bi_flags &= ~(1U << bit);
+}
+
 /*
  * Check whether this bio carries any data or not. A NULL bio is allowed.
  */
@@ -225,21 +240,6 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count)
 	atomic_set(&bio->__bi_cnt, count);
 }
 
-static inline bool bio_flagged(struct bio *bio, unsigned int bit)
-{
-	return bio->bi_flags & (1U << bit);
-}
-
-static inline void bio_set_flag(struct bio *bio, unsigned int bit)
-{
-	bio->bi_flags |= (1U << bit);
-}
-
-static inline void bio_clear_flag(struct bio *bio, unsigned int bit)
-{
-	bio->bi_flags &= ~(1U << bit);
-}
-
 static inline struct bio_vec *bio_first_bvec_all(struct bio *bio)
 {
 	WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));

From 9637fc3bdd10c8e073f71897bd35babbd21e9b29 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 12 Dec 2025 10:16:59 -0700
Subject: [PATCH 085/258] selftests: ublk: fix overflow in
 ublk_queue_auto_zc_fallback()

The functions ublk_queue_use_zc(), ublk_queue_use_auto_zc(), and
ublk_queue_auto_zc_fallback() were returning int, but performing
bitwise AND on q->flags which is __u64.

When a flag bit is set in the upper 32 bits (beyond INT_MAX), the
result of the bitwise AND operation could overflow when cast to int,
leading to incorrect boolean evaluation.

For example, if UBLKS_Q_AUTO_BUF_REG_FALLBACK is 0x8000000000000000:
  - (u64)flags & 0x8000000000000000 = 0x8000000000000000
  - Cast to int: undefined behavior / incorrect value
  - Used in if(): may evaluate incorrectly

Fix by:
1. Changing return type from int to bool for semantic correctness
2. Using !! to explicitly convert to boolean (0 or 1)

This ensures the functions return proper boolean values regardless
of which bit position the flags occupy in the 64-bit field.

Fixes: c3a6d48f86da ("selftests: ublk: remove ublk queue self-defined flags")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 tools/testing/selftests/ublk/kublk.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index fe42705c6d42dc..6e8f381f34810d 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -390,19 +390,19 @@ static inline int ublk_completed_tgt_io(struct ublk_thread *t,
 	return --io->tgt_ios == 0;
 }
 
-static inline int ublk_queue_use_zc(const struct ublk_queue *q)
+static inline bool ublk_queue_use_zc(const struct ublk_queue *q)
 {
-	return q->flags & UBLK_F_SUPPORT_ZERO_COPY;
+	return !!(q->flags & UBLK_F_SUPPORT_ZERO_COPY);
 }
 
-static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q)
+static inline bool ublk_queue_use_auto_zc(const struct ublk_queue *q)
 {
-	return q->flags & UBLK_F_AUTO_BUF_REG;
+	return !!(q->flags & UBLK_F_AUTO_BUF_REG);
 }
 
-static inline int ublk_queue_auto_zc_fallback(const struct ublk_queue *q)
+static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q)
 {
-	return q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK;
+	return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK);
 }
 
 static inline int ublk_queue_no_buf(const struct ublk_queue *q)

From 1fd4b8d7e3cf102bd01a295460d629e15152d2b3 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Fri, 12 Dec 2025 10:17:00 -0700
Subject: [PATCH 086/258] selftests: ublk: correct last_rw map type in
 seq_io.bt

The last_rw map is initialized with a value of 0 but later assigned the
value args.sector + args.nr_sector, which has type sector_t = u64.
bpftrace complains about the type mismatch between int64 and uint64:
trace/seq_io.bt:18:3-59: ERROR: Type mismatch for @last_rw: trying to assign value of type 'uint64' when map already contains a value of type 'int64'
        @last_rw[$dev, str($2)] = (args.sector + args.nr_sector);

Cast the initial value to uint64 so bpftrace will load the program.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 tools/testing/selftests/ublk/trace/seq_io.bt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt
index 272ac54c9d5fa5..507a3ca05abfc5 100644
--- a/tools/testing/selftests/ublk/trace/seq_io.bt
+++ b/tools/testing/selftests/ublk/trace/seq_io.bt
@@ -4,7 +4,7 @@
 	$3:     strlen($2)
 */
 BEGIN {
-	@last_rw[$1, str($2)] = 0;
+	@last_rw[$1, str($2)] = (uint64)0;
 }
 tracepoint:block:block_rq_complete
 {

From fe8c0182d49414740e639c1ca6b7d4b8e36b77fe Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Fri, 12 Dec 2025 10:17:01 -0700
Subject: [PATCH 087/258] selftests: ublk: remove unused ios map in seq_io.bt

The ios map populated by seq_io.bt is never read, so remove it.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 tools/testing/selftests/ublk/trace/seq_io.bt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt
index 507a3ca05abfc5..b2f60a92b118b4 100644
--- a/tools/testing/selftests/ublk/trace/seq_io.bt
+++ b/tools/testing/selftests/ublk/trace/seq_io.bt
@@ -17,7 +17,6 @@ tracepoint:block:block_rq_complete
 		}
 		@last_rw[$dev, str($2)] = (args.sector + args.nr_sector);
 	}
-	@ios = count();
 }
 
 END {

From 58eec4f3fc2878de51239916953b736b674d5071 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Fri, 12 Dec 2025 10:17:02 -0700
Subject: [PATCH 088/258] selftests: ublk: fix fio arguments in
 run_io_and_recover()

run_io_and_recover() invokes fio with --size="${size}", but the variable
size doesn't exist. Thus, the argument expands to --size=, which causes
fio to exit immediately with an error without issuing any I/O. Pass the
value for size as the first argument to the function.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 tools/testing/selftests/ublk/test_common.sh     | 5 +++--
 tools/testing/selftests/ublk/test_generic_04.sh | 2 +-
 tools/testing/selftests/ublk/test_generic_05.sh | 2 +-
 tools/testing/selftests/ublk/test_generic_11.sh | 2 +-
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh
index 8a4dbd09feb0a8..6f1c042de40e79 100755
--- a/tools/testing/selftests/ublk/test_common.sh
+++ b/tools/testing/selftests/ublk/test_common.sh
@@ -333,11 +333,12 @@ run_io_and_kill_daemon()
 
 run_io_and_recover()
 {
-	local action=$1
+	local size=$1
+	local action=$2
 	local state
 	local dev_id
 
-	shift 1
+	shift 2
 	dev_id=$(_add_ublk_dev "$@")
 	_check_add_dev "$TID" $?
 
diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh
index 8b533217d4a174..baf5b156193de5 100755
--- a/tools/testing/selftests/ublk/test_generic_04.sh
+++ b/tools/testing/selftests/ublk/test_generic_04.sh
@@ -8,7 +8,7 @@ ERR_CODE=0
 
 ublk_run_recover_test()
 {
-	run_io_and_recover "kill_daemon" "$@"
+	run_io_and_recover 256M "kill_daemon" "$@"
 	ERR_CODE=$?
 	if [ ${ERR_CODE} -ne 0 ]; then
 		echo "$TID failure: $*"
diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh
index 398e9e2b58e159..7b5083afc02abe 100755
--- a/tools/testing/selftests/ublk/test_generic_05.sh
+++ b/tools/testing/selftests/ublk/test_generic_05.sh
@@ -8,7 +8,7 @@ ERR_CODE=0
 
 ublk_run_recover_test()
 {
-	run_io_and_recover "kill_daemon" "$@"
+	run_io_and_recover 256M "kill_daemon" "$@"
 	ERR_CODE=$?
 	if [ ${ERR_CODE} -ne 0 ]; then
 		echo "$TID failure: $*"
diff --git a/tools/testing/selftests/ublk/test_generic_11.sh b/tools/testing/selftests/ublk/test_generic_11.sh
index a00357a5ec6b82..d1f973c8c64590 100755
--- a/tools/testing/selftests/ublk/test_generic_11.sh
+++ b/tools/testing/selftests/ublk/test_generic_11.sh
@@ -8,7 +8,7 @@ ERR_CODE=0
 
 ublk_run_quiesce_recover()
 {
-	run_io_and_recover "quiesce_dev" "$@"
+	run_io_and_recover 256M "quiesce_dev" "$@"
 	ERR_CODE=$?
 	if [ ${ERR_CODE} -ne 0 ]; then
 		echo "$TID failure: $*"

From 20da98a07bcbacb15fc627f6cf426a2f4d1501e5 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Fri, 12 Dec 2025 10:17:03 -0700
Subject: [PATCH 089/258] selftests: ublk: use auto_zc for PER_IO_DAEMON tests
 in stress_04

stress_04 is described as "run IO and kill ublk server(zero copy)" but
the --per_io_tasks tests cases don't use zero copy. Plus, one of the
test cases is duplicated. Add --auto_zc to these test cases and
--auto_zc_fallback to one of the duplicated ones. This matches the test
cases in stress_03.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 tools/testing/selftests/ublk/test_stress_04.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh
index 3f901db4d09dc9..c0c926ce053911 100755
--- a/tools/testing/selftests/ublk/test_stress_04.sh
+++ b/tools/testing/selftests/ublk/test_stress_04.sh
@@ -40,10 +40,10 @@ if _have_feature "AUTO_BUF_REG"; then
 fi
 
 if _have_feature "PER_IO_DAEMON"; then
-	ublk_io_and_kill_daemon 8G -t null -q 4 --nthreads 8 --per_io_tasks &
-	ublk_io_and_kill_daemon 256M -t loop -q 4 --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" &
-	ublk_io_and_kill_daemon 256M -t stripe -q 4 --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
-	ublk_io_and_kill_daemon 8G -t null -q 4 --nthreads 8 --per_io_tasks &
+	ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc --nthreads 8 --per_io_tasks &
+	ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" &
+	ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+	ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks &
 fi
 wait
 

From d8295408e0cf529be78ee4ed8b6758a9fb209feb Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Fri, 12 Dec 2025 10:17:04 -0700
Subject: [PATCH 090/258] selftests: ublk: don't share backing files between
 ublk servers

stress_04 is missing a wait between blocks of tests, meaning multiple
ublk servers will be running in parallel using the same backing files.
Add a wait after each section to ensure each backing file is in use by a
single ublk server at a time.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 tools/testing/selftests/ublk/test_stress_04.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh
index c0c926ce053911..efa8dc33234b56 100755
--- a/tools/testing/selftests/ublk/test_stress_04.sh
+++ b/tools/testing/selftests/ublk/test_stress_04.sh
@@ -31,12 +31,14 @@ _create_backfile 2 128M
 ublk_io_and_kill_daemon 8G -t null -q 4 -z --no_ublk_fixed_fd &
 ublk_io_and_kill_daemon 256M -t loop -q 4 -z --no_ublk_fixed_fd "${UBLK_BACKFILES[0]}" &
 ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
 
 if _have_feature "AUTO_BUF_REG"; then
 	ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc &
 	ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" &
 	ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --no_ublk_fixed_fd "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
 	ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback &
+	wait
 fi
 
 if _have_feature "PER_IO_DAEMON"; then
@@ -44,8 +46,8 @@ if _have_feature "PER_IO_DAEMON"; then
 	ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" &
 	ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
 	ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks &
+	wait
 fi
-wait
 
 _cleanup_test "stress"
 _show_result $TID $ERR_CODE

From 52bc483763262b66e51818a82e03cad0c5bfef67 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Fri, 12 Dec 2025 10:17:05 -0700
Subject: [PATCH 091/258] selftests: ublk: forbid multiple data copy modes

The kublk mock ublk server allows multiple data copy mode arguments to
be passed on the command line (--zero_copy, --get_data, and --auto_zc).
The ublk device will be created with all the requested feature flags,
however kublk will only use one of the modes to interact with request
data (arbitrarily preferring auto_zc over zero_copy over get_data). To
clarify the intent of the test, don't allow multiple data copy modes to
be specified. --zero_copy and --auto_zc are allowed together for
--auto_zc_fallback, which uses both copy modes.
Don't set UBLK_F_USER_COPY for zero_copy, as it's a separate feature.
Fix the test cases in test_stress_05 passing --get_data along with
--zero_copy or --auto_zc.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 tools/testing/selftests/ublk/kublk.c           | 11 ++++++++++-
 tools/testing/selftests/ublk/test_stress_05.sh | 10 +++++-----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index f8fa102a627fd7..4dd02cb083baaf 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -1613,7 +1613,7 @@ int main(int argc, char *argv[])
 			ctx.queue_depth = strtol(optarg, NULL, 10);
 			break;
 		case 'z':
-			ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY;
+			ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY;
 			break;
 		case 'r':
 			value = strtol(optarg, NULL, 10);
@@ -1686,6 +1686,15 @@ int main(int argc, char *argv[])
 		return -EINVAL;
 	}
 
+	if (!!(ctx.flags & UBLK_F_NEED_GET_DATA) +
+	    !!(ctx.flags & UBLK_F_USER_COPY) +
+	    (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY && !ctx.auto_zc_fallback) +
+	    (ctx.flags & UBLK_F_AUTO_BUF_REG && !ctx.auto_zc_fallback) +
+	    ctx.auto_zc_fallback > 1) {
+		fprintf(stderr, "too many data copy modes specified\n");
+		return -EINVAL;
+	}
+
 	i = optind;
 	while (i < argc && ctx.nr_files < MAX_BACK_FILES) {
 		ctx.files[ctx.nr_files++] = argv[i++];
diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh
index 274295061042e5..68a19414430257 100755
--- a/tools/testing/selftests/ublk/test_stress_05.sh
+++ b/tools/testing/selftests/ublk/test_stress_05.sh
@@ -58,17 +58,17 @@ done
 
 if _have_feature "ZERO_COPY"; then
 	for reissue in $(seq 0 1); do
-		ublk_io_and_remove 8G -t null -q 4 -g -z -r 1 -i "$reissue" &
-		ublk_io_and_remove 256M -t loop -q 4 -g -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
+		ublk_io_and_remove 8G -t null -q 4 -z -r 1 -i "$reissue" &
+		ublk_io_and_remove 256M -t loop -q 4 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
 		wait
 	done
 fi
 
 if _have_feature "AUTO_BUF_REG"; then
 	for reissue in $(seq 0 1); do
-		ublk_io_and_remove 8G -t null -q 4 -g --auto_zc -r 1 -i "$reissue" &
-		ublk_io_and_remove 256M -t loop -q 4 -g --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
-		ublk_io_and_remove 8G -t null -q 4 -g -z --auto_zc --auto_zc_fallback -r 1 -i "$reissue" &
+		ublk_io_and_remove 8G -t null -q 4 --auto_zc -r 1 -i "$reissue" &
+		ublk_io_and_remove 256M -t loop -q 4 --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
+		ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback -r 1 -i "$reissue" &
 		wait
 	done
 fi

From b9f0a94c3b2e7deaff93e4c4de335e3054223ff4 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Fri, 12 Dec 2025 10:17:06 -0700
Subject: [PATCH 092/258] selftests: ublk: add support for user copy to kublk

The ublk selftests mock ublk server kublk supports every data copy mode
except user copy. Add support for user copy to kublk, enabled via the
--user_copy (-u) command line argument. On writes, issue pread() calls
to copy the write data into the ublk_io's buffer before dispatching the
write to the target implementation. On reads, issue pwrite() calls to
copy read data from the ublk_io's buffer before committing the request.
Copy in 2 KB chunks to provide some coverage of the offseting logic.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 tools/testing/selftests/ublk/file_backed.c |  7 +--
 tools/testing/selftests/ublk/kublk.c       | 53 ++++++++++++++++++++--
 tools/testing/selftests/ublk/kublk.h       | 11 +++++
 tools/testing/selftests/ublk/stripe.c      |  2 +-
 4 files changed, 64 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c
index cd9fe69ecce201..269d5f124e06a0 100644
--- a/tools/testing/selftests/ublk/file_backed.c
+++ b/tools/testing/selftests/ublk/file_backed.c
@@ -34,8 +34,9 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
 	unsigned zc = ublk_queue_use_zc(q);
 	unsigned auto_zc = ublk_queue_use_auto_zc(q);
 	enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc);
+	struct ublk_io *io = ublk_get_io(q, tag);
 	struct io_uring_sqe *sqe[3];
-	void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr;
+	void *addr = io->buf_addr;
 
 	if (!zc || auto_zc) {
 		ublk_io_alloc_sqes(t, sqe, 1);
@@ -56,7 +57,7 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
 
 	ublk_io_alloc_sqes(t, sqe, 3);
 
-	io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
+	io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, io->buf_index);
 	sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
 	sqe[0]->user_data = build_user_data(tag,
 			ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
@@ -68,7 +69,7 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
 	sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
 	sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
 
-	io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
+	io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, io->buf_index);
 	sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1);
 
 	return 2;
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 4dd02cb083baaf..185ba553686abe 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -596,6 +596,38 @@ static void ublk_set_auto_buf_reg(const struct ublk_queue *q,
 	sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf);
 }
 
+/* Copy in pieces to test the buffer offset logic */
+#define UBLK_USER_COPY_LEN 2048
+
+static void ublk_user_copy(const struct ublk_io *io, __u8 match_ublk_op)
+{
+	const struct ublk_queue *q = ublk_io_to_queue(io);
+	const struct ublksrv_io_desc *iod = ublk_get_iod(q, io->tag);
+	__u64 off = ublk_user_copy_offset(q->q_id, io->tag);
+	__u8 ublk_op = ublksrv_get_op(iod);
+	__u32 len = iod->nr_sectors << 9;
+	void *addr = io->buf_addr;
+
+	if (ublk_op != match_ublk_op)
+		return;
+
+	while (len) {
+		__u32 copy_len = min(len, UBLK_USER_COPY_LEN);
+		ssize_t copied;
+
+		if (ublk_op == UBLK_IO_OP_WRITE)
+			copied = pread(q->ublk_fd, addr, copy_len, off);
+		else if (ublk_op == UBLK_IO_OP_READ)
+			copied = pwrite(q->ublk_fd, addr, copy_len, off);
+		else
+			assert(0);
+		assert(copied == (ssize_t)copy_len);
+		addr += copy_len;
+		off += copy_len;
+		len -= copy_len;
+	}
+}
+
 int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io)
 {
 	struct ublk_queue *q = ublk_io_to_queue(io);
@@ -618,9 +650,12 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io)
 
 	if (io->flags & UBLKS_IO_NEED_GET_DATA)
 		cmd_op = UBLK_U_IO_NEED_GET_DATA;
-	else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP)
+	else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP) {
+		if (ublk_queue_use_user_copy(q))
+			ublk_user_copy(io, UBLK_IO_OP_READ);
+
 		cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
-	else if (io->flags & UBLKS_IO_NEED_FETCH_RQ)
+	} else if (io->flags & UBLKS_IO_NEED_FETCH_RQ)
 		cmd_op = UBLK_U_IO_FETCH_REQ;
 
 	if (io_uring_sq_space_left(&t->ring) < 1)
@@ -649,7 +684,7 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io)
 	sqe[0]->rw_flags	= 0;
 	cmd->tag	= io->tag;
 	cmd->q_id	= q->q_id;
-	if (!ublk_queue_no_buf(q))
+	if (!ublk_queue_no_buf(q) && !ublk_queue_use_user_copy(q))
 		cmd->addr	= (__u64) (uintptr_t) io->buf_addr;
 	else
 		cmd->addr	= 0;
@@ -751,6 +786,10 @@ static void ublk_handle_uring_cmd(struct ublk_thread *t,
 
 	if (cqe->res == UBLK_IO_RES_OK) {
 		assert(tag < q->q_depth);
+
+		if (ublk_queue_use_user_copy(q))
+			ublk_user_copy(io, UBLK_IO_OP_WRITE);
+
 		if (q->tgt_ops->queue_io)
 			q->tgt_ops->queue_io(t, q, tag);
 	} else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
@@ -1507,7 +1546,7 @@ static void __cmd_create_help(char *exe, bool recovery)
 
 	printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
 			exe, recovery ? "recover" : "add");
-	printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n");
+	printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1] [-g] [-u]\n");
 	printf("\t[-e 0|1 ] [-i 0|1] [--no_ublk_fixed_fd]\n");
 	printf("\t[--nthreads threads] [--per_io_tasks]\n");
 	printf("\t[target options] [backfile1] [backfile2] ...\n");
@@ -1568,6 +1607,7 @@ int main(int argc, char *argv[])
 		{ "get_data",		1,	NULL, 'g'},
 		{ "auto_zc",		0,	NULL,  0 },
 		{ "auto_zc_fallback", 	0,	NULL,  0 },
+		{ "user_copy",		0,	NULL, 'u'},
 		{ "size",		1,	NULL, 's'},
 		{ "nthreads",		1,	NULL,  0 },
 		{ "per_io_tasks",	0,	NULL,  0 },
@@ -1593,7 +1633,7 @@ int main(int argc, char *argv[])
 
 	opterr = 0;
 	optind = 2;
-	while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gaz",
+	while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gazu",
 				  longopts, &option_idx)) != -1) {
 		switch (opt) {
 		case 'a':
@@ -1633,6 +1673,9 @@ int main(int argc, char *argv[])
 		case 'g':
 			ctx.flags |= UBLK_F_NEED_GET_DATA;
 			break;
+		case 'u':
+			ctx.flags |= UBLK_F_USER_COPY;
+			break;
 		case 's':
 			ctx.size = strtoull(optarg, NULL, 10);
 			break;
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 6e8f381f34810d..8a83b90ec603ad 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -208,6 +208,12 @@ static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
 	return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF);
 }
 
+static inline __u64 ublk_user_copy_offset(unsigned q_id, unsigned tag)
+{
+	return UBLKSRV_IO_BUF_OFFSET +
+	       ((__u64)q_id << UBLK_QID_OFF | (__u64)tag << UBLK_TAG_OFF);
+}
+
 static inline int is_target_io(__u64 user_data)
 {
 	return (user_data & (1ULL << 63)) != 0;
@@ -405,6 +411,11 @@ static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q)
 	return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK);
 }
 
+static inline bool ublk_queue_use_user_copy(const struct ublk_queue *q)
+{
+	return !!(q->flags & UBLK_F_USER_COPY);
+}
+
 static inline int ublk_queue_no_buf(const struct ublk_queue *q)
 {
 	return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q);
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
index 791fa8dc165109..fd412e1f01c0ee 100644
--- a/tools/testing/selftests/ublk/stripe.c
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -134,7 +134,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
 	struct stripe_array *s = alloc_stripe_array(conf, iod);
 	struct ublk_io *io = ublk_get_io(q, tag);
 	int i, extra = zc ? 2 : 0;
-	void *base = (zc | auto_zc) ? NULL : (void *)iod->addr;
+	void *base = io->buf_addr;
 
 	io->private_data = s;
 	calculate_stripe_array(conf, iod, s, base);

From 63276182c51332b75293ac88f3a81d98bfca1b93 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Fri, 12 Dec 2025 10:17:07 -0700
Subject: [PATCH 093/258] selftests: ublk: add user copy test cases

The ublk selftests cover every data copy mode except user copy. Add
tests for user copy based on the existing test suite:
- generic_14 ("basic recover function verification (user copy)") based
  on generic_04 and generic_05
- null_03 ("basic IO test with user copy") based on null_01 and null_02
- loop_06 ("write and verify over user copy") based on loop_01 and
  loop_03
- loop_07 ("mkfs & mount & umount with user copy") based on loop_02 and
  loop_04
- stripe_05 ("write and verify test on user copy") based on stripe_03
- stripe_06 ("mkfs & mount & umount on user copy") based on stripe_02
  and stripe_04
- stress_06 ("run IO and remove device (user copy)") based on stress_01
  and stress_03
- stress_07 ("run IO and kill ublk server (user copy)") based on
  stress_02 and stress_04

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 tools/testing/selftests/ublk/Makefile         |  8 ++++
 .../testing/selftests/ublk/test_generic_14.sh | 40 +++++++++++++++++++
 tools/testing/selftests/ublk/test_loop_06.sh  | 25 ++++++++++++
 tools/testing/selftests/ublk/test_loop_07.sh  | 21 ++++++++++
 tools/testing/selftests/ublk/test_null_03.sh  | 24 +++++++++++
 .../testing/selftests/ublk/test_stress_06.sh  | 39 ++++++++++++++++++
 .../testing/selftests/ublk/test_stress_07.sh  | 39 ++++++++++++++++++
 .../testing/selftests/ublk/test_stripe_05.sh  | 26 ++++++++++++
 .../testing/selftests/ublk/test_stripe_06.sh  | 21 ++++++++++
 9 files changed, 243 insertions(+)
 create mode 100755 tools/testing/selftests/ublk/test_generic_14.sh
 create mode 100755 tools/testing/selftests/ublk/test_loop_06.sh
 create mode 100755 tools/testing/selftests/ublk/test_loop_07.sh
 create mode 100755 tools/testing/selftests/ublk/test_null_03.sh
 create mode 100755 tools/testing/selftests/ublk/test_stress_06.sh
 create mode 100755 tools/testing/selftests/ublk/test_stress_07.sh
 create mode 100755 tools/testing/selftests/ublk/test_stripe_05.sh
 create mode 100755 tools/testing/selftests/ublk/test_stripe_06.sh

diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index 770269efe42ab4..837977b6241710 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -21,24 +21,32 @@ TEST_PROGS += test_generic_10.sh
 TEST_PROGS += test_generic_11.sh
 TEST_PROGS += test_generic_12.sh
 TEST_PROGS += test_generic_13.sh
+TEST_PROGS += test_generic_14.sh
 
 TEST_PROGS += test_null_01.sh
 TEST_PROGS += test_null_02.sh
+TEST_PROGS += test_null_03.sh
 TEST_PROGS += test_loop_01.sh
 TEST_PROGS += test_loop_02.sh
 TEST_PROGS += test_loop_03.sh
 TEST_PROGS += test_loop_04.sh
 TEST_PROGS += test_loop_05.sh
+TEST_PROGS += test_loop_06.sh
+TEST_PROGS += test_loop_07.sh
 TEST_PROGS += test_stripe_01.sh
 TEST_PROGS += test_stripe_02.sh
 TEST_PROGS += test_stripe_03.sh
 TEST_PROGS += test_stripe_04.sh
+TEST_PROGS += test_stripe_05.sh
+TEST_PROGS += test_stripe_06.sh
 
 TEST_PROGS += test_stress_01.sh
 TEST_PROGS += test_stress_02.sh
 TEST_PROGS += test_stress_03.sh
 TEST_PROGS += test_stress_04.sh
 TEST_PROGS += test_stress_05.sh
+TEST_PROGS += test_stress_06.sh
+TEST_PROGS += test_stress_07.sh
 
 TEST_GEN_PROGS_EXTENDED = kublk
 
diff --git a/tools/testing/selftests/ublk/test_generic_14.sh b/tools/testing/selftests/ublk/test_generic_14.sh
new file mode 100755
index 00000000000000..cd9b44b97c24ea
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_14.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_14"
+ERR_CODE=0
+
+ublk_run_recover_test()
+{
+	run_io_and_recover 256M "kill_daemon" "$@"
+	ERR_CODE=$?
+	if [ ${ERR_CODE} -ne 0 ]; then
+		echo "$TID failure: $*"
+		_show_result $TID $ERR_CODE
+	fi
+}
+
+if ! _have_program fio; then
+	exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "recover" "basic recover function verification (user copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_run_recover_test -t null -q 2 -r 1 -u &
+ublk_run_recover_test -t loop -q 2 -r 1 -u "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -u "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_run_recover_test -t null -q 2 -r 1 -u -i 1 &
+ublk_run_recover_test -t loop -q 2 -r 1 -u -i 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -u -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "recover"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_06.sh b/tools/testing/selftests/ublk/test_loop_06.sh
new file mode 100755
index 00000000000000..1d1a8a7255023c
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_loop_06.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="loop_06"
+ERR_CODE=0
+
+if ! _have_program fio; then
+	exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "loop" "write and verify over user copy"
+
+_create_backfile 0 256M
+dev_id=$(_add_ublk_dev -t loop -u "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
+
+# run fio over the ublk disk
+_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
+ERR_CODE=$?
+
+_cleanup_test "loop"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_07.sh b/tools/testing/selftests/ublk/test_loop_07.sh
new file mode 100755
index 00000000000000..493f3fb611a5a1
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_loop_07.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="loop_07"
+ERR_CODE=0
+
+_prep_test "loop" "mkfs & mount & umount with user copy"
+
+_create_backfile 0 256M
+
+dev_id=$(_add_ublk_dev -t loop -u "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
+
+_mkfs_mount_test /dev/ublkb"${dev_id}"
+ERR_CODE=$?
+
+_cleanup_test "loop"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_null_03.sh b/tools/testing/selftests/ublk/test_null_03.sh
new file mode 100755
index 00000000000000..0051067b46869c
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_null_03.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="null_03"
+ERR_CODE=0
+
+if ! _have_program fio; then
+	exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "null" "basic IO test with user copy"
+
+dev_id=$(_add_ublk_dev -t null -u)
+_check_add_dev $TID $?
+
+# run fio over the two disks
+fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1
+ERR_CODE=$?
+
+_cleanup_test "null"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_06.sh b/tools/testing/selftests/ublk/test_stress_06.sh
new file mode 100755
index 00000000000000..37188ec2e1f700
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_06.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TID="stress_06"
+ERR_CODE=0
+
+ublk_io_and_remove()
+{
+	run_io_and_remove "$@"
+	ERR_CODE=$?
+	if [ ${ERR_CODE} -ne 0 ]; then
+		echo "$TID failure: $*"
+		_show_result $TID $ERR_CODE
+	fi
+}
+
+if ! _have_program fio; then
+	exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stress" "run IO and remove device (user copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_io_and_remove 8G -t null -q 4 -u &
+ublk_io_and_remove 256M -t loop -q 4 -u "${UBLK_BACKFILES[0]}" &
+ublk_io_and_remove 256M -t stripe -q 4 -u "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_io_and_remove 8G -t null -q 4 -u --nthreads 8 --per_io_tasks &
+ublk_io_and_remove 256M -t loop -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" &
+ublk_io_and_remove 256M -t stripe -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_07.sh b/tools/testing/selftests/ublk/test_stress_07.sh
new file mode 100755
index 00000000000000..fb061fc26d3625
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_07.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TID="stress_07"
+ERR_CODE=0
+
+ublk_io_and_kill_daemon()
+{
+	run_io_and_kill_daemon "$@"
+	ERR_CODE=$?
+	if [ ${ERR_CODE} -ne 0 ]; then
+		echo "$TID failure: $*"
+		_show_result $TID $ERR_CODE
+	fi
+}
+
+if ! _have_program fio; then
+	exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stress" "run IO and kill ublk server (user copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_io_and_kill_daemon 8G -t null -q 4 -u --no_ublk_fixed_fd &
+ublk_io_and_kill_daemon 256M -t loop -q 4 -u --no_ublk_fixed_fd "${UBLK_BACKFILES[0]}" &
+ublk_io_and_kill_daemon 256M -t stripe -q 4 -u "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_io_and_kill_daemon 8G -t null -q 4 -u --nthreads 8 --per_io_tasks &
+ublk_io_and_kill_daemon 256M -t loop -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" &
+ublk_io_and_kill_daemon 256M -t stripe -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_05.sh b/tools/testing/selftests/ublk/test_stripe_05.sh
new file mode 100755
index 00000000000000..05d71951d710c0
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stripe_05.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="stripe_05"
+ERR_CODE=0
+
+if ! _have_program fio; then
+	exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stripe" "write and verify test on user copy"
+
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t stripe -q 2 -u "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
+
+# run fio over the ublk disk
+_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
+ERR_CODE=$?
+
+_cleanup_test "stripe"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_06.sh b/tools/testing/selftests/ublk/test_stripe_06.sh
new file mode 100755
index 00000000000000..d06cac7626e219
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stripe_06.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="stripe_06"
+ERR_CODE=0
+
+_prep_test "stripe" "mkfs & mount & umount on user copy"
+
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t stripe -u -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
+
+_mkfs_mount_test /dev/ublkb"${dev_id}"
+ERR_CODE=$?
+
+_cleanup_test "stripe"
+_show_result $TID $ERR_CODE

From 9869d3a6fed381f3b98404e26e1afc75d680cbf9 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 12 Dec 2025 22:35:00 +0800
Subject: [PATCH 094/258] block: fix race between wbt_enable_default and IO
 submission

When wbt_enable_default() is moved out of queue freezing in elevator_change(),
it can cause the wbt inflight counter to become negative (-1), leading to hung
tasks in the writeback path. Tasks get stuck in wbt_wait() because the counter
is in an inconsistent state.

The issue occurs because wbt_enable_default() could race with IO submission,
allowing the counter to be decremented before proper initialization. This manifests
as:

  rq_wait[0]:
    inflight:             -1
    has_waiters:        True

rwb_enabled() checks the state, which can be updated exactly between wbt_wait()
(rq_qos_throttle()) and wbt_track()(rq_qos_track()), then the inflight counter
will become negative.

And results in hung task warnings like:
  task:kworker/u24:39 state:D stack:0 pid:14767
  Call Trace:
    rq_qos_wait+0xb4/0x150
    wbt_wait+0xa9/0x100
    __rq_qos_throttle+0x24/0x40
    blk_mq_submit_bio+0x672/0x7b0
    ...

Fix this by:

1. Splitting wbt_enable_default() into:
   - __wbt_enable_default(): Returns true if wbt_init() should be called
   - wbt_enable_default(): Wrapper for existing callers (no init)
   - wbt_init_enable_default(): New function that checks and inits WBT

2. Using wbt_init_enable_default() in blk_register_queue() to ensure
   proper initialization during queue registration

3. Move wbt_init() out of wbt_enable_default() which is only for enabling
   disabled wbt from bfq and iocost, and wbt_init() isn't needed. Then the
   original lock warning can be avoided.

4. Removing the ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT flag and its handling
   code since it's no longer needed

This ensures WBT is properly initialized before any IO can be submitted,
preventing the counter from going negative.

Cc: Nilay Shroff <nilay@linux.ibm.com>
Cc: Yu Kuai <yukuai@fnnas.com>
Cc: Guangwu Zhang <guazhang@redhat.com>
Fixes: 78c271344b6f ("block: move wbt_enable_default() out of queue freezing from sched ->exit()")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c |  2 +-
 block/blk-sysfs.c   |  2 +-
 block/blk-wbt.c     | 20 ++++++++++++++++----
 block/blk-wbt.h     |  5 +++++
 block/elevator.c    |  4 ----
 block/elevator.h    |  1 -
 6 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 4a8d3d96bfe492..6e54b1d3d8bc2a 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7181,7 +7181,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
 
 	blk_stat_disable_accounting(bfqd->queue);
 	blk_queue_flag_clear(QUEUE_FLAG_DISABLE_WBT_DEF, bfqd->queue);
-	set_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, &e->flags);
+	wbt_enable_default(bfqd->queue->disk);
 
 	kfree(bfqd);
 }
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 8684c57498cc1d..e0a70d26972b3b 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -932,7 +932,7 @@ int blk_register_queue(struct gendisk *disk)
 		elevator_set_default(q);
 
 	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
-	wbt_enable_default(disk);
+	wbt_init_enable_default(disk);
 
 	/* Now everything is ready and send out KOBJ_ADD uevent */
 	kobject_uevent(&disk->queue_kobj, KOBJ_ADD);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index eb8037bae0bda6..0974875f77bda2 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -699,7 +699,7 @@ static void wbt_requeue(struct rq_qos *rqos, struct request *rq)
 /*
  * Enable wbt if defaults are configured that way
  */
-void wbt_enable_default(struct gendisk *disk)
+static bool __wbt_enable_default(struct gendisk *disk)
 {
 	struct request_queue *q = disk->queue;
 	struct rq_qos *rqos;
@@ -716,19 +716,31 @@ void wbt_enable_default(struct gendisk *disk)
 		if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
 			RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
 		mutex_unlock(&disk->rqos_state_mutex);
-		return;
+		return false;
 	}
 	mutex_unlock(&disk->rqos_state_mutex);
 
 	/* Queue not registered? Maybe shutting down... */
 	if (!blk_queue_registered(q))
-		return;
+		return false;
 
 	if (queue_is_mq(q) && enable)
-		wbt_init(disk);
+		return true;
+	return false;
+}
+
+void wbt_enable_default(struct gendisk *disk)
+{
+	__wbt_enable_default(disk);
 }
 EXPORT_SYMBOL_GPL(wbt_enable_default);
 
+void wbt_init_enable_default(struct gendisk *disk)
+{
+	if (__wbt_enable_default(disk))
+		WARN_ON_ONCE(wbt_init(disk));
+}
+
 u64 wbt_default_latency_nsec(struct request_queue *q)
 {
 	/*
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index e5fc653b9b76f6..925f2247573834 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -5,6 +5,7 @@
 #ifdef CONFIG_BLK_WBT
 
 int wbt_init(struct gendisk *disk);
+void wbt_init_enable_default(struct gendisk *disk);
 void wbt_disable_default(struct gendisk *disk);
 void wbt_enable_default(struct gendisk *disk);
 
@@ -16,6 +17,10 @@ u64 wbt_default_latency_nsec(struct request_queue *);
 
 #else
 
+static inline void wbt_init_enable_default(struct gendisk *disk)
+{
+}
+
 static inline void wbt_disable_default(struct gendisk *disk)
 {
 }
diff --git a/block/elevator.c b/block/elevator.c
index 5b37ef44f52d7f..a2f8b2251dc6e6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -633,14 +633,10 @@ static int elevator_change_done(struct request_queue *q,
 			.et = ctx->old->et,
 			.data = ctx->old->elevator_data
 		};
-		bool enable_wbt = test_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT,
-				&ctx->old->flags);
 
 		elv_unregister_queue(q, ctx->old);
 		blk_mq_free_sched_res(&res, ctx->old->type, q->tag_set);
 		kobject_put(&ctx->old->kobj);
-		if (enable_wbt)
-			wbt_enable_default(q->disk);
 	}
 	if (ctx->new) {
 		ret = elv_register_queue(q, ctx->new, !ctx->no_uevent);
diff --git a/block/elevator.h b/block/elevator.h
index a9d092c5a9e855..3eb32516be0b16 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -156,7 +156,6 @@ struct elevator_queue
 
 #define ELEVATOR_FLAG_REGISTERED	0
 #define ELEVATOR_FLAG_DYING		1
-#define ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT	2
 
 /*
  * block elevator interface

From fbbd7ce627af733ded7971b2495b0d099a0a80da Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 12 Dec 2025 13:01:04 +0900
Subject: [PATCH 095/258] genirq: Don't overwrite interrupt thread flags on
 setup

Chris reported that the recent affinity management changes result in
overwriting the already initialized thread flags.

Use set_bit() to set the affinity bit instead of assigning the bit value to
the flags.

Fixes: 801afdfbfcd9 ("genirq: Fix interrupt threads affinity vs. cpuset isolated partitions")
Reported-by: Chris Mason <clm@meta.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://patch.msgid.link/87ecp0e4cf.ffs@tglx
Closes: https://lore.kernel.org/all/20251212014848.3509622-1-clm@meta.com
---
 kernel/irq/manage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 8b1b4c8a4f54c5..349ae7979da0e3 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1414,7 +1414,7 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
 	 * Ensure the thread adjusts the affinity once it reaches the
 	 * thread function.
 	 */
-	new->thread_flags = BIT(IRQTF_AFFINITY);
+	set_bit(IRQTF_AFFINITY, &new->thread_flags);
 
 	return 0;
 }

From 043507144ae13d3b882d40495d101bb4c4990d98 Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Wed, 10 Dec 2025 13:56:28 +0100
Subject: [PATCH 096/258] x86/sgx: Remove unmatched quote in __sgx_encl_extend
 function comment

There is no opening quote. Remove the unmatched closing quote.

Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Kai Huang <kai.huang@intel.com>
Link: https://patch.msgid.link/20251210125628.544916-1-thorsten.blum@linux.dev
---
 arch/x86/kernel/cpu/sgx/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 66f1efa16fbb72..9322a9287dc7f5 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -242,7 +242,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
 /*
  * If the caller requires measurement of the page as a proof for the content,
  * use EEXTEND to add a measurement for 256 bytes of the page. Repeat this
- * operation until the entire page is measured."
+ * operation until the entire page is measured.
  */
 static int __sgx_encl_extend(struct sgx_encl *encl,
 			     struct sgx_epc_page *epc_page)

From 8b62e64e6d30fa047b3aefb1a36e1f80c8acb3d2 Mon Sep 17 00:00:00 2001
From: Tal Zussman <tz2294@columbia.edu>
Date: Fri, 12 Dec 2025 04:08:07 -0500
Subject: [PATCH 097/258] x86/mm/tlb/trace: Export the TLB_REMOTE_WRONG_CPU
 enum in <trace/events/tlb.h>

When the TLB_REMOTE_WRONG_CPU enum was introduced for the tlb_flush
tracepoint, the enum was not exported to user-space. Add it to the
appropriate macro definition to enable parsing by userspace tools, as
per:

  Link: https://lore.kernel.org/all/20150403013802.220157513@goodmis.org

[ mingo: Capitalize IPI, etc. ]

Fixes: 2815a56e4b72 ("x86/mm/tlb: Add tracepoint for TLB flush IPI to stale CPU")
Signed-off-by: Tal Zussman <tz2294@columbia.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Rik van Riel <riel@surriel.com>
Link: https://patch.msgid.link/20251212-tlb-trace-fix-v2-1-d322e0ad9b69@columbia.edu
---
 include/trace/events/tlb.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/trace/events/tlb.h b/include/trace/events/tlb.h
index b4d8e7dc38f880..fb836951168564 100644
--- a/include/trace/events/tlb.h
+++ b/include/trace/events/tlb.h
@@ -12,8 +12,9 @@
 	EM(  TLB_FLUSH_ON_TASK_SWITCH,	"flush on task switch" )	\
 	EM(  TLB_REMOTE_SHOOTDOWN,	"remote shootdown" )		\
 	EM(  TLB_LOCAL_SHOOTDOWN,	"local shootdown" )		\
-	EM(  TLB_LOCAL_MM_SHOOTDOWN,	"local mm shootdown" )		\
-	EMe( TLB_REMOTE_SEND_IPI,	"remote ipi send" )
+	EM(  TLB_LOCAL_MM_SHOOTDOWN,	"local MM shootdown" )		\
+	EM(  TLB_REMOTE_SEND_IPI,	"remote IPI send" )		\
+	EMe( TLB_REMOTE_WRONG_CPU,	"remote wrong CPU" )
 
 /*
  * First define the enums in TLB_FLUSH_REASON to be exported to userspace

From 0c01ea92f545ca7fcafdda6a8e29b65ef3a5ec74 Mon Sep 17 00:00:00 2001
From: Tal Zussman <tz2294@columbia.edu>
Date: Fri, 12 Dec 2025 04:08:08 -0500
Subject: [PATCH 098/258] mm: Remove tlb_flush_reason::NR_TLB_FLUSH_REASONS
 from <linux/mm_types.h>

This has been unused since it was added 11 years ago in:

  d17d8f9dedb9 ("x86/mm: Add tracepoints for TLB flushes")

Signed-off-by: Tal Zussman <tz2294@columbia.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Link: https://patch.msgid.link/20251212-tlb-trace-fix-v2-2-d322e0ad9b69@columbia.edu
---
 include/linux/mm_types.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9f6de068295d30..42af2292951d4f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1631,7 +1631,6 @@ enum tlb_flush_reason {
 	TLB_LOCAL_MM_SHOOTDOWN,
 	TLB_REMOTE_SEND_IPI,
 	TLB_REMOTE_WRONG_CPU,
-	NR_TLB_FLUSH_REASONS,
 };
 
 /**

From 21433d3e3ca14d20f9b0c2237b3d3a1355af7907 Mon Sep 17 00:00:00 2001
From: Kyle Meyer <kyle.meyer@hpe.com>
Date: Fri, 12 Dec 2025 12:53:36 -0600
Subject: [PATCH 099/258] x86/platform/uv: Fix UBSAN array-index-out-of-bounds

When UBSAN is enabled, multiple array-index-out-of-bounds messages are
printed:

  [    0.000000] [     T0] UBSAN: array-index-out-of-bounds in arch/x86/kernel/apic/x2apic_uv_x.c:276:23
  [    0.000000] [     T0] index 1 is out of range for type '<unknown> [1]'
  ...
  [    0.000000] [     T0] UBSAN: array-index-out-of-bounds in arch/x86/kernel/apic/x2apic_uv_x.c:277:32
  [    0.000000] [     T0] index 1 is out of range for type '<unknown> [1]'
  ...
  [    0.000000] [     T0] UBSAN: array-index-out-of-bounds in arch/x86/kernel/apic/x2apic_uv_x.c:282:16
  [    0.000000] [     T0] index 1 is out of range for type '<unknown> [1]'
  ...
  [    0.515850] [     T1] UBSAN: array-index-out-of-bounds in arch/x86/kernel/apic/x2apic_uv_x.c:1344:23
  [    0.519851] [     T1] index 1 is out of range for type '<unknown> [1]'
  ...
  [    0.603850] [     T1] UBSAN: array-index-out-of-bounds in arch/x86/kernel/apic/x2apic_uv_x.c:1345:32
  [    0.607850] [     T1] index 1 is out of range for type '<unknown> [1]'
  ...
  [    0.691850] [     T1] UBSAN: array-index-out-of-bounds in arch/x86/kernel/apic/x2apic_uv_x.c:1353:20
  [    0.695850] [     T1] index 1 is out of range for type '<unknown> [1]'

One-element arrays have been deprecated:

  https://docs.kernel.org/process/deprecated.html#zero-length-and-one-element-arrays

Switch entry in struct uv_systab to a flexible array member to fix UBSAN
array-index-out-of-bounds messages.

sizeof(struct uv_systab) is passed to early_memremap() and ioremap(). The
flexible array member is not accessed until the UV system table size is used to
remap the entire UV system table, so changes to sizeof(struct uv_systab) have no
impact.

Signed-off-by: Kyle Meyer <kyle.meyer@hpe.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://patch.msgid.link/aTxksN-3otY41WvQ@hpe.com
---
 arch/x86/include/asm/uv/bios.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 6989b824fd321b..d0b62e2552902c 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -122,7 +122,7 @@ struct uv_systab {
 	struct {
 		u32 type:8;	/* type of entry */
 		u32 offset:24;	/* byte offset from struct start to entry */
-	} entry[1];		/* additional entries follow */
+	} entry[];		/* additional entries follow */
 };
 extern struct uv_systab *uv_systab;
 

From b1aa01d31249bd116b18c7f512d3e46b4b4ad83b Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Fri, 5 Dec 2025 10:58:57 +0100
Subject: [PATCH 100/258] s390/ipl: Clear SBP flag when bootprog is set

With z16 a new flag 'search boot program' was introduced for
list-directed IPL (SCSI, NVMe, ECKD DASD). If this flag is set,
e.g. via selecting the "Automatic" value for the "Boot program
selector" control on an HMC load panel, it is copied to the reipl
structure from the initial ipl structure. When a user now sets a
boot prog via sysfs, the flag is not cleared and the bootloader
will again automatically select the boot program, ignoring user
configuration.

To avoid that, clear the SBP flag when a bootprog sysfs file is
written.

Cc: stable@vger.kernel.org
Reviewed-by: Peter Oberparleiter <oberpar@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/uapi/asm/ipl.h |  1 +
 arch/s390/kernel/ipl.c           | 48 ++++++++++++++++++++++++--------
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h
index 2cd28af50dd439..3d64a22516994b 100644
--- a/arch/s390/include/uapi/asm/ipl.h
+++ b/arch/s390/include/uapi/asm/ipl.h
@@ -15,6 +15,7 @@ struct ipl_pl_hdr {
 #define IPL_PL_FLAG_IPLPS	0x80
 #define IPL_PL_FLAG_SIPL	0x40
 #define IPL_PL_FLAG_IPLSR	0x20
+#define IPL_PL_FLAG_SBP		0x10
 
 /* IPL Parameter Block header */
 struct ipl_pb_hdr {
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 961a3d60a4ddda..dcdc7e27484867 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -262,6 +262,24 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
 			sys_##_prefix##_##_name##_show,			\
 			sys_##_prefix##_##_name##_store)
 
+#define DEFINE_IPL_ATTR_BOOTPROG_RW(_prefix, _name, _fmt_out, _fmt_in, _hdr, _value)	\
+	IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value)		\
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,			\
+		struct kobj_attribute *attr,						\
+		const char *buf, size_t len)						\
+{											\
+	unsigned long long value;							\
+	if (sscanf(buf, _fmt_in, &value) != 1)						\
+		return -EINVAL;								\
+	(_value) = value;								\
+	(_hdr).flags &= ~IPL_PL_FLAG_SBP;						\
+	return len;									\
+}											\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =				\
+	__ATTR(_name, 0644,								\
+			sys_##_prefix##_##_name##_show,					\
+			sys_##_prefix##_##_name##_store)
+
 #define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\
 IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value)			\
 static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
@@ -818,12 +836,13 @@ DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
 		   reipl_block_fcp->fcp.wwpn);
 DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
 		   reipl_block_fcp->fcp.lun);
-DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
-		   reipl_block_fcp->fcp.bootprog);
 DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
 		   reipl_block_fcp->fcp.br_lba);
 DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
 		   reipl_block_fcp->fcp.devno);
+DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
+			    reipl_block_fcp->hdr,
+			    reipl_block_fcp->fcp.bootprog);
 
 static void reipl_get_ascii_loadparm(char *loadparm,
 				     struct ipl_parameter_block *ibp)
@@ -942,10 +961,11 @@ DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n",
 		   reipl_block_nvme->nvme.fid);
 DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n",
 		   reipl_block_nvme->nvme.nsid);
-DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
-		   reipl_block_nvme->nvme.bootprog);
 DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
 		   reipl_block_nvme->nvme.br_lba);
+DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
+			    reipl_block_nvme->hdr,
+			    reipl_block_nvme->nvme.bootprog);
 
 static struct attribute *reipl_nvme_attrs[] = {
 	&sys_reipl_nvme_fid_attr.attr,
@@ -1038,8 +1058,9 @@ static const struct bin_attribute *const reipl_eckd_bin_attrs[] = {
 };
 
 DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd);
-DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n",
-		   reipl_block_eckd->eckd.bootprog);
+DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n",
+			    reipl_block_eckd->hdr,
+			    reipl_block_eckd->eckd.bootprog);
 
 static struct attribute *reipl_eckd_attrs[] = {
 	&sys_reipl_eckd_device_attr.attr,
@@ -1567,12 +1588,13 @@ DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
 		   dump_block_fcp->fcp.wwpn);
 DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
 		   dump_block_fcp->fcp.lun);
-DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
-		   dump_block_fcp->fcp.bootprog);
 DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
 		   dump_block_fcp->fcp.br_lba);
 DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
 		   dump_block_fcp->fcp.devno);
+DEFINE_IPL_ATTR_BOOTPROG_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
+			    dump_block_fcp->hdr,
+			    dump_block_fcp->fcp.bootprog);
 
 DEFINE_IPL_ATTR_SCP_DATA_RW(dump_fcp, dump_block_fcp->hdr,
 			    dump_block_fcp->fcp,
@@ -1604,10 +1626,11 @@ DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n",
 		   dump_block_nvme->nvme.fid);
 DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n",
 		   dump_block_nvme->nvme.nsid);
-DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
-		   dump_block_nvme->nvme.bootprog);
 DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n",
 		   dump_block_nvme->nvme.br_lba);
+DEFINE_IPL_ATTR_BOOTPROG_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
+			    dump_block_nvme->hdr,
+			    dump_block_nvme->nvme.bootprog);
 
 DEFINE_IPL_ATTR_SCP_DATA_RW(dump_nvme, dump_block_nvme->hdr,
 			    dump_block_nvme->nvme,
@@ -1635,8 +1658,9 @@ static const struct attribute_group dump_nvme_attr_group = {
 
 /* ECKD dump device attributes */
 DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd);
-DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n",
-		   dump_block_eckd->eckd.bootprog);
+DEFINE_IPL_ATTR_BOOTPROG_RW(dump_eckd, bootprog, "%lld\n", "%llx\n",
+			    dump_block_eckd->hdr,
+			    dump_block_eckd->eckd.bootprog);
 
 IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd);
 IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd);

From 4cb92fa763823d813d22b45b7f18fcf6e85a72ad Mon Sep 17 00:00:00 2001
From: Benjamin Block <bblock@linux.ibm.com>
Date: Fri, 5 Dec 2025 16:47:17 +0100
Subject: [PATCH 101/258] s390/pci: Fix cyclic dead-lock in zpci_zdev_put() and
 zpci_scan_devices()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When triggering PCI device recovery by writing into the SysFS attribute
`recover` of a Physical Function with existing child SR-IOV Virtual
Functions, lockdep is reporting a possible deadlock between three
threads:

         Thread (A)             Thread (B)             Thread (C)
             |                      |                      |
      recover_store()      zpci_scan_devices()    zpci_scan_devices()
lock(pci_rescan_remove_lock)        |                      |
             |                      |                      |
             |                      |            zpci_bus_scan_busses()
             |                      |             lock(zbus_list_lock)
             |              zpci_add_device()              |
             |          lock(zpci_add_remove_lock)         |
             |                      |                      ┴
             |                      |             zpci_bus_scan_bus()
             |                      |         lock(pci_rescan_remove_lock)
             ┴                      |
      zpci_zdev_put()               |
 lock(zpci_add_remove_lock)         |
                                    ┴
                              zpci_bus_get()
                           lock(zbus_list_lock)

In zpci_bus_scan_busses() the `zbus_list_lock` is taken for the whole
duration of the function, which also includes taking
`pci_rescan_remove_lock`, among other things. But `zbus_list_lock` only
really needs to protect the modification of the global registration
`zbus_list`, it can be dropped while the functions within the list
iteration run; this way we break the cycle above.

Break up zpci_bus_scan_busses() into an "iterator" zpci_bus_get_next()
that iterates over `zbus_list` element by element, and acquires and
releases `zbus_list_lock` as necessary, but never keep holding it.
References to `zpci_bus` objects are also acquired and released.

The reference counting on `zpci_bus` objects is also changed so that all
put() and get() operations are done under the protection of
`zbus_list_lock`, and if the operation results in a modification of
`zpci_bus_list`, this modification is done in the same critical section
(apart the very first initialization). This way objects are never seen
on the list that are about to be released and/or half-initialized.

Fixes: 14c87ba8123a ("s390/pci: separate zbus registration from scanning")
Suggested-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Benjamin Block <bblock@linux.ibm.com>
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 .clang-format           |  1 +
 arch/s390/pci/pci.c     |  6 ++-
 arch/s390/pci/pci_bus.c | 98 +++++++++++++++++++++++++++++------------
 arch/s390/pci/pci_bus.h | 15 ++++++-
 4 files changed, 91 insertions(+), 29 deletions(-)

diff --git a/.clang-format b/.clang-format
index 2ceca764122f87..c7060124a47aa2 100644
--- a/.clang-format
+++ b/.clang-format
@@ -748,6 +748,7 @@ ForEachMacros:
   - 'ynl_attr_for_each_nested'
   - 'ynl_attr_for_each_payload'
   - 'zorro_for_each_dev'
+  - 'zpci_bus_for_each'
 
 IncludeBlocks: Preserve
 IncludeCategories:
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 5a6ace9d875a2b..8fd14d0430085d 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -1148,6 +1148,7 @@ static void zpci_add_devices(struct list_head *scan_list)
 
 int zpci_scan_devices(void)
 {
+	struct zpci_bus *zbus;
 	LIST_HEAD(scan_list);
 	int rc;
 
@@ -1156,7 +1157,10 @@ int zpci_scan_devices(void)
 		return rc;
 
 	zpci_add_devices(&scan_list);
-	zpci_bus_scan_busses();
+	zpci_bus_for_each(zbus) {
+		zpci_bus_scan_bus(zbus);
+		cond_resched();
+	}
 	return 0;
 }
 
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 66c4bd888b2936..42a13e451f649d 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -153,23 +153,6 @@ int zpci_bus_scan_bus(struct zpci_bus *zbus)
 	return ret;
 }
 
-/* zpci_bus_scan_busses - Scan all registered busses
- *
- * Scan all available zbusses
- *
- */
-void zpci_bus_scan_busses(void)
-{
-	struct zpci_bus *zbus = NULL;
-
-	mutex_lock(&zbus_list_lock);
-	list_for_each_entry(zbus, &zbus_list, bus_next) {
-		zpci_bus_scan_bus(zbus);
-		cond_resched();
-	}
-	mutex_unlock(&zbus_list_lock);
-}
-
 static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev)
 {
 	return !s390_pci_no_rid && zdev->rid_available &&
@@ -222,10 +205,29 @@ static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, s
 	return -ENOMEM;
 }
 
-static void zpci_bus_release(struct kref *kref)
+/**
+ * zpci_bus_release - Un-initialize resources associated with the zbus and
+ *		      free memory
+ * @kref:	refcount * that is part of struct zpci_bus
+ *
+ * MUST be called with `zbus_list_lock` held, but the lock is released during
+ * run of the function.
+ */
+static inline void zpci_bus_release(struct kref *kref)
+	__releases(&zbus_list_lock)
 {
 	struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref);
 
+	lockdep_assert_held(&zbus_list_lock);
+
+	list_del(&zbus->bus_next);
+	mutex_unlock(&zbus_list_lock);
+
+	/*
+	 * At this point no-one should see this object, or be able to get a new
+	 * reference to it.
+	 */
+
 	if (zbus->bus) {
 		pci_lock_rescan_remove();
 		pci_stop_root_bus(zbus->bus);
@@ -237,16 +239,19 @@ static void zpci_bus_release(struct kref *kref)
 		pci_unlock_rescan_remove();
 	}
 
-	mutex_lock(&zbus_list_lock);
-	list_del(&zbus->bus_next);
-	mutex_unlock(&zbus_list_lock);
 	zpci_remove_parent_msi_domain(zbus);
 	kfree(zbus);
 }
 
-static void zpci_bus_put(struct zpci_bus *zbus)
+static inline void __zpci_bus_get(struct zpci_bus *zbus)
+{
+	lockdep_assert_held(&zbus_list_lock);
+	kref_get(&zbus->kref);
+}
+
+static inline void zpci_bus_put(struct zpci_bus *zbus)
 {
-	kref_put(&zbus->kref, zpci_bus_release);
+	kref_put_mutex(&zbus->kref, zpci_bus_release, &zbus_list_lock);
 }
 
 static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
@@ -258,7 +263,7 @@ static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
 		if (!zbus->multifunction)
 			continue;
 		if (topo_is_tid == zbus->topo_is_tid && topo == zbus->topo) {
-			kref_get(&zbus->kref);
+			__zpci_bus_get(zbus);
 			goto out_unlock;
 		}
 	}
@@ -268,6 +273,44 @@ static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
 	return zbus;
 }
 
+/**
+ * zpci_bus_get_next - get the next zbus object from given position in the list
+ * @pos:	current position/cursor in the global zbus list
+ *
+ * Acquires and releases references as the cursor iterates (might also free/
+ * release the cursor). Is tolerant of concurrent operations on the list.
+ *
+ * To begin the iteration, set *@pos to %NULL before calling the function.
+ *
+ * *@pos is set to %NULL in cases where either the list is empty, or *@pos is
+ * the last element in the list.
+ *
+ * Context: Process context. May sleep.
+ */
+void zpci_bus_get_next(struct zpci_bus **pos)
+{
+	struct zpci_bus *curp = *pos, *next = NULL;
+
+	mutex_lock(&zbus_list_lock);
+	if (curp)
+		next = list_next_entry(curp, bus_next);
+	else
+		next = list_first_entry(&zbus_list, typeof(*curp), bus_next);
+
+	if (list_entry_is_head(next, &zbus_list, bus_next))
+		next = NULL;
+
+	if (next)
+		__zpci_bus_get(next);
+
+	*pos = next;
+	mutex_unlock(&zbus_list_lock);
+
+	/* zpci_bus_put() might drop refcount to 0 and locks zbus_list_lock */
+	if (curp)
+		zpci_bus_put(curp);
+}
+
 static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
 {
 	struct zpci_bus *zbus;
@@ -279,9 +322,6 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
 	zbus->topo = topo;
 	zbus->topo_is_tid = topo_is_tid;
 	INIT_LIST_HEAD(&zbus->bus_next);
-	mutex_lock(&zbus_list_lock);
-	list_add_tail(&zbus->bus_next, &zbus_list);
-	mutex_unlock(&zbus_list_lock);
 
 	kref_init(&zbus->kref);
 	INIT_LIST_HEAD(&zbus->resources);
@@ -291,6 +331,10 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
 	zbus->bus_resource.flags = IORESOURCE_BUS;
 	pci_add_resource(&zbus->resources, &zbus->bus_resource);
 
+	mutex_lock(&zbus_list_lock);
+	list_add_tail(&zbus->bus_next, &zbus_list);
+	mutex_unlock(&zbus_list_lock);
+
 	return zbus;
 }
 
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index ae3d7a9159bde1..e440742e3145fd 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -15,7 +15,20 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
 void zpci_bus_device_unregister(struct zpci_dev *zdev);
 
 int zpci_bus_scan_bus(struct zpci_bus *zbus);
-void zpci_bus_scan_busses(void);
+void zpci_bus_get_next(struct zpci_bus **pos);
+
+/**
+ * zpci_bus_for_each - iterate over all the registered zbus objects
+ * @pos:	a struct zpci_bus * as cursor
+ *
+ * Acquires and releases references as the cursor iterates over the registered
+ * objects. Is tolerant against concurrent removals of objects.
+ *
+ * Context: Process context. May sleep.
+ */
+#define zpci_bus_for_each(pos)					     \
+	for ((pos) = NULL, zpci_bus_get_next(&(pos)); (pos) != NULL; \
+	     zpci_bus_get_next(&(pos)))
 
 int zpci_bus_scan_device(struct zpci_dev *zdev);
 void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);

From af241e6bfc11125e6669dabf0800fce6809dd3cf Mon Sep 17 00:00:00 2001
From: Benjamin Block <bblock@linux.ibm.com>
Date: Fri, 5 Dec 2025 16:47:18 +0100
Subject: [PATCH 102/258] s390/pci: Annotate lock context imbalance in
 zpci_release_device()

When checking `arch/s390/pci/pci.c` with `sparse` during build, the
following complaint is reported:

  arch/s390/pci/pci.c: note: in included file (through include/linux/smp.h, include/linux/lockdep.h, include/linux/spinlock.h, include/linux/mmzone.h, include/linux/gfp.h, include/linux/slab.h):
  ./include/linux/list.h:237:25: warning: context imbalance in 'zpci_release_device' - unexpected unlock

But this is expected, as zpci_release_device() is expected to be called
with `zpci_list_lock` held, as part of `kref_put_lock()` or similar.

Reflect this by annotating the function with the appropriate
__releases().

Signed-off-by: Benjamin Block <bblock@linux.ibm.com>
Reviewed-by: Farhan Ali <alifm@linux.ibm.com>
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/pci/pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 8fd14d0430085d..57f3980b98a927 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -961,6 +961,7 @@ void zpci_device_reserved(struct zpci_dev *zdev)
 }
 
 void zpci_release_device(struct kref *kref)
+	__releases(&zpci_list_lock)
 {
 	struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
 

From 489e96651dfe59794195c6b2ddb78835edd9f2ed Mon Sep 17 00:00:00 2001
From: Jens Remus <jremus@linux.ibm.com>
Date: Thu, 11 Dec 2025 12:24:50 +0100
Subject: [PATCH 103/258] s390/stacktrace: Do not fallback to RA register

The logic to fallback to the return address (RA) register value in
the topmost frame when stack tracing using back chain is broken in
multiple ways:

When assuming the RA register 14 has not been saved yet one must assume
that a new user stack frame has not been allocated either.  Therefore
the back chain would not contain the stack pointer (SP) at entry, but
the caller's SP at its entry instead.

Therefore when falling back to the RA register 14 value it would also be
necessary to fallback to the SP register 15 value.  Otherwise an invalid
combination of RA register 14 and caller's SP at its entry (from the
back chain) is used.

In the topmost frame the back chain contains either the caller's SP at
its entry (before having allocated a new stack frame in the prologue),
the SP at entry (after having allocated a new stack frame), or an
uninitialized value (during static/dynamic stack allocation).  In both
cases where the back chain is valid either the caller or prologue must
have saved its respective RA to the respective frame.  Therefore, if the
RA obtained from the frame pointed to by the back chain is invalid, this
does not indicate that the IP in the topmost frame is still early in the
prologue and the RA has not been saved.

Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/stacktrace.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 3aae7f70e6ab12..18520d33305818 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -104,7 +104,6 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo
 	struct stack_frame_vdso_wrapper __user *sf_vdso;
 	struct stack_frame_user __user *sf;
 	unsigned long ip, sp;
-	bool first = true;
 
 	if (!current->mm)
 		return;
@@ -133,24 +132,11 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo
 			if (__get_user(ip, &sf->gprs[8]))
 				break;
 		}
-		/* Sanity check: ABI requires SP to be 8 byte aligned. */
-		if (sp & 0x7)
+		/* Validate SP and RA (ABI requires SP to be 8 byte aligned). */
+		if (sp & 0x7 || ip_invalid(ip))
 			break;
-		if (ip_invalid(ip)) {
-			/*
-			 * If the instruction address is invalid, and this
-			 * is the first stack frame, assume r14 has not
-			 * been written to the stack yet. Otherwise exit.
-			 */
-			if (!first)
-				break;
-			ip = regs->gprs[14];
-			if (ip_invalid(ip))
-				break;
-		}
 		if (!store_ip(consume_entry, cookie, entry, perf, ip))
 			break;
-		first = false;
 	}
 	pagefault_enable();
 }

From a8a313612af7a55083ba5720f14f1835319debee Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 13 Dec 2025 08:48:51 +0100
Subject: [PATCH 104/258] spi: mpfs: Fix an error handling path in
 mpfs_spi_probe()

mpfs_spi_init() calls mpfs_spi_enable_ints(), so mpfs_spi_disable_ints()
should be called if an error occurs after calling mpfs_spi_init(), as
already done in the remove function.

Fixes: 9ac8d17694b6 ("spi: add support for microchip fpga spi controllers")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://patch.msgid.link/eb35f168517cc402ef7e78f26da02863e2f45c03.1765612110.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mpfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-mpfs.c b/drivers/spi/spi-mpfs.c
index 9a14d1732a1598..7e9e64d8e6c813 100644
--- a/drivers/spi/spi-mpfs.c
+++ b/drivers/spi/spi-mpfs.c
@@ -577,6 +577,7 @@ static int mpfs_spi_probe(struct platform_device *pdev)
 
 	ret = devm_spi_register_controller(&pdev->dev, host);
 	if (ret) {
+		mpfs_spi_disable_ints(spi);
 		mpfs_spi_disable(spi);
 		return dev_err_probe(&pdev->dev, ret,
 				     "unable to register host for SPI controller\n");

From 1417927df8049a0194933861e9b098669a95c762 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 20 Nov 2025 09:34:49 +0100
Subject: [PATCH 105/258] spi: fsl-cpm: Check length parity before switching to
 16 bit mode

Commit fc96ec826bce ("spi: fsl-cpm: Use 16 bit mode for large transfers
with even size") failed to make sure that the size is really even
before switching to 16 bit mode. Until recently the problem went
unnoticed because kernfs uses a pre-allocated bounce buffer of size
PAGE_SIZE for reading EEPROM.

But commit 8ad6249c51d0 ("eeprom: at25: convert to spi-mem API")
introduced an additional dynamically allocated bounce buffer whose size
is exactly the size of the transfer, leading to a buffer overrun in
the fsl-cpm driver when that size is odd.

Add the missing length parity verification and remain in 8 bit mode
when the length is not even.

Fixes: fc96ec826bce ("spi: fsl-cpm: Use 16 bit mode for large transfers with even size")
Cc: stable@vger.kernel.org
Closes: https://lore.kernel.org/all/638496dd-ec60-4e53-bad7-eb657f67d580@csgroup.eu/
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Sverdlin Alexander <alexander.sverdlin@siemens.com>
Link: https://patch.msgid.link/3c4d81c3923c93f95ec56702a454744a4bad3cfc.1763627618.git.christophe.leroy@csgroup.eu
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-spi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index 2f2082652a1a28..481a7b28aacd3d 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -335,7 +335,7 @@ static int fsl_spi_prepare_message(struct spi_controller *ctlr,
 			if (t->bits_per_word == 16 || t->bits_per_word == 32)
 				t->bits_per_word = 8; /* pretend its 8 bits */
 			if (t->bits_per_word == 8 && t->len >= 256 &&
-			    (mpc8xxx_spi->flags & SPI_CPM1))
+			    !(t->len & 1) && (mpc8xxx_spi->flags & SPI_CPM1))
 				t->bits_per_word = 16;
 		}
 	}

From 46c28bbbb150b80827e4bcbea231560af9d16854 Mon Sep 17 00:00:00 2001
From: Denis Sergeev <denserg.edu@gmail.com>
Date: Tue, 9 Dec 2025 09:37:06 +0300
Subject: [PATCH 106/258] hwmon: (dell-smm) Limit fan multiplier to avoid
 overflow

The fan nominal speed returned by SMM is limited to 16 bits, but the
driver allows the fan multiplier to be set via a module parameter.

Clamp the computed fan multiplier so that fan_nominal_speed *
i8k_fan_mult always fits into a signed 32-bit integer and refuse to
initialize the driver if the value is too large.

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Fixes: 20bdeebc88269 ("hwmon: (dell-smm) Introduce helper function for data init")
Signed-off-by: Denis Sergeev <denserg.edu@gmail.com>
Link: https://lore.kernel.org/r/20251209063706.49008-1-denserg.edu@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/dell-smm-hwmon.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index a34753fc29733a..6040a894067438 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -76,6 +76,9 @@
 #define DELL_SMM_NO_TEMP	10
 #define DELL_SMM_NO_FANS	4
 
+/* limit fan multiplier to avoid overflow */
+#define DELL_SMM_MAX_FAN_MULT (INT_MAX / U16_MAX)
+
 struct smm_regs {
 	unsigned int eax;
 	unsigned int ebx;
@@ -1253,6 +1256,12 @@ static int dell_smm_init_data(struct device *dev, const struct dell_smm_ops *ops
 	data->ops = ops;
 	/* All options must not be 0 */
 	data->i8k_fan_mult = fan_mult ? : I8K_FAN_MULT;
+	if (data->i8k_fan_mult > DELL_SMM_MAX_FAN_MULT) {
+		dev_err(dev,
+			"fan multiplier %u is too large (max %u)\n",
+			data->i8k_fan_mult, DELL_SMM_MAX_FAN_MULT);
+		return -EINVAL;
+	}
 	data->i8k_fan_max = fan_max ? : I8K_FAN_HIGH;
 	data->i8k_pwm_mult = DIV_ROUND_UP(255, data->i8k_fan_max);
 

From 6946c726c3f4c36f0f049e6f97e88c510b15f65d Mon Sep 17 00:00:00 2001
From: Junrui Luo <moonafterrain@outlook.com>
Date: Wed, 10 Dec 2025 17:48:08 +0800
Subject: [PATCH 107/258] hwmon: (ibmpex) fix use-after-free in high/low store

The ibmpex_high_low_store() function retrieves driver data using
dev_get_drvdata() and uses it without validation. This creates a race
condition where the sysfs callback can be invoked after the data
structure is freed, leading to use-after-free.

Fix by adding a NULL check after dev_get_drvdata(), and reordering
operations in the deletion path to prevent TOCTOU.

Reported-by: Yuhao Jiang <danisjiang@gmail.com>
Reported-by: Junrui Luo <moonafterrain@outlook.com>
Fixes: 57c7c3a0fdea ("hwmon: IBM power meter driver")
Signed-off-by: Junrui Luo <moonafterrain@outlook.com>
Link: https://lore.kernel.org/r/MEYPR01MB7886BE2F51BFE41875B74B60AFA0A@MEYPR01MB7886.ausprd01.prod.outlook.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ibmpex.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c
index 228c5f6c6f3836..129f3a9e8fe965 100644
--- a/drivers/hwmon/ibmpex.c
+++ b/drivers/hwmon/ibmpex.c
@@ -277,6 +277,9 @@ static ssize_t ibmpex_high_low_store(struct device *dev,
 {
 	struct ibmpex_bmc_data *data = dev_get_drvdata(dev);
 
+	if (!data)
+		return -ENODEV;
+
 	ibmpex_reset_high_low_data(data);
 
 	return count;
@@ -508,6 +511,9 @@ static void ibmpex_bmc_delete(struct ibmpex_bmc_data *data)
 {
 	int i, j;
 
+	hwmon_device_unregister(data->hwmon_dev);
+	dev_set_drvdata(data->bmc_device, NULL);
+
 	device_remove_file(data->bmc_device,
 			   &sensor_dev_attr_reset_high_low.dev_attr);
 	device_remove_file(data->bmc_device, &dev_attr_name.attr);
@@ -521,8 +527,7 @@ static void ibmpex_bmc_delete(struct ibmpex_bmc_data *data)
 		}
 
 	list_del(&data->list);
-	dev_set_drvdata(data->bmc_device, NULL);
-	hwmon_device_unregister(data->hwmon_dev);
+
 	ipmi_destroy_user(data->user);
 	kfree(data->sensors);
 	kfree(data);

From 82f2aab35a1ab2e1460de06ef04c726460aed51c Mon Sep 17 00:00:00 2001
From: Alexey Simakov <bigalex934@gmail.com>
Date: Thu, 11 Dec 2025 19:43:43 +0300
Subject: [PATCH 108/258] hwmon: (tmp401) fix overflow caused by default
 conversion rate value

The driver computes conversion intervals using the formula:

    interval = (1 << (7 - rate)) * 125ms

where 'rate' is the sensor's conversion rate register value. According to
the datasheet, the power-on reset value of this register is 0x8, which
could be assigned to the register, after handling i2c general call.
Using this default value causes a result greater than the bit width of
left operand and an undefined behaviour in the calculation above, since
shifting by values larger than the bit width is undefined behaviour as
per C language standard.

Limit the maximum usable 'rate' value to 7 to prevent undefined
behaviour in calculations.

Found by Linux Verification Center (linuxtesting.org) with Svace.

Note (groeck):
    This does not matter in practice unless someone overwrites the chip
    configuration from outside the driver while the driver is loaded.
    The conversion time register is initialized with a value of 5 (500ms)
    when the driver is loaded, and the driver never writes a bad value.

Fixes: ca53e7640de7 ("hwmon: (tmp401) Convert to _info API")
Signed-off-by: Alexey Simakov <bigalex934@gmail.com>
Link: https://lore.kernel.org/r/20251211164342.6291-1-bigalex934@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/tmp401.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c
index fbaa34973694f2..07f596581c6eb7 100644
--- a/drivers/hwmon/tmp401.c
+++ b/drivers/hwmon/tmp401.c
@@ -397,7 +397,7 @@ static int tmp401_chip_read(struct device *dev, u32 attr, int channel, long *val
 		ret = regmap_read(data->regmap, TMP401_CONVERSION_RATE, &regval);
 		if (ret < 0)
 			return ret;
-		*val = (1 << (7 - regval)) * 125;
+		*val = (1 << (7 - min(regval, 7))) * 125;
 		break;
 	case hwmon_chip_temp_reset_history:
 		*val = 0;

From c4b502d60a71cf0c0c938f133dc4c0e2adc17b44 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 9 Dec 2025 06:48:49 +0100
Subject: [PATCH 109/258] arm64/simd: Avoid pointless clearing of FP/SIMD
 buffer

The buffer provided to kernel_neon_begin() is only used if the task is
scheduled out while the FP/SIMD is in use by the kernel, or when such a
section is interrupted by a softirq that also uses the FP/SIMD.

IOW, this happens rarely, and even if it happened often, there is still
no reason for this buffer to be cleared beforehand, which happens
unconditionally, due to the use of a compound literal expression.

So define that buffer variable explicitly, and mark it as
__uninitialized so that it will not get cleared, even when
-ftrivial-auto-var-init is in effect.

This requires some preprocessor gymnastics, due to the fact that the
variable must be defined throughout the entire guarded scope, and the
expression

  ({ struct user_fpsimd_state __uninitialized st; &st; })

is problematic in that regard, even though the compilers seem to
permit it. So instead, repeat the 'for ()' trick that is also used in
the implementation of the guarded scope helpers.

Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Fixes: 4fa617cc6851 ("arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack")
Link: https://lore.kernel.org/r/20251209054848.998878-2-ardb@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 arch/arm64/include/asm/simd.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 0941f6f58a1463..69ecbd69ca8cc7 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -48,6 +48,13 @@ DEFINE_LOCK_GUARD_1(ksimd,
 		    kernel_neon_begin(_T->lock),
 		    kernel_neon_end(_T->lock))
 
-#define scoped_ksimd()	scoped_guard(ksimd, &(struct user_fpsimd_state){})
+#define __scoped_ksimd(_label)					\
+	for (struct user_fpsimd_state __uninitialized __st;	\
+	     true; ({ goto _label; }))				\
+		if (0) {					\
+_label:			break;					\
+		} else scoped_guard(ksimd, &__st)
+
+#define scoped_ksimd()	__scoped_ksimd(__UNIQUE_ID(label))
 
 #endif

From 5a0b1882506858b12cc77f0e2439a5f3c5052761 Mon Sep 17 00:00:00 2001
From: Charles Mirabile <cmirabil@redhat.com>
Date: Fri, 12 Dec 2025 13:47:17 -0500
Subject: [PATCH 110/258] lib/crypto: riscv: Add poly1305-core.S to .gitignore

poly1305-core.S is an auto-generated file, so it should be ignored.

Fixes: bef9c7559869 ("lib/crypto: riscv/poly1305: Import OpenSSL/CRYPTOGAMS implementation")
Cc: stable@vger.kernel.org
Signed-off-by: Charles Mirabile <cmirabil@redhat.com>
Link: https://lore.kernel.org/r/20251212184717.133701-1-cmirabil@redhat.com
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 lib/crypto/riscv/.gitignore | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 lib/crypto/riscv/.gitignore

diff --git a/lib/crypto/riscv/.gitignore b/lib/crypto/riscv/.gitignore
new file mode 100644
index 00000000000000..0d47d4f21c6de9
--- /dev/null
+++ b/lib/crypto/riscv/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+poly1305-core.S

From 635bc4def026a24e071436f4f356ea08c0eed6ff Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sun, 7 Dec 2025 11:44:55 +0100
Subject: [PATCH 111/258] fsnotify: do not generate ACCESS/MODIFY events on
 child for special files

inotify/fanotify do not allow users with no read access to a file to
subscribe to events (e.g. IN_ACCESS/IN_MODIFY), but they do allow the
same user to subscribe for watching events on children when the user
has access to the parent directory (e.g. /dev).

Users with no read access to a file but with read access to its parent
directory can still stat the file and see if it was accessed/modified
via atime/mtime change.

The same is not true for special files (e.g. /dev/null). Users will not
generally observe atime/mtime changes when other users read/write to
special files, only when someone sets atime/mtime via utimensat().

Align fsnotify events with this stat behavior and do not generate
ACCESS/MODIFY events to parent watchers on read/write of special files.
The events are still generated to parent watchers on utimensat(). This
closes some side-channels that could be possibly used for information
exfiltration [1].

[1] https://snee.la/pdf/pubs/file-notification-attacks.pdf

Reported-by: Sudheendra Raghav Neela <sneela@tugraz.at>
CC: stable@vger.kernel.org
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fsnotify.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 46bfc543f9467c..63dd44931989d4 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -270,8 +270,15 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
 	/*
 	 * Include parent/name in notification either if some notification
 	 * groups require parent info or the parent is interested in this event.
+	 * The parent interest in ACCESS/MODIFY events does not apply to special
+	 * files, where read/write are not on the filesystem of the parent and
+	 * events can provide an undesirable side-channel for information
+	 * exfiltration.
 	 */
-	parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS;
+	parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS &&
+			    !(data_type == FSNOTIFY_EVENT_PATH &&
+			      d_is_special(dentry) &&
+			      (mask & (FS_ACCESS | FS_MODIFY)));
 	if (parent_needed || parent_interested) {
 		/* When notifying parent, child should be passed as data */
 		WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type));

From 6f7c877cc397ba3c6d8ba44d4a604df3d4182eec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ahelenia=20Ziemia=C5=84ska?=
 <nabijaczleweli@nabijaczleweli.xyz>
Date: Mon, 8 Dec 2025 23:20:24 +0100
Subject: [PATCH 112/258] fs: send fsnotify_xattr()/IN_ATTRIB from
 vfs_fileattr_set()/chattr(1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently it seems impossible to observe these changes to the file's
attributes. It's useful to be able to do this to see when the file
becomes immutable, for example, so emit IN_ATTRIB via fsnotify_xattr(),
like when changing other inode attributes.

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Link: https://patch.msgid.link/iyvn6qjotpu6cei5jdtsoibfcp6l6rgvn47cwgaucgtucpfy2s@tarta.nabijaczleweli.xyz
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/file_attr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/file_attr.c b/fs/file_attr.c
index 1dcec88c068050..fac41048f7bc89 100644
--- a/fs/file_attr.c
+++ b/fs/file_attr.c
@@ -2,6 +2,7 @@
 #include <linux/fs.h>
 #include <linux/security.h>
 #include <linux/fscrypt.h>
+#include <linux/fsnotify.h>
 #include <linux/fileattr.h>
 #include <linux/export.h>
 #include <linux/syscalls.h>
@@ -298,6 +299,7 @@ int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry,
 		err = inode->i_op->fileattr_set(idmap, dentry, fa);
 		if (err)
 			goto out;
+		fsnotify_xattr(dentry);
 	}
 
 out:

From ed724ea1b82a800af4704311cb89e5ef1b4ea7ac Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@codeconstruct.com.au>
Date: Thu, 11 Dec 2025 17:45:48 +0900
Subject: [PATCH 113/258] dt-bindings: mmc: sdhci-of-aspeed: Switch ref to
 sdhci-common.yaml

Enable use of common SDHCI-related properties such as sdhci-caps-mask as
found in the AST2600 EVB DTS.

Cc: stable@vger.kernel.org # v6.2+
Signed-off-by: Andrew Jeffery <andrew@codeconstruct.com.au>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/aspeed,sdhci.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mmc/aspeed,sdhci.yaml b/Documentation/devicetree/bindings/mmc/aspeed,sdhci.yaml
index 9fce8cd7b0b62b..d24950ccea9522 100644
--- a/Documentation/devicetree/bindings/mmc/aspeed,sdhci.yaml
+++ b/Documentation/devicetree/bindings/mmc/aspeed,sdhci.yaml
@@ -41,7 +41,7 @@ properties:
 patternProperties:
   "^sdhci@[0-9a-f]+$":
     type: object
-    $ref: mmc-controller.yaml
+    $ref: sdhci-common.yaml
     unevaluatedProperties: false
 
     properties:

From 7bda1910c4bccd4b8d4726620bb3d6bbfb62286e Mon Sep 17 00:00:00 2001
From: Sumeet Pawnikar <sumeet4linux@gmail.com>
Date: Sat, 6 Dec 2025 00:32:16 +0530
Subject: [PATCH 114/258] powercap: fix race condition in
 register_control_type()

The device becomes visible to userspace via device_register()
even before it fully initialized by idr_init(). If userspace
or another thread tries to register a zone immediately after
device_register(), the control_type_valid() will fail because
the control_type is not yet in the list. The IDR is not yet
initialized, so this race condition causes zone registration
failure.

Move idr_init() and list addition before device_register()
fix the race condition.

Signed-off-by: Sumeet Pawnikar <sumeet4linux@gmail.com>
[ rjw: Subject adjustment, empty line added ]
Link: https://patch.msgid.link/20251205190216.5032-1-sumeet4linux@gmail.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/powercap_sys.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c
index 4112a009733826..d14b36b75189d7 100644
--- a/drivers/powercap/powercap_sys.c
+++ b/drivers/powercap/powercap_sys.c
@@ -625,17 +625,23 @@ struct powercap_control_type *powercap_register_control_type(
 	INIT_LIST_HEAD(&control_type->node);
 	control_type->dev.class = &powercap_class;
 	dev_set_name(&control_type->dev, "%s", name);
-	result = device_register(&control_type->dev);
-	if (result) {
-		put_device(&control_type->dev);
-		return ERR_PTR(result);
-	}
 	idr_init(&control_type->idr);
 
 	mutex_lock(&powercap_cntrl_list_lock);
 	list_add_tail(&control_type->node, &powercap_cntrl_list);
 	mutex_unlock(&powercap_cntrl_list_lock);
 
+	result = device_register(&control_type->dev);
+	if (result) {
+		mutex_lock(&powercap_cntrl_list_lock);
+		list_del(&control_type->node);
+		mutex_unlock(&powercap_cntrl_list_lock);
+
+		idr_destroy(&control_type->idr);
+		put_device(&control_type->dev);
+		return ERR_PTR(result);
+	}
+
 	return control_type;
 }
 EXPORT_SYMBOL_GPL(powercap_register_control_type);

From efc4c35b741af973de90f6826bf35d3b3ac36bf1 Mon Sep 17 00:00:00 2001
From: Sumeet Pawnikar <sumeet4linux@gmail.com>
Date: Sun, 7 Dec 2025 20:45:48 +0530
Subject: [PATCH 115/258] powercap: fix sscanf() error return value handling

Fix inconsistent error handling for sscanf() return value check.

Implicit boolean conversion is used instead of explicit return
value checks. The code checks if (!sscanf(...)) which is incorrect
because:
 1. sscanf returns the number of successfully parsed items
 2. On success, it returns 1 (one item passed)
 3. On failure, it returns 0 or EOF
 4. The check 'if (!sscanf(...))' is wrong because it treats
    success (1) as failure

All occurrences of sscanf() now uses explicit return value check.
With this behavior it returns '-EINVAL' when parsing fails (returns
0 or EOF), and continues when parsing succeeds (returns 1).

Signed-off-by: Sumeet Pawnikar <sumeet4linux@gmail.com>
[ rjw: Subject and changelog edits ]
Link: https://patch.msgid.link/20251207151549.202452-1-sumeet4linux@gmail.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/powercap_sys.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c
index d14b36b75189d7..1ff369880beb28 100644
--- a/drivers/powercap/powercap_sys.c
+++ b/drivers/powercap/powercap_sys.c
@@ -68,7 +68,7 @@ static ssize_t show_constraint_##_attr(struct device *dev, \
 	int id; \
 	struct powercap_zone_constraint *pconst;\
 	\
-	if (!sscanf(dev_attr->attr.name, "constraint_%d_", &id)) \
+	if (sscanf(dev_attr->attr.name, "constraint_%d_", &id) != 1) \
 		return -EINVAL; \
 	if (id >= power_zone->const_id_cnt)	\
 		return -EINVAL; \
@@ -93,7 +93,7 @@ static ssize_t store_constraint_##_attr(struct device *dev,\
 	int id; \
 	struct powercap_zone_constraint *pconst;\
 	\
-	if (!sscanf(dev_attr->attr.name, "constraint_%d_", &id)) \
+	if (sscanf(dev_attr->attr.name, "constraint_%d_", &id) != 1) \
 		return -EINVAL; \
 	if (id >= power_zone->const_id_cnt)	\
 		return -EINVAL; \
@@ -162,7 +162,7 @@ static ssize_t show_constraint_name(struct device *dev,
 	ssize_t len = -ENODATA;
 	struct powercap_zone_constraint *pconst;
 
-	if (!sscanf(dev_attr->attr.name, "constraint_%d_", &id))
+	if (sscanf(dev_attr->attr.name, "constraint_%d_", &id) != 1)
 		return -EINVAL;
 	if (id >= power_zone->const_id_cnt)
 		return -EINVAL;

From 450f9cde66a92fd6c7f6870b3501e8debe9f69cc Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 5 Dec 2025 15:00:07 -0800
Subject: [PATCH 116/258] thermal: intel: int340x: Enable power slider
 interface for Wildcat Lake

Set the PROC_THERMAL_FEATURE_SOC_POWER_SLIDER feature flag in
proc_thermal_pci_ids[] for Wildcat Lake to enable power slider interface.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://patch.msgid.link/20251205230007.2218533-1-srinivas.pandruvada@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../intel/int340x_thermal/processor_thermal_device_pci.c       | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
index 0d4dcc66e097e5..c693d934103afa 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
@@ -503,7 +503,8 @@ static const struct pci_device_id proc_thermal_pci_ids[] = {
 	{ PCI_DEVICE_DATA(INTEL, WCL_THERMAL, PROC_THERMAL_FEATURE_MSI_SUPPORT |
 	  PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_DLVR |
 	  PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_WT_HINT |
-	  PROC_THERMAL_FEATURE_POWER_FLOOR | PROC_THERMAL_FEATURE_PTC) },
+	  PROC_THERMAL_FEATURE_POWER_FLOOR | PROC_THERMAL_FEATURE_PTC |
+	  PROC_THERMAL_FEATURE_SOC_POWER_SLIDER) },
 	{ PCI_DEVICE_DATA(INTEL, NVL_H_THERMAL, PROC_THERMAL_FEATURE_RAPL |
 	  PROC_THERMAL_FEATURE_DLVR | PROC_THERMAL_FEATURE_DVFS |
 	  PROC_THERMAL_FEATURE_MSI_SUPPORT | PROC_THERMAL_FEATURE_WT_HINT |

From d113735421da322ea144c9778c433de6ff6bc57b Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Sat, 6 Dec 2025 18:42:45 +0100
Subject: [PATCH 117/258] thermal: core: Fix typo and indentation in comments

s/tmperature/temperature/ and adjust the indentation of the @ops
parameter description to improve readability.

Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Link: https://patch.msgid.link/20251206174245.116391-2-thorsten.blum@linux.dev
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/thermal_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 17ca5c08264359..89758c9934ec67 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -500,7 +500,7 @@ void thermal_zone_set_trip_hyst(struct thermal_zone_device *tz,
 	WRITE_ONCE(trip->hysteresis, hyst);
 	thermal_notify_tz_trip_change(tz, trip);
 	/*
-	 * If the zone temperature is above or at the trip tmperature, the trip
+	 * If the zone temperature is above or at the trip temperature, the trip
 	 * is in the trips_reached list and its threshold is equal to its low
 	 * temperature.  It needs to stay in that list, but its threshold needs
 	 * to be updated and the list ordering may need to be restored.
@@ -1043,7 +1043,7 @@ static void thermal_cooling_device_init_complete(struct thermal_cooling_device *
  * @np:		a pointer to a device tree node.
  * @type:	the thermal cooling device type.
  * @devdata:	device private data.
- * @ops:		standard thermal cooling devices callbacks.
+ * @ops:	standard thermal cooling devices callbacks.
  *
  * This interface function adds a new thermal cooling device (fan/processor/...)
  * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself

From 6ea3a44cef28add2d93b1ef119d84886cb1e3c9b Mon Sep 17 00:00:00 2001
From: Pengjie Zhang <zhangpengjie2@huawei.com>
Date: Wed, 10 Dec 2025 21:22:27 +0800
Subject: [PATCH 118/258] ACPI: CPPC: Fix missing PCC check for guaranteed_perf

The current implementation overlooks the 'guaranteed_perf'
register in this check.

If the Guaranteed Performance register is located in the PCC
subspace, the function currently attempts to read it without
acquiring the lock and without sending the CMD_READ doorbell
to the firmware. This can result in reading stale data.

Fixes: 29523f095397 ("ACPI / CPPC: Add support for guaranteed performance")
Signed-off-by: Pengjie Zhang <zhangpengjie2@huawei.com>
Cc: 4.20+ <stable@vger.kernel.org> # 4.20+
[ rjw: Subject and changelog edits ]
Link: https://patch.msgid.link/20251210132227.1988380-1-zhangpengjie2@huawei.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/cppc_acpi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 3bdeeee3414e68..e66e20d1f31b76 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1366,7 +1366,8 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps)
 	/* Are any of the regs PCC ?*/
 	if (CPC_IN_PCC(highest_reg) || CPC_IN_PCC(lowest_reg) ||
 		CPC_IN_PCC(lowest_non_linear_reg) || CPC_IN_PCC(nominal_reg) ||
-		CPC_IN_PCC(low_freq_reg) || CPC_IN_PCC(nom_freq_reg)) {
+		CPC_IN_PCC(low_freq_reg) || CPC_IN_PCC(nom_freq_reg) ||
+		CPC_IN_PCC(guaranteed_reg)) {
 		if (pcc_ss_id < 0) {
 			pr_debug("Invalid pcc_ss_id\n");
 			return -ENODEV;

From f103fa127c93016bcd89b05d8e11dc1a84f6990d Mon Sep 17 00:00:00 2001
From: Pengjie Zhang <zhangpengjie2@huawei.com>
Date: Wed, 10 Dec 2025 21:26:34 +0800
Subject: [PATCH 119/258] ACPI: PCC: Fix race condition by removing static
 qualifier

Local variable 'ret' in acpi_pcc_address_space_setup() is currently
declared as 'static'. This can lead to race conditions in a
multithreaded environment.

Remove the 'static' qualifier to ensure that 'ret' will be allocated
directly on the stack as a local variable.

Fixes: a10b1c99e2dc ("ACPI: PCC: Setup PCC Opregion handler only if platform interrupt is available")
Signed-off-by: Pengjie Zhang <zhangpengjie2@huawei.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: lihuisong@huawei.com
Cc: 6.2+ <stable@vger.kernel.org> # 6.2+
[ rjw: Changelog edits ]
Link: https://patch.msgid.link/20251210132634.2050033-1-zhangpengjie2@huawei.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_pcc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpi_pcc.c b/drivers/acpi/acpi_pcc.c
index 97064e943768ad..e3f302b9dee5f5 100644
--- a/drivers/acpi/acpi_pcc.c
+++ b/drivers/acpi/acpi_pcc.c
@@ -52,7 +52,7 @@ acpi_pcc_address_space_setup(acpi_handle region_handle, u32 function,
 	struct pcc_data *data;
 	struct acpi_pcc_info *ctx = handler_context;
 	struct pcc_mbox_chan *pcc_chan;
-	static acpi_status ret;
+	acpi_status ret;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)

From b7737c38e7cb611c2fbd87af3b09afeb92c96fe7 Mon Sep 17 00:00:00 2001
From: Kevin Brodsky <kevin.brodsky@arm.com>
Date: Wed, 19 Nov 2025 13:00:16 +0000
Subject: [PATCH 120/258] arm64: mm: Simplify check in arch_kfence_init_pool()

TL;DR: checking force_pte_mapping() in arch_kfence_init_pool() is
sufficient

Commit ce2b3a50ad92 ("arm64: mm: Don't sleep in
split_kernel_leaf_mapping() when in atomic context") recently added
an arm64 implementation of arch_kfence_init_pool() to ensure that
the KFENCE pool is PTE-mapped. Assuming that the pool was not
initialised early, block splitting is necessary if the linear
mapping is not fully PTE-mapped, in other words if
force_pte_mapping() is false.

arch_kfence_init_pool() currently makes another check: whether
BBML2-noabort is supported, i.e. whether we are *able* to split
block mappings. This check is however unnecessary, because
force_pte_mapping() is always true if KFENCE is enabled and
BBML2-noabort is not supported. This must be the case by design,
since KFENCE requires PTE-mapped pages in all cases. We can
therefore remove that check.

The situation is different in split_kernel_leaf_mapping(), as that
function is called unconditionally regardless of the configuration.
If BBML2-noabort is not supported, it cannot do anything and bails
out. If force_pte_mapping() is true, there is nothing to do and it
also bails out, but these are independent checks.

Commit 53357f14f924 ("arm64: mm: Tidy up force_pte_mapping()")
grouped these checks into a helper, split_leaf_mapping_possible().
This isn't so helpful as only split_kernel_leaf_mapping() should
check both. Revert the parts of that commit that introduced the
helper, reintroducing the more accurate comments in
split_kernel_leaf_mapping().

Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9ae7ce00a7ef28..8e1d80a7033e34 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -767,18 +767,6 @@ static inline bool force_pte_mapping(void)
 	return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world();
 }
 
-static inline bool split_leaf_mapping_possible(void)
-{
-	/*
-	 * !BBML2_NOABORT systems should never run into scenarios where we would
-	 * have to split. So exit early and let calling code detect it and raise
-	 * a warning.
-	 */
-	if (!system_supports_bbml2_noabort())
-		return false;
-	return !force_pte_mapping();
-}
-
 static DEFINE_MUTEX(pgtable_split_lock);
 
 int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
@@ -786,11 +774,22 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
 	int ret;
 
 	/*
-	 * Exit early if the region is within a pte-mapped area or if we can't
-	 * split. For the latter case, the permission change code will raise a
-	 * warning if not already pte-mapped.
+	 * !BBML2_NOABORT systems should not be trying to change permissions on
+	 * anything that is not pte-mapped in the first place. Just return early
+	 * and let the permission change code raise a warning if not already
+	 * pte-mapped.
 	 */
-	if (!split_leaf_mapping_possible() || is_kfence_address((void *)start))
+	if (!system_supports_bbml2_noabort())
+		return 0;
+
+	/*
+	 * If the region is within a pte-mapped area, there is no need to try to
+	 * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may
+	 * change permissions from atomic context so for those cases (which are
+	 * always pte-mapped), we must not go any further because taking the
+	 * mutex below may sleep.
+	 */
+	if (force_pte_mapping() || is_kfence_address((void *)start))
 		return 0;
 
 	/*
@@ -1089,7 +1088,7 @@ bool arch_kfence_init_pool(void)
 	int ret;
 
 	/* Exit early if we know the linear map is already pte-mapped. */
-	if (!split_leaf_mapping_possible())
+	if (force_pte_mapping())
 		return true;
 
 	/* Kfence pool is already pte-mapped for the early init case. */

From eb972eab0794dedeef5b3b1845e5f9a78793f184 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sat, 6 Dec 2025 20:01:16 +0100
Subject: [PATCH 121/258] lkdtm/bugs: Add cases for BUG and PANIC occurring in
 hardirq context

Add lkdtm cases to trigger a BUG() or panic() from hardirq context. This
is useful for testing pstore behavior being invoked from such contexts.

Reviewed-by: Kees Cook <kees@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/misc/lkdtm/bugs.c               | 53 +++++++++++++++++++++++++
 tools/testing/selftests/lkdtm/tests.txt |  2 +
 2 files changed, 55 insertions(+)

diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index 376047beea3d64..fa05d77acb558d 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -8,6 +8,7 @@
 #include "lkdtm.h"
 #include <linux/cpu.h>
 #include <linux/list.h>
+#include <linux/hrtimer.h>
 #include <linux/sched.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/task_stack.h>
@@ -100,11 +101,61 @@ static void lkdtm_PANIC_STOP_IRQOFF(void)
 	stop_machine(panic_stop_irqoff_fn, &v, cpu_online_mask);
 }
 
+static bool wait_for_panic;
+
+static enum hrtimer_restart panic_in_hardirq(struct hrtimer *timer)
+{
+	panic("from hard IRQ context");
+
+	wait_for_panic = false;
+	return HRTIMER_NORESTART;
+}
+
+static void lkdtm_PANIC_IN_HARDIRQ(void)
+{
+	struct hrtimer timer;
+
+	wait_for_panic = true;
+	hrtimer_setup_on_stack(&timer, panic_in_hardirq,
+			       CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
+	hrtimer_start(&timer, us_to_ktime(100), HRTIMER_MODE_REL_HARD);
+
+	while (wait_for_panic)
+		;
+
+	hrtimer_cancel(&timer);
+}
+
 static void lkdtm_BUG(void)
 {
 	BUG();
 }
 
+static bool wait_for_bug;
+
+static enum hrtimer_restart bug_in_hardirq(struct hrtimer *timer)
+{
+	BUG();
+
+	wait_for_bug = false;
+	return HRTIMER_NORESTART;
+}
+
+static void lkdtm_BUG_IN_HARDIRQ(void)
+{
+	struct hrtimer timer;
+
+	wait_for_bug = true;
+	hrtimer_setup_on_stack(&timer, bug_in_hardirq,
+			       CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
+	hrtimer_start(&timer, us_to_ktime(100), HRTIMER_MODE_REL_HARD);
+
+	while (wait_for_bug)
+		;
+
+	hrtimer_cancel(&timer);
+}
+
 static int warn_counter;
 
 static void lkdtm_WARNING(void)
@@ -696,7 +747,9 @@ static noinline void lkdtm_CORRUPT_PAC(void)
 static struct crashtype crashtypes[] = {
 	CRASHTYPE(PANIC),
 	CRASHTYPE(PANIC_STOP_IRQOFF),
+	CRASHTYPE(PANIC_IN_HARDIRQ),
 	CRASHTYPE(BUG),
+	CRASHTYPE(BUG_IN_HARDIRQ),
 	CRASHTYPE(WARNING),
 	CRASHTYPE(WARNING_MESSAGE),
 	CRASHTYPE(EXCEPTION),
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index cff124c1eddd3e..67cd53715d9323 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -1,6 +1,8 @@
 #PANIC
 #PANIC_STOP_IRQOFF Crashes entire system
+#PANIC_IN_HARDIRQ Crashes entire system
 BUG kernel BUG at
+#BUG_IN_HARDIRQ Crashes entire system
 WARNING WARNING:
 WARNING_MESSAGE message trigger
 EXCEPTION

From 63de2b3859ba1def9f43ed0a9c25a68810208e5c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sat, 6 Dec 2025 20:01:17 +0100
Subject: [PATCH 122/258] arm64/efi: Remove unneeded SVE/SME fallback
 preserve/store handling

Since commit 7137a203b251 ("arm64/fpsimd: Permit kernel mode NEON with
IRQs off"), the only condition under which the fallback path is taken
for FP/SIMD preserve/restore across a EFI runtime call is when it is
called from hardirq or NMI context.

In practice, this only happens when the EFI pstore driver is called to
dump the kernel log buffer into a EFI variable under a panic, oops or
emergency_restart() condition, and none of these can be expected to
result in a return to user space for the task in question.

This means that the existing EFI-specific logic for preserving and
restoring SVE/SME state is pointless, and can be removed.

Instead, kill the task, so that an exceedingly unlikely inadvertent
return to user space does not proceed with a corrupted FP/SIMD state.
Also, retain the preserve and restore of the base FP/SIMD state, as that
might belong to kernel mode use of FP/SIMD. (Note that EFI runtime calls
are never invoked reentrantly, even in this case, and so any interrupted
kernel mode FP/SIMD usage will be unrelated to EFI)

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 130 ++++++-------------------------------
 1 file changed, 20 insertions(+), 110 deletions(-)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index c154f72634e028..9de1d8a604cbf2 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -180,13 +180,6 @@ static inline void set_sve_default_vl(int val)
 	set_default_vl(ARM64_VEC_SVE, val);
 }
 
-static u8 *efi_sve_state;
-
-#else /* ! CONFIG_ARM64_SVE */
-
-/* Dummy declaration for code that will be optimised out: */
-extern u8 *efi_sve_state;
-
 #endif /* ! CONFIG_ARM64_SVE */
 
 #ifdef CONFIG_ARM64_SME
@@ -1095,36 +1088,6 @@ int vec_verify_vq_map(enum vec_type type)
 	return 0;
 }
 
-static void __init sve_efi_setup(void)
-{
-	int max_vl = 0;
-	int i;
-
-	if (!IS_ENABLED(CONFIG_EFI))
-		return;
-
-	for (i = 0; i < ARRAY_SIZE(vl_info); i++)
-		max_vl = max(vl_info[i].max_vl, max_vl);
-
-	/*
-	 * alloc_percpu() warns and prints a backtrace if this goes wrong.
-	 * This is evidence of a crippled system and we are returning void,
-	 * so no attempt is made to handle this situation here.
-	 */
-	if (!sve_vl_valid(max_vl))
-		goto fail;
-
-	efi_sve_state = kmalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)),
-				GFP_KERNEL);
-	if (!efi_sve_state)
-		goto fail;
-
-	return;
-
-fail:
-	panic("Cannot allocate memory for EFI SVE save/restore");
-}
-
 void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
 {
 	write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
@@ -1185,8 +1148,6 @@ void __init sve_setup(void)
 	if (sve_max_virtualisable_vl() < sve_max_vl())
 		pr_warn("%s: unvirtualisable vector lengths present\n",
 			info->name);
-
-	sve_efi_setup();
 }
 
 /*
@@ -1947,9 +1908,6 @@ EXPORT_SYMBOL_GPL(kernel_neon_end);
 #ifdef CONFIG_EFI
 
 static struct user_fpsimd_state efi_fpsimd_state;
-static bool efi_fpsimd_state_used;
-static bool efi_sve_state_used;
-static bool efi_sm_state;
 
 /*
  * EFI runtime services support functions
@@ -1976,43 +1934,26 @@ void __efi_fpsimd_begin(void)
 	if (may_use_simd()) {
 		kernel_neon_begin(&efi_fpsimd_state);
 	} else {
-		WARN_ON(preemptible());
-
 		/*
-		 * If !efi_sve_state, SVE can't be in use yet and doesn't need
-		 * preserving:
+		 * We are running in hardirq or NMI context, and the only
+		 * legitimate case where this might happen is when EFI pstore
+		 * is attempting to record the system's dying gasps into EFI
+		 * variables. This could be due to an oops, a panic or a call
+		 * to emergency_restart(), and in none of those cases, we can
+		 * expect the current task to ever return to user space again,
+		 * or for the kernel to resume any normal execution, for that
+		 * matter (an oops in hardirq context triggers a panic too).
+		 *
+		 * Therefore, there is no point in attempting to preserve any
+		 * SVE/SME state here. On the off chance that we might have
+		 * ended up here for a different reason inadvertently, kill the
+		 * task and preserve/restore the base FP/SIMD state, which
+		 * might belong to kernel mode FP/SIMD.
 		 */
-		if (system_supports_sve() && efi_sve_state != NULL) {
-			bool ffr = true;
-			u64 svcr;
-
-			efi_sve_state_used = true;
-
-			if (system_supports_sme()) {
-				svcr = read_sysreg_s(SYS_SVCR);
-
-				efi_sm_state = svcr & SVCR_SM_MASK;
-
-				/*
-				 * Unless we have FA64 FFR does not
-				 * exist in streaming mode.
-				 */
-				if (!system_supports_fa64())
-					ffr = !(svcr & SVCR_SM_MASK);
-			}
-
-			sve_save_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),
-				       &efi_fpsimd_state.fpsr, ffr);
-
-			if (system_supports_sme())
-				sysreg_clear_set_s(SYS_SVCR,
-						   SVCR_SM_MASK, 0);
-
-		} else {
-			fpsimd_save_state(&efi_fpsimd_state);
-		}
-
-		efi_fpsimd_state_used = true;
+		pr_warn_ratelimited("Calling EFI runtime from %s context\n",
+				    in_nmi() ? "NMI" : "hardirq");
+		force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
+		fpsimd_save_state(&efi_fpsimd_state);
 	}
 }
 
@@ -2024,41 +1965,10 @@ void __efi_fpsimd_end(void)
 	if (!system_supports_fpsimd())
 		return;
 
-	if (!efi_fpsimd_state_used) {
+	if (may_use_simd()) {
 		kernel_neon_end(&efi_fpsimd_state);
 	} else {
-		if (system_supports_sve() && efi_sve_state_used) {
-			bool ffr = true;
-
-			/*
-			 * Restore streaming mode; EFI calls are
-			 * normal function calls so should not return in
-			 * streaming mode.
-			 */
-			if (system_supports_sme()) {
-				if (efi_sm_state) {
-					sysreg_clear_set_s(SYS_SVCR,
-							   0,
-							   SVCR_SM_MASK);
-
-					/*
-					 * Unless we have FA64 FFR does not
-					 * exist in streaming mode.
-					 */
-					if (!system_supports_fa64())
-						ffr = false;
-				}
-			}
-
-			sve_load_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),
-				       &efi_fpsimd_state.fpsr, ffr);
-
-			efi_sve_state_used = false;
-		} else {
-			fpsimd_load_state(&efi_fpsimd_state);
-		}
-
-		efi_fpsimd_state_used = false;
+		fpsimd_load_state(&efi_fpsimd_state);
 	}
 }
 

From 98a97bf41528ef738b06eb07ec2b2eb1cfde6ce6 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Sat, 29 Nov 2025 00:48:45 +0000
Subject: [PATCH 123/258] arm64/gcs: Flush the GCS locking state on exec

When we exec a new task we forget to flush the set of locked GCS mode bits.
Since we do flush the rest of the state this means that if GCS is locked
the new task will be unable to enable GCS, it will be locked as being
disabled. Add the expected flush.

Fixes: fc84bc5378a8 ("arm64/gcs: Context switch GCS state for EL0")
Cc: <stable@vger.kernel.org> # 6.13.x
Reported-by: Yury Khrustalev <Yury.Khrustalev@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Tested-by: Yury Khrustalev <yury.khrustalev@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/process.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index fba7ca102a8c42..489554931231e6 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -292,6 +292,7 @@ static void flush_gcs(void)
 	current->thread.gcs_base = 0;
 	current->thread.gcs_size = 0;
 	current->thread.gcs_el0_mode = 0;
+	current->thread.gcs_el0_locked = 0;
 	write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
 	write_sysreg_s(0, SYS_GCSPR_EL0);
 }

From b32045d73bb4333a2cebc5d3c005807adb03ab58 Mon Sep 17 00:00:00 2001
From: Shuicheng Lin <shuicheng.lin@intel.com>
Date: Fri, 14 Nov 2025 20:56:39 +0000
Subject: [PATCH 124/258] drm/xe: Fix freq kobject leak on sysfs_create_files
 failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ensure gt->freq is released when sysfs_create_files() fails
in xe_gt_freq_init(). Without this, the kobject would leak.
Add kobject_put() before returning the error.

Fixes: fdc81c43f0c1 ("drm/xe: use devm_add_action_or_reset() helper")
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Alex Zuo <alex.zuo@intel.com>
Reviewed-by: Xin Wang <x.wang@intel.com>
Link: https://patch.msgid.link/20251114205638.2184529-2-shuicheng.lin@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit 251be5fb4982ebb0f5a81b62d975bd770f3ad5c2)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_gt_freq.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 849ea6c86e8e2a..ce3c7810469f7b 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -293,8 +293,10 @@ int xe_gt_freq_init(struct xe_gt *gt)
 		return -ENOMEM;
 
 	err = sysfs_create_files(gt->freq, freq_attrs);
-	if (err)
+	if (err) {
+		kobject_put(gt->freq);
 		return err;
+	}
 
 	err = devm_add_action_or_reset(xe->drm.dev, freq_fini, gt->freq);
 	if (err)

From c88a0731ed95f9705deb127a7f1927fa59aa742b Mon Sep 17 00:00:00 2001
From: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Date: Fri, 28 Nov 2025 21:25:48 -0800
Subject: [PATCH 125/258] drm/xe: Apply Wa_14020316580 in
 xe_gt_idle_enable_pg()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wa_14020316580 was getting clobbered by power gating init code
later in the driver load sequence. Move the Wa so that
it applies correctly.

Fixes: 7cd05ef89c9d ("drm/xe/xe2hpm: Add initial set of workarounds")
Suggested-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patch.msgid.link/20251129052548.70766-1-vinay.belgaumkar@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit 8b5502145351bde87f522df082b9e41356898ba3)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_gt_idle.c    | 8 ++++++++
 drivers/gpu/drm/xe/xe_wa.c         | 8 --------
 drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index bdc9d9877ec490..3e3d1d52f63026 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -5,6 +5,7 @@
 
 #include <drm/drm_managed.h>
 
+#include <generated/xe_wa_oob.h>
 #include "xe_force_wake.h"
 #include "xe_device.h"
 #include "xe_gt.h"
@@ -16,6 +17,7 @@
 #include "xe_mmio.h"
 #include "xe_pm.h"
 #include "xe_sriov.h"
+#include "xe_wa.h"
 
 /**
  * DOC: Xe GT Idle
@@ -145,6 +147,12 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
 		xe_mmio_write32(mmio, RENDER_POWERGATE_IDLE_HYSTERESIS, 25);
 	}
 
+	if (XE_GT_WA(gt, 14020316580))
+		gtidle->powergate_enable &= ~(VDN_HCP_POWERGATE_ENABLE(0) |
+					      VDN_MFXVDENC_POWERGATE_ENABLE(0) |
+					      VDN_HCP_POWERGATE_ENABLE(2) |
+					      VDN_MFXVDENC_POWERGATE_ENABLE(2));
+
 	xe_mmio_write32(mmio, POWERGATE_ENABLE, gtidle->powergate_enable);
 	xe_force_wake_put(gt_to_fw(gt), fw_ref);
 }
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 3764abca3d4f27..e32dd2fde6f1c5 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -270,14 +270,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
 	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
 	},
-	{ XE_RTP_NAME("14020316580"),
-	  XE_RTP_RULES(MEDIA_VERSION(1301)),
-	  XE_RTP_ACTIONS(CLR(POWERGATE_ENABLE,
-			     VDN_HCP_POWERGATE_ENABLE(0) |
-			     VDN_MFXVDENC_POWERGATE_ENABLE(0) |
-			     VDN_HCP_POWERGATE_ENABLE(2) |
-			     VDN_MFXVDENC_POWERGATE_ENABLE(2))),
-	},
 	{ XE_RTP_NAME("14019449301"),
 	  XE_RTP_RULES(MEDIA_VERSION(1301), ENGINE_CLASS(VIDEO_DECODE)),
 	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)),
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index fb38eb3d6e9a3b..7ca7258eb5d82a 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -76,3 +76,4 @@
 
 15015404425_disable	PLATFORM(PANTHERLAKE), MEDIA_STEP(B0, FOREVER)
 16026007364    MEDIA_VERSION(3000)
+14020316580    MEDIA_VERSION(1301)

From 224a6ac0808d0f58e51df2f923332adcb80fd930 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 2 Dec 2025 17:18:09 -0800
Subject: [PATCH 126/258] drm/xe: Do not reference loop variable directly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Do not reference the loop variable job after the loop has exited.
Instead, save the job from the last iteration of the loop.

Fixes: 3d98a7164da6 ("drm/xe/vf: Start re-emission from first unsignaled job during VF migration")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/r/202511291102.jnnKP6IB-lkp@intel.com/
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Link: https://patch.msgid.link/20251203011809.968893-1-matthew.brost@intel.com
(cherry picked from commit 76ce2313709f13a6adbcaa1a43a8539c8f509f6a)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index ed7be50b2f7208..c0819377ce6e43 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -2253,10 +2253,11 @@ static void guc_exec_queue_unpause_prepare(struct xe_guc *guc,
 					   struct xe_exec_queue *q)
 {
 	struct xe_gpu_scheduler *sched = &q->guc->sched;
-	struct xe_sched_job *job = NULL;
+	struct xe_sched_job *job = NULL, *__job;
 	bool restore_replay = false;
 
-	list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+	list_for_each_entry(__job, &sched->base.pending_list, drm.list) {
+		job = __job;
 		restore_replay |= job->restore_replay;
 		if (restore_replay) {
 			xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d",

From 9acc3295813b9b846791fd3eab0a78a3144af560 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 4 Dec 2025 10:46:58 +0100
Subject: [PATCH 127/258] drm/xe: fix drm_gpusvm_init() arguments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Xe driver fails to build when CONFIG_DRM_XE_GPUSVM is disabled
but CONFIG_DRM_GPUSVM is turned on, due to the clash of two commits:

In file included from drivers/gpu/drm/xe/xe_vm_madvise.c:8:
drivers/gpu/drm/xe/xe_svm.h: In function 'xe_svm_init':
include/linux/stddef.h:8:14: error: passing argument 5 of 'drm_gpusvm_init' makes integer from pointer without a cast [-Wint-conversion]
drivers/gpu/drm/xe/xe_svm.h:217:38: note: in expansion of macro 'NULL'
  217 |                                NULL, NULL, 0, 0, 0, NULL, NULL, 0);
      |                                      ^~~~
In file included from drivers/gpu/drm/xe/xe_bo_types.h:11,
                 from drivers/gpu/drm/xe/xe_bo.h:11,
                 from drivers/gpu/drm/xe/xe_vm_madvise.c:11:
include/drm/drm_gpusvm.h:254:35: note: expected 'long unsigned int' but argument is of type 'void *'
  254 |                     unsigned long mm_start, unsigned long mm_range,
      |                     ~~~~~~~~~~~~~~^~~~~~~~
In file included from drivers/gpu/drm/xe/xe_vm_madvise.c:14:
drivers/gpu/drm/xe/xe_svm.h:216:16: error: too many arguments to function 'drm_gpusvm_init'; expected 10, have 11
  216 |         return drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", &vm->xe->drm,
      |                ^~~~~~~~~~~~~~~
  217 |                                NULL, NULL, 0, 0, 0, NULL, NULL, 0);
      |                                                                 ~
include/drm/drm_gpusvm.h:251:5: note: declared here

Adapt the caller to the new argument list by removing the extraneous
NULL argument.

Fixes: 9e9787414882 ("drm/xe/userptr: replace xe_hmm with gpusvm")
Fixes: 10aa5c806030 ("drm/gpusvm, drm/xe: Fix userptr to not allow device private pages")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/20251204094704.1030933-1-arnd@kernel.org
(cherry picked from commit 29bce9c8b41d5c378263a927acb9a9074d0e7a0e)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_svm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 0955d2ac8d7448..fa757dd07954d9 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -214,7 +214,7 @@ int xe_svm_init(struct xe_vm *vm)
 {
 #if IS_ENABLED(CONFIG_DRM_GPUSVM)
 	return drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", &vm->xe->drm,
-			       NULL, NULL, 0, 0, 0, NULL, NULL, 0);
+			       NULL, 0, 0, 0, NULL, NULL, 0);
 #else
 	return 0;
 #endif

From 17d52ab2a6ec8b91bbfc577d397d42d0776ef01f Mon Sep 17 00:00:00 2001
From: Raag Jadav <raag.jadav@intel.com>
Date: Wed, 3 Dec 2025 18:03:55 +0530
Subject: [PATCH 128/258] drm/xe/throttle: Skip reason prefix while emitting
 array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The newly introduced "reasons" attribute already signifies possible
reasons for throttling and makes the prefix in individual attribute
names redundant while emitting them as an array. Skip the prefix.

Fixes: 83ccde67a3f7 ("drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons")
Signed-off-by: Raag Jadav <raag.jadav@intel.com>
Reviewed-by: Sk Anirban <sk.anirban@intel.com>
Link: https://patch.msgid.link/20251203123355.571606-1-raag.jadav@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
(cherry picked from commit b64a14334ef3ebbcf70d11bc67d0934bdc0e390d)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_gt_throttle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index 82c5fbcdfbe3e2..01477fc7b37b9f 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -140,7 +140,7 @@ static ssize_t reasons_show(struct kobject *kobj,
 		struct throttle_attribute *other_ta = kobj_attribute_to_throttle(kattr);
 
 		if (other_ta->mask != U32_MAX && reasons & other_ta->mask)
-			ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name);
+			ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name + strlen("reason_"));
 	}
 
 	if (drm_WARN_ONCE(&xe->drm, !ret, "Unknown reason: %#x\n", reasons))

From 61e6b711c30fc1ca690502f824c067caaf7d1a34 Mon Sep 17 00:00:00 2001
From: Tomasz Lis <tomasz.lis@intel.com>
Date: Thu, 4 Dec 2025 21:08:20 +0100
Subject: [PATCH 129/258] drm/xe/vf: Stop waiting for ring space on VF post
 migration recovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If wait for ring space started just before migration, it can delay
the recovery process, by waiting without bailout path for up to 2
seconds.

Two second wait for recovery is not acceptable, and if the ring was
completely filled even without the migration temporarily stopping
execution, then such a wait will result in up to a thousand new jobs
(assuming constant flow) being added while the wait is happening.

While this will not cause data corruption, it will lead to warning
messages getting logged due to reset being scheduled on a GT under
recovery. Also several seconds of unresponsiveness, as the backlog
of jobs gets progressively executed.

Add a bailout condition, to make sure the recovery starts without
much delay. The recovery is expected to finish in about 100 ms when
under moderate stress, so the condition verification period needs to be
below that - settling at 64 ms.

The theoretical max time which the recovery can take depends on how
many requests can be emitted to engine rings and be pending execution.
While stress testing, it was possible to reach 10k pending requests
on rings when a platform with two GTs was used. This resulted in max
recovery time of 5 seconds. But in real life situations, it is very
unlikely that the amount of pending requests will ever exceed 100,
and for that the recovery time will be around 50 ms - well within our
claimed limit of 100ms.

Fixes: a4dae94aad6a ("drm/xe/vf: Wakeup in GuC backend on VF post migration recovery")
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patch.msgid.link/20251204200820.2206168-1-tomasz.lis@intel.com
(cherry picked from commit a00e305fba02a915cf2745bf6ef3f55537e65d57)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index c0819377ce6e43..311cd047911a44 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -722,21 +722,23 @@ static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
-	unsigned int sleep_period_ms = 1;
+	unsigned int sleep_period_ms = 1, sleep_total_ms = 0;
 
 #define AVAILABLE_SPACE \
 	CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
 	if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) {
 try_again:
 		q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
-		if (wqi_size > AVAILABLE_SPACE) {
-			if (sleep_period_ms == 1024) {
+		if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) {
+			if (sleep_total_ms > 2000) {
 				xe_gt_reset_async(q->gt);
 				return -ENODEV;
 			}
 
 			msleep(sleep_period_ms);
-			sleep_period_ms <<= 1;
+			sleep_total_ms += sleep_period_ms;
+			if (sleep_period_ms < 64)
+				sleep_period_ms <<= 1;
 			goto try_again;
 		}
 	}

From 17445af7dcc7d645b6fb8951fd10c8b72cc7f23f Mon Sep 17 00:00:00 2001
From: Junxiao Chang <junxiao.chang@intel.com>
Date: Fri, 7 Nov 2025 11:31:52 +0800
Subject: [PATCH 130/258] drm/me/gsc: mei interrupt top half should be in irq
 disabled context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MEI GSC interrupt comes from i915 or xe driver. It has top half and
bottom half. Top half is called from i915/xe interrupt handler. It
should be in irq disabled context.

With RT kernel(PREEMPT_RT enabled), by default IRQ handler is in
threaded IRQ. MEI GSC top half might be in threaded IRQ context.
generic_handle_irq_safe API could be called from either IRQ or
process context, it disables local IRQ then calls MEI GSC interrupt
top half.

This change fixes B580 GPU boot issue with RT enabled.

Fixes: e02cea83d32d ("drm/xe/gsc: add Battlemage support")
Tested-by: Baoli Zhang <baoli.zhang@intel.com>
Signed-off-by: Junxiao Chang <junxiao.chang@intel.com>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251107033152.834960-1-junxiao.chang@intel.com
Signed-off-by: Maarten Lankhorst <dev@lankhorst.se>
(cherry picked from commit 3efadf028783a49ab2941294187c8b6dd86bf7da)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_heci_gsc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
index 2b3d49dd394c0e..495cdd4f948d5d 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.c
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -223,7 +223,7 @@ void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir)
 	if (xe->heci_gsc.irq < 0)
 		return;
 
-	ret = generic_handle_irq(xe->heci_gsc.irq);
+	ret = generic_handle_irq_safe(xe->heci_gsc.irq);
 	if (ret)
 		drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret);
 }
@@ -243,7 +243,7 @@ void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir)
 	if (xe->heci_gsc.irq < 0)
 		return;
 
-	ret = generic_handle_irq(xe->heci_gsc.irq);
+	ret = generic_handle_irq_safe(xe->heci_gsc.irq);
 	if (ret)
 		drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret);
 }

From 449bcd5d45eb4ce26740f11f8601082fe734bed2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Tue, 9 Dec 2025 21:49:20 +0100
Subject: [PATCH 131/258] drm/xe/bo: Don't include the CCS metadata in the
 dma-buf sg-table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some Xe bos are allocated with extra backing-store for the CCS
metadata. It's never been the intention to share the CCS metadata
when exporting such bos as dma-buf. Don't include it in the
dma-buf sg-table.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: <stable@vger.kernel.org> # v6.8+
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Link: https://patch.msgid.link/20251209204920.224374-1-thomas.hellstrom@linux.intel.com
(cherry picked from commit a4ebfb9d95d78a12512b435a698ee6886d712571)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_dma_buf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 54e42960daadc0..7c74a31d448602 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -124,7 +124,7 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
 	case XE_PL_TT:
 		sgt = drm_prime_pages_to_sg(obj->dev,
 					    bo->ttm.ttm->pages,
-					    bo->ttm.ttm->num_pages);
+					    obj->size >> PAGE_SHIFT);
 		if (IS_ERR(sgt))
 			return sgt;
 

From c770467d28bd61391f2d2b17feadafd58af731ab Mon Sep 17 00:00:00 2001
From: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Date: Wed, 10 Dec 2025 05:25:48 +0000
Subject: [PATCH 132/258] drm/xe/vf: Fix queuing of recovery work
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ensure VF migration recovery work is only queued when no recovery is
already queued and teardown is not in progress.

Fixes: b47c0c07c350 ("drm/xe/vf: Teardown VF post migration worker on driver unload")
Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Tomasz Lis <tomasz.lis@intel.com>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patch.msgid.link/20251210052546.622809-5-satyanarayana.k.v.p@intel.com
(cherry picked from commit 8d8cf42b03f149dcb545b547906306f3b474565e)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 4c73a077d314fa..033eae2d03d338 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -733,7 +733,7 @@ static void vf_start_migration_recovery(struct xe_gt *gt)
 
 	spin_lock(&gt->sriov.vf.migration.lock);
 
-	if (!gt->sriov.vf.migration.recovery_queued ||
+	if (!gt->sriov.vf.migration.recovery_queued &&
 	    !gt->sriov.vf.migration.recovery_teardown) {
 		gt->sriov.vf.migration.recovery_queued = true;
 		WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true);

From eafb6f62093f756535a7be1fc4559374a511e460 Mon Sep 17 00:00:00 2001
From: Jagmeet Randhawa <jagmeet.randhawa@intel.com>
Date: Fri, 12 Dec 2025 05:21:46 +0800
Subject: [PATCH 133/258] drm/xe: Increase TDF timeout
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are some corner cases where flushing transient
data may take slightly longer than the 150us timeout
we currently allow.  Update the driver to use a 300us
timeout instead based on the latest guidance from
the hardware team. An update to the bspec to formally
document this is expected to arrive soon.

Fixes: c01c6066e6fa ("drm/xe/device: implement transient flush")
Signed-off-by: Jagmeet Randhawa <jagmeet.randhawa@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patch.msgid.link/0201b1d6ec64d3651fcbff1ea21026efa915126a.1765487866.git.jagmeet.randhawa@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit d69d3636f5f7a84bae7cd43473b3701ad9b7d544)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index c7d373c70f0fbb..cf29e259861f95 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -1056,7 +1056,7 @@ static void tdf_request_sync(struct xe_device *xe)
 		 * transient and need to be flushed..
 		 */
 		if (xe_mmio_wait32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
-				   150, NULL, false))
+				   300, NULL, false))
 			xe_gt_err_once(gt, "TD flush timeout\n");
 
 		xe_force_wake_put(gt_to_fw(gt), fw_ref);

From eed5b815fa49c17d513202f54e980eb91955d3ed Mon Sep 17 00:00:00 2001
From: Jan Maslak <jan.maslak@intel.com>
Date: Wed, 10 Dec 2025 15:56:18 +0100
Subject: [PATCH 134/258] drm/xe: Restore engine registers before restarting
 schedulers after GT reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During GT reset recovery in do_gt_restart(), xe_uc_start() was called
before xe_reg_sr_apply_mmio() restored engine-specific registers. This
created a race window where the scheduler could run jobs before hardware
state was fully restored.

This caused failures in eudebug tests (xe_exec_sip_eudebug@breakpoint-
waitsip-*) where TD_CTL register (containing TD_CTL_GLOBAL_DEBUG_ENABLE)
wasn't restored before jobs started executing. Breakpoints would fail to
trigger SIP entry because the debug enable bit wasn't set yet.

Fix by moving xe_uc_start() after all MMIO register restoration,
including engine registers and CCS mode configuration, ensuring all
hardware state is fully restored before any jobs can be scheduled.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Jan Maslak <jan.maslak@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251210145618.169625-2-jan.maslak@intel.com
(cherry picked from commit 825aed0328588b2837636c1c5a0c48795d724617)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index dbb5e7a9bc6a96..cdce210e36f25c 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -797,9 +797,6 @@ static int do_gt_restart(struct xe_gt *gt)
 		xe_gt_sriov_pf_init_hw(gt);
 
 	xe_mocs_init(gt);
-	err = xe_uc_start(&gt->uc);
-	if (err)
-		return err;
 
 	for_each_hw_engine(hwe, gt, id)
 		xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
@@ -807,6 +804,10 @@ static int do_gt_restart(struct xe_gt *gt)
 	/* Get CCS mode in sync between sw/hw */
 	xe_gt_apply_ccs_mode(gt);
 
+	err = xe_uc_start(&gt->uc);
+	if (err)
+		return err;
+
 	/* Restore GT freq to expected values */
 	xe_gt_sanitize_freq(gt);
 

From 8e1a1bc4f5a42747c08130b8242ebebd1210b32f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 7 Jul 2024 01:18:25 +0200
Subject: [PATCH 135/258] netfilter: nf_tables: avoid chain re-validation if
 possible

Hamza Mahfooz reports cpu soft lock-ups in
nft_chain_validate():

 watchdog: BUG: soft lockup - CPU#1 stuck for 27s! [iptables-nft-re:37547]
[..]
 RIP: 0010:nft_chain_validate+0xcb/0x110 [nf_tables]
[..]
  nft_immediate_validate+0x36/0x50 [nf_tables]
  nft_chain_validate+0xc9/0x110 [nf_tables]
  nft_immediate_validate+0x36/0x50 [nf_tables]
  nft_chain_validate+0xc9/0x110 [nf_tables]
  nft_immediate_validate+0x36/0x50 [nf_tables]
  nft_chain_validate+0xc9/0x110 [nf_tables]
  nft_immediate_validate+0x36/0x50 [nf_tables]
  nft_chain_validate+0xc9/0x110 [nf_tables]
  nft_immediate_validate+0x36/0x50 [nf_tables]
  nft_chain_validate+0xc9/0x110 [nf_tables]
  nft_immediate_validate+0x36/0x50 [nf_tables]
  nft_chain_validate+0xc9/0x110 [nf_tables]
  nft_table_validate+0x6b/0xb0 [nf_tables]
  nf_tables_validate+0x8b/0xa0 [nf_tables]
  nf_tables_commit+0x1df/0x1eb0 [nf_tables]
[..]

Currently nf_tables will traverse the entire table (chain graph), starting
from the entry points (base chains), exploring all possible paths
(chain jumps).  But there are cases where we could avoid revalidation.

Consider:
1  input -> j2 -> j3
2  input -> j2 -> j3
3  input -> j1 -> j2 -> j3

Then the second rule does not need to revalidate j2, and, by extension j3,
because this was already checked during validation of the first rule.
We need to validate it only for rule 3.

This is needed because chain loop detection also ensures we do not exceed
the jump stack: Just because we know that j2 is cycle free, its last jump
might now exceed the allowed stack size.  We also need to update all
reachable chains with the new largest observed call depth.

Care has to be taken to revalidate even if the chain depth won't be an
issue: chain validation also ensures that expressions are not called from
invalid base chains.  For example, the masquerade expression can only be
called from NAT postrouting base chains.

Therefore we also need to keep record of the base chain context (type,
hooknum) and revalidate if the chain becomes reachable from a different
hook location.

Reported-by: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Closes: https://lore.kernel.org/netfilter-devel/20251118221735.GA5477@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net/
Tested-by: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_tables.h | 34 +++++++++++----
 net/netfilter/nf_tables_api.c     | 69 +++++++++++++++++++++++++++++--
 2 files changed, 91 insertions(+), 12 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index fab7dc73f738cb..0e266c2d0e7f07 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1091,6 +1091,29 @@ struct nft_rule_blob {
 		__attribute__((aligned(__alignof__(struct nft_rule_dp))));
 };
 
+enum nft_chain_types {
+	NFT_CHAIN_T_DEFAULT = 0,
+	NFT_CHAIN_T_ROUTE,
+	NFT_CHAIN_T_NAT,
+	NFT_CHAIN_T_MAX
+};
+
+/**
+ *	struct nft_chain_validate_state - validation state
+ *
+ *	If a chain is encountered again during table validation it is
+ *	possible to avoid revalidation provided the calling context is
+ *	compatible.  This structure stores relevant calling context of
+ *	previous validations.
+ *
+ *	@hook_mask: the hook numbers and locations the chain is linked to
+ *	@depth: the deepest call chain level the chain is linked to
+ */
+struct nft_chain_validate_state {
+	u8			hook_mask[NFT_CHAIN_T_MAX];
+	u8			depth;
+};
+
 /**
  *	struct nft_chain - nf_tables chain
  *
@@ -1109,6 +1132,7 @@ struct nft_rule_blob {
  *	@udlen: user data length
  *	@udata: user data in the chain
  *	@blob_next: rule blob pointer to the next in the chain
+ *	@vstate: validation state
  */
 struct nft_chain {
 	struct nft_rule_blob		__rcu *blob_gen_0;
@@ -1128,9 +1152,10 @@ struct nft_chain {
 
 	/* Only used during control plane commit phase: */
 	struct nft_rule_blob		*blob_next;
+	struct nft_chain_validate_state vstate;
 };
 
-int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
+int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain);
 int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
 			 const struct nft_set_iter *iter,
 			 struct nft_elem_priv *elem_priv);
@@ -1138,13 +1163,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
 int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
 void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
 
-enum nft_chain_types {
-	NFT_CHAIN_T_DEFAULT = 0,
-	NFT_CHAIN_T_ROUTE,
-	NFT_CHAIN_T_NAT,
-	NFT_CHAIN_T_MAX
-};
-
 /**
  * 	struct nft_chain_type - nf_tables chain type info
  *
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c46b1bb0efe0f1..a9f6babcc781b7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -123,6 +123,29 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
 
 	table->validate_state = new_validate_state;
 }
+
+static bool nft_chain_vstate_valid(const struct nft_ctx *ctx,
+				   const struct nft_chain *chain)
+{
+	const struct nft_base_chain *base_chain;
+	enum nft_chain_types type;
+	u8 hooknum;
+
+	if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain)))
+		return false;
+
+	base_chain = nft_base_chain(ctx->chain);
+	hooknum = base_chain->ops.hooknum;
+	type = base_chain->type->type;
+
+	/* chain is already validated for this call depth */
+	if (chain->vstate.depth >= ctx->level &&
+	    chain->vstate.hook_mask[type] & BIT(hooknum))
+		return true;
+
+	return false;
+}
+
 static void nf_tables_trans_destroy_work(struct work_struct *w);
 
 static void nft_trans_gc_work(struct work_struct *work);
@@ -4079,6 +4102,29 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
 	nf_tables_rule_destroy(ctx, rule);
 }
 
+static void nft_chain_vstate_update(const struct nft_ctx *ctx, struct nft_chain *chain)
+{
+	const struct nft_base_chain *base_chain;
+	enum nft_chain_types type;
+	u8 hooknum;
+
+	/* ctx->chain must hold the calling base chain. */
+	if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) {
+		memset(&chain->vstate, 0, sizeof(chain->vstate));
+		return;
+	}
+
+	base_chain = nft_base_chain(ctx->chain);
+	hooknum = base_chain->ops.hooknum;
+	type = base_chain->type->type;
+
+	BUILD_BUG_ON(BIT(NF_INET_NUMHOOKS) > U8_MAX);
+
+	chain->vstate.hook_mask[type] |= BIT(hooknum);
+	if (chain->vstate.depth < ctx->level)
+		chain->vstate.depth = ctx->level;
+}
+
 /** nft_chain_validate - loop detection and hook validation
  *
  * @ctx: context containing call depth and base chain
@@ -4088,15 +4134,25 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
  * and set lookups until either the jump limit is hit or all reachable
  * chains have been validated.
  */
-int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
+int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain)
 {
 	struct nft_expr *expr, *last;
 	struct nft_rule *rule;
 	int err;
 
+	BUILD_BUG_ON(NFT_JUMP_STACK_SIZE > 255);
 	if (ctx->level == NFT_JUMP_STACK_SIZE)
 		return -EMLINK;
 
+	if (ctx->level > 0) {
+		/* jumps to base chains are not allowed. */
+		if (nft_is_base_chain(chain))
+			return -ELOOP;
+
+		if (nft_chain_vstate_valid(ctx, chain))
+			return 0;
+	}
+
 	list_for_each_entry(rule, &chain->rules, list) {
 		if (fatal_signal_pending(current))
 			return -EINTR;
@@ -4117,6 +4173,7 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
 		}
 	}
 
+	nft_chain_vstate_update(ctx, chain);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nft_chain_validate);
@@ -4128,7 +4185,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
 		.net	= net,
 		.family	= table->family,
 	};
-	int err;
+	int err = 0;
 
 	list_for_each_entry(chain, &table->chains, list) {
 		if (!nft_is_base_chain(chain))
@@ -4137,12 +4194,16 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
 		ctx.chain = chain;
 		err = nft_chain_validate(&ctx, chain);
 		if (err < 0)
-			return err;
+			goto err;
 
 		cond_resched();
 	}
 
-	return 0;
+err:
+	list_for_each_entry(chain, &table->chains, list)
+		memset(&chain->vstate, 0, sizeof(chain->vstate));
+
+	return err;
 }
 
 int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,

From 7e7a817f2dfd79098a706ee5581ea9518b2de878 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 11 Dec 2025 12:55:19 +0100
Subject: [PATCH 136/258] netfilter: nf_tables: avoid softlockup warnings in
 nft_chain_validate

This reverts commit
314c82841602 ("netfilter: nf_tables: can't schedule in nft_chain_validate"):
Since commit a60a5abe19d6 ("netfilter: nf_tables: allow iter callbacks to sleep")
the iterator callback is invoked without rcu read lock held, so this
cond_resched() is now valid.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_tables_api.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a9f6babcc781b7..618af6e90773ff 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4171,6 +4171,8 @@ int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain)
 			if (err < 0)
 				return err;
 		}
+
+		cond_resched();
 	}
 
 	nft_chain_vstate_update(ctx, chain);
@@ -4195,8 +4197,6 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
 		err = nft_chain_validate(&ctx, chain);
 		if (err < 0)
 			goto err;
-
-		cond_resched();
 	}
 
 err:

From fec7b0795548b43e2c3c46e3143c34ef6070341c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 11 Dec 2025 13:16:49 +0100
Subject: [PATCH 137/258] selftests: netfilter: packetdrill: avoid failure on
 HZ=100 kernel

packetdrill --ip_version=ipv4 --mtu=1500 --tolerance_usecs=1000000 --non_fatal packet conntrack_syn_challenge_ack.pkt
conntrack v1.4.8 (conntrack-tools): 1 flow entries have been shown.
conntrack_syn_challenge_ack.pkt:32: error executing `conntrack -f $NFCT_IP_VERSION \
-L -p tcp --dport 8080 | grep UNREPLIED | grep -q SYN_SENT` command: non-zero status 1

Affected kernel had CONFIG_HZ=100; reset packet was still sitting in
backlog.

Reported-by: Yi Chen <yiche@redhat.com>
Fixes: a8a388c2aae4 ("selftests: netfilter: add packetdrill based conntrack tests")
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 .../net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
index 3442cd29bc9320..cdb3910af95b4d 100644
--- a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
@@ -26,7 +26,7 @@
 
 +0.01 > R 643160523:643160523(0) win 0
 
-+0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
++0.1 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
 
 // Must go through.
 +0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>

From bd94fbe8b55f38c24a63cca2854ff74b62780d77 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 29 Oct 2025 16:03:16 +0100
Subject: [PATCH 138/258] MIPS: Alchemy: Remove bogus static/inline specifiers

The recent io_remap_pfn_range() rework applied the static and inline
specifiers to the implementation of io_remap_pfn_range_pfn() on MIPS
Alchemy, mirroring the same change on other platforms. However, this
function is defined in a source file and that definition causes a
conflict with its declaration. Fix this by dropping the specifiers.

Fixes: c707a68f9468 ("mm: abstract io_remap_pfn_range() based on PFN")
Signed-off-by: Thierry Reding <treding@nvidia.com>
Acked-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/alchemy/common/setup.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c
index c35b4f809d5123..992134a8c23ae0 100644
--- a/arch/mips/alchemy/common/setup.c
+++ b/arch/mips/alchemy/common/setup.c
@@ -94,8 +94,7 @@ phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
 	return phys_addr;
 }
 
-static inline unsigned long io_remap_pfn_range_pfn(unsigned long pfn,
-		unsigned long size)
+unsigned long io_remap_pfn_range_pfn(unsigned long pfn, unsigned long size)
 {
 	phys_addr_t phys_addr = fixup_bigphys_addr(pfn << PAGE_SHIFT, size);
 

From 680ad315caaa2860df411cb378bf3614d96c7648 Mon Sep 17 00:00:00 2001
From: Haoxiang Li <haoxiang_li2024@163.com>
Date: Thu, 4 Dec 2025 18:36:18 +0800
Subject: [PATCH 139/258] MIPS: Fix a reference leak bug in ip22_check_gio()

If gio_device_register fails, gio_dev_put() is required to
drop the gio_dev device reference.

Fixes: e84de0c61905 ("MIPS: GIO bus support for SGI IP22/28")
Signed-off-by: Haoxiang Li <haoxiang_li2024@163.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/sgi-ip22/ip22-gio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c
index 5893ea4e382cae..19b70928d6dc38 100644
--- a/arch/mips/sgi-ip22/ip22-gio.c
+++ b/arch/mips/sgi-ip22/ip22-gio.c
@@ -372,7 +372,8 @@ static void ip22_check_gio(int slotno, unsigned long addr, int irq)
 		gio_dev->resource.flags = IORESOURCE_MEM;
 		gio_dev->irq = irq;
 		dev_set_name(&gio_dev->dev, "%d", slotno);
-		gio_device_register(gio_dev);
+		if (gio_device_register(gio_dev))
+			gio_dev_put(gio_dev);
 	} else
 		printk(KERN_INFO "GIO: slot %d : Empty\n", slotno);
 }

From 2b973ca48ff3ef1952091c8f988d7796781836c8 Mon Sep 17 00:00:00 2001
From: Marijn Suijten <marijn.suijten@somainline.org>
Date: Sun, 30 Nov 2025 23:40:05 +0100
Subject: [PATCH 140/258] drm/panel: sony-td4353-jdi: Enable prepare_prev_first

The DSI host must be enabled before our prepare function can run, which
has to send its init sequence over DSI.  Without enabling the host first
the panel will not probe.

Fixes: 9e15123eca79 ("drm/msm/dsi: Stop unconditionally powering up DSI hosts at modeset")
Signed-off-by: Marijn Suijten <marijn.suijten@somainline.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Martin Botka <martin.botka@somainline.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patch.msgid.link/20251130-sony-akari-fix-panel-v1-1-1d27c60a55f5@somainline.org
---
 drivers/gpu/drm/panel/panel-sony-td4353-jdi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-sony-td4353-jdi.c b/drivers/gpu/drm/panel/panel-sony-td4353-jdi.c
index 7c989b70ab5130..a14c86c60d19d7 100644
--- a/drivers/gpu/drm/panel/panel-sony-td4353-jdi.c
+++ b/drivers/gpu/drm/panel/panel-sony-td4353-jdi.c
@@ -212,6 +212,8 @@ static int sony_td4353_jdi_probe(struct mipi_dsi_device *dsi)
 	if (ret)
 		return dev_err_probe(dev, ret, "Failed to get backlight\n");
 
+	ctx->panel.prepare_prev_first = true;
+
 	drm_panel_add(&ctx->panel);
 
 	ret = mipi_dsi_attach(dsi);

From 726c93b0408f3aedc87349e97fb9b4741131908b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Wed, 10 Dec 2025 07:58:39 +0100
Subject: [PATCH 141/258] kunit: Drop unused parameter from
 kunit_device_register_internal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The passed driver isn't used, so just drop this parameter.

Link: https://lore.kernel.org/r/20251210065839.482608-2-u.kleine-koenig@baylibre.com
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 lib/kunit/device.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/lib/kunit/device.c b/lib/kunit/device.c
index 520c1fccee8a54..f201aaacd4cf46 100644
--- a/lib/kunit/device.c
+++ b/lib/kunit/device.c
@@ -106,8 +106,7 @@ EXPORT_SYMBOL_GPL(kunit_driver_create);
 
 /* Helper which creates a kunit_device, attaches it to the kunit_bus*/
 static struct kunit_device *kunit_device_register_internal(struct kunit *test,
-							   const char *name,
-							   const struct device_driver *drv)
+							   const char *name)
 {
 	struct kunit_device *kunit_dev;
 	int err = -ENOMEM;
@@ -150,7 +149,7 @@ struct device *kunit_device_register_with_driver(struct kunit *test,
 						 const char *name,
 						 const struct device_driver *drv)
 {
-	struct kunit_device *kunit_dev = kunit_device_register_internal(test, name, drv);
+	struct kunit_device *kunit_dev = kunit_device_register_internal(test, name);
 
 	if (IS_ERR_OR_NULL(kunit_dev))
 		return ERR_CAST(kunit_dev);
@@ -172,7 +171,7 @@ struct device *kunit_device_register(struct kunit *test, const char *name)
 	if (IS_ERR(drv))
 		return ERR_CAST(drv);
 
-	dev = kunit_device_register_internal(test, name, drv);
+	dev = kunit_device_register_internal(test, name);
 	if (IS_ERR(dev)) {
 		kunit_release_action(test, driver_unregister_wrapper, (void *)drv);
 		return ERR_CAST(dev);

From c33b68801fbe9d5ee8a9178beb5747ec65873530 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <jackmanb@google.com>
Date: Sun, 7 Dec 2025 02:17:10 +0000
Subject: [PATCH 142/258] kunit: make FAULT_TEST default to n when
 PANIC_ON_OOPS

As describe in the help string, the user might want to disable these
tests if they don't like to see stacktraces/BUG etc in their kernel log.

However, if they enable PANIC_ON_OOPS, these tests also crash the
machine, which it's safe to assume _almost_ nobody wants.

One might argue that _absolutely_ nobody ever wants their kernel to
crash so this should just be a hard dependency instead of a default.
However, since this is rather special code that's anyway concerned with
deliberately doing "bad" things, the normal rules don't seem to apply,
hence prefer flexibility and allow users to set up a crashing Kconfig if
they so choose.

Link: https://lore.kernel.org/r/20251207-kunit-fault-no-panic-v1-1-2ac932f26864@google.com
Signed-off-by: Brendan Jackman <jackmanb@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 lib/kunit/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig
index 50ecf55d2b9c8a..498cc51e493dc9 100644
--- a/lib/kunit/Kconfig
+++ b/lib/kunit/Kconfig
@@ -28,7 +28,7 @@ config KUNIT_FAULT_TEST
 	bool "Enable KUnit tests which print BUG stacktraces"
 	depends on KUNIT_TEST
 	depends on !UML
-	default y
+	default !PANIC_ON_OOPS
 	help
 	  Enables fault handling tests for the KUnit framework. These tests may
 	  trigger a kernel BUG(), and the associated stack trace, even when they

From 54891a96b7a90d77c32bd0a7d6c9987e5479a314 Mon Sep 17 00:00:00 2001
From: Yongpeng Yang <yangyongpeng@xiaomi.com>
Date: Mon, 15 Dec 2025 23:21:04 +0800
Subject: [PATCH 143/258] loop: use READ_ONCE() to read lo->lo_state without
 locking

When lo->lo_mutex is not held, direct access may read stale data. This
patch uses READ_ONCE() to read lo->lo_state and data_race() to silence
code checkers, and changes all assignments to use WRITE_ONCE().

Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 272bc608e52824..32a3a5b138029e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1082,7 +1082,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
 	/* Order wrt reading lo_state in loop_validate_file(). */
 	wmb();
 
-	lo->lo_state = Lo_bound;
+	WRITE_ONCE(lo->lo_state, Lo_bound);
 	if (part_shift)
 		lo->lo_flags |= LO_FLAGS_PARTSCAN;
 	partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
@@ -1179,7 +1179,7 @@ static void __loop_clr_fd(struct loop_device *lo)
 	if (!part_shift)
 		set_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
 	mutex_lock(&lo->lo_mutex);
-	lo->lo_state = Lo_unbound;
+	WRITE_ONCE(lo->lo_state, Lo_unbound);
 	mutex_unlock(&lo->lo_mutex);
 
 	/*
@@ -1218,7 +1218,7 @@ static int loop_clr_fd(struct loop_device *lo)
 
 	lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
 	if (disk_openers(lo->lo_disk) == 1)
-		lo->lo_state = Lo_rundown;
+		WRITE_ONCE(lo->lo_state, Lo_rundown);
 	loop_global_unlock(lo, true);
 
 	return 0;
@@ -1743,7 +1743,7 @@ static void lo_release(struct gendisk *disk)
 
 	mutex_lock(&lo->lo_mutex);
 	if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR))
-		lo->lo_state = Lo_rundown;
+		WRITE_ONCE(lo->lo_state, Lo_rundown);
 
 	need_clear = (lo->lo_state == Lo_rundown);
 	mutex_unlock(&lo->lo_mutex);
@@ -1858,7 +1858,7 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(rq);
 
-	if (lo->lo_state != Lo_bound)
+	if (data_race(READ_ONCE(lo->lo_state)) != Lo_bound)
 		return BLK_STS_IOERR;
 
 	switch (req_op(rq)) {
@@ -2016,7 +2016,7 @@ static int loop_add(int i)
 	lo->worker_tree = RB_ROOT;
 	INIT_LIST_HEAD(&lo->idle_worker_list);
 	timer_setup(&lo->timer, loop_free_idle_workers_timer, TIMER_DEFERRABLE);
-	lo->lo_state = Lo_unbound;
+	WRITE_ONCE(lo->lo_state, Lo_unbound);
 
 	err = mutex_lock_killable(&loop_ctl_mutex);
 	if (err)
@@ -2174,7 +2174,7 @@ static int loop_control_remove(int idx)
 		goto mark_visible;
 	}
 	/* Mark this loop device as no more bound, but not quite unbound yet */
-	lo->lo_state = Lo_deleting;
+	WRITE_ONCE(lo->lo_state, Lo_deleting);
 	mutex_unlock(&lo->lo_mutex);
 
 	loop_remove(lo);
@@ -2197,8 +2197,12 @@ static int loop_control_get_free(int idx)
 	if (ret)
 		return ret;
 	idr_for_each_entry(&loop_index_idr, lo, id) {
-		/* Hitting a race results in creating a new loop device which is harmless. */
-		if (lo->idr_visible && data_race(lo->lo_state) == Lo_unbound)
+		/*
+		 * Hitting a race results in creating a new loop device
+		 * which is harmless.
+		 */
+		if (lo->idr_visible &&
+		    data_race(READ_ONCE(lo->lo_state)) == Lo_unbound)
 			goto found;
 	}
 	mutex_unlock(&loop_ctl_mutex);

From 4b2b03151e2e3c11a3caae6e0223964dc771b003 Mon Sep 17 00:00:00 2001
From: Yongpeng Yang <yangyongpeng@xiaomi.com>
Date: Mon, 15 Dec 2025 23:21:06 +0800
Subject: [PATCH 144/258] zloop: use READ_ONCE() to read lo->lo_state in
 queue_rq path

In the queue_rq path, zlo->state is accessed without locking, and direct
access may read stale data. This patch uses READ_ONCE() to read
zlo->state and data_race() to silence code checkers, and changes all
assignments to use WRITE_ONCE().

Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/zloop.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c
index 77bd6081b24452..8e334f5025fc0a 100644
--- a/drivers/block/zloop.c
+++ b/drivers/block/zloop.c
@@ -697,7 +697,7 @@ static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
 	struct zloop_device *zlo = rq->q->queuedata;
 
-	if (zlo->state == Zlo_deleting)
+	if (data_race(READ_ONCE(zlo->state)) == Zlo_deleting)
 		return BLK_STS_IOERR;
 
 	/*
@@ -1002,7 +1002,7 @@ static int zloop_ctl_add(struct zloop_options *opts)
 		ret = -ENOMEM;
 		goto out;
 	}
-	zlo->state = Zlo_creating;
+	WRITE_ONCE(zlo->state, Zlo_creating);
 
 	ret = mutex_lock_killable(&zloop_ctl_mutex);
 	if (ret)
@@ -1113,7 +1113,7 @@ static int zloop_ctl_add(struct zloop_options *opts)
 	}
 
 	mutex_lock(&zloop_ctl_mutex);
-	zlo->state = Zlo_live;
+	WRITE_ONCE(zlo->state, Zlo_live);
 	mutex_unlock(&zloop_ctl_mutex);
 
 	pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n",
@@ -1177,7 +1177,7 @@ static int zloop_ctl_remove(struct zloop_options *opts)
 		ret = -EINVAL;
 	} else {
 		idr_remove(&zloop_index_idr, zlo->id);
-		zlo->state = Zlo_deleting;
+		WRITE_ONCE(zlo->state, Zlo_deleting);
 	}
 
 	mutex_unlock(&zloop_ctl_mutex);

From 67d85b062dcb49af9c903a58842a4ed7281f57b8 Mon Sep 17 00:00:00 2001
From: Yongpeng Yang <yangyongpeng@xiaomi.com>
Date: Mon, 15 Dec 2025 17:58:17 +0800
Subject: [PATCH 145/258] Documentation: admin-guide: blockdev: replace
 zone_capacity with zone_capacity_mb when creating devices

The "zone_capacity=%umb" option is no longer used. The effective option
is now "zone_capacity_mb=%u", so update the documentation accordingly.

Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/admin-guide/blockdev/zoned_loop.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/blockdev/zoned_loop.rst b/Documentation/admin-guide/blockdev/zoned_loop.rst
index 806adde664dbf9..6aa865424ac383 100644
--- a/Documentation/admin-guide/blockdev/zoned_loop.rst
+++ b/Documentation/admin-guide/blockdev/zoned_loop.rst
@@ -134,7 +134,7 @@ MB and a zone capacity of 63 MB::
 
         $ modprobe zloop
         $ mkdir -p /var/local/zloop/0
-        $ echo "add capacity_mb=2048,zone_size_mb=64,zone_capacity=63MB" > /dev/zloop-control
+        $ echo "add capacity_mb=2048,zone_size_mb=64,zone_capacity_mb=63" > /dev/zloop-control
 
 For the device created (/dev/zloop0), the zone backing files are all created
 under the default base directory (/var/local/zloop)::

From 2aaf33c6e1e82561d7dce2345298a985a2483266 Mon Sep 17 00:00:00 2001
From: Cryolitia PukNgae <cryolitia.pukngae@linux.dev>
Date: Thu, 23 Oct 2025 12:42:25 -0700
Subject: [PATCH 146/258] Input: atkbd - skip deactivate for HONOR FMB-P's
 internal keyboard

After commit 9cf6e24c9fbf17e52de9fff07f12be7565ea6d61 ("Input: atkbd -
do not skip atkbd_deactivate() when skipping ATKBD_CMD_GETID"), HONOR
FMB-P, aka HONOR MagicBook Pro 14 2025's internal keyboard stops
working. Adding the atkbd_deactivate_fixup quirk fixes it.

DMI: HONOR FMB-P/FMB-P-PCB, BIOS 1.13 05/08/2025

Fixes: 9cf6e24c9fbf17e52de9fff07f12be7565ea6d61 ("Input: atkbd - do not skip atkbd_deactivate() when skipping ATKBD_CMD_GETID")
Reported-by: Mikura Kyouka <mikurakyouka@aosc.io>
Reported-by: foad.elkhattabi <foad.elkhattabi@gmail.com>
Signed-off-by: Cryolitia PukNgae <cryolitia.pukngae@linux.dev>
Reviewed-by: Hans de Goede <hansg@kernel.org>
Link: https://patch.msgid.link/20251022-honor-v1-1-ff894ed271a9@linux.dev
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/atkbd.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 6c999d89ee4bd0..422e28ad1e8e2b 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -1937,6 +1937,13 @@ static const struct dmi_system_id atkbd_dmi_quirk_table[] __initconst = {
 		},
 		.callback = atkbd_deactivate_fixup,
 	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "HONOR"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "FMB-P"),
+		},
+		.callback = atkbd_deactivate_fixup,
+	},
 	{ }
 };
 

From aed3716db7fff74919cc5775ca3a80c8bb246489 Mon Sep 17 00:00:00 2001
From: Christoffer Sandberg <cs@tuxedo.de>
Date: Mon, 24 Nov 2025 21:31:34 +0100
Subject: [PATCH 147/258] Input: i8042 - add TUXEDO InfinityBook Max Gen10 AMD
 to i8042 quirk table

The device occasionally wakes up from suspend with missing input on the
internal keyboard and the following suspend attempt results in an instant
wake-up. The quirks fix both issues for this device.

Signed-off-by: Christoffer Sandberg <cs@tuxedo.de>
Signed-off-by: Werner Sembach <wse@tuxedocomputers.com>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20251124203336.64072-1-wse@tuxedocomputers.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/serio/i8042-acpipnpio.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h
index 1caa6c4ca435c7..654771275ce878 100644
--- a/drivers/input/serio/i8042-acpipnpio.h
+++ b/drivers/input/serio/i8042-acpipnpio.h
@@ -1169,6 +1169,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
 		.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
 					SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
 	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "X5KK45xS_X5SP45xS"),
+		},
+		.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+					SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+	},
 	/*
 	 * A lot of modern Clevo barebones have touchpad and/or keyboard issues
 	 * after suspend fixable with the forcenorestore quirk.

From 7bbf6d15e935abbb3d604c1fa157350e84a26f98 Mon Sep 17 00:00:00 2001
From: "Mario Limonciello (AMD)" <superm1@kernel.org>
Date: Fri, 12 Dec 2025 23:44:47 -0600
Subject: [PATCH 148/258] accel/amdxdna: Block running under a hypervisor

SVA support is required, which isn't configured by hypervisor
solutions.

Closes: https://github.com/QubesOS/qubes-issues/issues/10275
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4656
Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20251213054513.87925-1-superm1@kernel.org
Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
---
 drivers/accel/amdxdna/aie2_pci.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index ceef1c502e9e2c..8141d8e5163605 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -17,6 +17,7 @@
 #include <linux/iopoll.h>
 #include <linux/pci.h>
 #include <linux/xarray.h>
+#include <asm/hypervisor.h>
 
 #include "aie2_msg_priv.h"
 #include "aie2_pci.h"
@@ -508,6 +509,11 @@ static int aie2_init(struct amdxdna_dev *xdna)
 	unsigned long bars = 0;
 	int i, nvec, ret;
 
+	if (!hypervisor_is_type(X86_HYPER_NATIVE)) {
+		XDNA_ERR(xdna, "Running under hypervisor not supported");
+		return -EINVAL;
+	}
+
 	ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
 	if (!ndev)
 		return -ENOMEM;

From 69dc538a4f5a57dcc5ea4893c769d567f539a1b1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 4 Dec 2025 11:03:29 +0100
Subject: [PATCH 149/258] iommufd: Fix building without dmabuf

When DMABUF is disabled, trying to use it causes a link failure:

x86_64-linux-ld: drivers/iommu/iommufd/io_pagetable.o: in function `iopt_map_file_pages':
io_pagetable.c:(.text+0x1735): undefined reference to `dma_buf_get'
x86_64-linux-ld: io_pagetable.c:(.text+0x1775): undefined reference to `dma_buf_put'

Fixes: 44ebaa1744fd ("iommufd: Accept a DMABUF through IOMMU_IOAS_MAP_FILE")
Link: https://patch.msgid.link/r/20251204100333.1034767-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/io_pagetable.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 54cf4d856179b9..436992331111c6 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -495,7 +495,11 @@ int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
 		return -EOVERFLOW;
 
 	start_byte = start - ALIGN_DOWN(start, PAGE_SIZE);
-	dmabuf = dma_buf_get(fd);
+	if (IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
+		dmabuf = dma_buf_get(fd);
+	else
+		dmabuf = ERR_PTR(-ENXIO);
+
 	if (!IS_ERR(dmabuf)) {
 		pages = iopt_alloc_dmabuf_pages(ictx, dmabuf, start_byte, start,
 						length,

From 5b244b077c0b0e76573fbb9542cf038e42368901 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Fri, 5 Dec 2025 14:56:12 -0400
Subject: [PATCH 150/258] iommufd/selftest: Make it clearer to gcc that the
 access is not out of bounds

GCC gets a bit confused and reports:

   In function '_test_cmd_get_hw_info',
       inlined from 'iommufd_ioas_get_hw_info' at iommufd.c:779:3,
       inlined from 'wrapper_iommufd_ioas_get_hw_info' at iommufd.c:752:1:
>> iommufd_utils.h:804:37: warning: array subscript 'struct iommu_test_hw_info[0]' is partly outside array bounds of 'struct iommu_test_hw_info_buffer_smaller[1]' [-Warray-bounds=]
     804 |                         assert(!info->flags);
         |                                 ~~~~^~~~~~~
   iommufd.c: In function 'wrapper_iommufd_ioas_get_hw_info':
   iommufd.c:761:11: note: object 'buffer_smaller' of size 4
     761 |         } buffer_smaller;
         |           ^~~~~~~~~~~~~~

While it is true that "struct iommu_test_hw_info[0]" is partly out of
bounds of the input pointer, it is not true that info->flags is out of
bounds. Unclear why it warns on this.

Reuse an existing properly sized stack buffer and pass a truncated length
instead to test the same thing.

Fixes: af4fde93c319 ("iommufd/selftest: Add coverage for IOMMU_GET_HW_INFO ioctl")
Link: https://patch.msgid.link/r/0-v1-63a2cffb09da+4486-iommufd_gcc_bounds_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512032344.kaAcKFIM-lkp@intel.com/
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 tools/testing/selftests/iommu/iommufd.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 10e051b6f592df..dadad277f4eb2e 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -755,9 +755,6 @@ TEST_F(iommufd_ioas, get_hw_info)
 		struct iommu_test_hw_info info;
 		uint64_t trailing_bytes;
 	} buffer_larger;
-	struct iommu_test_hw_info_buffer_smaller {
-		__u32 flags;
-	} buffer_smaller;
 
 	if (self->device_id) {
 		uint8_t max_pasid = 0;
@@ -789,8 +786,9 @@ TEST_F(iommufd_ioas, get_hw_info)
 		 * the fields within the size range still gets updated.
 		 */
 		test_cmd_get_hw_info(self->device_id,
-				     IOMMU_HW_INFO_TYPE_DEFAULT,
-				     &buffer_smaller, sizeof(buffer_smaller));
+				     IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact,
+				     offsetofend(struct iommu_test_hw_info,
+						 flags));
 		test_cmd_get_hw_info_pasid(self->device_id, &max_pasid);
 		ASSERT_EQ(0, max_pasid);
 		if (variant->pasid_capable) {

From b80fab281349f107a07e841eb412a86e2877ae88 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Fri, 5 Dec 2025 15:42:47 -0400
Subject: [PATCH 151/258] iommufd/selftest: Do not leak the hwpt if
 IOMMU_TEST_OP_MD_CHECK_MAP fails

If the input validation fails it returned without freeing the hwpt
refcount causing a leak. This triggers a WARN_ON when closing the fd:

  WARNING: drivers/iommu/iommufd/main.c:369 at iommufd_fops_release+0x385/0x430, CPU#1: repro/724

Found by szykaller.

Fixes: e93d5945ed5b ("iommufd: Change the selftest to use iommupt instead of xarray")
Link: https://patch.msgid.link/r/0-v1-c8ed57e24380+44ae-iommufd_selftest_hwpt_leak_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reported-by: "Lai, Yi" <yi1.lai@linux.intel.com>
Closes: https://lore.kernel.org/r/aTJGMaqwQK0ASj0G@ly-workstation
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/selftest.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index c4322fd26f93e5..86446e1537949a 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -1215,8 +1215,10 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd,
 	page_size = 1 << __ffs(mock->domain.pgsize_bitmap);
 	if (iova % page_size || length % page_size ||
 	    (uintptr_t)uptr % page_size ||
-	    check_add_overflow((uintptr_t)uptr, (uintptr_t)length, &end))
-		return -EINVAL;
+	    check_add_overflow((uintptr_t)uptr, (uintptr_t)length, &end)) {
+		rc = -EINVAL;
+		goto out_put;
+	}
 
 	for (; length; length -= page_size) {
 		struct page *pages[1];

From ba624ba88d9f5c3e2ace9bb6697dbeb05b2dbc44 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Tue, 9 Dec 2025 05:24:00 +0100
Subject: [PATCH 152/258] ata: libata-core: Disable LPM on ST2000DM008-2FR102

According to a user report, the ST2000DM008-2FR102 has problems with LPM.

Reported-by: Emerson Pinter <e@pinter.dev>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220693
Signed-off-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
 drivers/ata/libata-core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 0b24bd169d61dd..09d8c035fcdf99 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4143,6 +4143,9 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = {
 	{ "ST3320[68]13AS",	"SD1[5-9]",	ATA_QUIRK_NONCQ |
 						ATA_QUIRK_FIRMWARE_WARN },
 
+	/* Seagate disks with LPM issues */
+	{ "ST2000DM008-2FR102",	NULL,		ATA_QUIRK_NOLPM },
+
 	/* drives which fail FPDMA_AA activation (some may freeze afterwards)
 	   the ST disks also have LPM issues */
 	{ "ST1000LM024 HN-M101MBB", NULL,	ATA_QUIRK_BROKEN_FPDMA_AA |

From e5aff444e3a7bdeef5ea796a2099fc3c60a070fa Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Mon, 15 Dec 2025 12:51:12 +0100
Subject: [PATCH 153/258] x86/xen: Fix sparse warning in enlighten_pv.c

The sparse tool issues a warning for arch/x76/xen/enlighten_pv.c:

   arch/x86/xen/enlighten_pv.c:120:9: sparse: sparse: incorrect type
     in initializer (different address spaces)
     expected void const [noderef] __percpu *__vpp_verify
     got bool *

This is due to the percpu variable xen_in_preemptible_hcall being
exported via EXPORT_SYMBOL_GPL() instead of EXPORT_PER_CPU_SYMBOL_GPL().

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512140856.Ic6FetG6-lkp@intel.com/
Fixes: fdfd811ddde3 ("x86/xen: allow privcmd hypercalls to be preempted")
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <20251215115112.15072-1-jgross@suse.com>
---
 arch/x86/xen/enlighten_pv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 4806cc28d7ca77..b74ff8bc7f2a8d 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -108,7 +108,7 @@ static int xen_cpu_dead_pv(unsigned int cpu);
  * calls.
  */
 DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
-EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
+EXPORT_PER_CPU_SYMBOL_GPL(xen_in_preemptible_hcall);
 
 /*
  * In case of scheduling the flag must be cleared and restored after

From 2145f447b79ab522667cbdbdab4525c903759f7c Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <ckulkarnilinux@gmail.com>
Date: Mon, 24 Nov 2025 15:48:06 -0800
Subject: [PATCH 154/258] xfs: ignore discard return value

__blkdev_issue_discard() always returns 0, making all error checking
in XFS discard functions dead code.

Change xfs_discard_extents() return type to void, remove error variable,
error checking, and error logging for the __blkdev_issue_discard() call
in same function.

Update xfs_trim_perag_extents() and xfs_trim_rtgroup_extents() to
ignore the xfs_discard_extents() return value and error checking
code.

Update xfs_discard_rtdev_extents() to ignore __blkdev_issue_discard()
return value and error checking code.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chaitanya Kulkarni <ckulkarnilinux@gmail.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/xfs_discard.c | 27 +++++----------------------
 fs/xfs/xfs_discard.h |  2 +-
 2 files changed, 6 insertions(+), 23 deletions(-)

diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 6917de8321915d..b6ffe4807a1111 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -108,7 +108,7 @@ xfs_discard_endio(
  * list. We plug and chain the bios so that we only need a single completion
  * call to clear all the busy extents once the discards are complete.
  */
-int
+void
 xfs_discard_extents(
 	struct xfs_mount	*mp,
 	struct xfs_busy_extents	*extents)
@@ -116,7 +116,6 @@ xfs_discard_extents(
 	struct xfs_extent_busy	*busyp;
 	struct bio		*bio = NULL;
 	struct blk_plug		plug;
-	int			error = 0;
 
 	blk_start_plug(&plug);
 	list_for_each_entry(busyp, &extents->extent_list, list) {
@@ -126,18 +125,10 @@ xfs_discard_extents(
 
 		trace_xfs_discard_extent(xg, busyp->bno, busyp->length);
 
-		error = __blkdev_issue_discard(btp->bt_bdev,
+		__blkdev_issue_discard(btp->bt_bdev,
 				xfs_gbno_to_daddr(xg, busyp->bno),
 				XFS_FSB_TO_BB(mp, busyp->length),
 				GFP_KERNEL, &bio);
-		if (error && error != -EOPNOTSUPP) {
-			xfs_info(mp,
-	 "discard failed for extent [0x%llx,%u], error %d",
-				 (unsigned long long)busyp->bno,
-				 busyp->length,
-				 error);
-			break;
-		}
 	}
 
 	if (bio) {
@@ -148,8 +139,6 @@ xfs_discard_extents(
 		xfs_discard_endio_work(&extents->endio_work);
 	}
 	blk_finish_plug(&plug);
-
-	return error;
 }
 
 /*
@@ -385,9 +374,7 @@ xfs_trim_perag_extents(
 		 * list  after this function call, as it may have been freed by
 		 * the time control returns to us.
 		 */
-		error = xfs_discard_extents(pag_mount(pag), extents);
-		if (error)
-			break;
+		xfs_discard_extents(pag_mount(pag), extents);
 
 		if (xfs_trim_should_stop())
 			break;
@@ -496,12 +483,10 @@ xfs_discard_rtdev_extents(
 
 		trace_xfs_discard_rtextent(mp, busyp->bno, busyp->length);
 
-		error = __blkdev_issue_discard(bdev,
+		__blkdev_issue_discard(bdev,
 				xfs_rtb_to_daddr(mp, busyp->bno),
 				XFS_FSB_TO_BB(mp, busyp->length),
 				GFP_NOFS, &bio);
-		if (error)
-			break;
 	}
 	xfs_discard_free_rtdev_extents(tr);
 
@@ -741,9 +726,7 @@ xfs_trim_rtgroup_extents(
 		 * list  after this function call, as it may have been freed by
 		 * the time control returns to us.
 		 */
-		error = xfs_discard_extents(rtg_mount(rtg), tr.extents);
-		if (error)
-			break;
+		xfs_discard_extents(rtg_mount(rtg), tr.extents);
 
 		low = tr.restart_rtx;
 	} while (!xfs_trim_should_stop() && low <= high);
diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h
index 2b1a85223a56c6..8c5cc4af6a0787 100644
--- a/fs/xfs/xfs_discard.h
+++ b/fs/xfs/xfs_discard.h
@@ -6,7 +6,7 @@ struct fstrim_range;
 struct xfs_mount;
 struct xfs_busy_extents;
 
-int xfs_discard_extents(struct xfs_mount *mp, struct xfs_busy_extents *busy);
+void xfs_discard_extents(struct xfs_mount *mp, struct xfs_busy_extents *busy);
 int xfs_ioc_trim(struct xfs_mount *mp, struct fstrim_range __user *fstrim);
 
 #endif /* XFS_DISCARD_H */

From 5990fd756943836978ad184aac980e2b36ab7e01 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Thu, 4 Dec 2025 13:43:50 -0800
Subject: [PATCH 155/258] xfs: fix a UAF problem in xattr repair

The xchk_setup_xattr_buf function can allocate a new value buffer, which
means that any reference to ab->value before the call could become a
dangling pointer.  Fix this by moving an assignment to after the buffer
setup.

Cc: stable@vger.kernel.org # v6.10
Fixes: e47dcf113ae348 ("xfs: repair extended attributes")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/scrub/attr_repair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c
index c7eb94069cafcd..09d63aa10314b0 100644
--- a/fs/xfs/scrub/attr_repair.c
+++ b/fs/xfs/scrub/attr_repair.c
@@ -333,7 +333,6 @@ xrep_xattr_salvage_remote_attr(
 		.attr_filter		= ent->flags & XFS_ATTR_NSP_ONDISK_MASK,
 		.namelen		= rentry->namelen,
 		.name			= rentry->name,
-		.value			= ab->value,
 		.valuelen		= be32_to_cpu(rentry->valuelen),
 	};
 	unsigned int			namesize;
@@ -363,6 +362,7 @@ xrep_xattr_salvage_remote_attr(
 		error = -EDEADLOCK;
 	if (error)
 		return error;
+	args.value = ab->value;
 
 	/* Look up the remote value and stash it for reconstruction. */
 	error = xfs_attr3_leaf_getvalue(leaf_bp, &args);

From f06725052098d7b1133ac3846d693c383dc427a2 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Thu, 4 Dec 2025 13:44:15 -0800
Subject: [PATCH 156/258] xfs: fix stupid compiler warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gcc 14.2 warns about:

xfs_attr_item.c: In function ‘xfs_attr_recover_work’:
xfs_attr_item.c:785:9: warning: ‘ip’ may be used uninitialized [-Wmaybe-uninitialized]
  785 |         xfs_trans_ijoin(tp, ip, 0);
      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~
xfs_attr_item.c:740:42: note: ‘ip’ was declared here
  740 |         struct xfs_inode                *ip;
      |                                          ^~

I think this is bogus since xfs_attri_recover_work either returns a real
pointer having initialized ip or an ERR_PTR having not touched it, but
the tools are smarter than me so let's just null-init the variable
anyway.

Cc: stable@vger.kernel.org # v6.8
Fixes: e70fb328d52772 ("xfs: recreate work items when recovering intent items")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/xfs_attr_item.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index c3a593319bee71..e8fa326ac995bc 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -737,7 +737,7 @@ xfs_attr_recover_work(
 	struct xfs_attri_log_item	*attrip = ATTRI_ITEM(lip);
 	struct xfs_attr_intent		*attr;
 	struct xfs_mount		*mp = lip->li_log->l_mp;
-	struct xfs_inode		*ip;
+	struct xfs_inode		*ip = NULL;
 	struct xfs_da_args		*args;
 	struct xfs_trans		*tp;
 	struct xfs_trans_res		resv;

From fc40459de82543b565ebc839dca8f7987f16f62e Mon Sep 17 00:00:00 2001
From: Haoxiang Li <lihaoxiang@isrc.iscas.ac.cn>
Date: Wed, 10 Dec 2025 17:06:01 +0800
Subject: [PATCH 157/258] xfs: fix a memory leak in xfs_buf_item_init()

xfs_buf_item_get_format() may allocate memory for bip->bli_formats,
free the memory in the error path.

Fixes: c3d5f0c2fb85 ("xfs: complain if anyone tries to create a too-large buffer log item")
Cc: stable@vger.kernel.org
Signed-off-by: Haoxiang Li <lihaoxiang@isrc.iscas.ac.cn>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/xfs_buf_item.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 8d85b5eee44441..f4c5be67826e22 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -896,6 +896,7 @@ xfs_buf_item_init(
 		map_size = DIV_ROUND_UP(chunks, NBWORD);
 
 		if (map_size > XFS_BLF_DATAMAP_SIZE) {
+			xfs_buf_item_free_format(bip);
 			kmem_cache_free(xfs_buf_item_cache, bip);
 			xfs_err(mp,
 	"buffer item dirty bitmap (%u uints) too small to reflect %u bytes!",

From 8dc15b7a6e5918bad2b0583cf63d170f94a212df Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 15 Dec 2025 07:05:46 +0100
Subject: [PATCH 158/258] xfs: fix XFS_ERRTAG_FORCE_ZERO_RANGE for zoned file
 system

The new XFS_ERRTAG_FORCE_ZERO_RANGE error tag added by commit
ea9989668081 ("xfs: error tag to force zeroing on debug kernels") fails
to account for the zoned space reservation rules and this reliably fails
xfs/131 because the zeroing operation returns -EIO.

Fix this by reserving enough space to zero the entire range, which
requires a bit of (fairly ugly) reshuffling to do the error injection
early enough to affect the space reservation.

Fixes: ea9989668081 ("xfs: error tag to force zeroing on debug kernels")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/xfs_file.c | 58 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 6108612182e2fb..7874cf745af372 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1240,6 +1240,38 @@ xfs_falloc_insert_range(
 	return xfs_insert_file_space(XFS_I(inode), offset, len);
 }
 
+/*
+ * For various operations we need to zero up to one block at each end of
+ * the affected range.  For zoned file systems this will require a space
+ * allocation, for which we need a reservation ahead of time.
+ */
+#define XFS_ZONED_ZERO_EDGE_SPACE_RES		2
+
+/*
+ * Zero range implements a full zeroing mechanism but is only used in limited
+ * situations. It is more efficient to allocate unwritten extents than to
+ * perform zeroing here, so use an errortag to randomly force zeroing on DEBUG
+ * kernels for added test coverage.
+ *
+ * On zoned file systems, the error is already injected by
+ * xfs_file_zoned_fallocate, which then reserves the additional space needed.
+ * We only check for this extra space reservation here.
+ */
+static inline bool
+xfs_falloc_force_zero(
+	struct xfs_inode		*ip,
+	struct xfs_zone_alloc_ctx	*ac)
+{
+	if (xfs_is_zoned_inode(ip)) {
+		if (ac->reserved_blocks > XFS_ZONED_ZERO_EDGE_SPACE_RES) {
+			ASSERT(IS_ENABLED(CONFIG_XFS_DEBUG));
+			return true;
+		}
+		return false;
+	}
+	return XFS_TEST_ERROR(ip->i_mount, XFS_ERRTAG_FORCE_ZERO_RANGE);
+}
+
 /*
  * Punch a hole and prealloc the range.  We use a hole punch rather than
  * unwritten extent conversion for two reasons:
@@ -1268,14 +1300,7 @@ xfs_falloc_zero_range(
 	if (error)
 		return error;
 
-	/*
-	 * Zero range implements a full zeroing mechanism but is only used in
-	 * limited situations. It is more efficient to allocate unwritten
-	 * extents than to perform zeroing here, so use an errortag to randomly
-	 * force zeroing on DEBUG kernels for added test coverage.
-	 */
-	if (XFS_TEST_ERROR(ip->i_mount,
-			   XFS_ERRTAG_FORCE_ZERO_RANGE)) {
+	if (xfs_falloc_force_zero(ip, ac)) {
 		error = xfs_zero_range(ip, offset, len, ac, NULL);
 	} else {
 		error = xfs_free_file_space(ip, offset, len, ac);
@@ -1423,13 +1448,26 @@ xfs_file_zoned_fallocate(
 {
 	struct xfs_zone_alloc_ctx ac = { };
 	struct xfs_inode	*ip = XFS_I(file_inode(file));
+	struct xfs_mount	*mp = ip->i_mount;
+	xfs_filblks_t		count_fsb;
 	int			error;
 
-	error = xfs_zoned_space_reserve(ip->i_mount, 2, XFS_ZR_RESERVED, &ac);
+	/*
+	 * If full zeroing is forced by the error injection knob, we need a
+	 * space reservation that covers the entire range.  See the comment in
+	 * xfs_zoned_write_space_reserve for the rationale for the calculation.
+	 * Otherwise just reserve space for the two boundary blocks.
+	 */
+	count_fsb = XFS_ZONED_ZERO_EDGE_SPACE_RES;
+	if ((mode & FALLOC_FL_MODE_MASK) == FALLOC_FL_ZERO_RANGE &&
+	    XFS_TEST_ERROR(mp, XFS_ERRTAG_FORCE_ZERO_RANGE))
+		count_fsb += XFS_B_TO_FSB(mp, len) + 1;
+
+	error = xfs_zoned_space_reserve(mp, count_fsb, XFS_ZR_RESERVED, &ac);
 	if (error)
 		return error;
 	error = __xfs_file_fallocate(file, mode, offset, len, &ac);
-	xfs_zoned_space_unreserve(ip->i_mount, &ac);
+	xfs_zoned_space_unreserve(mp, &ac);
 	return error;
 }
 

From 2bfca4fe1f36eb6618e5712a8b5b41433bb6f10b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guido=20G=C3=BCnther?= <agx@sigxcpu.org>
Date: Fri, 17 Oct 2025 10:27:59 +0200
Subject: [PATCH 159/258] drm/panel: visionox-rm69299: Depend on
 BACKLIGHT_CLASS_DEVICE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We handle backlight so need that dependency.

Fixes: 7911d8cab554 ("drm/panel: visionox-rm69299: Add backlight support")
Reported-by: kernelci.org bot <bot@kernelci.org>
Signed-off-by: Guido Günther <agx@sigxcpu.org>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: David Heidelberg <david@ixit.cz>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patch.msgid.link/20251017-visionox-rm69299-bl-v2-1-9dfa06606754@sigxcpu.org
---
 drivers/gpu/drm/panel/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 76f6af81903765..7a83804fedca1b 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -1165,6 +1165,7 @@ config DRM_PANEL_VISIONOX_RM69299
 	tristate "Visionox RM69299"
 	depends on OF
 	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
 	help
 	  Say Y here if you want to enable support for Visionox
 	  RM69299  DSI Video Mode panel.

From 359afc8eb02a518fbdd0cbd462c8c2827c6cbec2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 15 Dec 2025 15:21:34 +0100
Subject: [PATCH 160/258] PM: runtime: Do not clear needs_force_resume with
 enabled runtime PM

Commit 89d9cec3b1e9 ("PM: runtime: Clear power.needs_force_resume in
pm_runtime_reinit()") added provisional clearing of power.needs_force_resume
to pm_runtime_reinit(), but it is done unconditionally which is a
mistake because pm_runtime_reinit() may race with driver probing
and removal [1].

To address this, notice that power.needs_force_resume should never
be set when runtime PM is enabled and so it only needs to be cleared
when runtime PM is disabled, and update pm_runtime_init() to only
clear that flag when runtime PM is disabled.

Fixes: 89d9cec3b1e9 ("PM: runtime: Clear power.needs_force_resume in pm_runtime_reinit()")
Reported-by: Ed Tsai <ed.tsai@mediatek.com>
Closes: https://lore.kernel.org/linux-pm/20251215122154.3180001-1-ed.tsai@mediatek.com/ [1]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: 6.17+ <stable@vger.kernel.org> # 6.17+
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://patch.msgid.link/12807571.O9o76ZdvQC@rafael.j.wysocki
---
 drivers/base/power/runtime.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 84676cc2422147..0ee8ea971aa468 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1868,16 +1868,18 @@ void pm_runtime_init(struct device *dev)
  */
 void pm_runtime_reinit(struct device *dev)
 {
-	if (!pm_runtime_enabled(dev)) {
-		if (dev->power.runtime_status == RPM_ACTIVE)
-			pm_runtime_set_suspended(dev);
-		if (dev->power.irq_safe) {
-			spin_lock_irq(&dev->power.lock);
-			dev->power.irq_safe = 0;
-			spin_unlock_irq(&dev->power.lock);
-			if (dev->parent)
-				pm_runtime_put(dev->parent);
-		}
+	if (pm_runtime_enabled(dev))
+		return;
+
+	if (dev->power.runtime_status == RPM_ACTIVE)
+		pm_runtime_set_suspended(dev);
+
+	if (dev->power.irq_safe) {
+		spin_lock_irq(&dev->power.lock);
+		dev->power.irq_safe = 0;
+		spin_unlock_irq(&dev->power.lock);
+		if (dev->parent)
+			pm_runtime_put(dev->parent);
 	}
 	/*
 	 * Clear power.needs_force_resume in case it has been set by

From 3e8ade58b71b48913d21b647b2089e03e81f117e Mon Sep 17 00:00:00 2001
From: Jianpeng Chang <jianpeng.chang.cn@windriver.com>
Date: Fri, 5 Dec 2025 09:59:34 +0800
Subject: [PATCH 161/258] arm64: kdump: Fix elfcorehdr overlap caused by
 reserved memory processing reorder

Commit 8a6e02d0c00e ("of: reserved_mem: Restructure how the reserved
memory regions are processed") changed the processing order of reserved
memory regions, causing elfcorehdr to overlap with dynamically allocated
reserved memory regions during kdump kernel boot.

The issue occurs because:
1. kexec-tools allocates elfcorehdr in the last crashkernel reserved
   memory region and passes it to the second kernel
2. The problematic commit moved dynamic reserved memory allocation
   (like bman-fbpr) to occur during fdt_scan_reserved_mem(), before
   elfcorehdr reservation in fdt_reserve_elfcorehdr()
3. bman-fbpr with 16MB alignment requirement can get allocated at
   addresses that overlap with the elfcorehdr location
4. When fdt_reserve_elfcorehdr() tries to reserve elfcorehdr memory,
   overlap detection identifies the conflict and skips reservation
5. kdump kernel fails with "Unable to handle kernel paging request"
   because elfcorehdr memory is not properly reserved

The boot log:
Before 8a6e02d0c00e:
  OF: fdt: Reserving 1 KiB of memory at 0xf4fff000 for elfcorehdr
  OF: reserved mem: 0xf3000000..0xf3ffffff bman-fbpr

After 8a6e02d0c00e:
  OF: reserved mem: 0xf4000000..0xf4ffffff bman-fbpr
  OF: fdt: elfcorehdr is overlapped

Fix this by ensuring elfcorehdr reservation occurs before dynamic
reserved memory allocation.

Fixes: 8a6e02d0c00e ("of: reserved_mem: Restructure how the reserved memory regions are processed")
Signed-off-by: Jianpeng Chang <jianpeng.chang.cn@windriver.com>
Link: https://patch.msgid.link/20251205015934.700016-1-jianpeng.chang.cn@windriver.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 drivers/of/fdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index d378d4b4109f5c..331646d667b9be 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -503,8 +503,8 @@ void __init early_init_fdt_scan_reserved_mem(void)
 	if (!initial_boot_params)
 		return;
 
-	fdt_scan_reserved_mem();
 	fdt_reserve_elfcorehdr();
+	fdt_scan_reserved_mem();
 
 	/* Process header /memreserve/ fields */
 	for (n = 0; ; n++) {

From 7fff398df4c44529bde2183a959bd77123fbac98 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sat, 15 Nov 2025 13:21:21 +0100
Subject: [PATCH 162/258] dt-bindings: display/ti: Simplify dma-coherent
 property

Common boolean properties need to be only allowed in the binding
(":true"), because their type is already defined by core DT schema.
Simplify dma-coherent property to match common syntax.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Link: https://patch.msgid.link/20251115122120.35315-4-krzk@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml | 3 +--
 Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml
index 361e9cae6896c1..38fcee91211e8c 100644
--- a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml
+++ b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml
@@ -84,8 +84,7 @@ properties:
     maxItems: 1
     description: phandle to the associated power domain
 
-  dma-coherent:
-    type: boolean
+  dma-coherent: true
 
   ports:
     $ref: /schemas/graph.yaml#/properties/ports
diff --git a/Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml b/Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml
index fad7cba58d39ac..65ae8a1c399862 100644
--- a/Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml
+++ b/Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml
@@ -103,8 +103,7 @@ properties:
     maxItems: 1
     description: phandle to the associated power domain
 
-  dma-coherent:
-    type: boolean
+  dma-coherent: true
 
   ports:
     $ref: /schemas/graph.yaml#/properties/ports

From 512e1568562b2a19f4b175607062c8e97c9203ec Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 29 Oct 2025 10:56:13 -0500
Subject: [PATCH 163/258] dt-bindings: clock: sprd,sc9860-clk: Allow "reg" for
 gate clocks

The gate bindings have an artificial split between a "syscon" and clock
provider node. Allow "reg" properties so this split can be removed.

Reviewed-by: Chunyan Zhang <zhang.lyra@gmail.com>
Link: https://patch.msgid.link/20251029155615.1167903-1-robh@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 .../bindings/clock/sprd,sc9860-clk.yaml       | 26 -------------------
 1 file changed, 26 deletions(-)

diff --git a/Documentation/devicetree/bindings/clock/sprd,sc9860-clk.yaml b/Documentation/devicetree/bindings/clock/sprd,sc9860-clk.yaml
index 502cd723511fa4..b131390207d6de 100644
--- a/Documentation/devicetree/bindings/clock/sprd,sc9860-clk.yaml
+++ b/Documentation/devicetree/bindings/clock/sprd,sc9860-clk.yaml
@@ -114,25 +114,6 @@ allOf:
         - reg
       properties:
         sprd,syscon: false
-  - if:
-      properties:
-        compatible:
-          contains:
-            enum:
-              - sprd,sc9860-agcp-gate
-              - sprd,sc9860-aon-gate
-              - sprd,sc9860-apahb-gate
-              - sprd,sc9860-apapb-gate
-              - sprd,sc9860-cam-gate
-              - sprd,sc9860-disp-gate
-              - sprd,sc9860-pll
-              - sprd,sc9860-pmu-gate
-              - sprd,sc9860-vsp-gate
-    then:
-      required:
-        - sprd,syscon
-      properties:
-        reg: false
 
 additionalProperties: false
 
@@ -142,13 +123,6 @@ examples:
       #address-cells = <2>;
       #size-cells = <2>;
 
-      pmu-gate {
-        compatible = "sprd,sc9860-pmu-gate";
-        clocks = <&ext_26m>;
-        #clock-cells = <1>;
-        sprd,syscon = <&pmu_regs>;
-      };
-
       clock-controller@20000000 {
         compatible = "sprd,sc9860-ap-clk";
         reg = <0 0x20000000 0 0x400>;

From 0f5796dac1bb7e3b8a36eec54e3a2c6bf70aa414 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Date: Wed, 10 Dec 2025 06:17:19 +0100
Subject: [PATCH 164/258] cpufreq: dt-platdev: Fix creating device on OPPv1
 platforms

Commit 6ea891a6dd37 ("cpufreq: dt-platdev: Simplify with
of_machine_get_match_data()") broke several platforms which did not have
OPPv2 proprety, because it incorrectly checked for device match data
after first matching from "allowlist".  Almost all of "allowlist" match
entries do not have match data and it is expected to create platform
device for them with empty data.

Fix this by first checking if platform is on the allowlist with
of_machine_device_match() and only then taking the match data.  This
duplicates the number of checks (we match against the allowlist twice),
but makes the code here much smaller.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Closes: https://lore.kernel.org/all/CAMuHMdVJD4+J9QpUUs-sX0feKfuPD72CO0dcqN7shvF_UYpZ3Q@mail.gmail.com/
Reported-by: Pavel Pisa <pisa@fel.cvut.cz>
Closes: https://lore.kernel.org/all/6hnk7llbwdezh74h74fhvofbx4t4jihel5kvr6qwx2xuxxbjys@rmwbd7lkhrdz/
Fixes: 6ea891a6dd37 ("cpufreq: dt-platdev: Simplify with of_machine_get_match_data()")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Tested-by: Pavel Pisa <pisa@fel.cvut.cz>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://patch.msgid.link/20251210051718.132795-2-krzysztof.kozlowski@oss.qualcomm.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 drivers/cpufreq/cpufreq-dt-platdev.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index a1d11ecd1ac863..b06a43143d23c6 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -219,11 +219,12 @@ static bool __init cpu0_node_has_opp_v2_prop(void)
 
 static int __init cpufreq_dt_platdev_init(void)
 {
-	const void *data;
+	const void *data = NULL;
 
-	data = of_machine_get_match_data(allowlist);
-	if (data)
+	if (of_machine_device_match(allowlist)) {
+		data = of_machine_get_match_data(allowlist);
 		goto create_pdev;
+	}
 
 	if (cpu0_node_has_opp_v2_prop() && !of_machine_device_match(blocklist))
 		goto create_pdev;

From 67549b73f10b85172a0cbb1109904542b97e6177 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Thu, 6 Nov 2025 22:23:41 +0100
Subject: [PATCH 165/258] dt-bindings: gpu: img,powervr-rogue: Document GE7800
 GPU in Renesas R-Car V3U
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document Imagination Technologies PowerVR Rogue GE7800 BNVC 15.5.1.64
present in Renesas R-Car R8A779A0 V3U SoC.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Marek Vasut <marek.vasut+renesas@mailbox.org>
Reviewed-by: Matt Coster <matt.coster@imgtec.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://patch.msgid.link/20251106212342.2771579-2-niklas.soderlund+renesas@ragnatech.se
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml b/Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml
index 225a6e1b7fcd3e..86ef689853177d 100644
--- a/Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml
+++ b/Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml
@@ -20,7 +20,9 @@ properties:
           - const: img,img-gx6250
           - const: img,img-rogue
       - items:
-          - const: renesas,r8a77965-gpu
+          - enum:
+              - renesas,r8a77965-gpu
+              - renesas,r8a779a0-gpu
           - const: img,img-ge7800
           - const: img,img-rogue
       - items:

From e6a973af11135439de32ece3b9cbe3bfc043bea8 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 16 Dec 2025 11:53:40 -0400
Subject: [PATCH 166/258] iommufd/selftest: Check for overflow in
 IOMMU_TEST_OP_ADD_RESERVED

syzkaller found it could overflow math in the test infrastructure and
cause a WARN_ON by corrupting the reserved interval tree. This only
effects test kernels with CONFIG_IOMMUFD_TEST.

Validate the user input length in the test ioctl.

Fixes: f4b20bb34c83 ("iommufd: Add kernel support for testing iommufd")
Link: https://patch.msgid.link/r/0-v1-cd99f6049ba5+51-iommufd_syz_add_resv_jgg@nvidia.com
Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Reported-by: syzbot+57fdb0cf6a0c5d1f15a2@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/69368129.a70a0220.38f243.008f.GAE@google.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/selftest.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index 86446e1537949a..550ff36dec3a35 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -1184,14 +1184,20 @@ static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd,
 				     unsigned int mockpt_id,
 				     unsigned long start, size_t length)
 {
+	unsigned long last;
 	struct iommufd_ioas *ioas;
 	int rc;
 
+	if (!length)
+		return -EINVAL;
+	if (check_add_overflow(start, length - 1, &last))
+		return -EOVERFLOW;
+
 	ioas = iommufd_get_ioas(ucmd->ictx, mockpt_id);
 	if (IS_ERR(ioas))
 		return PTR_ERR(ioas);
 	down_write(&ioas->iopt.iova_rwsem);
-	rc = iopt_reserve_iova(&ioas->iopt, start, start + length - 1, NULL);
+	rc = iopt_reserve_iova(&ioas->iopt, start, last, NULL);
 	up_write(&ioas->iopt.iova_rwsem);
 	iommufd_put_object(ucmd->ictx, &ioas->obj);
 	return rc;

From 54de247a0efa4c6176ba6840a58e2fb0b2130e2d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linusw@kernel.org>
Date: Tue, 16 Dec 2025 10:52:10 +0100
Subject: [PATCH 167/258] dt-bindings: Updates Linus Walleij's mail address

My name is stamped into maintainership for a big slew of DT
bindings. Now that it is changing, switch it over to my
kernel.org mail address, which will hopefully be stable for the
rest of my life.

Signed-off-by: Linus Walleij <linusw@kernel.org>
Link: https://patch.msgid.link/20251216-maintainers-dt-v1-1-0b5ab102c9bb@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/arm,integrator.yaml       | 2 +-
 Documentation/devicetree/bindings/arm/arm,realview.yaml         | 2 +-
 Documentation/devicetree/bindings/arm/arm,scu.yaml              | 2 +-
 Documentation/devicetree/bindings/arm/arm,versatile-sysreg.yaml | 2 +-
 Documentation/devicetree/bindings/arm/arm,versatile.yaml        | 2 +-
 Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml    | 2 +-
 Documentation/devicetree/bindings/arm/gemini.yaml               | 2 +-
 Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml         | 2 +-
 Documentation/devicetree/bindings/arm/ux500.yaml                | 2 +-
 Documentation/devicetree/bindings/ata/ata-generic.yaml          | 2 +-
 .../devicetree/bindings/ata/cortina,gemini-sata-bridge.yaml     | 2 +-
 Documentation/devicetree/bindings/ata/faraday,ftide010.yaml     | 2 +-
 .../devicetree/bindings/ata/intel,ixp4xx-compact-flash.yaml     | 2 +-
 Documentation/devicetree/bindings/ata/pata-common.yaml          | 2 +-
 Documentation/devicetree/bindings/ata/sata-common.yaml          | 2 +-
 .../devicetree/bindings/auxdisplay/arm,versatile-lcd.yaml       | 2 +-
 .../devicetree/bindings/clock/stericsson,u8500-clks.yaml        | 2 +-
 .../devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml         | 2 +-
 Documentation/devicetree/bindings/display/dsi-controller.yaml   | 2 +-
 Documentation/devicetree/bindings/display/faraday,tve200.yaml   | 2 +-
 .../devicetree/bindings/display/panel/arm,rtsm-display.yaml     | 2 +-
 .../bindings/display/panel/arm,versatile-tft-panel.yaml         | 2 +-
 .../devicetree/bindings/display/panel/ilitek,ili9322.yaml       | 2 +-
 .../devicetree/bindings/display/panel/novatek,nt35510.yaml      | 2 +-
 .../devicetree/bindings/display/panel/samsung,lms380kf01.yaml   | 2 +-
 .../devicetree/bindings/display/panel/samsung,lms397kf04.yaml   | 2 +-
 .../devicetree/bindings/display/panel/samsung,s6d16d0.yaml      | 2 +-
 .../devicetree/bindings/display/panel/sony,acx424akp.yaml       | 2 +-
 Documentation/devicetree/bindings/display/panel/ti,nspire.yaml  | 2 +-
 Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml | 2 +-
 Documentation/devicetree/bindings/display/ste,mcde.yaml         | 2 +-
 Documentation/devicetree/bindings/dma/stericsson,dma40.yaml     | 2 +-
 Documentation/devicetree/bindings/extcon/fcs,fsa880.yaml        | 2 +-
 .../firmware/intel,ixp4xx-network-processing-engine.yaml        | 2 +-
 Documentation/devicetree/bindings/gnss/brcm,bcm4751.yaml        | 2 +-
 Documentation/devicetree/bindings/gpio/faraday,ftgpio010.yaml   | 2 +-
 .../devicetree/bindings/gpio/gpio-consumer-common.yaml          | 2 +-
 Documentation/devicetree/bindings/gpio/gpio-ep9301.yaml         | 2 +-
 Documentation/devicetree/bindings/gpio/gpio-mmio.yaml           | 2 +-
 Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.yaml   | 2 +-
 Documentation/devicetree/bindings/gpio/mrvl-gpio.yaml           | 2 +-
 Documentation/devicetree/bindings/gpio/pl061-gpio.yaml          | 2 +-
 Documentation/devicetree/bindings/gpio/st,nomadik-gpio.yaml     | 2 +-
 Documentation/devicetree/bindings/gpio/st,stmpe-gpio.yaml       | 2 +-
 Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml     | 2 +-
 Documentation/devicetree/bindings/hwmon/winbond,w83781d.yaml    | 2 +-
 Documentation/devicetree/bindings/i2c/arm,i2c-versatile.yaml    | 2 +-
 Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml       | 2 +-
 Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml   | 2 +-
 Documentation/devicetree/bindings/iio/adc/qcom,pm8018-adc.yaml  | 2 +-
 .../devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml   | 2 +-
 Documentation/devicetree/bindings/iio/light/capella,cm3605.yaml | 2 +-
 Documentation/devicetree/bindings/iio/light/sharp,gp2ap002.yaml | 2 +-
 .../bindings/iio/magnetometer/asahi-kasei,ak8974.yaml           | 2 +-
 .../devicetree/bindings/iio/magnetometer/yamaha,yas530.yaml     | 2 +-
 Documentation/devicetree/bindings/iio/st,st-sensors.yaml        | 2 +-
 Documentation/devicetree/bindings/input/atmel,maxtouch.yaml     | 2 +-
 .../bindings/input/touchscreen/cypress,cy8ctma140.yaml          | 2 +-
 .../bindings/input/touchscreen/cypress,cy8ctma340.yaml          | 2 +-
 .../devicetree/bindings/input/touchscreen/melfas,mms114.yaml    | 2 +-
 .../devicetree/bindings/input/touchscreen/zinitix,bt400.yaml    | 2 +-
 .../bindings/interrupt-controller/arm,versatile-fpga-irq.yaml   | 2 +-
 .../bindings/interrupt-controller/faraday,ftintc010.yaml        | 2 +-
 .../bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml   | 2 +-
 .../devicetree/bindings/leds/backlight/kinetic,ktd253.yaml      | 2 +-
 Documentation/devicetree/bindings/leds/register-bit-led.yaml    | 2 +-
 Documentation/devicetree/bindings/leds/regulator-led.yaml       | 2 +-
 Documentation/devicetree/bindings/leds/richtek,rt8515.yaml      | 2 +-
 .../intel,ixp4xx-expansion-bus-controller.yaml                  | 2 +-
 .../intel,ixp4xx-expansion-peripheral-props.yaml                | 2 +-
 .../devicetree/bindings/mfd/arm,dev-platforms-syscon.yaml       | 2 +-
 Documentation/devicetree/bindings/mfd/st,stmpe.yaml             | 2 +-
 Documentation/devicetree/bindings/mfd/stericsson,ab8500.yaml    | 2 +-
 .../devicetree/bindings/mfd/stericsson,db8500-prcmu.yaml        | 2 +-
 .../bindings/misc/intel,ixp4xx-ahb-queue-manager.yaml           | 2 +-
 Documentation/devicetree/bindings/mmc/arm,pl18x.yaml            | 2 +-
 .../bindings/mtd/partitions/arm,arm-firmware-suite.yaml         | 2 +-
 .../devicetree/bindings/mtd/partitions/redboot-fis.yaml         | 2 +-
 Documentation/devicetree/bindings/mtd/partitions/seama.yaml     | 2 +-
 .../devicetree/bindings/net/bluetooth/brcm,bluetooth.yaml       | 2 +-
 .../devicetree/bindings/net/cortina,gemini-ethernet.yaml        | 2 +-
 Documentation/devicetree/bindings/net/dsa/micrel,ks8995.yaml    | 2 +-
 Documentation/devicetree/bindings/net/dsa/realtek.yaml          | 2 +-
 Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.yaml  | 2 +-
 .../devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml         | 2 +-
 .../devicetree/bindings/net/intel,ixp4xx-ethernet.yaml          | 2 +-
 Documentation/devicetree/bindings/net/intel,ixp4xx-hss.yaml     | 2 +-
 Documentation/devicetree/bindings/pci/faraday,ftpci100.yaml     | 2 +-
 Documentation/devicetree/bindings/pci/intel,ixp4xx-pci.yaml     | 2 +-
 Documentation/devicetree/bindings/pci/v3,v360epc-pci.yaml       | 2 +-
 Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml      | 2 +-
 Documentation/devicetree/bindings/pinctrl/pinctrl.yaml          | 2 +-
 Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml      | 2 +-
 .../devicetree/bindings/power/supply/samsung,battery.yaml       | 2 +-
 Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml     | 2 +-
 Documentation/devicetree/bindings/rtc/faraday,ftrtc010.yaml     | 2 +-
 .../devicetree/bindings/spi/arm,pl022-peripheral-props.yaml     | 2 +-
 Documentation/devicetree/bindings/spi/spi-pl022.yaml            | 2 +-
 Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml   | 2 +-
 Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml | 2 +-
 Documentation/devicetree/bindings/timer/st,nomadik-mtu.yaml     | 2 +-
 Documentation/devicetree/bindings/usb/faraday,fotg210.yaml      | 2 +-
 Documentation/devicetree/bindings/usb/intel,ixp4xx-udc.yaml     | 2 +-
 .../devicetree/bindings/watchdog/faraday,ftwdt010.yaml          | 2 +-
 Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml   | 2 +-
 105 files changed, 105 insertions(+), 105 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/arm,integrator.yaml b/Documentation/devicetree/bindings/arm/arm,integrator.yaml
index 1bdbd1b7ee3815..8fe22185a33762 100644
--- a/Documentation/devicetree/bindings/arm/arm,integrator.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,integrator.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM Integrator Boards
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |+
   These were the first ARM platforms officially supported by ARM Ltd.
diff --git a/Documentation/devicetree/bindings/arm/arm,realview.yaml b/Documentation/devicetree/bindings/arm/arm,realview.yaml
index 3c5f1688dbd787..0b3133ecddac10 100644
--- a/Documentation/devicetree/bindings/arm/arm,realview.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,realview.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM RealView Boards
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |+
   The ARM RealView series of reference designs were built to explore the Arm11,
diff --git a/Documentation/devicetree/bindings/arm/arm,scu.yaml b/Documentation/devicetree/bindings/arm/arm,scu.yaml
index dae2aa27e641b7..f735b7fb8e1cc2 100644
--- a/Documentation/devicetree/bindings/arm/arm,scu.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,scu.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM Snoop Control Unit (SCU)
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   As part of the MPCore complex, Cortex-A5 and Cortex-A9 are provided
diff --git a/Documentation/devicetree/bindings/arm/arm,versatile-sysreg.yaml b/Documentation/devicetree/bindings/arm/arm,versatile-sysreg.yaml
index 3b060c36b90cdd..e72dc45c1afa79 100644
--- a/Documentation/devicetree/bindings/arm/arm,versatile-sysreg.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,versatile-sysreg.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Arm Versatile system registers
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   This is a system control registers block, providing multiple low level
diff --git a/Documentation/devicetree/bindings/arm/arm,versatile.yaml b/Documentation/devicetree/bindings/arm/arm,versatile.yaml
index 7a3caf6af200a1..c777e455d0388d 100644
--- a/Documentation/devicetree/bindings/arm/arm,versatile.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,versatile.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM Versatile Boards
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |+
   The ARM Versatile boards are two variants of ARM926EJ-S evaluation boards
diff --git a/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml b/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml
index 4cdca532054440..6430218ba1ceac 100644
--- a/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml
@@ -8,7 +8,7 @@ title: ARM Versatile Express and Juno Boards
 
 maintainers:
   - Sudeep Holla <sudeep.holla@arm.com>
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |+
   ARM's Versatile Express platform were built as reference designs for exploring
diff --git a/Documentation/devicetree/bindings/arm/gemini.yaml b/Documentation/devicetree/bindings/arm/gemini.yaml
index f6a0b675830fbb..fc092962ab5653 100644
--- a/Documentation/devicetree/bindings/arm/gemini.yaml
+++ b/Documentation/devicetree/bindings/arm/gemini.yaml
@@ -20,7 +20,7 @@ description: |
   Many of the IP blocks used in the SoC comes from Faraday Technology.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   $nodename:
diff --git a/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml b/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml
index b7b430896596aa..0f1bf634a98a1a 100644
--- a/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml
+++ b/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Intel IXP4xx
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   $nodename:
diff --git a/Documentation/devicetree/bindings/arm/ux500.yaml b/Documentation/devicetree/bindings/arm/ux500.yaml
index b42d20fa435968..3a8611e5786e5b 100644
--- a/Documentation/devicetree/bindings/arm/ux500.yaml
+++ b/Documentation/devicetree/bindings/arm/ux500.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Ux500 platforms
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   $nodename:
diff --git a/Documentation/devicetree/bindings/ata/ata-generic.yaml b/Documentation/devicetree/bindings/ata/ata-generic.yaml
index 0697927f3d7e6f..9da341ea091e23 100644
--- a/Documentation/devicetree/bindings/ata/ata-generic.yaml
+++ b/Documentation/devicetree/bindings/ata/ata-generic.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Generic Parallel ATA Controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   Generic Parallel ATA controllers supporting PIO modes only.
diff --git a/Documentation/devicetree/bindings/ata/cortina,gemini-sata-bridge.yaml b/Documentation/devicetree/bindings/ata/cortina,gemini-sata-bridge.yaml
index 5290936665084a..66de6d4769c120 100644
--- a/Documentation/devicetree/bindings/ata/cortina,gemini-sata-bridge.yaml
+++ b/Documentation/devicetree/bindings/ata/cortina,gemini-sata-bridge.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Cortina Systems Gemini SATA Bridge
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
     The Gemini SATA bridge in a SoC-internal PATA to SATA bridge that
diff --git a/Documentation/devicetree/bindings/ata/faraday,ftide010.yaml b/Documentation/devicetree/bindings/ata/faraday,ftide010.yaml
index fa16f3767c6a56..32e11d8a0a3b09 100644
--- a/Documentation/devicetree/bindings/ata/faraday,ftide010.yaml
+++ b/Documentation/devicetree/bindings/ata/faraday,ftide010.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Faraday Technology FTIDE010 PATA controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   This controller is the first Faraday IDE interface block, used in the
diff --git a/Documentation/devicetree/bindings/ata/intel,ixp4xx-compact-flash.yaml b/Documentation/devicetree/bindings/ata/intel,ixp4xx-compact-flash.yaml
index 378692010c5614..894a8b9eb910be 100644
--- a/Documentation/devicetree/bindings/ata/intel,ixp4xx-compact-flash.yaml
+++ b/Documentation/devicetree/bindings/ata/intel,ixp4xx-compact-flash.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Intel IXP4xx CompactFlash Card Controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   The IXP4xx network processors have a CompactFlash interface that presents
diff --git a/Documentation/devicetree/bindings/ata/pata-common.yaml b/Documentation/devicetree/bindings/ata/pata-common.yaml
index 4e867dd4d402b2..cee4bb7eb0b9c5 100644
--- a/Documentation/devicetree/bindings/ata/pata-common.yaml
+++ b/Documentation/devicetree/bindings/ata/pata-common.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Common Properties for Parallel AT attachment (PATA) controllers
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   This document defines device tree properties common to most Parallel
diff --git a/Documentation/devicetree/bindings/ata/sata-common.yaml b/Documentation/devicetree/bindings/ata/sata-common.yaml
index 58c9342b992558..667f48c3319597 100644
--- a/Documentation/devicetree/bindings/ata/sata-common.yaml
+++ b/Documentation/devicetree/bindings/ata/sata-common.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Common Properties for Serial AT attachment (SATA) controllers
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   This document defines device tree properties common to most Serial
diff --git a/Documentation/devicetree/bindings/auxdisplay/arm,versatile-lcd.yaml b/Documentation/devicetree/bindings/auxdisplay/arm,versatile-lcd.yaml
index 439f7b811a94a4..51d68a778b5cb6 100644
--- a/Documentation/devicetree/bindings/auxdisplay/arm,versatile-lcd.yaml
+++ b/Documentation/devicetree/bindings/auxdisplay/arm,versatile-lcd.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM Versatile Character LCD
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
   - Rob Herring <robh@kernel.org>
 
 description:
diff --git a/Documentation/devicetree/bindings/clock/stericsson,u8500-clks.yaml b/Documentation/devicetree/bindings/clock/stericsson,u8500-clks.yaml
index 2150307219a0c1..4ebfa5a8d5242b 100644
--- a/Documentation/devicetree/bindings/clock/stericsson,u8500-clks.yaml
+++ b/Documentation/devicetree/bindings/clock/stericsson,u8500-clks.yaml
@@ -8,7 +8,7 @@ title: ST-Ericsson DB8500 (U8500) clocks
 
 maintainers:
   - Ulf Hansson <ulf.hansson@linaro.org>
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: While named "U8500 clocks" these clocks are inside the
   DB8500 digital baseband system-on-chip and its siblings such as
diff --git a/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml b/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml
index a4006237aa89fd..fd20b8197207a0 100644
--- a/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml
+++ b/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Intel IXP4xx cryptographic engine
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   The Intel IXP4xx cryptographic engine makes use of the IXP4xx NPE
diff --git a/Documentation/devicetree/bindings/display/dsi-controller.yaml b/Documentation/devicetree/bindings/display/dsi-controller.yaml
index bb4d6e9e7d0cae..850b86fe03ccb1 100644
--- a/Documentation/devicetree/bindings/display/dsi-controller.yaml
+++ b/Documentation/devicetree/bindings/display/dsi-controller.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Common Properties for DSI Display Panels
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   This document defines device tree properties common to DSI, Display
diff --git a/Documentation/devicetree/bindings/display/faraday,tve200.yaml b/Documentation/devicetree/bindings/display/faraday,tve200.yaml
index e2ee7776732112..b09628b69177c6 100644
--- a/Documentation/devicetree/bindings/display/faraday,tve200.yaml
+++ b/Documentation/devicetree/bindings/display/faraday,tve200.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Faraday TV Encoder TVE200
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/arm,rtsm-display.yaml b/Documentation/devicetree/bindings/display/panel/arm,rtsm-display.yaml
index 4ad484f09ba3a0..fc04558fcc8ddf 100644
--- a/Documentation/devicetree/bindings/display/panel/arm,rtsm-display.yaml
+++ b/Documentation/devicetree/bindings/display/panel/arm,rtsm-display.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Arm RTSM Virtual Platforms Display
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: panel-common.yaml#
diff --git a/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.yaml b/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.yaml
index c9958f824d9ab3..b6c18e7283cd91 100644
--- a/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.yaml
+++ b/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM Versatile TFT Panels
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   These panels are connected to the daughterboards found on the
diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml
index 44423465f6e35b..4bdc33d12306b2 100644
--- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml
+++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Ilitek ILI9322 TFT panel driver with SPI control bus
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   This is a driver for 320x240 TFT panels, accepting a variety of input
diff --git a/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml b/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml
index bb50fd5506c3d8..b39fd0c5a48ad1 100644
--- a/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml
+++ b/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Novatek NT35510-based display panels
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: panel-common.yaml#
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml b/Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml
index 7ce8540551f9eb..74c2a617c2ff30 100644
--- a/Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml
+++ b/Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml
@@ -11,7 +11,7 @@ description: The LMS380KF01 is a 480x800 DPI display panel from Samsung Mobile
   used with internal or external backlight control.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: panel-common.yaml#
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,lms397kf04.yaml b/Documentation/devicetree/bindings/display/panel/samsung,lms397kf04.yaml
index 9363032883de45..4cecf502a1506d 100644
--- a/Documentation/devicetree/bindings/display/panel/samsung,lms397kf04.yaml
+++ b/Documentation/devicetree/bindings/display/panel/samsung,lms397kf04.yaml
@@ -10,7 +10,7 @@ description: The datasheet claims this is based around a display controller
   named DB7430 with a separate backlight controller.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: panel-common.yaml#
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6d16d0.yaml b/Documentation/devicetree/bindings/display/panel/samsung,s6d16d0.yaml
index 2af5bc47323f50..0872476a8ac9ae 100644
--- a/Documentation/devicetree/bindings/display/panel/samsung,s6d16d0.yaml
+++ b/Documentation/devicetree/bindings/display/panel/samsung,s6d16d0.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S6D16D0 4" 864x480 AMOLED panel
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: panel-common.yaml#
diff --git a/Documentation/devicetree/bindings/display/panel/sony,acx424akp.yaml b/Documentation/devicetree/bindings/display/panel/sony,acx424akp.yaml
index fd778a20f76099..64fa086730b05a 100644
--- a/Documentation/devicetree/bindings/display/panel/sony,acx424akp.yaml
+++ b/Documentation/devicetree/bindings/display/panel/sony,acx424akp.yaml
@@ -12,7 +12,7 @@ description: The Sony ACX424AKP and ACX424AKM are panels built around
   AKP.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: panel-common.yaml#
diff --git a/Documentation/devicetree/bindings/display/panel/ti,nspire.yaml b/Documentation/devicetree/bindings/display/panel/ti,nspire.yaml
index 5c5a3b519e314c..fc722f706ad71b 100644
--- a/Documentation/devicetree/bindings/display/panel/ti,nspire.yaml
+++ b/Documentation/devicetree/bindings/display/panel/ti,nspire.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Texas Instruments NSPIRE Display Panels
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: panel-common.yaml#
diff --git a/Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml b/Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml
index 99db268eb9b3ab..e5f3108cde5a6d 100644
--- a/Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml
+++ b/Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: TPO TPG110 Panel
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
   - Thierry Reding <thierry.reding@gmail.com>
 
 description: |+
diff --git a/Documentation/devicetree/bindings/display/ste,mcde.yaml b/Documentation/devicetree/bindings/display/ste,mcde.yaml
index 564ea845c82e05..7a12d0b817e68a 100644
--- a/Documentation/devicetree/bindings/display/ste,mcde.yaml
+++ b/Documentation/devicetree/bindings/display/ste,mcde.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ST-Ericsson Multi Channel Display Engine MCDE
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/dma/stericsson,dma40.yaml b/Documentation/devicetree/bindings/dma/stericsson,dma40.yaml
index 8b42d988040037..607da11e7baa93 100644
--- a/Documentation/devicetree/bindings/dma/stericsson,dma40.yaml
+++ b/Documentation/devicetree/bindings/dma/stericsson,dma40.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ST-Ericsson DMA40 DMA Engine
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: dma-controller.yaml#
diff --git a/Documentation/devicetree/bindings/extcon/fcs,fsa880.yaml b/Documentation/devicetree/bindings/extcon/fcs,fsa880.yaml
index ef6a246a133781..bff3fd5f7f4ebf 100644
--- a/Documentation/devicetree/bindings/extcon/fcs,fsa880.yaml
+++ b/Documentation/devicetree/bindings/extcon/fcs,fsa880.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Fairchild Semiconductor FSA880, FSA9480 and compatibles
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   The FSA880 and FSA9480 are USB port accessory detectors and switches.
diff --git a/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml b/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
index 50f1f08744a1da..4d66ef4835223a 100644
--- a/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
+++ b/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Intel IXP4xx Network Processing Engine
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   On the IXP4xx SoCs, the Network Processing Engine (NPE) is a small
diff --git a/Documentation/devicetree/bindings/gnss/brcm,bcm4751.yaml b/Documentation/devicetree/bindings/gnss/brcm,bcm4751.yaml
index 089166089498d1..c34b86bb7f6ff9 100644
--- a/Documentation/devicetree/bindings/gnss/brcm,bcm4751.yaml
+++ b/Documentation/devicetree/bindings/gnss/brcm,bcm4751.yaml
@@ -8,7 +8,7 @@ title: Broadcom BCM4751 family GNSS Receiver
 
 maintainers:
   - Johan Hovold <johan@kernel.org>
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   Broadcom GPS chips can be used over the UART or I2C bus. The UART
diff --git a/Documentation/devicetree/bindings/gpio/faraday,ftgpio010.yaml b/Documentation/devicetree/bindings/gpio/faraday,ftgpio010.yaml
index 640da5b9b0cc18..3a6a47f12982ce 100644
--- a/Documentation/devicetree/bindings/gpio/faraday,ftgpio010.yaml
+++ b/Documentation/devicetree/bindings/gpio/faraday,ftgpio010.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Faraday Technology FTGPIO010 GPIO Controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/gpio/gpio-consumer-common.yaml b/Documentation/devicetree/bindings/gpio/gpio-consumer-common.yaml
index 40d0be31e2000c..fa0148758b4b2b 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-consumer-common.yaml
+++ b/Documentation/devicetree/bindings/gpio/gpio-consumer-common.yaml
@@ -8,7 +8,7 @@ title: Common GPIO lines
 
 maintainers:
   - Bartosz Golaszewski <brgl@bgdev.pl>
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   Pay attention to using proper GPIO flag (e.g. GPIO_ACTIVE_LOW) for the GPIOs
diff --git a/Documentation/devicetree/bindings/gpio/gpio-ep9301.yaml b/Documentation/devicetree/bindings/gpio/gpio-ep9301.yaml
index 3a1079d6ee200c..ebdb7ee5b790d3 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-ep9301.yaml
+++ b/Documentation/devicetree/bindings/gpio/gpio-ep9301.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: EP93xx GPIO controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
   - Bartosz Golaszewski <brgl@bgdev.pl>
   - Nikita Shubin <nikita.shubin@maquefel.me>
 
diff --git a/Documentation/devicetree/bindings/gpio/gpio-mmio.yaml b/Documentation/devicetree/bindings/gpio/gpio-mmio.yaml
index b4d55bf6a28548..ee5d5d25ae82fb 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-mmio.yaml
+++ b/Documentation/devicetree/bindings/gpio/gpio-mmio.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Generic MMIO GPIO
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
   - Bartosz Golaszewski <brgl@bgdev.pl>
 
 description:
diff --git a/Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.yaml b/Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.yaml
index bfcb1f364c3aa0..2a980c0ed86f64 100644
--- a/Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.yaml
@@ -22,7 +22,7 @@ description: |
   and this can be enabled by a special flag.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/gpio/mrvl-gpio.yaml b/Documentation/devicetree/bindings/gpio/mrvl-gpio.yaml
index 65155bb701a9fb..7f420b9c048082 100644
--- a/Documentation/devicetree/bindings/gpio/mrvl-gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/mrvl-gpio.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Marvell PXA GPIO controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
   - Bartosz Golaszewski <bgolaszewski@baylibre.com>
   - Rob Herring <robh@kernel.org>
 
diff --git a/Documentation/devicetree/bindings/gpio/pl061-gpio.yaml b/Documentation/devicetree/bindings/gpio/pl061-gpio.yaml
index c51e10680c0a53..4d970e55104bb5 100644
--- a/Documentation/devicetree/bindings/gpio/pl061-gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/pl061-gpio.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM PL061 GPIO controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
   - Rob Herring <robh@kernel.org>
 
 # We need a select here so we don't match all nodes with 'arm,primecell'
diff --git a/Documentation/devicetree/bindings/gpio/st,nomadik-gpio.yaml b/Documentation/devicetree/bindings/gpio/st,nomadik-gpio.yaml
index b3e8951959b52e..40b4a755144960 100644
--- a/Documentation/devicetree/bindings/gpio/st,nomadik-gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/st,nomadik-gpio.yaml
@@ -12,7 +12,7 @@ description:
   with pinctrl-nomadik.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   $nodename:
diff --git a/Documentation/devicetree/bindings/gpio/st,stmpe-gpio.yaml b/Documentation/devicetree/bindings/gpio/st,stmpe-gpio.yaml
index 4555f1644a4dfe..66dd602e797ddc 100644
--- a/Documentation/devicetree/bindings/gpio/st,stmpe-gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/st,stmpe-gpio.yaml
@@ -14,7 +14,7 @@ description:
   GPIO portions of these expanders.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml b/Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml
index dc8bc4c6df34df..efd10bcfb0820e 100644
--- a/Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml
+++ b/Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml
@@ -6,7 +6,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: NTC thermistor temperature sensors
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   Thermistors with negative temperature coefficient (NTC) are resistors that
diff --git a/Documentation/devicetree/bindings/hwmon/winbond,w83781d.yaml b/Documentation/devicetree/bindings/hwmon/winbond,w83781d.yaml
index 6971ecb314ebc9..d97b0e69847761 100644
--- a/Documentation/devicetree/bindings/hwmon/winbond,w83781d.yaml
+++ b/Documentation/devicetree/bindings/hwmon/winbond,w83781d.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Winbond W83781 and compatible hardware monitor IC
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/i2c/arm,i2c-versatile.yaml b/Documentation/devicetree/bindings/i2c/arm,i2c-versatile.yaml
index e58465d1b0c881..26026dfd788a45 100644
--- a/Documentation/devicetree/bindings/i2c/arm,i2c-versatile.yaml
+++ b/Documentation/devicetree/bindings/i2c/arm,i2c-versatile.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: I2C Controller on ARM Ltd development platforms
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: /schemas/i2c/i2c-controller.yaml#
diff --git a/Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml b/Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml
index 012402debfeb24..63a459c63f6a7e 100644
--- a/Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml
@@ -12,7 +12,7 @@ description: The Nomadik I2C host controller began its life in the ST
   DB8500 after the merge of these two companies wireless divisions.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 # Need a custom select here or 'arm,primecell' will match on lots of nodes
 select:
diff --git a/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml b/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml
index 85c9537f1f0299..c1387e02eb8260 100644
--- a/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml
+++ b/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Bosch BMA255 and Similar Accelerometers
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
   - Stephan Gerhold <stephan@gerhold.net>
 
 description:
diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,pm8018-adc.yaml b/Documentation/devicetree/bindings/iio/adc/qcom,pm8018-adc.yaml
index 58ea1ca4a5ee80..c978c3a3e31af2 100644
--- a/Documentation/devicetree/bindings/iio/adc/qcom,pm8018-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/qcom,pm8018-adc.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Qualcomm's PM8xxx voltage XOADC
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   The Qualcomm PM8xxx PMICs contain a HK/XO ADC (Housekeeping/Crystal
diff --git a/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml b/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml
index f3242dc0e7e64f..3a307ac50aa7f4 100644
--- a/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml
+++ b/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Invensense MPU-3050 Gyroscope
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/iio/light/capella,cm3605.yaml b/Documentation/devicetree/bindings/iio/light/capella,cm3605.yaml
index c63b79c3351bfb..01376c386a0342 100644
--- a/Documentation/devicetree/bindings/iio/light/capella,cm3605.yaml
+++ b/Documentation/devicetree/bindings/iio/light/capella,cm3605.yaml
@@ -8,7 +8,7 @@ title:
   Capella Microsystems CM3605 Ambient Light and Short Distance Proximity Sensor
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
   - Kevin Tsai <ktsai@capellamicro.com>
 
 description: |
diff --git a/Documentation/devicetree/bindings/iio/light/sharp,gp2ap002.yaml b/Documentation/devicetree/bindings/iio/light/sharp,gp2ap002.yaml
index f8a932be0d1037..99bddf31cbed2b 100644
--- a/Documentation/devicetree/bindings/iio/light/sharp,gp2ap002.yaml
+++ b/Documentation/devicetree/bindings/iio/light/sharp,gp2ap002.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Sharp GP2AP002A00F and GP2AP002S00F proximity and ambient light sensors
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   Proximity and ambient light sensor with IR LED for the proximity
diff --git a/Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8974.yaml b/Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8974.yaml
index cefb70def1886b..f6b4d987419049 100644
--- a/Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8974.yaml
+++ b/Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8974.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Asahi Kasei AK8974 magnetometer sensor
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/iio/magnetometer/yamaha,yas530.yaml b/Documentation/devicetree/bindings/iio/magnetometer/yamaha,yas530.yaml
index 877226e9219bac..5cbf60f3b08b5b 100644
--- a/Documentation/devicetree/bindings/iio/magnetometer/yamaha,yas530.yaml
+++ b/Documentation/devicetree/bindings/iio/magnetometer/yamaha,yas530.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Yamaha YAS530 family of magnetometer sensors
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   The Yamaha YAS530 magnetometers is a line of 3-axis magnetometers
diff --git a/Documentation/devicetree/bindings/iio/st,st-sensors.yaml b/Documentation/devicetree/bindings/iio/st,st-sensors.yaml
index e955eb8e879795..a1a958215cdb7e 100644
--- a/Documentation/devicetree/bindings/iio/st,st-sensors.yaml
+++ b/Documentation/devicetree/bindings/iio/st,st-sensors.yaml
@@ -14,7 +14,7 @@ description: The STMicroelectronics sensor devices are pretty straight-forward
 
 maintainers:
   - Denis Ciocca <denis.ciocca@st.com>
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/input/atmel,maxtouch.yaml b/Documentation/devicetree/bindings/input/atmel,maxtouch.yaml
index d79b254f1cde40..9bf07acea59994 100644
--- a/Documentation/devicetree/bindings/input/atmel,maxtouch.yaml
+++ b/Documentation/devicetree/bindings/input/atmel,maxtouch.yaml
@@ -8,7 +8,7 @@ title: Atmel maXTouch touchscreen/touchpad
 
 maintainers:
   - Nick Dyer <nick@shmanahar.org>
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   Atmel maXTouch touchscreen or touchpads such as the mXT244
diff --git a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml
index 86a6d18f952a0c..afeab49a9544f1 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Cypress CY8CTMA140 series touchscreen controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: touchscreen.yaml#
diff --git a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma340.yaml b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma340.yaml
index 4dfbb93678b564..a0b8c12977a193 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma340.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma340.yaml
@@ -12,7 +12,7 @@ description: The Cypress CY8CTMA340 series (also known as "CYTTSP" after
 
 maintainers:
   - Javier Martinez Canillas <javier@dowhile0.org>
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: touchscreen.yaml#
diff --git a/Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml b/Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml
index 90ebd4f8354c27..a8a93f755458b3 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Melfas MMS114 family touchscreen controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: touchscreen.yaml#
diff --git a/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml b/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml
index 3f663ce3e44ece..f1ce837b16dfa8 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml
@@ -12,7 +12,7 @@ description: The Zinitix BT4xx and BT5xx series of touchscreen controllers
 
 maintainers:
   - Michael Srba <Michael.Srba@seznam.cz>
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: touchscreen.yaml#
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.yaml
index 8d581b3aac3a1b..42ab873665e1e5 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM Versatile FPGA IRQ Controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   One or more FPGA IRQ controllers can be synthesized in an ARM reference board
diff --git a/Documentation/devicetree/bindings/interrupt-controller/faraday,ftintc010.yaml b/Documentation/devicetree/bindings/interrupt-controller/faraday,ftintc010.yaml
index 980e5c45f25b1d..e6495acea038fa 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/faraday,ftintc010.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/faraday,ftintc010.yaml
@@ -6,7 +6,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Faraday Technology FTINTC010 interrupt controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   This interrupt controller is a stock IP block from Faraday Technology found
diff --git a/Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml b/Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml
index a02a6b5af20562..c375e08ba4104b 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Intel IXP4xx XScale Networking Processors Interrupt Controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   This interrupt controller is found in the Intel IXP4xx processors.
diff --git a/Documentation/devicetree/bindings/leds/backlight/kinetic,ktd253.yaml b/Documentation/devicetree/bindings/leds/backlight/kinetic,ktd253.yaml
index 73fa59e6218164..e7207eb2658425 100644
--- a/Documentation/devicetree/bindings/leds/backlight/kinetic,ktd253.yaml
+++ b/Documentation/devicetree/bindings/leds/backlight/kinetic,ktd253.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Kinetic Technologies KTD253 and KTD259 one-wire backlight
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   The Kinetic Technologies KTD253 and KTD259 are white LED backlights
diff --git a/Documentation/devicetree/bindings/leds/register-bit-led.yaml b/Documentation/devicetree/bindings/leds/register-bit-led.yaml
index 20930d327ae999..a6bafc96bd0c36 100644
--- a/Documentation/devicetree/bindings/leds/register-bit-led.yaml
+++ b/Documentation/devicetree/bindings/leds/register-bit-led.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Register Bit LEDs
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |+
   Register bit leds are used with syscon multifunctional devices where single
diff --git a/Documentation/devicetree/bindings/leds/regulator-led.yaml b/Documentation/devicetree/bindings/leds/regulator-led.yaml
index 4ef7b96e9a086b..75ee87d4a78699 100644
--- a/Documentation/devicetree/bindings/leds/regulator-led.yaml
+++ b/Documentation/devicetree/bindings/leds/regulator-led.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Regulator LEDs
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   Regulator LEDs are powered by a single regulator such that they can
diff --git a/Documentation/devicetree/bindings/leds/richtek,rt8515.yaml b/Documentation/devicetree/bindings/leds/richtek,rt8515.yaml
index 68c328eec03bec..0356371a6b014a 100644
--- a/Documentation/devicetree/bindings/leds/richtek,rt8515.yaml
+++ b/Documentation/devicetree/bindings/leds/richtek,rt8515.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Richtek RT8515 1.5A dual channel LED driver
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   The Richtek RT8515 is a dual channel (two mode) LED driver that
diff --git a/Documentation/devicetree/bindings/memory-controllers/intel,ixp4xx-expansion-bus-controller.yaml b/Documentation/devicetree/bindings/memory-controllers/intel,ixp4xx-expansion-bus-controller.yaml
index 3049d6bb0b1fe6..2a4bf905a36984 100644
--- a/Documentation/devicetree/bindings/memory-controllers/intel,ixp4xx-expansion-bus-controller.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/intel,ixp4xx-expansion-bus-controller.yaml
@@ -12,7 +12,7 @@ description: |
   including IXP42x, IXP43x, IXP45x and IXP46x.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   $nodename:
diff --git a/Documentation/devicetree/bindings/memory-controllers/intel,ixp4xx-expansion-peripheral-props.yaml b/Documentation/devicetree/bindings/memory-controllers/intel,ixp4xx-expansion-peripheral-props.yaml
index d1479a7b9c8df4..020fa49c345441 100644
--- a/Documentation/devicetree/bindings/memory-controllers/intel,ixp4xx-expansion-peripheral-props.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/intel,ixp4xx-expansion-peripheral-props.yaml
@@ -12,7 +12,7 @@ description:
   including IXP42x, IXP43x, IXP45x and IXP46x.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   intel,ixp4xx-eb-t1:
diff --git a/Documentation/devicetree/bindings/mfd/arm,dev-platforms-syscon.yaml b/Documentation/devicetree/bindings/mfd/arm,dev-platforms-syscon.yaml
index 46b164ae083157..7f3b1b77293c5b 100644
--- a/Documentation/devicetree/bindings/mfd/arm,dev-platforms-syscon.yaml
+++ b/Documentation/devicetree/bindings/mfd/arm,dev-platforms-syscon.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Arm Ltd Developer Platforms System Controllers
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   The Arm Ltd Integrator, Realview, and Versatile families of developer
diff --git a/Documentation/devicetree/bindings/mfd/st,stmpe.yaml b/Documentation/devicetree/bindings/mfd/st,stmpe.yaml
index b77cc3f3075d79..df43878fbe18f3 100644
--- a/Documentation/devicetree/bindings/mfd/st,stmpe.yaml
+++ b/Documentation/devicetree/bindings/mfd/st,stmpe.yaml
@@ -12,7 +12,7 @@ description: STMicroelectronics Port Expander (STMPE) is a series of slow
   peripherals connected to SPI or I2C.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: /schemas/spi/spi-peripheral-props.yaml#
diff --git a/Documentation/devicetree/bindings/mfd/stericsson,ab8500.yaml b/Documentation/devicetree/bindings/mfd/stericsson,ab8500.yaml
index ce5e845ab5c52c..0fdfbfdfe88a36 100644
--- a/Documentation/devicetree/bindings/mfd/stericsson,ab8500.yaml
+++ b/Documentation/devicetree/bindings/mfd/stericsson,ab8500.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ST-Ericsson Analog Baseband AB8500 and AB8505
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   the AB8500 "Analog Baseband" is the mixed-signals integrated circuit
diff --git a/Documentation/devicetree/bindings/mfd/stericsson,db8500-prcmu.yaml b/Documentation/devicetree/bindings/mfd/stericsson,db8500-prcmu.yaml
index d6c13779d44e93..4edd4a3bab880d 100644
--- a/Documentation/devicetree/bindings/mfd/stericsson,db8500-prcmu.yaml
+++ b/Documentation/devicetree/bindings/mfd/stericsson,db8500-prcmu.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ST-Ericsson DB8500 PRCMU - Power Reset and Control Management Unit
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   The DB8500 Power Reset and Control Management Unit is an XP70 8-bit
diff --git a/Documentation/devicetree/bindings/misc/intel,ixp4xx-ahb-queue-manager.yaml b/Documentation/devicetree/bindings/misc/intel,ixp4xx-ahb-queue-manager.yaml
index aab89946b04fbd..1198d87d0ab67a 100644
--- a/Documentation/devicetree/bindings/misc/intel,ixp4xx-ahb-queue-manager.yaml
+++ b/Documentation/devicetree/bindings/misc/intel,ixp4xx-ahb-queue-manager.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Intel IXP4xx AHB Queue Manager
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   The IXP4xx AHB Queue Manager maintains queues as circular buffers in
diff --git a/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml b/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml
index 8f62e2c7fa6414..f90fd73904a246 100644
--- a/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml
+++ b/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM PrimeCell MultiMedia Card Interface (MMCI) PL180 and PL181
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
   - Ulf Hansson <ulf.hansson@linaro.org>
 
 description:
diff --git a/Documentation/devicetree/bindings/mtd/partitions/arm,arm-firmware-suite.yaml b/Documentation/devicetree/bindings/mtd/partitions/arm,arm-firmware-suite.yaml
index 97618847ee3548..e9b1a6869910cd 100644
--- a/Documentation/devicetree/bindings/mtd/partitions/arm,arm-firmware-suite.yaml
+++ b/Documentation/devicetree/bindings/mtd/partitions/arm,arm-firmware-suite.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM Firmware Suite (AFS) Partitions
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 select: false
 
diff --git a/Documentation/devicetree/bindings/mtd/partitions/redboot-fis.yaml b/Documentation/devicetree/bindings/mtd/partitions/redboot-fis.yaml
index ba7445cd69e8f4..e3978d2bc056f4 100644
--- a/Documentation/devicetree/bindings/mtd/partitions/redboot-fis.yaml
+++ b/Documentation/devicetree/bindings/mtd/partitions/redboot-fis.yaml
@@ -14,7 +14,7 @@ description: The FLASH Image System (FIS) directory is a flash description
     32 KB in size.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 select: false
 
diff --git a/Documentation/devicetree/bindings/mtd/partitions/seama.yaml b/Documentation/devicetree/bindings/mtd/partitions/seama.yaml
index 4c1cbf43e81a64..4af185204b4b93 100644
--- a/Documentation/devicetree/bindings/mtd/partitions/seama.yaml
+++ b/Documentation/devicetree/bindings/mtd/partitions/seama.yaml
@@ -18,7 +18,7 @@ allOf:
   - $ref: partition.yaml#
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/net/bluetooth/brcm,bluetooth.yaml b/Documentation/devicetree/bindings/net/bluetooth/brcm,bluetooth.yaml
index 3c410cadff2304..95501e858e6f82 100644
--- a/Documentation/devicetree/bindings/net/bluetooth/brcm,bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/bluetooth/brcm,bluetooth.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Broadcom Bluetooth Chips
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   This binding describes Broadcom UART-attached bluetooth chips.
diff --git a/Documentation/devicetree/bindings/net/cortina,gemini-ethernet.yaml b/Documentation/devicetree/bindings/net/cortina,gemini-ethernet.yaml
index a930358f6a66b6..f0b5bea2458d59 100644
--- a/Documentation/devicetree/bindings/net/cortina,gemini-ethernet.yaml
+++ b/Documentation/devicetree/bindings/net/cortina,gemini-ethernet.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Cortina Systems Gemini Ethernet Controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   This ethernet controller is found in the Gemini SoC family:
diff --git a/Documentation/devicetree/bindings/net/dsa/micrel,ks8995.yaml b/Documentation/devicetree/bindings/net/dsa/micrel,ks8995.yaml
index 854808ff5ad5d1..e9ce3606703317 100644
--- a/Documentation/devicetree/bindings/net/dsa/micrel,ks8995.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/micrel,ks8995.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Micrel KS8995 Family DSA Switches
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   The Micrel KS8995 DSA Switches are 100 Mbit switches that were produced in
diff --git a/Documentation/devicetree/bindings/net/dsa/realtek.yaml b/Documentation/devicetree/bindings/net/dsa/realtek.yaml
index f348e66fb51583..473facd87a622f 100644
--- a/Documentation/devicetree/bindings/net/dsa/realtek.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/realtek.yaml
@@ -10,7 +10,7 @@ allOf:
   - $ref: dsa.yaml#/$defs/ethernet-ports
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   Realtek advertises these chips as fast/gigabit switches or unmanaged
diff --git a/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.yaml b/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.yaml
index 51cf574249becd..c41f479bdee94f 100644
--- a/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Vitesse VSC73xx DSA Switches
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   The Vitesse DSA Switches were produced in the early-to-mid 2000s.
diff --git a/Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml b/Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml
index f92730b1d2fad2..80336b7e64eccd 100644
--- a/Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml
+++ b/Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Intel IXP46x PTP Timer (TSYNC)
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   The Intel IXP46x PTP timer is known in the manual as IEEE1588 Hardware
diff --git a/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml b/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml
index 8689de1aaea15f..3b8f83b7099d3f 100644
--- a/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml
+++ b/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml
@@ -11,7 +11,7 @@ allOf:
   - $ref: ethernet-controller.yaml#
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   The Intel IXP4xx ethernet makes use of the IXP4xx NPE (Network
diff --git a/Documentation/devicetree/bindings/net/intel,ixp4xx-hss.yaml b/Documentation/devicetree/bindings/net/intel,ixp4xx-hss.yaml
index 7a405e9b37b2c8..1d952735c81b6a 100644
--- a/Documentation/devicetree/bindings/net/intel,ixp4xx-hss.yaml
+++ b/Documentation/devicetree/bindings/net/intel,ixp4xx-hss.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Intel IXP4xx V.35 WAN High Speed Serial Link (HSS)
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   The Intel IXP4xx HSS makes use of the IXP4xx NPE (Network
diff --git a/Documentation/devicetree/bindings/pci/faraday,ftpci100.yaml b/Documentation/devicetree/bindings/pci/faraday,ftpci100.yaml
index 378dd1c8e2ee2e..fed393a8956337 100644
--- a/Documentation/devicetree/bindings/pci/faraday,ftpci100.yaml
+++ b/Documentation/devicetree/bindings/pci/faraday,ftpci100.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Faraday Technology FTPCI100 PCI Host Bridge
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
     This PCI bridge is found inside that Cortina Systems Gemini SoC platform and
diff --git a/Documentation/devicetree/bindings/pci/intel,ixp4xx-pci.yaml b/Documentation/devicetree/bindings/pci/intel,ixp4xx-pci.yaml
index 3cae2e0f7f5e26..c1806aef7bac4d 100644
--- a/Documentation/devicetree/bindings/pci/intel,ixp4xx-pci.yaml
+++ b/Documentation/devicetree/bindings/pci/intel,ixp4xx-pci.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Intel IXP4xx PCI controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: PCI host controller found in the Intel IXP4xx SoC series.
 
diff --git a/Documentation/devicetree/bindings/pci/v3,v360epc-pci.yaml b/Documentation/devicetree/bindings/pci/v3,v360epc-pci.yaml
index 38cac88f17bfd4..0e2ac2f8faed3a 100644
--- a/Documentation/devicetree/bindings/pci/v3,v360epc-pci.yaml
+++ b/Documentation/devicetree/bindings/pci/v3,v360epc-pci.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: V3 Semiconductor V360 EPC PCI bridge
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   This bridge is found in the ARM Integrator/AP (Application Platform)
diff --git a/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml
index d1bc389e0a6d18..a916d0fc79a992 100644
--- a/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Generic Pin Configuration Node
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   Many data items that are represented in a pin configuration node are common
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/pinctrl.yaml
index d471563119a98e..290438826c507e 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Pin controller device
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
   - Rafał Miłecki <rafal@milecki.pl>
 
 description: |
diff --git a/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml b/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml
index ca9d246d46fe4f..7ba26271c4d65f 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Generic Pin Multiplexing Node
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   The contents of the pin configuration child nodes are defined by the binding
diff --git a/Documentation/devicetree/bindings/power/supply/samsung,battery.yaml b/Documentation/devicetree/bindings/power/supply/samsung,battery.yaml
index 40292d581b1055..fa1ccff043bedb 100644
--- a/Documentation/devicetree/bindings/power/supply/samsung,battery.yaml
+++ b/Documentation/devicetree/bindings/power/supply/samsung,battery.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung SDI Batteries
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   Samsung SDI (Samsung Digital Interface) batteries are all different versions
diff --git a/Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml b/Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml
index 9f7590ce6b3d6e..146593a669d667 100644
--- a/Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml
+++ b/Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml
@@ -12,7 +12,7 @@ description: |
   32 bit random number.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.yaml b/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.yaml
index b1c1a0e2131881..2b1215b4958075 100644
--- a/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.yaml
+++ b/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Faraday Technology FTRTC010 Real Time Clock
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: |
   This RTC appears in for example the Storlink Gemini family of SoCs.
diff --git a/Documentation/devicetree/bindings/spi/arm,pl022-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/arm,pl022-peripheral-props.yaml
index bb8b6863b10909..f976e416395b7d 100644
--- a/Documentation/devicetree/bindings/spi/arm,pl022-peripheral-props.yaml
+++ b/Documentation/devicetree/bindings/spi/arm,pl022-peripheral-props.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Peripheral-specific properties for Arm PL022 SPI controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 select: false
 
diff --git a/Documentation/devicetree/bindings/spi/spi-pl022.yaml b/Documentation/devicetree/bindings/spi/spi-pl022.yaml
index 7f174b7d0a26f6..680fdfa184d0c8 100644
--- a/Documentation/devicetree/bindings/spi/spi-pl022.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-pl022.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM PL022 SPI controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: spi-controller.yaml#
diff --git a/Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml b/Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml
index 39506323556c57..e93c20243dba14 100644
--- a/Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml
+++ b/Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml
@@ -8,7 +8,7 @@ title: Faraday FTTMR010 timer
 
 maintainers:
   - Joel Stanley <joel@jms.id.au>
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description:
   This timer is a generic IP block from Faraday Technology, embedded in the
diff --git a/Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml b/Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml
index 526b8db4d57590..c92e6b9cd5e2fb 100644
--- a/Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml
+++ b/Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Intel IXP4xx XScale Networking Processors Timers
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: This timer is found in the Intel IXP4xx processors.
 
diff --git a/Documentation/devicetree/bindings/timer/st,nomadik-mtu.yaml b/Documentation/devicetree/bindings/timer/st,nomadik-mtu.yaml
index fa65878b357198..873a01c287f47b 100644
--- a/Documentation/devicetree/bindings/timer/st,nomadik-mtu.yaml
+++ b/Documentation/devicetree/bindings/timer/st,nomadik-mtu.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ST Microelectronics Nomadik Multi-Timer Unit MTU Timer
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 description: This timer is found in the ST Microelectronics Nomadik
   SoCs STn8800, STn8810 and STn8815 as well as in ST-Ericsson DB8500.
diff --git a/Documentation/devicetree/bindings/usb/faraday,fotg210.yaml b/Documentation/devicetree/bindings/usb/faraday,fotg210.yaml
index 3fe4d1564dfed7..b97ba535087c9d 100644
--- a/Documentation/devicetree/bindings/usb/faraday,fotg210.yaml
+++ b/Documentation/devicetree/bindings/usb/faraday,fotg210.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Faraday Technology FOTG200 series HS OTG USB 2.0 controller
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: usb-drd.yaml#
diff --git a/Documentation/devicetree/bindings/usb/intel,ixp4xx-udc.yaml b/Documentation/devicetree/bindings/usb/intel,ixp4xx-udc.yaml
index 4ed60274689725..91a149ad3ad6e7 100644
--- a/Documentation/devicetree/bindings/usb/intel,ixp4xx-udc.yaml
+++ b/Documentation/devicetree/bindings/usb/intel,ixp4xx-udc.yaml
@@ -10,7 +10,7 @@ description: The IXP4xx SoCs has a full-speed USB Device
   Controller with 16 endpoints and a built-in transceiver.
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/watchdog/faraday,ftwdt010.yaml b/Documentation/devicetree/bindings/watchdog/faraday,ftwdt010.yaml
index 726dc872ad02dd..3eb35f325f4c5a 100644
--- a/Documentation/devicetree/bindings/watchdog/faraday,ftwdt010.yaml
+++ b/Documentation/devicetree/bindings/watchdog/faraday,ftwdt010.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Faraday Technology FTWDT010 watchdog
 
 maintainers:
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
   - Corentin Labbe <clabbe@baylibre.com>
 
 description: |
diff --git a/Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml b/Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml
index 442c21f12a3b27..defe0401ded0b4 100644
--- a/Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml
+++ b/Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml
@@ -8,7 +8,7 @@ title: Maxim 63xx Watchdog Timers
 
 maintainers:
   - Marc Zyngier <maz@kernel.org>
-  - Linus Walleij <linus.walleij@linaro.org>
+  - Linus Walleij <linusw@kernel.org>
 
 allOf:
   - $ref: watchdog.yaml#

From 244a07c4862d5a2e3efd56241dc979ebf2b798a4 Mon Sep 17 00:00:00 2001
From: "Mario Limonciello (AMD)" <superm1@kernel.org>
Date: Tue, 9 Dec 2025 16:00:29 -0600
Subject: [PATCH 168/258] drm/amd: Resume the device in thaw() callback when
 console suspend is disabled

If console suspend has been disabled using `no_console_suspend` also
wake up during thaw() so that some messages can be seen for debugging.

Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/4191
Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 63387cbbb714d9f0d179d9d4560de1408d0906de)
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 2dfbddcef9ab3c..848e6b7db482d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -33,6 +33,7 @@
 #include <drm/drm_vblank.h>
 
 #include <linux/cc_platform.h>
+#include <linux/console.h>
 #include <linux/dynamic_debug.h>
 #include <linux/module.h>
 #include <linux/mmu_notifier.h>
@@ -2704,7 +2705,9 @@ static int amdgpu_pmops_thaw(struct device *dev)
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 
 	/* do not resume device if it's normal hibernation */
-	if (!pm_hibernate_is_recovering() && !pm_hibernation_mode_is_suspend())
+	if (console_suspend_enabled &&
+	    !pm_hibernate_is_recovering() &&
+	    !pm_hibernation_mode_is_suspend())
 		return 0;
 
 	return amdgpu_device_resume(drm_dev, true);

From 69741d9ccc7222e6b6f138db67b012ecc0d72542 Mon Sep 17 00:00:00 2001
From: Ray Wu <ray.wu@amd.com>
Date: Fri, 28 Nov 2025 08:58:13 +0800
Subject: [PATCH 169/258] drm/amd/display: Fix scratch registers offsets for
 DCN35

[Why]
Different platforms use differnet NBIO header files,
causing display code to use differnt offset and read
wrong accelerated status.

[How]
- Unified NBIO offset header file across platform.
- Correct scratch registers offsets to proper locations.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4667
Cc: Mario Limonciello <mario.limonciello@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Ray Wu <ray.wu@amd.com>
Signed-off-by: Chenyu Chen <chen-yu.chen@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 49a63bc8eda0304ba307f5ba68305f936174f72d)
Cc: stable@vger.kernel.org
---
 .../drm/amd/display/dc/resource/dcn35/dcn35_resource.c    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index ef69898d2cc5d2..d056e5fd545871 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -203,12 +203,12 @@ enum dcn35_clk_src_array_id {
 	NBIO_BASE_INNER(seg)
 
 #define NBIO_SR(reg_name)\
-	REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \
-				regBIF_BX2_ ## reg_name
+	REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX1_ ## reg_name ## _BASE_IDX) + \
+				regBIF_BX1_ ## reg_name
 
 #define NBIO_SR_ARR(reg_name, id)\
-	REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \
-		regBIF_BX2_ ## reg_name
+	REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX1_ ## reg_name ## _BASE_IDX) + \
+		regBIF_BX1_ ## reg_name
 
 #define bios_regs_init() \
 		( \

From fd62aa13d3ee0f21c756a40a7c2f900f98992d6a Mon Sep 17 00:00:00 2001
From: Ray Wu <ray.wu@amd.com>
Date: Fri, 28 Nov 2025 09:14:09 +0800
Subject: [PATCH 170/258] drm/amd/display: Fix scratch registers offsets for
 DCN351

[Why]
Different platforms use different NBIO header files,
causing display code to use differnt offset and read
wrong accelerated status.

[How]
- Unified NBIO offset header file across platform.
- Correct scratch registers offsets to proper locations.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4667
Cc: Mario Limonciello <mario.limonciello@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Ray Wu <ray.wu@amd.com>
Signed-off-by: Chenyu Chen <chen-yu.chen@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 576e032e909c8a6bb3d907b4ef5f6abe0f644199)
Cc: stable@vger.kernel.org
---
 .../drm/amd/display/dc/resource/dcn351/dcn351_resource.c  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
index f3c614c4490ce2..9fab3169069c40 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
@@ -183,12 +183,12 @@ enum dcn351_clk_src_array_id {
 	NBIO_BASE_INNER(seg)
 
 #define NBIO_SR(reg_name)\
-	REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \
-				regBIF_BX2_ ## reg_name
+	REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX1_ ## reg_name ## _BASE_IDX) + \
+				regBIF_BX1_ ## reg_name
 
 #define NBIO_SR_ARR(reg_name, id)\
-	REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \
-		regBIF_BX2_ ## reg_name
+	REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX1_ ## reg_name ## _BASE_IDX) + \
+		regBIF_BX1_ ## reg_name
 
 #define bios_regs_init() \
 		( \

From 3886b198bd6e49c801fe9552fcfbfc387a49fbbc Mon Sep 17 00:00:00 2001
From: Charlene Liu <Charlene.Liu@amd.com>
Date: Fri, 28 Nov 2025 19:38:31 -0500
Subject: [PATCH 171/258] drm/amd/display: Fix DP no audio issue

[why]
need to enable APG_CLOCK_ENABLE enable first
also need to wake up az from D3 before access az block

Reviewed-by: Swapnil Patel <swapnil.patel@amd.com>
Signed-off-by: Charlene Liu <Charlene.Liu@amd.com>
Signed-off-by: Chenyu Chen <chen-yu.chen@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit bf5e396957acafd46003318965500914d5f4edfa)
---
 drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 4986f12dc9dfd3..0cdd8c74abdfa1 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -1118,13 +1118,13 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx)
 			if (dc->current_state->res_ctx.pipe_ctx[i].stream_res.audio != NULL)
 				num_audio++;
 		}
+		if (num_audio >= 1 && clk_mgr->funcs->enable_pme_wa) {
+			/*wake AZ from D3 first before access az endpoint*/
+			clk_mgr->funcs->enable_pme_wa(clk_mgr);
+		}
 
 		pipe_ctx->stream_res.audio->funcs->az_enable(pipe_ctx->stream_res.audio);
 
-		if (num_audio >= 1 && clk_mgr->funcs->enable_pme_wa)
-			/*this is the first audio. apply the PME w/a in order to wake AZ from D3*/
-			clk_mgr->funcs->enable_pme_wa(clk_mgr);
-
 		link_hwss->enable_audio_packet(pipe_ctx);
 
 		if (pipe_ctx->stream_res.audio)

From 77f73253015cbc7893fca1821ac3eae9eb4bc943 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 10 Dec 2025 11:02:30 -0500
Subject: [PATCH 172/258] drm/amdgpu: fix a job->pasid access race in gpu
 recovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid a possible UAF in GPU recovery due to a race between
the sched timeout callback and the tdr work queue.

The gpu recovery function calls drm_sched_stop() and
later drm_sched_start().  drm_sched_start() restarts
the tdr queue which will eventually free the job.  If
the tdr queue frees the job before time out callback
completes, the job will be freed and we'll get a UAF
when accessing the pasid.  Cache it early to avoid the
UAF.

Example KASAN trace:
[  493.058141] BUG: KASAN: slab-use-after-free in amdgpu_device_gpu_recover+0x968/0x990 [amdgpu]
[  493.067530] Read of size 4 at addr ffff88b0ce3f794c by task kworker/u128:1/323
[  493.074892]
[  493.076485] CPU: 9 UID: 0 PID: 323 Comm: kworker/u128:1 Tainted: G            E       6.16.0-1289896.2.zuul.bf4f11df81c1410bbe901c4373305a31 #1 PREEMPT(voluntary)
[  493.076493] Tainted: [E]=UNSIGNED_MODULE
[  493.076495] Hardware name: TYAN B8021G88V2HR-2T/S8021GM2NR-2T, BIOS V1.03.B10 04/01/2019
[  493.076500] Workqueue: amdgpu-reset-dev drm_sched_job_timedout [gpu_sched]
[  493.076512] Call Trace:
[  493.076515]  <TASK>
[  493.076518]  dump_stack_lvl+0x64/0x80
[  493.076529]  print_report+0xce/0x630
[  493.076536]  ? _raw_spin_lock_irqsave+0x86/0xd0
[  493.076541]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
[  493.076545]  ? amdgpu_device_gpu_recover+0x968/0x990 [amdgpu]
[  493.077253]  kasan_report+0xb8/0xf0
[  493.077258]  ? amdgpu_device_gpu_recover+0x968/0x990 [amdgpu]
[  493.077965]  amdgpu_device_gpu_recover+0x968/0x990 [amdgpu]
[  493.078672]  ? __pfx_amdgpu_device_gpu_recover+0x10/0x10 [amdgpu]
[  493.079378]  ? amdgpu_coredump+0x1fd/0x4c0 [amdgpu]
[  493.080111]  amdgpu_job_timedout+0x642/0x1400 [amdgpu]
[  493.080903]  ? pick_task_fair+0x24e/0x330
[  493.080910]  ? __pfx_amdgpu_job_timedout+0x10/0x10 [amdgpu]
[  493.081702]  ? _raw_spin_lock+0x75/0xc0
[  493.081708]  ? __pfx__raw_spin_lock+0x10/0x10
[  493.081712]  drm_sched_job_timedout+0x1b0/0x4b0 [gpu_sched]
[  493.081721]  ? __pfx__raw_spin_lock_irq+0x10/0x10
[  493.081725]  process_one_work+0x679/0xff0
[  493.081732]  worker_thread+0x6ce/0xfd0
[  493.081736]  ? __pfx_worker_thread+0x10/0x10
[  493.081739]  kthread+0x376/0x730
[  493.081744]  ? __pfx_kthread+0x10/0x10
[  493.081748]  ? __pfx__raw_spin_lock_irq+0x10/0x10
[  493.081751]  ? __pfx_kthread+0x10/0x10
[  493.081755]  ret_from_fork+0x247/0x330
[  493.081761]  ? __pfx_kthread+0x10/0x10
[  493.081764]  ret_from_fork_asm+0x1a/0x30
[  493.081771]  </TASK>

Fixes: a72002cb181f ("drm/amdgpu: Make use of drm_wedge_task_info")
Link: https://github.com/HansKristian-Work/vkd3d-proton/pull/2670
Cc: SRINIVASAN.SHANMUGAM@amd.com
Cc: vitaly.prosyak@amd.com
Cc: christian.koenig@amd.com
Suggested-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 20880a3fd5dd7bca1a079534cf6596bda92e107d)
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 58c3ffe707d1d3..12201b8e99b3fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -6613,6 +6613,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	struct amdgpu_hive_info *hive = NULL;
 	int r = 0;
 	bool need_emergency_restart = false;
+	/* save the pasid here as the job may be freed before the end of the reset */
+	int pasid = job ? job->pasid : -EINVAL;
 
 	/*
 	 * If it reaches here because of hang/timeout and a RAS error is
@@ -6713,8 +6715,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	if (!r) {
 		struct amdgpu_task_info *ti = NULL;
 
-		if (job)
-			ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
+		/*
+		 * The job may already be freed at this point via the sched tdr workqueue so
+		 * use the cached pasid.
+		 */
+		if (pasid >= 0)
+			ti = amdgpu_vm_get_task_info_pasid(adev, pasid);
 
 		drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
 				     ti ? &ti->task : NULL);

From 7a372e214f6b814253e940334ab6eabb5181fc6f Mon Sep 17 00:00:00 2001
From: mythilam <mythilam@amd.com>
Date: Thu, 4 Dec 2025 11:04:12 +0530
Subject: [PATCH 173/258] drm/amd/pm: restore SCLK settings after S0ix resume

User-configured SCLK(GPU core clock)frequencies were not persisting
across S0ix suspend/resume cycles on smu v14 hardware.
The issue occurred because of the code resetting clock frequency
to zero during resume.

This patch addresses the problem by:
- Preserving user-configured values in driver and sets the
  clock frequency across resume
- Preserved settings are sent to the hardware during resume

Signed-off-by: mythilam <mythilam@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 20ba98326f4c69e6bf8d1f42942ece485a675b27)
---
 .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c    |  5 +++
 .../drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c  | 37 ++++++++++++++++---
 2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
index f9b0938c57ea71..f2a16dfee59981 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
@@ -1939,6 +1939,11 @@ int smu_v14_0_od_edit_dpm_table(struct smu_context *smu,
 			dev_err(smu->adev->dev, "Set soft max sclk failed!");
 			return ret;
 		}
+		if (smu->gfx_actual_hard_min_freq != smu->gfx_default_hard_min_freq ||
+		    smu->gfx_actual_soft_max_freq != smu->gfx_default_soft_max_freq)
+			smu->user_dpm_profile.user_od = true;
+		else
+			smu->user_dpm_profile.user_od = false;
 		break;
 	default:
 		return -ENOSYS;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
index b1bd946d8e3091..97414bc3976419 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
@@ -1514,9 +1514,10 @@ static int smu_v14_0_1_set_fine_grain_gfx_freq_parameters(struct smu_context *sm
 
 	smu->gfx_default_hard_min_freq = clk_table->MinGfxClk;
 	smu->gfx_default_soft_max_freq = clk_table->MaxGfxClk;
-	smu->gfx_actual_hard_min_freq = 0;
-	smu->gfx_actual_soft_max_freq = 0;
-
+	if (smu->gfx_actual_hard_min_freq == 0)
+		smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq;
+	if (smu->gfx_actual_soft_max_freq == 0)
+		smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq;
 	return 0;
 }
 
@@ -1526,8 +1527,10 @@ static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *sm
 
 	smu->gfx_default_hard_min_freq = clk_table->MinGfxClk;
 	smu->gfx_default_soft_max_freq = clk_table->MaxGfxClk;
-	smu->gfx_actual_hard_min_freq = 0;
-	smu->gfx_actual_soft_max_freq = 0;
+	if (smu->gfx_actual_hard_min_freq == 0)
+		smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq;
+	if (smu->gfx_actual_soft_max_freq == 0)
+		smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq;
 
 	return 0;
 }
@@ -1665,6 +1668,29 @@ static int smu_v14_0_common_set_mall_enable(struct smu_context *smu)
 	return ret;
 }
 
+static int smu_v14_0_0_restore_user_od_settings(struct smu_context *smu)
+{
+	int ret;
+
+	ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinGfxClk,
+					      smu->gfx_actual_hard_min_freq,
+					      NULL);
+	if (ret) {
+		dev_err(smu->adev->dev, "Failed to restore hard min sclk!\n");
+		return ret;
+	}
+
+	ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk,
+					      smu->gfx_actual_soft_max_freq,
+					      NULL);
+	if (ret) {
+		dev_err(smu->adev->dev, "Failed to restore soft max sclk!\n");
+		return ret;
+	}
+
+	return 0;
+}
+
 static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
 	.check_fw_status = smu_v14_0_check_fw_status,
 	.check_fw_version = smu_v14_0_check_fw_version,
@@ -1688,6 +1714,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
 	.mode2_reset = smu_v14_0_0_mode2_reset,
 	.get_dpm_ultimate_freq = smu_v14_0_common_get_dpm_ultimate_freq,
 	.set_soft_freq_limited_range = smu_v14_0_0_set_soft_freq_limited_range,
+	.restore_user_od_settings = smu_v14_0_0_restore_user_od_settings,
 	.od_edit_dpm_table = smu_v14_0_od_edit_dpm_table,
 	.print_clk_levels = smu_v14_0_0_print_clk_levels,
 	.force_clk_levels = smu_v14_0_0_force_clk_levels,

From 969faea4e9d01787c58bab4d945f7ad82dad222d Mon Sep 17 00:00:00 2001
From: Brian Kocoloski <brian.kocoloski@amd.com>
Date: Thu, 20 Nov 2025 13:57:19 -0500
Subject: [PATCH 174/258] drm/amdkfd: Fix improper NULL termination of queue
 restore SMI event string

Pass character "0" rather than NULL terminator to properly format
queue restoration SMI events. Currently, the NULL terminator precedes
the newline character that is intended to delineate separate events
in the SMI event buffer, which can break userspace parsers.

Signed-off-by: Brian Kocoloski <brian.kocoloski@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 6e7143e5e6e21f9d5572e0390f7089e6d53edf3c)
---
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index a499449fcb068a..d2bc169e84b0b6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -312,7 +312,7 @@ void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid)
 {
 	kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE,
 			  KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), pid,
-			  node->id, 0));
+			  node->id, '0'));
 }
 
 void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)

From 60d7f6031b94b6dae9e7d95b49f5c7045f6c8edb Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Date: Wed, 26 Nov 2025 19:22:58 +0100
Subject: [PATCH 175/258] i2c: bcm-iproc: Fix Wvoid-pointer-to-enum-cast
 warning

'type' is an enum, thus cast of pointer on 64-bit compile test with
clang and W=1 causes:

  i2c-bcm-iproc.c:1102:3: error: cast to smaller integer type 'enum bcm_iproc_i2c_type' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

One of the discussions in 2023 on LKML suggested warning is not suitable
for kernel.  Nothing changed in this regard since that time, so assume
the warning will stay and we want to have warnings-free builds.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20251126182257.157439-4-krzysztof.kozlowski@oss.qualcomm.com
---
 drivers/i2c/busses/i2c-bcm-iproc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c
index e418a4f23f156f..b5629cffe99b58 100644
--- a/drivers/i2c/busses/i2c-bcm-iproc.c
+++ b/drivers/i2c/busses/i2c-bcm-iproc.c
@@ -1098,8 +1098,7 @@ static int bcm_iproc_i2c_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, iproc_i2c);
 	iproc_i2c->device = &pdev->dev;
-	iproc_i2c->type =
-		(enum bcm_iproc_i2c_type)of_device_get_match_data(&pdev->dev);
+	iproc_i2c->type = (kernel_ulong_t)of_device_get_match_data(&pdev->dev);
 	init_completion(&iproc_i2c->done);
 
 	iproc_i2c->base = devm_platform_ioremap_resource(pdev, 0);

From 3ff79e76d31f32575fbd8a8ad6ce9108ca916d1a Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Date: Wed, 26 Nov 2025 19:22:59 +0100
Subject: [PATCH 176/258] i2c: pxa: Fix Wvoid-pointer-to-enum-cast warning

'i2c_types' is an enum, thus cast of pointer on 64-bit compile test with
clang and W=1 causes:

  i2c-pxa.c:1269:15: error: cast to smaller integer type 'enum pxa_i2c_types' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

One of the discussions in 2023 on LKML suggested warning is not suitable
for kernel.  Nothing changed in this regard since that time, so assume
the warning will stay and we want to have warnings-free builds.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20251126182257.157439-5-krzysztof.kozlowski@oss.qualcomm.com
---
 drivers/i2c/busses/i2c-pxa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 968a8b8794dac3..09af3b3625f110 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -1266,7 +1266,7 @@ static int i2c_pxa_probe_dt(struct platform_device *pdev, struct pxa_i2c *i2c,
 	i2c->use_pio = of_property_read_bool(np, "mrvl,i2c-polling");
 	i2c->fast_mode = of_property_read_bool(np, "mrvl,i2c-fast-mode");
 
-	*i2c_types = (enum pxa_i2c_types)device_get_match_data(&pdev->dev);
+	*i2c_types = (kernel_ulong_t)device_get_match_data(&pdev->dev);
 
 	return 0;
 }

From 4c544cd6556d9193baad1a0f183e8d3b5c7baf02 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Date: Wed, 26 Nov 2025 19:23:00 +0100
Subject: [PATCH 177/258] i2c: rcar: Fix Wvoid-pointer-to-enum-cast warning

'i2c_types' is an enum, thus cast of pointer on 64-bit compile test with
clang and W=1 causes:

  i2c-rcar.c:1144:18: error: cast to smaller integer type 'enum rcar_i2c_type' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

One of the discussions in 2023 on LKML suggested warning is not suitable
for kernel.  Nothing changed in this regard since that time, so assume
the warning will stay and we want to have warnings-free builds.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20251126182257.157439-6-krzysztof.kozlowski@oss.qualcomm.com
---
 drivers/i2c/busses/i2c-rcar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index d51884ab99f4dd..5ce8f8e4856fb2 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -1141,7 +1141,7 @@ static int rcar_i2c_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->io))
 		return PTR_ERR(priv->io);
 
-	priv->devtype = (enum rcar_i2c_type)of_device_get_match_data(dev);
+	priv->devtype = (kernel_ulong_t)of_device_get_match_data(dev);
 	init_waitqueue_head(&priv->wait);
 
 	adap = &priv->adap;

From d9b85d296f3accd8957a98d78810a4ecdbdfe557 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Beno=C3=AEt=20Monin?= <benoit.monin@bootlin.com>
Date: Wed, 26 Nov 2025 11:46:24 +0100
Subject: [PATCH 178/258] dt-bindings: i2c: dw: Add Mobileye I2C controllers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add compatible string for the DesignWare-based I2C controllers present
in Mobileye Eyeq6Lplus SoC, with a fallback to the default compatible.
The same controllers are also present in the EyeQ7H, so add a compatible
for those with a fallback to the Eyeq6Lplus compatible.

Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Benoît Monin <benoit.monin@bootlin.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20251126-i2c-dw-v4-1-b0654598e7c5@bootlin.com
---
 .../devicetree/bindings/i2c/snps,designware-i2c.yaml       | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml b/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml
index d904191bb0c6ea..91420018880958 100644
--- a/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml
@@ -34,8 +34,15 @@ properties:
           - const: snps,designware-i2c
       - description: Baikal-T1 SoC System I2C controller
         const: baikal,bt1-sys-i2c
+      - description: Mobileye EyeQ DesignWare I2C controller
+        items:
+          - enum:
+              - mobileye,eyeq7h-i2c
+          - const: mobileye,eyeq6lplus-i2c
+          - const: snps,designware-i2c
       - items:
           - enum:
+              - mobileye,eyeq6lplus-i2c
               - mscc,ocelot-i2c
               - sophgo,sg2044-i2c
               - thead,th1520-i2c

From dde7e21311004a6d227b628f14c582313da90bde Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 24 Nov 2025 14:28:15 +0100
Subject: [PATCH 179/258] i2c: i801: Add support for Intel Nova Lake-S

Add SMBus PCI IDs on Intel Nova Lake-S.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20251124132816.470599-1-heikki.krogerus@linux.intel.com
---
 Documentation/i2c/busses/i2c-i801.rst | 1 +
 drivers/i2c/busses/Kconfig            | 1 +
 drivers/i2c/busses/i2c-i801.c         | 3 +++
 3 files changed, 5 insertions(+)

diff --git a/Documentation/i2c/busses/i2c-i801.rst b/Documentation/i2c/busses/i2c-i801.rst
index c939a5bfc8d008..bbbce90eb7d843 100644
--- a/Documentation/i2c/busses/i2c-i801.rst
+++ b/Documentation/i2c/busses/i2c-i801.rst
@@ -52,6 +52,7 @@ Supported adapters:
   * Intel Panther Lake (SOC)
   * Intel Wildcat Lake (SOC)
   * Intel Diamond Rapids (SOC)
+  * Intel Nova Lake (PCH)
 
    Datasheets: Publicly available at the Intel website
 
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index cea87fcb4a1a94..09ba55bae1fac0 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -167,6 +167,7 @@ config I2C_I801
 	    Panther Lake (SOC)
 	    Wildcat Lake (SOC)
 	    Diamond Rapids (SOC)
+	    Nova Lake (PCH)
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-i801.
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 81e6e2d7ad3dcc..9e1789725edf7e 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -85,6 +85,7 @@
  * Panther Lake-P (SOC)		0xe422	32	hard	yes	yes	yes
  * Wildcat Lake-U (SOC)		0x4d22	32	hard	yes	yes	yes
  * Diamond Rapids (SOC)		0x5827	32	hard	yes	yes	yes
+ * Nova Lake-S (PCH)		0x6e23	32	hard	yes	yes	yes
  *
  * Features supported by this driver:
  * Software PEC				no
@@ -245,6 +246,7 @@
 #define PCI_DEVICE_ID_INTEL_BIRCH_STREAM_SMBUS		0x5796
 #define PCI_DEVICE_ID_INTEL_DIAMOND_RAPIDS_SMBUS	0x5827
 #define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS		0x5ad4
+#define PCI_DEVICE_ID_INTEL_NOVA_LAKE_S_SMBUS		0x6e23
 #define PCI_DEVICE_ID_INTEL_ARROW_LAKE_H_SMBUS		0x7722
 #define PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_S_SMBUS		0x7a23
 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_S_SMBUS		0x7aa3
@@ -1061,6 +1063,7 @@ static const struct pci_device_id i801_ids[] = {
 	{ PCI_DEVICE_DATA(INTEL, PANTHER_LAKE_H_SMBUS,		FEATURES_ICH5 | FEATURE_TCO_CNL) },
 	{ PCI_DEVICE_DATA(INTEL, PANTHER_LAKE_P_SMBUS,		FEATURES_ICH5 | FEATURE_TCO_CNL) },
 	{ PCI_DEVICE_DATA(INTEL, WILDCAT_LAKE_U_SMBUS,		FEATURES_ICH5 | FEATURE_TCO_CNL) },
+	{ PCI_DEVICE_DATA(INTEL, NOVA_LAKE_S_SMBUS,		FEATURES_ICH5 | FEATURE_TCO_CNL) },
 	{ 0, }
 };
 

From 880977fdc7f67923d1904ee23ca75fa1e375ea46 Mon Sep 17 00:00:00 2001
From: Hangxiang Ma <hangxiang.ma@oss.qualcomm.com>
Date: Wed, 26 Nov 2025 01:38:34 -0800
Subject: [PATCH 180/258] dt-bindings: i2c: qcom-cci: Document SM8750
 compatible

Add SM8750 compatible consistent with CAMSS CCI interfaces.

Signed-off-by: Hangxiang Ma <hangxiang.ma@oss.qualcomm.com>
Reviewed-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20251126-add-support-for-camss-on-sm8750-v1-1-646fee2eb720@oss.qualcomm.com
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
---
 Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml b/Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml
index 33852a5ffca8fe..a3fe1eea6aece9 100644
--- a/Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml
+++ b/Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml
@@ -38,6 +38,7 @@ properties:
               - qcom,sm8450-cci
               - qcom,sm8550-cci
               - qcom,sm8650-cci
+              - qcom,sm8750-cci
               - qcom,x1e80100-cci
           - const: qcom,msm8996-cci # CCI v2
 
@@ -132,6 +133,7 @@ allOf:
             enum:
               - qcom,kaanapali-cci
               - qcom,qcm2290-cci
+              - qcom,sm8750-cci
     then:
       properties:
         clocks:

From 05f5e355cf783b30bd6eb3dec17ed1a8b3cfa95c Mon Sep 17 00:00:00 2001
From: Bharath SM <bharathsm@microsoft.com>
Date: Tue, 16 Dec 2025 21:26:05 +0530
Subject: [PATCH 181/258] smb: align durable reconnect v2 context to 8 byte
 boundary

Add a 4-byte Pad to create_durable_handle_reconnect_v2 so the DH2C
create context is 8 byte aligned.
This avoids malformed CREATE contexts on reconnect.
Recent change removed this Padding, adding it back.

Fixes: 81a45de432c6 ("smb: move create_durable_handle_reconnect_v2 to common/smb2pdu.h")

Signed-off-by: Bharath SM <bharathsm@microsoft.com>
Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/common/smb2pdu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 3c8d8a4e743935..95323df7274b6e 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -1293,6 +1293,7 @@ struct create_durable_handle_reconnect_v2 {
 	struct create_context_hdr ccontext;
 	__u8   Name[8];
 	struct durable_reconnect_context_v2 dcontext;
+	__u8 Pad[4];
 } __packed;
 
 /* See MS-SMB2 2.2.14.2.12 */

From 94d5b8dbc5d9caa8e01c8fab8d5ed56e843ff40e Mon Sep 17 00:00:00 2001
From: ZhangGuoDong <zhangguodong@kylinos.cn>
Date: Tue, 2 Dec 2025 15:14:17 +0800
Subject: [PATCH 182/258] smb: move some SMB1 definitions into common/smb1pdu.h

These definitions are only used by SMB1, so move them into the new
common/smb1pdu.h.

KSMBD only implements SMB_COM_NEGOTIATE, see MS-SMB2 3.3.5.2.

Co-developed-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Signed-off-by: ChenXiaoSong <chenxiaosong@kylinos.cn>
Signed-off-by: ZhangGuoDong <zhangguodong@kylinos.cn>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifspdu.h    |  2 +-
 fs/smb/common/smb1pdu.h    | 56 ++++++++++++++++++++++++++++++++++++++
 fs/smb/common/smb2pdu.h    | 40 ---------------------------
 fs/smb/common/smbglob.h    |  2 --
 fs/smb/server/smb_common.h |  1 +
 5 files changed, 58 insertions(+), 43 deletions(-)
 create mode 100644 fs/smb/common/smb1pdu.h

diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index eeb4011cb217df..fdd84369e7b8b7 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -12,7 +12,7 @@
 #include <net/sock.h>
 #include <linux/unaligned.h>
 #include "../common/smbfsctl.h"
-#include "../common/smb2pdu.h"
+#include "../common/smb1pdu.h"
 
 #define CIFS_PROT   0
 #define POSIX_PROT  (CIFS_PROT+1)
diff --git a/fs/smb/common/smb1pdu.h b/fs/smb/common/smb1pdu.h
new file mode 100644
index 00000000000000..df6d4e11ae929a
--- /dev/null
+++ b/fs/smb/common/smb1pdu.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ *
+ *   Copyright (C) International Business Machines  Corp., 2002,2009
+ *                 2018 Samsung Electronics Co., Ltd.
+ *   Author(s): Steve French <sfrench@us.ibm.com>
+ *              Namjae Jeon <linkinjeon@kernel.org>
+ *
+ */
+
+#ifndef _COMMON_SMB1_PDU_H
+#define _COMMON_SMB1_PDU_H
+
+#define SMB1_PROTO_NUMBER		cpu_to_le32(0x424d53ff)
+
+/*
+ * See MS-CIFS 2.2.3.1
+ *     MS-SMB 2.2.3.1
+ */
+struct smb_hdr {
+	__u8 Protocol[4];
+	__u8 Command;
+	union {
+		struct {
+			__u8 ErrorClass;
+			__u8 Reserved;
+			__le16 Error;
+		} __packed DosError;
+		__le32 CifsError;
+	} __packed Status;
+	__u8 Flags;
+	__le16 Flags2;		/* note: le */
+	__le16 PidHigh;
+	union {
+		struct {
+			__le32 SequenceNumber;  /* le */
+			__u32 Reserved; /* zero */
+		} __packed Sequence;
+		__u8 SecuritySignature[8];	/* le */
+	} __packed Signature;
+	__u8 pad[2];
+	__u16 Tid;
+	__le16 Pid;
+	__u16 Uid;
+	__le16 Mid;
+	__u8 WordCount;
+} __packed;
+
+/* See MS-CIFS 2.2.4.52.1 */
+typedef struct smb_negotiate_req {
+	struct smb_hdr hdr;	/* wct = 0 */
+	__le16 ByteCount;
+	unsigned char DialectsArray[];
+} __packed SMB_NEGOTIATE_REQ;
+
+#endif /* _COMMON_SMB1_PDU_H */
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 95323df7274b6e..f5ebbe31384aea 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -1986,39 +1986,6 @@ struct smb2_lease_ack {
 	__le64 LeaseDuration;
 } __packed;
 
-/*
- * See MS-CIFS 2.2.3.1
- *     MS-SMB 2.2.3.1
- */
-struct smb_hdr {
-	__u8 Protocol[4];
-	__u8 Command;
-	union {
-		struct {
-			__u8 ErrorClass;
-			__u8 Reserved;
-			__le16 Error;
-		} __packed DosError;
-		__le32 CifsError;
-	} __packed Status;
-	__u8 Flags;
-	__le16 Flags2;		/* note: le */
-	__le16 PidHigh;
-	union {
-		struct {
-			__le32 SequenceNumber;  /* le */
-			__u32 Reserved; /* zero */
-		} __packed Sequence;
-		__u8 SecuritySignature[8];	/* le */
-	} __packed Signature;
-	__u8 pad[2];
-	__u16 Tid;
-	__le16 Pid;
-	__u16 Uid;
-	__le16 Mid;
-	__u8 WordCount;
-} __packed;
-
 #define OP_BREAK_STRUCT_SIZE_20		24
 #define OP_BREAK_STRUCT_SIZE_21		36
 
@@ -2123,11 +2090,4 @@ struct smb_hdr {
 #define SET_MINIMUM_RIGHTS (FILE_READ_EA | FILE_READ_ATTRIBUTES \
 				| READ_CONTROL | SYNCHRONIZE)
 
-/* See MS-CIFS 2.2.4.52.1 */
-typedef struct smb_negotiate_req {
-	struct smb_hdr hdr;	/* wct = 0 */
-	__le16 ByteCount;
-	unsigned char DialectsArray[];
-} __packed SMB_NEGOTIATE_REQ;
-
 #endif				/* _COMMON_SMB2PDU_H */
diff --git a/fs/smb/common/smbglob.h b/fs/smb/common/smbglob.h
index 9562845a561757..4e33d91cdc9dbe 100644
--- a/fs/smb/common/smbglob.h
+++ b/fs/smb/common/smbglob.h
@@ -11,8 +11,6 @@
 #ifndef _COMMON_SMB_GLOB_H
 #define _COMMON_SMB_GLOB_H
 
-#define SMB1_PROTO_NUMBER		cpu_to_le32(0x424d53ff)
-
 struct smb_version_values {
 	char		*version_string;
 	__u16		protocol_id;
diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h
index 067b45048c732b..95bf1465387b9f 100644
--- a/fs/smb/server/smb_common.h
+++ b/fs/smb/server/smb_common.h
@@ -10,6 +10,7 @@
 
 #include "glob.h"
 #include "../common/smbglob.h"
+#include "../common/smb1pdu.h"
 #include "../common/smb2pdu.h"
 #include "../common/fscc.h"
 #include "smb2pdu.h"

From d8a4af8f3d9d3367b2c49b0d9dee529556bdd2f4 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sat, 13 Dec 2025 12:48:49 -0600
Subject: [PATCH 183/258] cifs: update internal module version number

   to 2.58

Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifsfs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h
index e9534258d1efd0..75d372ceb65539 100644
--- a/fs/smb/client/cifsfs.h
+++ b/fs/smb/client/cifsfs.h
@@ -145,6 +145,6 @@ extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
 /* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 57
-#define CIFS_VERSION   "2.57"
+#define SMB3_PRODUCT_BUILD 58
+#define CIFS_VERSION   "2.58"
 #endif				/* _CIFSFS_H */

From 982d2616a2906113e433fdc0cfcc122f8d1bb60a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 16 Dec 2025 18:30:08 +0100
Subject: [PATCH 184/258] xfs: validate that zoned RT devices are zone aligned

Garbage collection assumes all zones contain the full amount of blocks.
Mkfs already ensures this happens, but make the kernel check it as well
to avoid getting into trouble due to fuzzers or mkfs bugs.

Fixes: 2167eaabe2fa ("xfs: define the zoned on-disk format")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Cc: stable@vger.kernel.org # v6.15
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/libxfs/xfs_sb.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index cdd16dd805d77c..94c272a2ae2622 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -301,6 +301,21 @@ xfs_validate_rt_geometry(
 	    sbp->sb_rbmblocks != xfs_expected_rbmblocks(sbp))
 		return false;
 
+	if (xfs_sb_is_v5(sbp) &&
+	    (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED)) {
+		uint32_t		mod;
+
+		/*
+		 * Zoned RT devices must be aligned to the RT group size,
+		 * because garbage collection assumes that all zones have the
+		 * same size to avoid insane complexity if that weren't the
+		 * case.
+		 */
+		div_u64_rem(sbp->sb_rextents, sbp->sb_rgextents, &mod);
+		if (mod)
+			return false;
+	}
+
 	return true;
 }
 

From dc68c0f601691010dd5ae53442f8523f41a53131 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 16 Dec 2025 18:30:09 +0100
Subject: [PATCH 185/258] xfs: fix the zoned RT growfs check for zone alignment

The grofs code for zoned RT subvolums already tries to check for zone
alignment, but gets it wrong by using the old instead of the new mount
structure.

Fixes: 01b71e64bb87 ("xfs: support growfs on zoned file systems")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Cc: stable@vger.kernel.org # v6.15
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/xfs_rtalloc.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6907e871fa1511..e063f4f2f2e617 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1255,12 +1255,10 @@ xfs_growfs_check_rtgeom(
 	min_logfsbs = min_t(xfs_extlen_t, xfs_log_calc_minimum_size(nmp),
 			nmp->m_rsumblocks * 2);
 
-	kfree(nmp);
-
 	trace_xfs_growfs_check_rtgeom(mp, min_logfsbs);
 
 	if (min_logfsbs > mp->m_sb.sb_logblocks)
-		return -EINVAL;
+		goto out_inval;
 
 	if (xfs_has_zoned(mp)) {
 		uint32_t	gblocks = mp->m_groups[XG_TYPE_RTG].blocks;
@@ -1268,16 +1266,20 @@ xfs_growfs_check_rtgeom(
 
 		if (rextsize != 1)
 			return -EINVAL;
-		div_u64_rem(mp->m_sb.sb_rblocks, gblocks, &rem);
+		div_u64_rem(nmp->m_sb.sb_rblocks, gblocks, &rem);
 		if (rem) {
 			xfs_warn(mp,
 "new RT volume size (%lld) not aligned to RT group size (%d)",
-				mp->m_sb.sb_rblocks, gblocks);
-			return -EINVAL;
+				nmp->m_sb.sb_rblocks, gblocks);
+			goto out_inval;
 		}
 	}
 
+	kfree(nmp);
 	return 0;
+out_inval:
+	kfree(nmp);
+	return -EINVAL;
 }
 
 /*

From 5d5602236f5db19e8b337a2cd87a90ace5ea776d Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Tue, 25 Nov 2025 22:39:59 +0900
Subject: [PATCH 186/258] can: j1939: make j1939_session_activate() fail if
 device is no longer registered

syzbot is still reporting

  unregister_netdevice: waiting for vcan0 to become free. Usage count = 2

even after commit 93a27b5891b8 ("can: j1939: add missing calls in
NETDEV_UNREGISTER notification handler") was added. A debug printk() patch
found that j1939_session_activate() can succeed even after
j1939_cancel_active_session() from j1939_netdev_notify(NETDEV_UNREGISTER)
has completed.

Since j1939_cancel_active_session() is processed with the session list lock
held, checking ndev->reg_state in j1939_session_activate() with the session
list lock held can reliably close the race window.

Reported-by: syzbot <syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://patch.msgid.link/b9653191-d479-4c8b-8536-1326d028db5c@I-love.SAKURA.ne.jp
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index fbf5c8001c9df3..613a911dda100b 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1567,6 +1567,8 @@ int j1939_session_activate(struct j1939_session *session)
 	if (active) {
 		j1939_session_put(active);
 		ret = -EAGAIN;
+	} else if (priv->ndev->reg_state != NETREG_REGISTERED) {
+		ret = -ENODEV;
 	} else {
 		WARN_ON_ONCE(session->state != J1939_SESSION_NEW);
 		list_add_tail(&session->active_session_list_entry,

From 46cea215dc9444ec32a76b1b6a9cb809e17b64d5 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Tue, 25 Nov 2025 22:43:12 +0900
Subject: [PATCH 187/258] can: j1939: make j1939_sk_bind() fail if device is no
 longer registered

There is a theoretical race window in j1939_sk_netdev_event_unregister()
where two j1939_sk_bind() calls jump in between read_unlock_bh() and
lock_sock().

The assumption jsk->priv == priv can fail if the first j1939_sk_bind()
call once made jsk->priv == NULL due to failed j1939_local_ecu_get() call
and the second j1939_sk_bind() call again made jsk->priv != NULL due to
successful j1939_local_ecu_get() call.

Since the socket lock is held by both j1939_sk_netdev_event_unregister()
and j1939_sk_bind(), checking ndev->reg_state with the socket lock held can
reliably make the second j1939_sk_bind() call fail (and close this race
window).

Fixes: 7fcbe5b2c6a4 ("can: j1939: implement NETDEV_UNREGISTER notification handler")
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://patch.msgid.link/5732921e-247e-4957-a364-da74bd7031d7@I-love.SAKURA.ne.jp
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/socket.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 6272326dd614a9..ff9c4fd7b4337a 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -482,6 +482,12 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr_unsized *uaddr, in
 			goto out_release_sock;
 		}
 
+		if (ndev->reg_state != NETREG_REGISTERED) {
+			dev_put(ndev);
+			ret = -ENODEV;
+			goto out_release_sock;
+		}
+
 		can_ml = can_get_ml_priv(ndev);
 		if (!can_ml) {
 			dev_put(ndev);

From b1f54d7143e0f527cca1091857a786e278d72184 Mon Sep 17 00:00:00 2001
From: Anurag Dutta <a-dutta@ti.com>
Date: Fri, 12 Dec 2025 12:53:11 +0530
Subject: [PATCH 188/258] spi: cadence-quadspi: Add error logging for DMA
 request failure

Add dev_err_probe() to log DMA request failures. This properly handles
-EPROBE_DEFER at debug level, reducing log spam during deferred probing.

Signed-off-by: Anurag Dutta <a-dutta@ti.com>
Link: https://patch.msgid.link/20251212072312.2711806-2-a-dutta@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cadence-quadspi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index af6d050da1c8ac..7c1f742d95a684 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -2001,8 +2001,10 @@ static int cqspi_probe(struct platform_device *pdev)
 
 	if (cqspi->use_direct_mode) {
 		ret = cqspi_request_mmap_dma(cqspi);
-		if (ret == -EPROBE_DEFER)
+		if (ret == -EPROBE_DEFER) {
+			dev_err_probe(&pdev->dev, ret, "Failed to request mmap DMA\n");
 			goto probe_setup_failed;
+		}
 	}
 
 	ret = spi_register_controller(host);

From 1889dd2081975ce1f6275b06cdebaa8d154847a9 Mon Sep 17 00:00:00 2001
From: Anurag Dutta <a-dutta@ti.com>
Date: Fri, 12 Dec 2025 12:53:12 +0530
Subject: [PATCH 189/258] spi: cadence-quadspi: Fix clock disable on probe
 failure path

When cqspi_request_mmap_dma() returns -EPROBE_DEFER after runtime PM
is enabled, the error path calls clk_disable_unprepare() on an already
disabled clock, causing an imbalance.

Use pm_runtime_get_sync() to increment the usage counter and resume the
device. This prevents runtime_suspend() from being invoked and causing
a double clock disable.

Fixes: 140623410536 ("mtd: spi-nor: Add driver for Cadence Quad SPI Flash Controller")
Signed-off-by: Anurag Dutta <a-dutta@ti.com>
Tested-by: Nishanth Menon <nm@ti.com>
Link: https://patch.msgid.link/20251212072312.2711806-3-a-dutta@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cadence-quadspi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index 7c1f742d95a684..f8823e83a62263 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -2026,7 +2026,9 @@ static int cqspi_probe(struct platform_device *pdev)
 probe_reset_failed:
 	if (cqspi->is_jh7110)
 		cqspi_jh7110_disable_clk(pdev, cqspi);
-	clk_disable_unprepare(cqspi->clk);
+
+	if (pm_runtime_get_sync(&pdev->dev) >= 0)
+		clk_disable_unprepare(cqspi->clk);
 probe_clk_failed:
 	return ret;
 }

From 1d24636a9c87c32ec626a56593c98544e6c49fef Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Mon, 15 Dec 2025 17:03:22 -0600
Subject: [PATCH 190/258] spi: dt-bindings: snps,dw-abp-ssi: Allow up to 16
 chip-selects

At least the Microchip Sparx5 supports up to 16 chip-selects, so
increase the maximum. The pattern for the child unit-address was
unconstrained, so update it to match the maximum number of
chip-selects.

Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://patch.msgid.link/20251215230323.3634112-1-robh@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
index 5c87fc8a845dfe..81838577cf9cdb 100644
--- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
+++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
@@ -121,7 +121,7 @@ properties:
   num-cs:
     default: 4
     minimum: 1
-    maximum: 4
+    maximum: 16
 
   dmas:
     items:
@@ -153,14 +153,14 @@ properties:
       provides an interface to override the native DWC SSI CS control.
 
 patternProperties:
-  "@[0-9a-f]+$":
+  "@[0-9a-f]$":
     type: object
     additionalProperties: true
 
     properties:
       reg:
         minimum: 0
-        maximum: 3
+        maximum: 0xf
 
 unevaluatedProperties: false
 

From 8c04b77f87e6e321ae6acd28ce1de5553916153f Mon Sep 17 00:00:00 2001
From: Fei Shao <fshao@chromium.org>
Date: Wed, 17 Dec 2025 18:10:47 +0800
Subject: [PATCH 191/258] spi: mt65xx: Use IRQF_ONESHOT with threaded IRQ

This driver is migrated to use threaded IRQ since commit 5972eb05ca32
("spi: spi-mt65xx: Use threaded interrupt for non-SPIMEM transfer"), and
we almost always want to disable the interrupt line to avoid excess
interrupts while the threaded handler is processing SPI transfer.
Use IRQF_ONESHOT for that purpose.

In practice, we see MediaTek devices show SPI transfer timeout errors
when communicating with ChromeOS EC in certain scenarios, and with
IRQF_ONESHOT, the issue goes away.

Signed-off-by: Fei Shao <fshao@chromium.org>
Link: https://patch.msgid.link/20251217101131.1975131-1-fshao@chromium.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mt65xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 4b40985af1eaf0..90e5813cfdc332 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -1320,7 +1320,7 @@ static int mtk_spi_probe(struct platform_device *pdev)
 
 	ret = devm_request_threaded_irq(dev, irq, mtk_spi_interrupt,
 					mtk_spi_interrupt_thread,
-					IRQF_TRIGGER_NONE, dev_name(dev), host);
+					IRQF_ONESHOT, dev_name(dev), host);
 	if (ret)
 		return dev_err_probe(dev, ret, "failed to register irq\n");
 

From a9c4c9085ec8ce3ce01be21b75184789e74f5f19 Mon Sep 17 00:00:00 2001
From: Sai Krishna Potthuri <sai.krishna.potthuri@amd.com>
Date: Fri, 12 Dec 2025 12:05:09 +0530
Subject: [PATCH 192/258] mmc: sdhci-of-arasan: Increase CD stable timeout to 2
 seconds

On Xilinx/AMD platforms, the CD stable bit take slightly longer than
one second(about an additional 100ms) to assert after a host
controller reset. Although no functional failure observed with the
existing one second delay but to ensure reliable initialization, increase
the CD stable timeout to 2 seconds.

Fixes: e251709aaddb ("mmc: sdhci-of-arasan: Ensure CD logic stabilization before power-up")
Cc: stable@vger.kernel.org
Signed-off-by: Sai Krishna Potthuri <sai.krishna.potthuri@amd.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-arasan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index b97d042897add5..ab7f0ffe7b4f00 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -99,7 +99,7 @@
 #define HIWORD_UPDATE(val, mask, shift) \
 		((val) << (shift) | (mask) << ((shift) + 16))
 
-#define CD_STABLE_TIMEOUT_US		1000000
+#define CD_STABLE_TIMEOUT_US		2000000
 #define CD_STABLE_MAX_SLEEP_US		10
 
 /**

From d3ecb12e2e04ce53c95f933c462f2d8b150b965b Mon Sep 17 00:00:00 2001
From: Jared Kangas <jkangas@redhat.com>
Date: Fri, 12 Dec 2025 07:03:17 -0800
Subject: [PATCH 193/258] mmc: sdhci-esdhc-imx: add alternate ARCH_S32
 dependency to Kconfig

MMC_SDHCI_ESDHC_IMX requires ARCH_MXC despite also being used on
ARCH_S32, which results in unmet dependencies when compiling strictly
for ARCH_S32. Resolve this by adding ARCH_S32 as an alternative to
ARCH_MXC in the driver's dependencies.

Fixes: 5c4f00627c9a ("mmc: sdhci-esdhc-imx: add NXP S32G2 support")
Cc: stable@bvger.kernel.org
Signed-off-by: Jared Kangas <jkangas@redhat.com>
Reviewed-by: Haibo Chen <haibo.chen@nxp.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 24f07df32a1a58..6d79cc9a79e22d 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -315,14 +315,14 @@ config MMC_SDHCI_ESDHC_MCF
 
 config MMC_SDHCI_ESDHC_IMX
 	tristate "SDHCI support for the Freescale eSDHC/uSDHC i.MX controller"
-	depends on ARCH_MXC || COMPILE_TEST
+	depends on ARCH_MXC || ARCH_S32 || COMPILE_TEST
 	depends on MMC_SDHCI_PLTFM
 	depends on OF
 	select MMC_SDHCI_IO_ACCESSORS
 	select MMC_CQHCI
 	help
 	  This selects the Freescale eSDHC/uSDHC controller support
-	  found on i.MX25, i.MX35 i.MX5x and i.MX6x.
+	  found on i.MX25, i.MX35, i.MX5x, i.MX6x, and S32G.
 
 	  If you have a controller with this interface, say Y or M here.
 

From a58383fa45c706bda3bf4a1955c3a0327dbec7e7 Mon Sep 17 00:00:00 2001
From: Deepanshu Kartikey <kartikey406@gmail.com>
Date: Wed, 17 Dec 2025 07:17:12 +0530
Subject: [PATCH 194/258] block: add allocation size check in
 blkdev_pr_read_keys()

blkdev_pr_read_keys() takes num_keys from userspace and uses it to
calculate the allocation size for keys_info via struct_size(). While
there is a check for SIZE_MAX (integer overflow), there is no upper
bound validation on the allocation size itself.

A malicious or buggy userspace can pass a large num_keys value that
doesn't trigger overflow but still results in an excessive allocation
attempt, causing a warning in the page allocator when the order exceeds
MAX_PAGE_ORDER.

Fix this by introducing PR_KEYS_MAX to limit the number of keys to
a sane value. This makes the SIZE_MAX check redundant, so remove it.
Also switch to kvzalloc/kvfree to handle larger allocations gracefully.

Fixes: 22a1ffea5f80 ("block: add IOC_PR_READ_KEYS ioctl")
Tested-by: syzbot+660d079d90f8a1baf54d@syzkaller.appspotmail.com
Reported-by: syzbot+660d079d90f8a1baf54d@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=660d079d90f8a1baf54d
Link: https://lore.kernel.org/all/20251212013510.3576091-1-kartikey406@gmail.com/T/ [v1]
Signed-off-by: Deepanshu Kartikey <kartikey406@gmail.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/ioctl.c           | 9 +++++----
 include/uapi/linux/pr.h | 2 ++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/block/ioctl.c b/block/ioctl.c
index 61feed686418a6..344478348a54e3 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -442,11 +442,12 @@ static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode,
 	if (copy_from_user(&read_keys, arg, sizeof(read_keys)))
 		return -EFAULT;
 
-	keys_info_len = struct_size(keys_info, keys, read_keys.num_keys);
-	if (keys_info_len == SIZE_MAX)
+	if (read_keys.num_keys > PR_KEYS_MAX)
 		return -EINVAL;
 
-	keys_info = kzalloc(keys_info_len, GFP_KERNEL);
+	keys_info_len = struct_size(keys_info, keys, read_keys.num_keys);
+
+	keys_info = kvzalloc(keys_info_len, GFP_KERNEL);
 	if (!keys_info)
 		return -ENOMEM;
 
@@ -473,7 +474,7 @@ static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode,
 	if (copy_to_user(arg, &read_keys, sizeof(read_keys)))
 		ret = -EFAULT;
 out:
-	kfree(keys_info);
+	kvfree(keys_info);
 	return ret;
 }
 
diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h
index 847f3051057af7..f0ecb1677317d2 100644
--- a/include/uapi/linux/pr.h
+++ b/include/uapi/linux/pr.h
@@ -79,4 +79,6 @@ struct pr_read_reservation {
 #define IOC_PR_READ_KEYS	_IOWR('p', 206, struct pr_read_keys)
 #define IOC_PR_READ_RESERVATION	_IOR('p', 207, struct pr_read_reservation)
 
+#define PR_KEYS_MAX		(1u << 16)
+
 #endif /* _UAPI_PR_H */

From 114ea9bbaf7681c4d363e13b7916e6fef6a4963a Mon Sep 17 00:00:00 2001
From: huang-jl <huang-jl@deepseek.com>
Date: Wed, 17 Dec 2025 14:26:32 +0800
Subject: [PATCH 195/258] io_uring: fix nr_segs calculation in io_import_kbuf

io_import_kbuf() calculates nr_segs incorrectly when iov_offset is
non-zero after iov_iter_advance(). It doesn't account for the partial
consumption of the first bvec.

The problem comes when meet the following conditions:
1. Use UBLK_F_AUTO_BUF_REG feature of ublk.
2. The kernel will help to register the buffer, into the io uring.
3. Later, the ublk server try to send IO request using the registered
   buffer in the io uring, to read/write to fuse-based filesystem, with
O_DIRECT.

>From a userspace perspective, the ublk server thread is blocked in the
kernel, and will see "soft lockup" in the kernel dmesg.

When ublk registers a buffer with mixed-size bvecs like [4K]*6 + [12K]
and a request partially consumes a bvec, the next request's nr_segs
calculation uses bvec->bv_len instead of (bv_len - iov_offset).

This causes fuse_get_user_pages() to loop forever because nr_segs
indicates fewer pages than actually needed.

Specifically, the infinite loop happens at:
fuse_get_user_pages()
  -> iov_iter_extract_pages()
    -> iov_iter_extract_bvec_pages()
Since the nr_segs is miscalculated, the iov_iter_extract_bvec_pages
returns when finding that i->nr_segs is zero. Then
iov_iter_extract_pages returns zero. However, fuse_get_user_pages does
still not get enough data/pages, causing infinite loop.

Example:
  - Bvecs: [4K, 4K, 4K, 4K, 4K, 4K, 12K, ...]
  - Request 1: 32K at offset 0, uses 6*4K + 8K of the 12K bvec
  - Request 2: 32K at offset 32K
    - iov_offset = 8K (8K already consumed from 12K bvec)
    - Bug: calculates using 12K, not (12K - 8K) = 4K
    - Result: nr_segs too small, infinite loop in fuse_get_user_pages.

Fix by accounting for iov_offset when calculating the first segment's
available length.

Fixes: b419bed4f0a6 ("io_uring/rsrc: ensure segments counts are correct on kbuf buffers")
Signed-off-by: huang-jl <huang-jl@deepseek.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/rsrc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index a63474b331bf89..41c89f5c616da2 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -1059,6 +1059,7 @@ static int io_import_kbuf(int ddir, struct iov_iter *iter,
 	if (count < imu->len) {
 		const struct bio_vec *bvec = iter->bvec;
 
+		len += iter->iov_offset;
 		while (len > bvec->bv_len) {
 			len -= bvec->bv_len;
 			bvec++;

From dcd0b625fe440d68bb4b97c71d18ca48ecd6e594 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 17 Dec 2025 07:34:55 -0800
Subject: [PATCH 196/258] powercap: intel_rapl: Fix possible recursive lock
 warning

With the RAPL PMU addition, there is a recursive locking when CPU online
callback function calls rapl_package_add_pmu(). Here cpu_hotplug_lock
is already acquired by cpuhp_thread_fun() and rapl_package_add_pmu()
tries to acquire again.

<4>[ 8.197433] ============================================
<4>[ 8.197437] WARNING: possible recursive locking detected
<4>[ 8.197440] 6.19.0-rc1-lgci-xe-xe-4242-05b7c58b3367dca84+ #1 Not tainted
<4>[ 8.197444] --------------------------------------------
<4>[ 8.197447] cpuhp/0/20 is trying to acquire lock:
<4>[ 8.197450] ffffffff83487870 (cpu_hotplug_lock){++++}-{0:0}, at:
rapl_package_add_pmu+0x37/0x370 [intel_rapl_common]
<4>[ 8.197463]
but task is already holding lock:
<4>[ 8.197466] ffffffff83487870 (cpu_hotplug_lock){++++}-{0:0}, at:
cpuhp_thread_fun+0x6d/0x290
<4>[ 8.197477]
other info that might help us debug this:
<4>[ 8.197480] Possible unsafe locking scenario:

<4>[ 8.197483] CPU0
<4>[ 8.197485] ----
<4>[ 8.197487] lock(cpu_hotplug_lock);
<4>[ 8.197490] lock(cpu_hotplug_lock);
<4>[ 8.197493]
*** DEADLOCK ***
..
..
<4>[ 8.197542] __lock_acquire+0x146e/0x2790
<4>[ 8.197548] lock_acquire+0xc4/0x2c0
<4>[ 8.197550] ? rapl_package_add_pmu+0x37/0x370 [intel_rapl_common]
<4>[ 8.197556] cpus_read_lock+0x41/0x110
<4>[ 8.197558] ? rapl_package_add_pmu+0x37/0x370 [intel_rapl_common]
<4>[ 8.197561] rapl_package_add_pmu+0x37/0x370 [intel_rapl_common]
<4>[ 8.197565] rapl_cpu_online+0x85/0x87 [intel_rapl_msr]
<4>[ 8.197568] ? __pfx_rapl_cpu_online+0x10/0x10 [intel_rapl_msr]
<4>[ 8.197570] cpuhp_invoke_callback+0x41f/0x6c0
<4>[ 8.197573] ? cpuhp_thread_fun+0x6d/0x290
<4>[ 8.197575] cpuhp_thread_fun+0x1e2/0x290
<4>[ 8.197578] ? smpboot_thread_fn+0x26/0x290
<4>[ 8.197581] smpboot_thread_fn+0x12f/0x290
<4>[ 8.197584] ? __pfx_smpboot_thread_fn+0x10/0x10
<4>[ 8.197586] kthread+0x11f/0x250
<4>[ 8.197589] ? __pfx_kthread+0x10/0x10
<4>[ 8.197592] ret_from_fork+0x344/0x3a0
<4>[ 8.197595] ? __pfx_kthread+0x10/0x10
<4>[ 8.197597] ret_from_fork_asm+0x1a/0x30
<4>[ 8.197604] </TASK>

Fix this issue in the same way as rapl powercap package domain is added
from the same CPU online callback by introducing another interface which
doesn't call cpus_read_lock(). Add rapl_package_add_pmu_locked() and
rapl_package_remove_pmu_locked() which don't call cpus_read_lock().

Fixes: 748d6ba43afd ("powercap: intel_rapl: Enable MSR-based RAPL PMU support")
Reported-by: Borah, Chaitanya Kumar <chaitanya.kumar.borah@intel.com>
Closes: https://lore.kernel.org/linux-pm/5427ede1-57a0-43d1-99f3-8ca4b0643e82@intel.com/T/#u
Tested-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Tested-by: RavitejaX Veesam <ravitejax.veesam@intel.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://patch.msgid.link/20251217153455.3560176-1-srinivas.pandruvada@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c | 24 ++++++++++++++++++------
 drivers/powercap/intel_rapl_msr.c    |  4 ++--
 include/linux/intel_rapl.h           |  4 ++++
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index b9d87e56cbbc8a..3ff6da3bf4e630 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -2032,7 +2032,7 @@ static int rapl_pmu_update(struct rapl_package *rp)
 	return ret;
 }
 
-int rapl_package_add_pmu(struct rapl_package *rp)
+int rapl_package_add_pmu_locked(struct rapl_package *rp)
 {
 	struct rapl_package_pmu_data *data = &rp->pmu_data;
 	int idx;
@@ -2040,8 +2040,6 @@ int rapl_package_add_pmu(struct rapl_package *rp)
 	if (rp->has_pmu)
 		return -EEXIST;
 
-	guard(cpus_read_lock)();
-
 	for (idx = 0; idx < rp->nr_domains; idx++) {
 		struct rapl_domain *rd = &rp->domains[idx];
 		int domain = rd->id;
@@ -2091,17 +2089,23 @@ int rapl_package_add_pmu(struct rapl_package *rp)
 
 	return rapl_pmu_update(rp);
 }
+EXPORT_SYMBOL_GPL(rapl_package_add_pmu_locked);
+
+int rapl_package_add_pmu(struct rapl_package *rp)
+{
+	guard(cpus_read_lock)();
+
+	return rapl_package_add_pmu_locked(rp);
+}
 EXPORT_SYMBOL_GPL(rapl_package_add_pmu);
 
-void rapl_package_remove_pmu(struct rapl_package *rp)
+void rapl_package_remove_pmu_locked(struct rapl_package *rp)
 {
 	struct rapl_package *pos;
 
 	if (!rp->has_pmu)
 		return;
 
-	guard(cpus_read_lock)();
-
 	list_for_each_entry(pos, &rapl_packages, plist) {
 		/* PMU is still needed */
 		if (pos->has_pmu && pos != rp)
@@ -2111,6 +2115,14 @@ void rapl_package_remove_pmu(struct rapl_package *rp)
 	perf_pmu_unregister(&rapl_pmu.pmu);
 	memset(&rapl_pmu, 0, sizeof(struct rapl_pmu));
 }
+EXPORT_SYMBOL_GPL(rapl_package_remove_pmu_locked);
+
+void rapl_package_remove_pmu(struct rapl_package *rp)
+{
+	guard(cpus_read_lock)();
+
+	rapl_package_remove_pmu_locked(rp);
+}
 EXPORT_SYMBOL_GPL(rapl_package_remove_pmu);
 #endif
 
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index 0ce1096b63145d..9a7e150b3536b9 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -82,7 +82,7 @@ static int rapl_cpu_online(unsigned int cpu)
 		if (IS_ERR(rp))
 			return PTR_ERR(rp);
 		if (rapl_msr_pmu)
-			rapl_package_add_pmu(rp);
+			rapl_package_add_pmu_locked(rp);
 	}
 	cpumask_set_cpu(cpu, &rp->cpumask);
 	return 0;
@@ -101,7 +101,7 @@ static int rapl_cpu_down_prep(unsigned int cpu)
 	lead_cpu = cpumask_first(&rp->cpumask);
 	if (lead_cpu >= nr_cpu_ids) {
 		if (rapl_msr_pmu)
-			rapl_package_remove_pmu(rp);
+			rapl_package_remove_pmu_locked(rp);
 		rapl_remove_package_cpuslocked(rp);
 	} else if (rp->lead_cpu == cpu) {
 		rp->lead_cpu = lead_cpu;
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index e9ade2ff4af664..f479ef5b3341cf 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -214,10 +214,14 @@ void rapl_remove_package(struct rapl_package *rp);
 
 #ifdef CONFIG_PERF_EVENTS
 int rapl_package_add_pmu(struct rapl_package *rp);
+int rapl_package_add_pmu_locked(struct rapl_package *rp);
 void rapl_package_remove_pmu(struct rapl_package *rp);
+void rapl_package_remove_pmu_locked(struct rapl_package *rp);
 #else
 static inline int rapl_package_add_pmu(struct rapl_package *rp) { return 0; }
+static inline int rapl_package_add_pmu_locked(struct rapl_package *rp) { return 0; }
 static inline void rapl_package_remove_pmu(struct rapl_package *rp) { }
+static inline void rapl_package_remove_pmu_locked(struct rapl_package *rp) { }
 #endif
 
 #endif /* __INTEL_RAPL_H__ */

From 0edc78b82bea85e1b2165d8e870a5c3535919695 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 25 Nov 2025 22:50:45 +0100
Subject: [PATCH 197/258] x86/msi: Make irq_retrigger() functional for posted
 MSI

Luigi reported that retriggering a posted MSI interrupt does not work
correctly.

The reason is that the retrigger happens at the vector domain by sending an
IPI to the actual vector on the target CPU. That works correctly exactly
once because the posted MSI interrupt chip does not issue an EOI as that's
only required for the posted MSI notification vector itself.

As a consequence the vector becomes stale in the ISR, which not only
affects this vector but also any lower priority vector in the affected
APIC because the ISR bit is not cleared.

Luigi proposed to set the vector in the remap PIR bitmap and raise the
posted MSI notification vector. That works, but that still does not cure a
related problem:

  If there is ever a stray interrupt on such a vector, then the related
  APIC ISR bit becomes stale due to the lack of EOI as described above.
  Unlikely to happen, but if it happens it's not debuggable at all.

So instead of playing games with the PIR, this can be actually solved
for both cases by:

 1) Keeping track of the posted interrupt vector handler state

 2) Implementing a posted MSI specific irq_ack() callback which checks that
    state. If the posted vector handler is inactive it issues an EOI,
    otherwise it delegates that to the posted handler.

This is correct versus affinity changes and concurrent events on the posted
vector as the actual handler invocation is serialized through the interrupt
descriptor lock.

Fixes: ed1e48ea4370 ("iommu/vt-d: Enable posted mode for device MSIs")
Reported-by: Luigi Rizzo <lrizzo@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Luigi Rizzo <lrizzo@google.com>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20251125214631.044440658@linutronix.de
Closes: https://lore.kernel.org/lkml/20251124104836.3685533-1-lrizzo@google.com
---
 arch/x86/include/asm/irq_remapping.h |  7 +++++++
 arch/x86/kernel/irq.c                | 23 +++++++++++++++++++++++
 drivers/iommu/intel/irq_remapping.c  |  8 ++++----
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5a0d42464d4424..4e55d17558465d 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -87,4 +87,11 @@ static inline void panic_if_irq_remap(const char *msg)
 }
 
 #endif /* CONFIG_IRQ_REMAP */
+
+#ifdef CONFIG_X86_POSTED_MSI
+void intel_ack_posted_msi_irq(struct irq_data *irqd);
+#else
+#define intel_ack_posted_msi_irq	NULL
+#endif
+
 #endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 86f4e574de0267..b2fe6181960c3f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -397,6 +397,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi)
 
 /* Posted Interrupt Descriptors for coalesced MSIs to be posted */
 DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
+static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active);
 
 void intel_posted_msi_init(void)
 {
@@ -414,6 +415,25 @@ void intel_posted_msi_init(void)
 	this_cpu_write(posted_msi_pi_desc.ndst, destination);
 }
 
+void intel_ack_posted_msi_irq(struct irq_data *irqd)
+{
+	irq_move_irq(irqd);
+
+	/*
+	 * Handle the rare case that irq_retrigger() raised the actual
+	 * assigned vector on the target CPU, which means that it was not
+	 * invoked via the posted MSI handler below. In that case APIC EOI
+	 * is required as otherwise the ISR entry becomes stale and lower
+	 * priority interrupts are never going to be delivered after that.
+	 *
+	 * If the posted handler invoked the device interrupt handler then
+	 * the EOI would be premature because it would acknowledge the
+	 * posted vector.
+	 */
+	if (unlikely(!__this_cpu_read(posted_msi_handler_active)))
+		apic_eoi();
+}
+
 static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs)
 {
 	unsigned long pir_copy[NR_PIR_WORDS];
@@ -446,6 +466,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
 
 	pid = this_cpu_ptr(&posted_msi_pi_desc);
 
+	/* Mark the handler active for intel_ack_posted_msi_irq() */
+	__this_cpu_write(posted_msi_handler_active, true);
 	inc_irq_stat(posted_msi_notification_count);
 	irq_enter();
 
@@ -474,6 +496,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
 
 	apic_eoi();
 	irq_exit();
+	__this_cpu_write(posted_msi_handler_active, false);
 	set_irq_regs(old_regs);
 }
 #endif /* X86_POSTED_MSI */
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 4f9b01dc91e86f..8bcbfe3d9c7221 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1303,17 +1303,17 @@ static struct irq_chip intel_ir_chip = {
  *	irq_enter();
  *		handle_edge_irq()
  *			irq_chip_ack_parent()
- *				irq_move_irq(); // No EOI
+ *				intel_ack_posted_msi_irq(); // No EOI
  *			handle_irq_event()
  *				driver_handler()
  *		handle_edge_irq()
  *			irq_chip_ack_parent()
- *				irq_move_irq(); // No EOI
+ *				intel_ack_posted_msi_irq(); // No EOI
  *			handle_irq_event()
  *				driver_handler()
  *		handle_edge_irq()
  *			irq_chip_ack_parent()
- *				irq_move_irq(); // No EOI
+ *				intel_ack_posted_msi_irq(); // No EOI
  *			handle_irq_event()
  *				driver_handler()
  *	apic_eoi()
@@ -1322,7 +1322,7 @@ static struct irq_chip intel_ir_chip = {
  */
 static struct irq_chip intel_ir_chip_post_msi = {
 	.name			= "INTEL-IR-POST",
-	.irq_ack		= irq_move_irq,
+	.irq_ack		= intel_ack_posted_msi_irq,
 	.irq_set_affinity	= intel_ir_set_affinity,
 	.irq_compose_msi_msg	= intel_ir_compose_msi_msg,
 	.irq_set_vcpu_affinity	= intel_ir_set_vcpu_affinity,

From bf40644ef8c8a288742fa45580897ed0e0289474 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Wed, 17 Dec 2025 11:00:17 +0800
Subject: [PATCH 198/258] Input: alps - fix use-after-free bugs caused by
 dev3_register_work

The dev3_register_work delayed work item is initialized within
alps_reconnect() and scheduled upon receipt of the first bare
PS/2 packet from an external PS/2 device connected to the ALPS
touchpad. During device detachment, the original implementation
calls flush_workqueue() in psmouse_disconnect() to ensure
completion of dev3_register_work. However, the flush_workqueue()
in psmouse_disconnect() only blocks and waits for work items that
were already queued to the workqueue prior to its invocation. Any
work items submitted after flush_workqueue() is called are not
included in the set of tasks that the flush operation awaits.
This means that after flush_workqueue() has finished executing,
the dev3_register_work could still be scheduled. Although the
psmouse state is set to PSMOUSE_CMD_MODE in psmouse_disconnect(),
the scheduling of dev3_register_work remains unaffected.

The race condition can occur as follows:

CPU 0 (cleanup path)     | CPU 1 (delayed work)
psmouse_disconnect()     |
  psmouse_set_state()    |
  flush_workqueue()      | alps_report_bare_ps2_packet()
  alps_disconnect()      |   psmouse_queue_work()
    kfree(priv); // FREE | alps_register_bare_ps2_mouse()
                         |   priv = container_of(work...); // USE
                         |   priv->dev3 // USE

Add disable_delayed_work_sync() in alps_disconnect() to ensure
that dev3_register_work is properly canceled and prevented from
executing after the alps_data structure has been deallocated.

This bug is identified by static analysis.

Fixes: 04aae283ba6a ("Input: ALPS - do not mix trackstick and external PS/2 mouse data")
Cc: stable@kernel.org
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Link: https://patch.msgid.link/b57b0a9ccca51a3f06be141bfc02b9ffe69d1845.1765939397.git.duoming@zju.edu.cn
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/alps.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index d0cb9fb9482185..df8953a5196e1b 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -2975,6 +2975,7 @@ static void alps_disconnect(struct psmouse *psmouse)
 
 	psmouse_reset(psmouse);
 	timer_shutdown_sync(&priv->timer);
+	disable_delayed_work_sync(&priv->dev3_register_work);
 	if (priv->dev2)
 		input_unregister_device(priv->dev2);
 	if (!IS_ERR_OR_NULL(priv->dev3))

From 09879758d8ac345d7f08709bfcd2fbe29e96cae4 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@redhat.com>
Date: Wed, 5 Nov 2025 10:24:28 +0100
Subject: [PATCH 199/258] MAINTAINERS: add tracepoint core-api doc files to
 TRACING

The files in Documentation/core-api/ are by virtue of their top-level
directory part of the Documentation section in MAINTAINERS. Each file in
Documentation/core-api/ should however also have a further section in
MAINTAINERS it belongs to, which fits to the technical area of the
documented API in that file.

The tracepoint.rst provides some explanation to tracepoints defined in
selected files under include/trace/events/, which itself is part of the
TRACING section.

So, add this core-api document to TRACING.

Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://patch.msgid.link/20251105092428.153378-1-lukas.bulwahn@redhat.com
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@redhat.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5b11839cba9de1..8e90454bb817e5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -26464,6 +26464,7 @@ L:	linux-trace-kernel@vger.kernel.org
 S:	Maintained
 Q:	https://patchwork.kernel.org/project/linux-trace-kernel/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
+F:	Documentation/core-api/tracepoint.rst
 F:	Documentation/trace/*
 F:	fs/tracefs/
 F:	include/linux/trace*.h

From ef7f38df890f5dcd2ae62f8dbde191d72f3bebae Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 16 Dec 2025 18:24:40 -0500
Subject: [PATCH 200/258] tracing: Do not register unsupported perf events

Synthetic events currently do not have a function to register perf events.
This leads to calling the tracepoint register functions with a NULL
function pointer which triggers:

 ------------[ cut here ]------------
 WARNING: kernel/tracepoint.c:175 at tracepoint_add_func+0x357/0x370, CPU#2: perf/2272
 Modules linked in: kvm_intel kvm irqbypass
 CPU: 2 UID: 0 PID: 2272 Comm: perf Not tainted 6.18.0-ftest-11964-ge022764176fc-dirty #323 PREEMPTLAZY
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.17.0-debian-1.17.0-1 04/01/2014
 RIP: 0010:tracepoint_add_func+0x357/0x370
 Code: 28 9c e8 4c 0b f5 ff eb 0f 4c 89 f7 48 c7 c6 80 4d 28 9c e8 ab 89 f4 ff 31 c0 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc cc <0f> 0b 49 c7 c6 ea ff ff ff e9 ee fe ff ff 0f 0b e9 f9 fe ff ff 0f
 RSP: 0018:ffffabc0c44d3c40 EFLAGS: 00010246
 RAX: 0000000000000001 RBX: ffff9380aa9e4060 RCX: 0000000000000000
 RDX: 000000000000000a RSI: ffffffff9e1d4a98 RDI: ffff937fcf5fd6c8
 RBP: 0000000000000001 R08: 0000000000000007 R09: ffff937fcf5fc780
 R10: 0000000000000003 R11: ffffffff9c193910 R12: 000000000000000a
 R13: ffffffff9e1e5888 R14: 0000000000000000 R15: ffffabc0c44d3c78
 FS:  00007f6202f5f340(0000) GS:ffff93819f00f000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 000055d3162281a8 CR3: 0000000106a56003 CR4: 0000000000172ef0
 Call Trace:
  <TASK>
  tracepoint_probe_register+0x5d/0x90
  synth_event_reg+0x3c/0x60
  perf_trace_event_init+0x204/0x340
  perf_trace_init+0x85/0xd0
  perf_tp_event_init+0x2e/0x50
  perf_try_init_event+0x6f/0x230
  ? perf_event_alloc+0x4bb/0xdc0
  perf_event_alloc+0x65a/0xdc0
  __se_sys_perf_event_open+0x290/0x9f0
  do_syscall_64+0x93/0x7b0
  ? entry_SYSCALL_64_after_hwframe+0x76/0x7e
  ? trace_hardirqs_off+0x53/0xc0
  entry_SYSCALL_64_after_hwframe+0x76/0x7e

Instead, have the code return -ENODEV, which doesn't warn and has perf
error out with:

 # perf record -e synthetic:futex_wait
Error:
The sys_perf_event_open() syscall returned with 19 (No such device) for event (synthetic:futex_wait).
"dmesg | grep -i perf" may provide additional information.

Ideally perf should support synthetic events, but for now just fix the
warning. The support can come later.

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://patch.msgid.link/20251216182440.147e4453@gandalf.local.home
Fixes: 4b147936fa509 ("tracing: Add support for 'synthetic' events")
Reported-by: Ian Rogers <irogers@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index b16a5a1580401b..76067529db61bc 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -700,6 +700,8 @@ int trace_event_reg(struct trace_event_call *call,
 
 #ifdef CONFIG_PERF_EVENTS
 	case TRACE_REG_PERF_REGISTER:
+		if (!call->class->perf_probe)
+			return -ENODEV;
 		return tracepoint_probe_register(call->tp,
 						 call->class->perf_probe,
 						 call);

From 74bf97e9a8b6443ba2119dc884940e9364c91bde Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Tue, 16 Dec 2025 09:49:50 -0800
Subject: [PATCH 201/258] tracing: Fix UBSAN warning in __remove_instance()

xfs/558 triggers the following UBSAN warning:

 ------------[ cut here ]------------
 UBSAN: shift-out-of-bounds in kernel/trace/trace.c:10510:10
 shift exponent 32 is too large for 32-bit type 'int'
 CPU: 1 UID: 0 PID: 888674 Comm: rmdir Not tainted 6.19.0-rc1-xfsx #rc1 PREEMPT(lazy)  dbf607ef4c142c563f76d706e71af9731d7b9c90
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-4.module+el8.8.0+21164+ed375313 04/01/2014
 Call Trace:
  <TASK>
  dump_stack_lvl+0x4a/0x70
  ubsan_epilogue+0x5/0x2b
  __ubsan_handle_shift_out_of_bounds.cold+0x5e/0x113
  __remove_instance.part.0.constprop.0.cold+0x18/0x26f
  instance_rmdir+0xf3/0x110
  tracefs_syscall_rmdir+0x4d/0x90
  vfs_rmdir+0x139/0x230
  do_rmdir+0x143/0x230
  __x64_sys_rmdir+0x1d/0x20
  do_syscall_64+0x44/0x230
  entry_SYSCALL_64_after_hwframe+0x4b/0x53
 RIP: 0033:0x7f7ae8e51f17
 Code: f0 ff ff 73 01 c3 48 8b 0d de 2e 0e 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 54 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 01 c3 48 8b 15 b1 2e 0e 00 f7 d8 64 89 02 b8
 RSP: 002b:00007ffd90743f08 EFLAGS: 00000246 ORIG_RAX: 0000000000000054
 RAX: ffffffffffffffda RBX: 00007ffd907440f8 RCX: 00007f7ae8e51f17
 RDX: 00007f7ae8f3c5c0 RSI: 00007ffd90744a21 RDI: 00007ffd90744a21
 RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000
 R10: 00007f7ae8f35ac0 R11: 0000000000000246 R12: 00007ffd90744a21
 R13: 0000000000000001 R14: 00007f7ae8f8b000 R15: 000055e5283e6a98
  </TASK>
 ---[ end trace ]---

whilst tearing down an ftrace instance.  TRACE_FLAGS_MAX_SIZE is now 64bit,
so the mask comparison expression must be typecast to a u64 value to
avoid an overflow.  AFAICT, ZEROED_TRACE_FLAGS is already cast to ULL
so this is ok.

Link: https://patch.msgid.link/20251216174950.GA7705@frogsfrogsfrogs
Fixes: bbec8e28cac592 ("tracing: Allow tracer to add more than 32 options")
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e575956ef9b5a2..6f2148df14d966 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -10507,7 +10507,7 @@ static int __remove_instance(struct trace_array *tr)
 
 	/* Disable all the flags that were enabled coming in */
 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
-		if ((1 << i) & ZEROED_TRACE_FLAGS)
+		if ((1ULL << i) & ZEROED_TRACE_FLAGS)
 			set_tracer_flag(tr, 1ULL << i, 0);
 	}
 

From 39263f986da55c5b7bc328c757fe378a6a41799d Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Wed, 17 Dec 2025 11:00:53 +0800
Subject: [PATCH 202/258] ftrace: Fix address for jmp mode in t_show()

The address from ftrace_find_rec_direct() is printed directly in t_show().
This can mislead symbol offsets if it has the "jmp" bit in the last bit.

Fix this by printing the address that returned by ftrace_jmp_get().

Link: https://patch.msgid.link/20251217030053.80343-1-dongml2@chinatelecom.cn
Fixes: 25e4e3565d45 ("ftrace: Introduce FTRACE_OPS_FL_JMP")
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3ec2033c077436..ef2d5dca6f70c6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4518,8 +4518,11 @@ static int t_show(struct seq_file *m, void *v)
 			unsigned long direct;
 
 			direct = ftrace_find_rec_direct(rec->ip);
-			if (direct)
-				seq_printf(m, "\n\tdirect-->%pS", (void *)direct);
+			if (direct) {
+				seq_printf(m, "\n\tdirect%s-->%pS",
+					   ftrace_is_jmp(direct) ? "(jmp)" : "",
+					   (void *)ftrace_jmp_get(direct));
+			}
 		}
 	}
 

From c258f5c4502c9667bccf5d76fa731ab9c96687c1 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 12 Dec 2025 22:34:15 +0800
Subject: [PATCH 203/258] ublk: fix deadlock when reading partition table

When one process(such as udev) opens ublk block device (e.g., to read
the partition table via bdev_open()), a deadlock[1] can occur:

1. bdev_open() grabs disk->open_mutex
2. The process issues read I/O to ublk backend to read partition table
3. In __ublk_complete_rq(), blk_update_request() or blk_mq_end_request()
   runs bio->bi_end_io() callbacks
4. If this triggers fput() on file descriptor of ublk block device, the
   work may be deferred to current task's task work (see fput() implementation)
5. This eventually calls blkdev_release() from the same context
6. blkdev_release() tries to grab disk->open_mutex again
7. Deadlock: same task waiting for a mutex it already holds

The fix is to run blk_update_request() and blk_mq_end_request() with bottom
halves disabled. This forces blkdev_release() to run in kernel work-queue
context instead of current task work context, and allows ublk server to make
forward progress, and avoids the deadlock.

Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver")
Link: https://github.com/ublk-org/ublksrv/issues/170 [1]
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
[axboe: rewrite comment in ublk]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/ublk_drv.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index df9831783a1339..cfd2132410dd74 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1080,12 +1080,20 @@ static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu(
 	return io_uring_cmd_to_pdu(ioucmd, struct ublk_uring_cmd_pdu);
 }
 
+static void ublk_end_request(struct request *req, blk_status_t error)
+{
+	local_bh_disable();
+	blk_mq_end_request(req, error);
+	local_bh_enable();
+}
+
 /* todo: handle partial completion */
 static inline void __ublk_complete_rq(struct request *req, struct ublk_io *io,
 				      bool need_map)
 {
 	unsigned int unmapped_bytes;
 	blk_status_t res = BLK_STS_OK;
+	bool requeue;
 
 	/* failed read IO if nothing is read */
 	if (!io->res && req_op(req) == REQ_OP_READ)
@@ -1117,14 +1125,30 @@ static inline void __ublk_complete_rq(struct request *req, struct ublk_io *io,
 	if (unlikely(unmapped_bytes < io->res))
 		io->res = unmapped_bytes;
 
-	if (blk_update_request(req, BLK_STS_OK, io->res))
+	/*
+	 * Run bio->bi_end_io() with softirqs disabled. If the final fput
+	 * happens off this path, then that will prevent ublk's blkdev_release()
+	 * from being called on current's task work, see fput() implementation.
+	 *
+	 * Otherwise, ublk server may not provide forward progress in case of
+	 * reading the partition table from bdev_open() with disk->open_mutex
+	 * held, and causes dead lock as we could already be holding
+	 * disk->open_mutex here.
+	 *
+	 * Preferably we would not be doing IO with a mutex held that is also
+	 * used for release, but this work-around will suffice for now.
+	 */
+	local_bh_disable();
+	requeue = blk_update_request(req, BLK_STS_OK, io->res);
+	local_bh_enable();
+	if (requeue)
 		blk_mq_requeue_request(req, true);
 	else if (likely(!blk_should_fake_timeout(req->q)))
 		__blk_mq_end_request(req, BLK_STS_OK);
 
 	return;
 exit:
-	blk_mq_end_request(req, res);
+	ublk_end_request(req, res);
 }
 
 static struct io_uring_cmd *__ublk_prep_compl_io_cmd(struct ublk_io *io,
@@ -1164,7 +1188,7 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
 	if (ublk_nosrv_dev_should_queue_io(ubq->dev))
 		blk_mq_requeue_request(rq, false);
 	else
-		blk_mq_end_request(rq, BLK_STS_IOERR);
+		ublk_end_request(rq, BLK_STS_IOERR);
 }
 
 static void
@@ -1209,7 +1233,7 @@ __ublk_do_auto_buf_reg(const struct ublk_queue *ubq, struct request *req,
 			ublk_auto_buf_reg_fallback(ubq, req->tag);
 			return AUTO_BUF_REG_FALLBACK;
 		}
-		blk_mq_end_request(req, BLK_STS_IOERR);
+		ublk_end_request(req, BLK_STS_IOERR);
 		return AUTO_BUF_REG_FAIL;
 	}
 

From 5a5aff6338c0f4164a6a8d8a7eb400c4054df256 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 17 Dec 2025 10:45:53 +0100
Subject: [PATCH 204/258] can: fix build dependency

Arnd Bergmann's patch [1] fixed the build dependency problem introduced by
bugfix commit cb2dc6d2869a ("can: Kconfig: select CAN driver infrastructure
by default"). This ended up as commit 6abd4577bccc ("can: fix build
dependency"), but I broke Arnd's fix by removing a dependency that we
thought was superfluous.

[1] https://lore.kernel.org/all/20251204100015.1033688-1-arnd@kernel.org/

Meanwhile the problem was also found by intel's kernel test robot,
complaining about undefined symbols:

| ERROR: modpost: "m_can_class_unregister" [drivers/net/can/m_can/m_can_platform.ko] undefined!
| ERROR: modpost: "m_can_class_free_dev" [drivers/net/can/m_can/m_can_platform.ko] undefined!
| ERROR: modpost: "m_can_class_allocate_dev" [drivers/net/can/m_can/m_can_platform.ko] undefined!
| ERROR: modpost: "m_can_class_get_clocks" [drivers/net/can/m_can/m_can_platform.ko] undefined!
| ERROR: modpost: "m_can_class_register" [drivers/net/can/m_can/m_can_platform.ko] undefined!

To fix this problem, add the missing dependency again.

Cc: Vincent Mailhol <mailhol@kernel.org>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512132253.vO9WFDJK-lkp@intel.com/
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512180808.fTAUQ2XN-lkp@intel.com/
Reported-by: Arnd Bergmann <arnd@arndb.de>
Closes: https://lore.kernel.org/all/7427949a-ea7d-4854-9fe4-e01db7d878c7@app.fastmail.com/
Fixes: 6abd4577bccc ("can: fix build dependency")
Fixes: cb2dc6d2869a ("can: Kconfig: select CAN driver infrastructure by default")
Acked-by: Vincent Mailhol <mailhol@kernel.org>
Link: https://patch.msgid.link/20251217-can-fix-dependency-v1-1-fd2d4f2a2bf5@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 460a74ae692330..cfaea6178a7196 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -17,7 +17,7 @@ menuconfig CAN_DEV
 	  virtual ones. If you own such devices or plan to use the virtual CAN
 	  interfaces to develop applications, say Y here.
 
-if CAN_DEV
+if CAN_DEV && CAN
 
 config CAN_VCAN
 	tristate "Virtual Local CAN Interface (vcan)"

From 7a089c5d35aa307147e78c5cbeeb1352b92790b1 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Wed, 17 Dec 2025 13:43:04 -0400
Subject: [PATCH 205/258] iommupt: Return ERR_PTR from _table_alloc()

syzkaller noticed that with fault injection a failure inside
iommu_alloc_pages_node_sz() oops's in PT_FEAT_DMA_INCOHERENT because it goes
on to make NULL incoherent. Closer inspection shows the return value has
become confused, the alloc routines on the iommupt side expect ERR_PTR while
iommu_alloc_pages_node_sz() returns NULL.

Error out early to fix both issues.

Fixes: aefd967dab64 ("iommupt: Use the incoherent start/stop functions for PT_FEAT_DMA_INCOHERENT")
Fixes: dcd6a011a8d5 ("iommupt: Add map_pages op")
Fixes: cdb39d918579 ("iommupt: Add the basic structure of the iommu implementation")
Reported-by: syzbot+e06bb7478e687f235ad7@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/693a39de.050a0220.4004e.02ce.GAE@google.com/
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/generic_pt/iommu_pt.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h
index 97aeda1ad01cca..3327116a441cac 100644
--- a/drivers/iommu/generic_pt/iommu_pt.h
+++ b/drivers/iommu/generic_pt/iommu_pt.h
@@ -372,6 +372,9 @@ static inline struct pt_table_p *_table_alloc(struct pt_common *common,
 
 	table_mem = iommu_alloc_pages_node_sz(iommu_table->nid, gfp,
 					      log2_to_int(lg2sz));
+	if (!table_mem)
+		return ERR_PTR(-ENOMEM);
+
 	if (pt_feature(common, PT_FEAT_DMA_INCOHERENT) &&
 	    mode == ALLOC_NORMAL) {
 		int ret = iommu_pages_start_incoherent(

From c56a12c71ad38f381105f6e5036dede64ad2dfee Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 18 Dec 2025 11:47:38 +0100
Subject: [PATCH 206/258] x86/bug: Fix old GCC compile fails

For some mysterious reasons the GCC 8 and 9 preprocessor manages to
sporadically fumble _ASM_BYTES(0x0f, 0x0b):

$ grep ".byte[ ]*0x0f" defconfig-build/drivers/net/wireless/realtek/rtlwifi/base.s
        1:       .byte0x0f,0x0b ;
        1:       .byte 0x0f,0x0b ;

which makes the assembler upset and all that. While there are more
_ASM_BYTES() users (notably the NOP instructions), those don't seem
affected. Therefore replace the offending ASM_UD2 with one using the
ud2 mnemonic.

Reported-by: Jean Delvare <jdelvare@suse.de>
Suggested-by: Uros Bizjak <ubizjak@gmail.com>
Fixes: 85a2d4a890dc ("x86,ibt: Use UDB instead of 0xEA")
Cc: stable@kernel.org
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251218104659.GT3911114@noisy.programming.kicks-ass.net
---
 arch/x86/include/asm/bug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index ee23b98353d735..40de5796adb52b 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -15,7 +15,7 @@ extern void __WARN_trap(struct bug_entry *bug, ...);
 /*
  * Despite that some emulators terminate on UD2, we use it for WARN().
  */
-#define ASM_UD2		_ASM_BYTES(0x0f, 0x0b)
+#define ASM_UD2		__ASM_FORM(ud2)
 #define INSN_UD2	0x0b0f
 #define LEN_UD2		2
 

From 7b07be1ff1cb6c49869910518650e8d0abc7d25f Mon Sep 17 00:00:00 2001
From: Gal Pressman <gal@nvidia.com>
Date: Mon, 8 Dec 2025 14:19:01 +0200
Subject: [PATCH 207/258] ethtool: Avoid overflowing userspace buffer on stats
 query

The ethtool -S command operates across three ioctl calls:
ETHTOOL_GSSET_INFO for the size, ETHTOOL_GSTRINGS for the names, and
ETHTOOL_GSTATS for the values.

If the number of stats changes between these calls (e.g., due to device
reconfiguration), userspace's buffer allocation will be incorrect,
potentially leading to buffer overflow.

Drivers are generally expected to maintain stable stat counts, but some
drivers (e.g., mlx5, bnx2x, bna, ksz884x) use dynamic counters, making
this scenario possible.

Some drivers try to handle this internally:
- bnad_get_ethtool_stats() returns early in case stats.n_stats is not
  equal to the driver's stats count.
- micrel/ksz884x also makes sure not to write anything beyond
  stats.n_stats and overflow the buffer.

However, both use stats.n_stats which is already assigned with the value
returned from get_sset_count(), hence won't solve the issue described
here.

Change ethtool_get_strings(), ethtool_get_stats(),
ethtool_get_phy_stats() to not return anything in case of a mismatch
between userspace's size and get_sset_size(), to prevent buffer
overflow.
The returned n_stats value will be equal to zero, to reflect that
nothing has been returned.

This could result in one of two cases when using upstream ethtool,
depending on when the size change is detected:
1. When detected in ethtool_get_strings():
    # ethtool -S eth2
    no stats available

2. When detected in get stats, all stats will be reported as zero.

Both cases are presumably transient, and a subsequent ethtool call
should succeed.

Other than the overflow avoidance, these two cases are very evident (no
output/cleared stats), which is arguably better than presenting
incorrect/shifted stats.
I also considered returning an error instead of a "silent" response, but
that seems more destructive towards userspace apps.

Notes:
- This patch does not claim to fix the inherent race, it only makes sure
  that we do not overflow the userspace buffer, and makes for a more
  predictable behavior.

- RTNL lock is held during each ioctl, the race window exists between
  the separate ioctl calls when the lock is released.

- Userspace ethtool always fills stats.n_stats, but it is likely that
  these stats ioctls are implemented in other userspace applications
  which might not fill it. The added code checks that it's not zero,
  to prevent any regressions.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Gal Pressman <gal@nvidia.com>
Link: https://patch.msgid.link/20251208121901.3203692-1-gal@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/ethtool/ioctl.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index fa83ddade4f817..9431e305b2333f 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -2383,7 +2383,10 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
 		return -ENOMEM;
 	WARN_ON_ONCE(!ret);
 
-	gstrings.len = ret;
+	if (gstrings.len && gstrings.len != ret)
+		gstrings.len = 0;
+	else
+		gstrings.len = ret;
 
 	if (gstrings.len) {
 		data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
@@ -2509,10 +2512,13 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
 	if (copy_from_user(&stats, useraddr, sizeof(stats)))
 		return -EFAULT;
 
-	stats.n_stats = n_stats;
+	if (stats.n_stats && stats.n_stats != n_stats)
+		stats.n_stats = 0;
+	else
+		stats.n_stats = n_stats;
 
-	if (n_stats) {
-		data = vzalloc(array_size(n_stats, sizeof(u64)));
+	if (stats.n_stats) {
+		data = vzalloc(array_size(stats.n_stats, sizeof(u64)));
 		if (!data)
 			return -ENOMEM;
 		ops->get_ethtool_stats(dev, &stats, data);
@@ -2524,7 +2530,9 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
 	if (copy_to_user(useraddr, &stats, sizeof(stats)))
 		goto out;
 	useraddr += sizeof(stats);
-	if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64))))
+	if (stats.n_stats &&
+	    copy_to_user(useraddr, data,
+			 array_size(stats.n_stats, sizeof(u64))))
 		goto out;
 	ret = 0;
 
@@ -2560,6 +2568,10 @@ static int ethtool_get_phy_stats_phydev(struct phy_device *phydev,
 		return -EOPNOTSUPP;
 
 	n_stats = phy_ops->get_sset_count(phydev);
+	if (stats->n_stats && stats->n_stats != n_stats) {
+		stats->n_stats = 0;
+		return 0;
+	}
 
 	ret = ethtool_vzalloc_stats_array(n_stats, data);
 	if (ret)
@@ -2580,6 +2592,10 @@ static int ethtool_get_phy_stats_ethtool(struct net_device *dev,
 		return -EOPNOTSUPP;
 
 	n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
+	if (stats->n_stats && stats->n_stats != n_stats) {
+		stats->n_stats = 0;
+		return 0;
+	}
 
 	ret = ethtool_vzalloc_stats_array(n_stats, data);
 	if (ret)
@@ -2616,7 +2632,9 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
 	}
 
 	useraddr += sizeof(stats);
-	if (copy_to_user(useraddr, data, array_size(stats.n_stats, sizeof(u64))))
+	if (stats.n_stats &&
+	    copy_to_user(useraddr, data,
+			 array_size(stats.n_stats, sizeof(u64))))
 		ret = -EFAULT;
 
  out:

From 377d66fa86654085be1f48906c1d88b7ca721c78 Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Tue, 9 Dec 2025 01:28:20 +0000
Subject: [PATCH 208/258] net: dsa: lantiq_gswip: fix order in .remove
 operation

Russell King pointed out that disabling the switch by clearing
GSWIP_MDIO_GLOB_ENABLE before calling dsa_unregister_switch() is
problematic, as it violates a Golden Rule of driver development to
always first unpublish userspace interfaces and then disable the
hardware.

Fix this, and also simplify the probe() function, by introducing a
dsa_switch_ops teardown() operation which takes care of clearing the
GSWIP_MDIO_GLOB_ENABLE bit.

Fixes: 14fceff4771e5 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200")
Suggested-by: "Russell King (Oracle)" <linux@armlinux.org.uk>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Link: https://patch.msgid.link/4ebd72a29edc1e4059b9666a26a0bb5d906a829a.1765241054.git.daniel@makrotopia.org
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/dsa/lantiq/lantiq_gswip.c        |  3 ---
 drivers/net/dsa/lantiq/lantiq_gswip_common.c | 13 ++++++++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c
index 57dd063c074035..b094001a7c8057 100644
--- a/drivers/net/dsa/lantiq/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq/lantiq_gswip.c
@@ -444,9 +444,6 @@ static void gswip_remove(struct platform_device *pdev)
 	if (!priv)
 		return;
 
-	/* disable the switch */
-	gswip_disable_switch(priv);
-
 	dsa_unregister_switch(priv->ds);
 
 	for (i = 0; i < priv->num_gphy_fw; i++)
diff --git a/drivers/net/dsa/lantiq/lantiq_gswip_common.c b/drivers/net/dsa/lantiq/lantiq_gswip_common.c
index 9da39edf8f5742..6b171d58e18625 100644
--- a/drivers/net/dsa/lantiq/lantiq_gswip_common.c
+++ b/drivers/net/dsa/lantiq/lantiq_gswip_common.c
@@ -752,6 +752,13 @@ static int gswip_setup(struct dsa_switch *ds)
 	return 0;
 }
 
+static void gswip_teardown(struct dsa_switch *ds)
+{
+	struct gswip_priv *priv = ds->priv;
+
+	regmap_clear_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE);
+}
+
 static enum dsa_tag_protocol gswip_get_tag_protocol(struct dsa_switch *ds,
 						    int port,
 						    enum dsa_tag_protocol mp)
@@ -1629,6 +1636,7 @@ static const struct phylink_mac_ops gswip_phylink_mac_ops = {
 static const struct dsa_switch_ops gswip_switch_ops = {
 	.get_tag_protocol	= gswip_get_tag_protocol,
 	.setup			= gswip_setup,
+	.teardown		= gswip_teardown,
 	.port_setup		= gswip_port_setup,
 	.port_enable		= gswip_port_enable,
 	.port_disable		= gswip_port_disable,
@@ -1718,15 +1726,14 @@ int gswip_probe_common(struct gswip_priv *priv, u32 version)
 
 	err = gswip_validate_cpu_port(priv->ds);
 	if (err)
-		goto disable_switch;
+		goto unregister_switch;
 
 	dev_info(priv->dev, "probed GSWIP version %lx mod %lx\n",
 		 GSWIP_VERSION_REV(version), GSWIP_VERSION_MOD(version));
 
 	return 0;
 
-disable_switch:
-	gswip_disable_switch(priv);
+unregister_switch:
 	dsa_unregister_switch(priv->ds);
 
 	return err;

From 8e4c0f08f6bedeb885515c5ec5a6388a45d768ec Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Tue, 9 Dec 2025 01:28:49 +0000
Subject: [PATCH 209/258] net: dsa: mxl-gsw1xx: fix order in .remove operation

The driver's .remove operation was calling gswip_disable_switch() which
clears the GSWIP_MDIO_GLOB_ENABLE bit before calling
dsa_unregister_switch() and thereby violating a Golden Rule of driver
development to always unpublish userspace interfaces before disabling
hardware, as pointed out by Russell King.

Fix this by relying in GSWIP_MDIO_GLOB_ENABLE being cleared by the
.teardown operation introduced by the previous commit
("net: dsa: lantiq_gswip: fix teardown order").

Fixes: 22335939ec907 ("net: dsa: add driver for MaxLinear GSW1xx switch family")
Suggested-by: "Russell King (Oracle)" <linux@armlinux.org.uk>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Link: https://patch.msgid.link/63f882eeb910cf24503c35a443b541cc54a930f2.1765241054.git.daniel@makrotopia.org
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/dsa/lantiq/mxl-gsw1xx.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/dsa/lantiq/mxl-gsw1xx.c b/drivers/net/dsa/lantiq/mxl-gsw1xx.c
index cf33a16fd183b4..cda966d71e889c 100644
--- a/drivers/net/dsa/lantiq/mxl-gsw1xx.c
+++ b/drivers/net/dsa/lantiq/mxl-gsw1xx.c
@@ -652,8 +652,6 @@ static void gsw1xx_remove(struct mdio_device *mdiodev)
 	if (!priv)
 		return;
 
-	gswip_disable_switch(priv);
-
 	dsa_unregister_switch(priv->ds);
 }
 

From 651b253b80379b0eb3669405fcf50d4039dc7a0e Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Tue, 9 Dec 2025 01:29:05 +0000
Subject: [PATCH 210/258] net: dsa: mxl-gsw1xx: fix .shutdown driver operation

The .shutdown operation should call dsa_switch_shutdown() just like
it is done also by the sibling lantiq_gswip driver. Not doing that
results in shutdown or reboot hanging and waiting for the CPU port
becoming free, which introduces a longer delay and a WARNING before
shutdown or reboot in case the driver is built-into the kernel.
Fix this by calling dsa_switch_shutdown() in the driver's shutdown
operation, harmonizing it with what is done in the lantiq_gswip
driver. As a side-effect this now allows to remove the previously
exported gswip_disable_switch() function which no longer got any
users.

Fixes: 22335939ec907 ("net: dsa: add driver for MaxLinear GSW1xx switch family")
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Link: https://patch.msgid.link/77ed91a5206e5dbf5d3e83d7e364ebfda90d31fd.1765241054.git.daniel@makrotopia.org
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/dsa/lantiq/lantiq_gswip.h        | 2 --
 drivers/net/dsa/lantiq/lantiq_gswip_common.c | 6 ------
 drivers/net/dsa/lantiq/mxl-gsw1xx.c          | 4 ++--
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h
index 9c38e51a75e806..2e0f2afbadbbc9 100644
--- a/drivers/net/dsa/lantiq/lantiq_gswip.h
+++ b/drivers/net/dsa/lantiq/lantiq_gswip.h
@@ -294,8 +294,6 @@ struct gswip_priv {
 	u16 version;
 };
 
-void gswip_disable_switch(struct gswip_priv *priv);
-
 int gswip_probe_common(struct gswip_priv *priv, u32 version);
 
 #endif /* __LANTIQ_GSWIP_H */
diff --git a/drivers/net/dsa/lantiq/lantiq_gswip_common.c b/drivers/net/dsa/lantiq/lantiq_gswip_common.c
index 6b171d58e18625..e790f2ef758846 100644
--- a/drivers/net/dsa/lantiq/lantiq_gswip_common.c
+++ b/drivers/net/dsa/lantiq/lantiq_gswip_common.c
@@ -1664,12 +1664,6 @@ static const struct dsa_switch_ops gswip_switch_ops = {
 	.port_hsr_leave		= dsa_port_simple_hsr_leave,
 };
 
-void gswip_disable_switch(struct gswip_priv *priv)
-{
-	regmap_clear_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE);
-}
-EXPORT_SYMBOL_GPL(gswip_disable_switch);
-
 static int gswip_validate_cpu_port(struct dsa_switch *ds)
 {
 	struct gswip_priv *priv = ds->priv;
diff --git a/drivers/net/dsa/lantiq/mxl-gsw1xx.c b/drivers/net/dsa/lantiq/mxl-gsw1xx.c
index cda966d71e889c..4dc287ad141e19 100644
--- a/drivers/net/dsa/lantiq/mxl-gsw1xx.c
+++ b/drivers/net/dsa/lantiq/mxl-gsw1xx.c
@@ -662,9 +662,9 @@ static void gsw1xx_shutdown(struct mdio_device *mdiodev)
 	if (!priv)
 		return;
 
-	dev_set_drvdata(&mdiodev->dev, NULL);
+	dsa_switch_shutdown(priv->ds);
 
-	gswip_disable_switch(priv);
+	dev_set_drvdata(&mdiodev->dev, NULL);
 }
 
 static const struct gswip_hw_info gsw12x_data = {

From 7b103aaf0d564b83ee1d4bb532ee7ae36ed001ed Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Tue, 9 Dec 2025 01:29:34 +0000
Subject: [PATCH 211/258] net: dsa: mxl-gsw1xx: manually clear RANEG bit

Despite being documented as self-clearing, the RANEG bit sometimes
remains set, preventing auto-negotiation from happening.

Manually clear the RANEG bit after 10ms as advised by MaxLinear.
In order to not hold RTNL during the 10ms of waiting schedule
delayed work to take care of clearing the bit asynchronously, which
is similar to the self-clearing behavior.

Fixes: 22335939ec90 ("net: dsa: add driver for MaxLinear GSW1xx switch family")
Reported-by: Rasmus Villemoes <ravi@prevas.dk>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Link: https://patch.msgid.link/76745fceb5a3f53088110fb7a96acf88434088ca.1765241054.git.daniel@makrotopia.org
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/dsa/lantiq/mxl-gsw1xx.c | 34 ++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/lantiq/mxl-gsw1xx.c b/drivers/net/dsa/lantiq/mxl-gsw1xx.c
index 4dc287ad141e19..f8ff8a604bf535 100644
--- a/drivers/net/dsa/lantiq/mxl-gsw1xx.c
+++ b/drivers/net/dsa/lantiq/mxl-gsw1xx.c
@@ -11,10 +11,12 @@
 
 #include <linux/bits.h>
 #include <linux/delay.h>
+#include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of_mdio.h>
 #include <linux/regmap.h>
+#include <linux/workqueue.h>
 #include <net/dsa.h>
 
 #include "lantiq_gswip.h"
@@ -29,6 +31,7 @@ struct gsw1xx_priv {
 	struct			regmap *clk;
 	struct			regmap *shell;
 	struct			phylink_pcs pcs;
+	struct delayed_work	clear_raneg;
 	phy_interface_t		tbi_interface;
 	struct gswip_priv	gswip;
 };
@@ -145,7 +148,9 @@ static void gsw1xx_pcs_disable(struct phylink_pcs *pcs)
 {
 	struct gsw1xx_priv *priv = pcs_to_gsw1xx(pcs);
 
-	/* Assert SGMII shell reset */
+	cancel_delayed_work_sync(&priv->clear_raneg);
+
+	/* Assert SGMII shell reset (will also clear RANEG bit) */
 	regmap_set_bits(priv->shell, GSW1XX_SHELL_RST_REQ,
 			GSW1XX_RST_REQ_SGMII_SHELL);
 
@@ -428,12 +433,29 @@ static int gsw1xx_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
 	return 0;
 }
 
+static void gsw1xx_pcs_clear_raneg(struct work_struct *work)
+{
+	struct gsw1xx_priv *priv =
+		container_of(work, struct gsw1xx_priv, clear_raneg.work);
+
+	regmap_clear_bits(priv->sgmii, GSW1XX_SGMII_TBI_ANEGCTL,
+			  GSW1XX_SGMII_TBI_ANEGCTL_RANEG);
+}
+
 static void gsw1xx_pcs_an_restart(struct phylink_pcs *pcs)
 {
 	struct gsw1xx_priv *priv = pcs_to_gsw1xx(pcs);
 
+	cancel_delayed_work_sync(&priv->clear_raneg);
+
 	regmap_set_bits(priv->sgmii, GSW1XX_SGMII_TBI_ANEGCTL,
 			GSW1XX_SGMII_TBI_ANEGCTL_RANEG);
+
+	/* despite being documented as self-clearing, the RANEG bit
+	 * sometimes remains set, preventing auto-negotiation from happening.
+	 * MaxLinear advises to manually clear the bit after 10ms.
+	 */
+	schedule_delayed_work(&priv->clear_raneg, msecs_to_jiffies(10));
 }
 
 static void gsw1xx_pcs_link_up(struct phylink_pcs *pcs,
@@ -636,6 +658,8 @@ static int gsw1xx_probe(struct mdio_device *mdiodev)
 	if (ret)
 		return ret;
 
+	INIT_DELAYED_WORK(&priv->clear_raneg, gsw1xx_pcs_clear_raneg);
+
 	ret = gswip_probe_common(&priv->gswip, version);
 	if (ret)
 		return ret;
@@ -648,16 +672,21 @@ static int gsw1xx_probe(struct mdio_device *mdiodev)
 static void gsw1xx_remove(struct mdio_device *mdiodev)
 {
 	struct gswip_priv *priv = dev_get_drvdata(&mdiodev->dev);
+	struct gsw1xx_priv *gsw1xx_priv;
 
 	if (!priv)
 		return;
 
 	dsa_unregister_switch(priv->ds);
+
+	gsw1xx_priv = container_of(priv, struct gsw1xx_priv, gswip);
+	cancel_delayed_work_sync(&gsw1xx_priv->clear_raneg);
 }
 
 static void gsw1xx_shutdown(struct mdio_device *mdiodev)
 {
 	struct gswip_priv *priv = dev_get_drvdata(&mdiodev->dev);
+	struct gsw1xx_priv *gsw1xx_priv;
 
 	if (!priv)
 		return;
@@ -665,6 +694,9 @@ static void gsw1xx_shutdown(struct mdio_device *mdiodev)
 	dsa_switch_shutdown(priv->ds);
 
 	dev_set_drvdata(&mdiodev->dev, NULL);
+
+	gsw1xx_priv = container_of(priv, struct gsw1xx_priv, gswip);
+	cancel_delayed_work_sync(&gsw1xx_priv->clear_raneg);
 }
 
 static const struct gswip_hw_info gsw12x_data = {

From 89a898d63f6f588acf5c104c65c94a38b68c69a6 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Tue, 9 Dec 2025 14:56:09 +0200
Subject: [PATCH 212/258] net/mlx5: fw reset, clear reset requested on
 drain_fw_reset

drain_fw_reset() waits for ongoing firmware reset events and blocks new
event handling, but does not clear the reset requested flag, and may
keep sync reset polling.

To fix it, call mlx5_sync_reset_clear_reset_requested() to clear the
flag, stop sync reset polling, and resume health polling, ensuring
health issues are still detected after the firmware reset drain.

Fixes: 16d42d313350 ("net/mlx5: Drain fw_reset when removing device")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Shay Drori <shayd@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1765284977-1363052-2-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 2bceb42c98cc2e..b81de792c181a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -844,7 +844,8 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
 	cancel_work_sync(&fw_reset->reset_reload_work);
 	cancel_work_sync(&fw_reset->reset_now_work);
 	cancel_work_sync(&fw_reset->reset_abort_work);
-	cancel_delayed_work(&fw_reset->reset_timeout_work);
+	if (test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+		mlx5_sync_reset_clear_reset_requested(dev, true);
 }
 
 static const struct devlink_param mlx5_fw_reset_devlink_params[] = {

From 5846a365fc6476b02d6766963cf0985520f0385f Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Tue, 9 Dec 2025 14:56:10 +0200
Subject: [PATCH 213/258] net/mlx5: Drain firmware reset in shutdown callback

Invoke drain_fw_reset() in the shutdown callback to ensure all
firmware reset handling is completed before shutdown proceeds.

Fixes: 16d42d313350 ("net/mlx5: Drain fw_reset when removing device")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Shay Drori <shayd@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1765284977-1363052-3-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 024339ce41f19e..cf53affe61ce22 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -2232,6 +2232,7 @@ static void shutdown(struct pci_dev *pdev)
 
 	mlx5_core_info(dev, "Shutdown was called\n");
 	set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
+	mlx5_drain_fw_reset(dev);
 	mlx5_drain_health_wq(dev);
 	err = mlx5_try_fast_unload(dev);
 	if (err)

From b35966042d20b14e2d83330049f77deec5229749 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 9 Dec 2025 14:56:11 +0200
Subject: [PATCH 214/258] net/mlx5: fw_tracer, Validate format string
 parameters

Add validation for format string parameters in the firmware tracer to
prevent potential security vulnerabilities and crashes from malformed
format strings received from firmware.

The firmware tracer receives format strings from the device firmware and
uses them to format trace messages. Without proper validation, bad
firmware could provide format strings with invalid format specifiers
(e.g., %s, %p, %n) that could lead to crashes, or other undefined
behavior.

Add mlx5_tracer_validate_params() to validate that all format specifiers
in trace strings are limited to safe integer/hex formats (%x, %d, %i,
%u, %llx, %lx, etc.). Reject strings containing other format types that
could be used to access arbitrary memory or cause crashes.
Invalid format strings are added to the trace output for visibility with
"BAD_FORMAT: " prefix.

Fixes: 70dd6fdb8987 ("net/mlx5: FW tracer, parse traces and kernel tracing support")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reported-by: Breno Leitao <leitao@debian.org>
Closes: https://lore.kernel.org/netdev/hanz6rzrb2bqbplryjrakvkbmv4y5jlmtthnvi3thg5slqvelp@t3s3erottr6s/
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1765284977-1363052-4-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../mellanox/mlx5/core/diag/fw_tracer.c       | 83 ++++++++++++++++---
 .../mellanox/mlx5/core/diag/fw_tracer.h       |  1 +
 2 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index 7bcf822a89f9f1..b415dfe5de45fc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -33,6 +33,7 @@
 #include "lib/eq.h"
 #include "fw_tracer.h"
 #include "fw_tracer_tracepoint.h"
+#include <linux/ctype.h>
 
 static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer)
 {
@@ -358,6 +359,43 @@ static const char *VAL_PARM		= "%llx";
 static const char *REPLACE_64_VAL_PARM	= "%x%x";
 static const char *PARAM_CHAR		= "%";
 
+static bool mlx5_is_valid_spec(const char *str)
+{
+	/* Parse format specifiers to find the actual type.
+	 * Structure: %[flags][width][.precision][length]type
+	 * Skip flags, width, precision & length.
+	 */
+	while (isdigit(*str) || *str == '#' || *str == '.' || *str == 'l')
+		str++;
+
+	/* Check if it's a valid integer/hex specifier:
+	 * Valid formats: %x, %d, %i, %u, etc.
+	 */
+	if (*str != 'x' && *str != 'X' && *str != 'd' && *str != 'i' &&
+	    *str != 'u' && *str != 'c')
+		return false;
+
+	return true;
+}
+
+static bool mlx5_tracer_validate_params(const char *str)
+{
+	const char *substr = str;
+
+	if (!str)
+		return false;
+
+	substr = strstr(substr, PARAM_CHAR);
+	while (substr) {
+		if (!mlx5_is_valid_spec(substr + 1))
+			return false;
+
+		substr = strstr(substr + 1, PARAM_CHAR);
+	}
+
+	return true;
+}
+
 static int mlx5_tracer_message_hash(u32 message_id)
 {
 	return jhash_1word(message_id, 0) & (MESSAGE_HASH_SIZE - 1);
@@ -419,6 +457,10 @@ static int mlx5_tracer_get_num_of_params(char *str)
 	char *substr, *pstr = str;
 	int num_of_params = 0;
 
+	/* Validate that all parameters are valid before processing */
+	if (!mlx5_tracer_validate_params(str))
+		return -EINVAL;
+
 	/* replace %llx with %x%x */
 	substr = strstr(pstr, VAL_PARM);
 	while (substr) {
@@ -570,14 +612,17 @@ void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
 {
 	char	tmp[512];
 
-	snprintf(tmp, sizeof(tmp), str_frmt->string,
-		 str_frmt->params[0],
-		 str_frmt->params[1],
-		 str_frmt->params[2],
-		 str_frmt->params[3],
-		 str_frmt->params[4],
-		 str_frmt->params[5],
-		 str_frmt->params[6]);
+	if (str_frmt->invalid_string)
+		snprintf(tmp, sizeof(tmp), "BAD_FORMAT: %s", str_frmt->string);
+	else
+		snprintf(tmp, sizeof(tmp), str_frmt->string,
+			 str_frmt->params[0],
+			 str_frmt->params[1],
+			 str_frmt->params[2],
+			 str_frmt->params[3],
+			 str_frmt->params[4],
+			 str_frmt->params[5],
+			 str_frmt->params[6]);
 
 	trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
 		      str_frmt->event_id, tmp);
@@ -609,6 +654,13 @@ static int mlx5_tracer_handle_raw_string(struct mlx5_fw_tracer *tracer,
 	return 0;
 }
 
+static void mlx5_tracer_handle_bad_format_string(struct mlx5_fw_tracer *tracer,
+						 struct tracer_string_format *cur_string)
+{
+	cur_string->invalid_string = true;
+	list_add_tail(&cur_string->list, &tracer->ready_strings_list);
+}
+
 static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
 					   struct tracer_event *tracer_event)
 {
@@ -619,12 +671,18 @@ static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
 		if (!cur_string)
 			return mlx5_tracer_handle_raw_string(tracer, tracer_event);
 
-		cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string);
-		cur_string->last_param_num = 0;
 		cur_string->event_id = tracer_event->event_id;
 		cur_string->tmsn = tracer_event->string_event.tmsn;
 		cur_string->timestamp = tracer_event->string_event.timestamp;
 		cur_string->lost = tracer_event->lost_event;
+		cur_string->last_param_num = 0;
+		cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string);
+		if (cur_string->num_of_params < 0) {
+			pr_debug("%s Invalid format string parameters\n",
+				 __func__);
+			mlx5_tracer_handle_bad_format_string(tracer, cur_string);
+			return 0;
+		}
 		if (cur_string->num_of_params == 0) /* trace with no params */
 			list_add_tail(&cur_string->list, &tracer->ready_strings_list);
 	} else {
@@ -634,6 +692,11 @@ static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
 				 __func__, tracer_event->string_event.tmsn);
 			return mlx5_tracer_handle_raw_string(tracer, tracer_event);
 		}
+		if (cur_string->num_of_params < 0) {
+			pr_debug("%s string parameter of invalid string, dumping\n",
+				 __func__);
+			return 0;
+		}
 		cur_string->last_param_num += 1;
 		if (cur_string->last_param_num > TRACER_MAX_PARAMS) {
 			pr_debug("%s Number of params exceeds the max (%d)\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index 5c548bb74f07b6..30d0bcba884791 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -125,6 +125,7 @@ struct tracer_string_format {
 	struct list_head list;
 	u32 timestamp;
 	bool lost;
+	bool invalid_string;
 };
 
 enum mlx5_fw_tracer_ownership_state {

From c0289f67f7d6a0dfba0e92cfe661a5c70c8c6e92 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 9 Dec 2025 14:56:12 +0200
Subject: [PATCH 215/258] net/mlx5: fw_tracer, Handle escaped percent properly

The firmware tracer's format string validation and parameter counting
did not properly handle escaped percent signs (%%). This caused
fw_tracer to count more parameters when trace format strings contained
literal percent characters.

To fix it, allow %% to pass string validation and skip %% sequences when
counting parameters since they represent literal percent signs rather
than format specifiers.

Fixes: 70dd6fdb8987 ("net/mlx5: FW tracer, parse traces and kernel tracing support")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reported-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Closes: https://lore.kernel.org/netdev/hanz6rzrb2bqbplryjrakvkbmv4y5jlmtthnvi3thg5slqvelp@t3s3erottr6s/
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1765284977-1363052-5-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../mellanox/mlx5/core/diag/fw_tracer.c       | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index b415dfe5de45fc..6b4ec457ce2274 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -368,11 +368,11 @@ static bool mlx5_is_valid_spec(const char *str)
 	while (isdigit(*str) || *str == '#' || *str == '.' || *str == 'l')
 		str++;
 
-	/* Check if it's a valid integer/hex specifier:
+	/* Check if it's a valid integer/hex specifier or %%:
 	 * Valid formats: %x, %d, %i, %u, etc.
 	 */
 	if (*str != 'x' && *str != 'X' && *str != 'd' && *str != 'i' &&
-	    *str != 'u' && *str != 'c')
+	    *str != 'u' && *str != 'c' && *str != '%')
 		return false;
 
 	return true;
@@ -390,7 +390,11 @@ static bool mlx5_tracer_validate_params(const char *str)
 		if (!mlx5_is_valid_spec(substr + 1))
 			return false;
 
-		substr = strstr(substr + 1, PARAM_CHAR);
+		if (*(substr + 1) == '%')
+			substr = strstr(substr + 2, PARAM_CHAR);
+		else
+			substr = strstr(substr + 1, PARAM_CHAR);
+
 	}
 
 	return true;
@@ -469,11 +473,15 @@ static int mlx5_tracer_get_num_of_params(char *str)
 		substr = strstr(pstr, VAL_PARM);
 	}
 
-	/* count all the % characters */
+	/* count all the % characters, but skip %% (escaped percent) */
 	substr = strstr(str, PARAM_CHAR);
 	while (substr) {
-		num_of_params += 1;
-		str = substr + 1;
+		if (*(substr + 1) != '%') {
+			num_of_params += 1;
+			str = substr + 1;
+		} else {
+			str = substr + 2;
+		}
 		substr = strstr(str, PARAM_CHAR);
 	}
 

From 367e501f8b095eca08d2eb0ba4ccea5b5e82c169 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 9 Dec 2025 14:56:13 +0200
Subject: [PATCH 216/258] net/mlx5: Serialize firmware reset with devlink

The firmware reset mechanism can be triggered by asynchronous events,
which may race with other devlink operations like devlink reload or
devlink dev eswitch set, potentially leading to inconsistent states.

This patch addresses the race by using the devl_lock to serialize the
firmware reset against other devlink operations. When a reset is
requested, the driver attempts to acquire the lock. If successful, it
sets a flag to block devlink reload or eswitch changes, ACKs the reset
to firmware and then releases the lock. If the lock is already held by
another operation, the driver NACKs the firmware reset request,
indicating that the reset cannot proceed.

Firmware reset does not keep the devl_lock and instead uses an internal
firmware reset bit. This is because firmware resets can be triggered by
asynchronous events, and processed in different threads. It is illegal
and unsafe to acquire a lock in one thread and attempt to release it in
another, as lock ownership is intrinsically thread-specific.

This change ensures that firmware resets and other devlink operations
are mutually exclusive during the critical reset request phase,
preventing race conditions.

Fixes: 38b9f903f22b ("net/mlx5: Handle sync reset request event")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mateusz Berezecki <mberezecki@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1765284977-1363052-6-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../net/ethernet/mellanox/mlx5/core/devlink.c |  5 +++
 .../mellanox/mlx5/core/eswitch_offloads.c     |  6 +++
 .../ethernet/mellanox/mlx5/core/fw_reset.c    | 45 +++++++++++++++++--
 .../ethernet/mellanox/mlx5/core/fw_reset.h    |  1 +
 4 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 887adf4807d164..ea77fbd98396a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -197,6 +197,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
 	struct pci_dev *pdev = dev->pdev;
 	int ret = 0;
 
+	if (mlx5_fw_reset_in_progress(dev)) {
+		NL_SET_ERR_MSG_MOD(extack, "Can't reload during firmware reset");
+		return -EBUSY;
+	}
+
 	if (mlx5_dev_is_lightweight(dev)) {
 		if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT)
 			return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 8de6c7f6c2944d..ea94a727633f1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -52,6 +52,7 @@
 #include "devlink.h"
 #include "lag/lag.h"
 #include "en/tc/post_meter.h"
+#include "fw_reset.h"
 
 /* There are two match-all miss flows, one for unicast dst mac and
  * one for multicast.
@@ -3991,6 +3992,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
 	if (IS_ERR(esw))
 		return PTR_ERR(esw);
 
+	if (mlx5_fw_reset_in_progress(esw->dev)) {
+		NL_SET_ERR_MSG_MOD(extack, "Can't change eswitch mode during firmware reset");
+		return -EBUSY;
+	}
+
 	if (esw_mode_from_devlink(mode, &mlx5_mode))
 		return -EINVAL;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index b81de792c181a3..ae10665c53f32d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -15,6 +15,7 @@ enum {
 	MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
 	MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED,
 	MLX5_FW_RESET_FLAGS_UNLOAD_EVENT,
+	MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS,
 };
 
 struct mlx5_fw_reset {
@@ -128,6 +129,16 @@ int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_ty
 	return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL, NULL);
 }
 
+bool mlx5_fw_reset_in_progress(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+	if (!fw_reset)
+		return false;
+
+	return test_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
+}
+
 static int mlx5_fw_reset_get_reset_method(struct mlx5_core_dev *dev,
 					  u8 *reset_method)
 {
@@ -243,6 +254,8 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
 							BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
 		devl_unlock(devlink);
 	}
+
+	clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
 }
 
 static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
@@ -462,27 +475,48 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
 	struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
 						      reset_request_work);
 	struct mlx5_core_dev *dev = fw_reset->dev;
+	bool nack_request = false;
+	struct devlink *devlink;
 	int err;
 
 	err = mlx5_fw_reset_get_reset_method(dev, &fw_reset->reset_method);
-	if (err)
+	if (err) {
+		nack_request = true;
 		mlx5_core_warn(dev, "Failed reading MFRL, err %d\n", err);
+	} else if (!mlx5_is_reset_now_capable(dev, fw_reset->reset_method) ||
+		   test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
+			    &fw_reset->reset_flags)) {
+		nack_request = true;
+	}
 
-	if (err || test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) ||
-	    !mlx5_is_reset_now_capable(dev, fw_reset->reset_method)) {
+	devlink = priv_to_devlink(dev);
+	/* For external resets, try to acquire devl_lock. Skip if devlink reset is
+	 * pending (lock already held)
+	 */
+	if (nack_request ||
+	    (!test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP,
+		       &fw_reset->reset_flags) &&
+	     !devl_trylock(devlink))) {
 		err = mlx5_fw_reset_set_reset_sync_nack(dev);
 		mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s",
 			       err ? "Failed" : "Sent");
 		return;
 	}
+
 	if (mlx5_sync_reset_set_reset_requested(dev))
-		return;
+		goto unlock;
+
+	set_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
 
 	err = mlx5_fw_reset_set_reset_sync_ack(dev);
 	if (err)
 		mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
 	else
 		mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
+
+unlock:
+	if (!test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags))
+		devl_unlock(devlink);
 }
 
 static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev, u16 dev_id)
@@ -722,6 +756,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
 
 	if (mlx5_sync_reset_clear_reset_requested(dev, true))
 		return;
+
+	clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
 	mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
 }
 
@@ -758,6 +794,7 @@ static void mlx5_sync_reset_timeout_work(struct work_struct *work)
 
 	if (mlx5_sync_reset_clear_reset_requested(dev, true))
 		return;
+	clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
 	mlx5_core_warn(dev, "PCI Sync FW Update Reset Timeout.\n");
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
index d5b28525c960dc..2d96b2adc1cdf1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
@@ -10,6 +10,7 @@ int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_ty
 int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
 				 struct netlink_ext_ack *extack);
 int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
+bool mlx5_fw_reset_in_progress(struct mlx5_core_dev *dev);
 
 int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
 void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked);

From e35d7da8dd9e55b37c3e8ab548f6793af0c2ab49 Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@nvidia.com>
Date: Tue, 9 Dec 2025 14:56:14 +0200
Subject: [PATCH 217/258] net/mlx5e: Use ip6_dst_lookup instead of
 ipv6_dst_lookup_flow for MAC init

Replace ipv6_stub->ipv6_dst_lookup_flow() with ip6_dst_lookup() in
mlx5e_ipsec_init_macs() since IPsec transformations are not needed
during Security Association setup - only basic routing information is
required for nexthop MAC address resolution.

This resolves an issue where XfrmOutNoStates error counter would be
incremented when xfrm policy is configured before xfrm state, as the
IPsec-aware routing function would attempt policy checks during SA
initialization.

Fixes: 71670f766b8f ("net/mlx5e: Support routed networks during IPsec MACs initialization")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1765284977-1363052-7-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 35d9530037a655..6c79b9cea2efbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -342,9 +342,8 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
 		rt_dst_entry = &rt->dst;
 		break;
 	case AF_INET6:
-		rt_dst_entry = ipv6_stub->ipv6_dst_lookup_flow(
-			dev_net(netdev), NULL, &fl6, NULL);
-		if (IS_ERR(rt_dst_entry))
+		if (!IS_ENABLED(CONFIG_IPV6) ||
+		    ip6_dst_lookup(dev_net(netdev), NULL, &rt_dst_entry, &fl6))
 			goto neigh;
 		break;
 	default:

From 9ab89bde13e5251e1d0507e1cc426edcdfe19142 Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@nvidia.com>
Date: Tue, 9 Dec 2025 14:56:15 +0200
Subject: [PATCH 218/258] net/mlx5e: Trigger neighbor resolution for unresolved
 destinations

When initializing the MAC addresses for an outbound IPsec packet offload
rule in mlx5e_ipsec_init_macs, the call to dst_neigh_lookup is used to
find the next-hop neighbor (typically the gateway in tunnel mode).
This call might create a new neighbor entry if one doesn't already
exist. This newly created entry starts in the INCOMPLETE state, as the
kernel hasn't yet sent an ARP or NDISC probe to resolve the MAC
address. In this case, neigh_ha_snapshot will correctly return an
all-zero MAC address.

IPsec packet offload requires the actual next-hop MAC address to
program the rule correctly. If the neighbor state is INCOMPLETE when
the rule is created, the hardware rule is programmed with an all-zero
destination MAC address. Packets sent using this rule will be
subsequently dropped by the receiving network infrastructure or host.

This patch adds a check specifically for the outbound offload path. If
neigh_ha_snapshot returns an all-zero MAC address, it proactively
calls neigh_event_send(n, NULL). This ensures the kernel immediately
sends the initial ARP or NDISC probe if one isn't already pending,
accelerating the resolution process. This helps prevent the hardware
rule from being programmed with an invalid MAC address and avoids
packet drops due to unresolved neighbors.

Fixes: 71670f766b8f ("net/mlx5e: Support routed networks during IPsec MACs initialization")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1765284977-1363052-8-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 6c79b9cea2efbc..a8fb4bec369cf4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -358,6 +358,9 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
 
 	neigh_ha_snapshot(addr, n, netdev);
 	ether_addr_copy(dst, addr);
+	if (attrs->dir == XFRM_DEV_OFFLOAD_OUT &&
+	    is_zero_ether_addr(addr))
+		neigh_event_send(n, NULL);
 	dst_release(rt_dst_entry);
 	neigh_release(n);
 	return;

From c8591decd9dbf395cb8ae398e70b0438fdd24aee Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Tue, 9 Dec 2025 14:56:16 +0200
Subject: [PATCH 219/258] net/mlx5e: Do not update BQL of old txqs during
 channel reconfiguration

During channel reconfiguration (e.g., ethtool private flags changes),
the driver can trigger a kernel BUG_ON in dql_completed() with the error
"kernel BUG at lib/dynamic_queue_limits.c:99".

The issue occurs in the following sequence:

During mlx5e_safe_switch_params(), old channels are deactivated via
mlx5e_deactivate_txqsq(). New channels are created and activated, taking
ownership of the netdev_queues and their BQL state.

When old channels are closed via mlx5e_close_txqsq(), there may be
pending TX descriptors (sq->cc != sq->pc) that were in-flight during the
deactivation.

mlx5e_free_txqsq_descs() frees these pending descriptors and attempts to
complete them via netdev_tx_completed_queue().

However, the BQL state (dql->num_queued and dql->num_completed) have
been reset in mlx5e_activate_txqsq and belong to the new queue owner,
leading to dql->num_queued - dql->num_completed < nbytes.

This triggers BUG_ON(count > num_queued - num_completed) in
dql_completed().

Fixes: 3b88a535a8e1 ("net/mlx5e: Defer channels closure to reduce interface down time")
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: William Tu <witu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Link: https://patch.msgid.link/1765284977-1363052-9-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 14884b9ea7f396..a01ee656a1e7f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -939,7 +939,11 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
 	sq->dma_fifo_cc = dma_fifo_cc;
 	sq->cc = sqcc;
 
-	netdev_tx_completed_queue(sq->txq, npkts, nbytes);
+	/* Do not update BQL for TXQs that got replaced by new active ones, as
+	 * netdev_tx_reset_queue() is called for them in mlx5e_activate_txqsq().
+	 */
+	if (sq == sq->priv->txq2sq[sq->txq_ix])
+		netdev_tx_completed_queue(sq->txq, npkts, nbytes);
 }
 
 #ifdef CONFIG_MLX5_CORE_IPOIB

From 4198a14c8c6252fd1191afaa742dd515dcaf3487 Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@nvidia.com>
Date: Tue, 9 Dec 2025 14:56:17 +0200
Subject: [PATCH 220/258] net/mlx5e: Don't include PSP in the hard MTU
 calculations

Commit [1] added the 40 bytes required by the PSP header+trailer and the
UDP header to MLX5E_ETH_HARD_MTU, which limits the device-wide max
software MTU that could be set. This is not okay, because most packets
are not PSP packets and it doesn't make sense to always reserve space
for headers which won't get added in most cases.

As it turns out, for TCP connections, PSP overhead is already taken into
account in the TCP MSS calculations via inet_csk(sk)->icsk_ext_hdr_len.
This was added in commit [2]. This means that the extra space reserved
in the hard MTU for mlx5 ends up unused and wasted.

Remove the unnecessary 40 byte reservation from hard MTU.

[1] commit e5a1861a298e ("net/mlx5e: Implement PSP Tx data path")
[2] commit e97269257fe4 ("net: psp: update the TCP MSS to reflect PSP
packet overhead")

Fixes: e5a1861a298e ("net/mlx5e: Implement PSP Tx data path")
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Shahar Shitrit <shshitrit@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1765284977-1363052-10-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 811178d8976cf3..262dc032e276a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -69,7 +69,7 @@ struct page_pool;
 #define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
 #define MLX5E_METADATA_ETHER_LEN 8
 
-#define MLX5E_ETH_HARD_MTU (ETH_HLEN + PSP_ENCAP_HLEN + PSP_TRL_SIZE + VLAN_HLEN + ETH_FCS_LEN)
+#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
 
 #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
 #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))

From 15564bd67e2975002f2a8e9defee33e321d3183f Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Tue, 9 Dec 2025 14:30:15 -0500
Subject: [PATCH 221/258] net/handshake: duplicate handshake cancellations leak
 socket

When a handshake request is cancelled it is removed from the
handshake_net->hn_requests list, but it is still present in the
handshake_rhashtbl until it is destroyed.

If a second cancellation request arrives for the same handshake request,
then remove_pending() will return false... and assuming
HANDSHAKE_F_REQ_COMPLETED isn't set in req->hr_flags, we'll continue
processing through the out_true label, where we put another reference on
the sock and a refcount underflow occurs.

This can happen for example if a handshake times out - particularly if
the SUNRPC client sends the AUTH_TLS probe to the server but doesn't
follow it up with the ClientHello due to a problem with tlshd.  When the
timeout is hit on the server, the server will send a FIN, which triggers
a cancellation request via xs_reset_transport().  When the timeout is
hit on the client, another cancellation request happens via
xs_tls_handshake_sync().

Add a test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED) in the pending cancel
path so duplicate cancels can be detected.

Fixes: 3b3009ea8abb ("net/handshake: Create a NETLINK service for handling handshake requests")
Suggested-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Link: https://patch.msgid.link/20251209193015.3032058-1-smayhew@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/handshake/request.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/handshake/request.c b/net/handshake/request.c
index 89435ed755cd00..6b7e3e0bf3996e 100644
--- a/net/handshake/request.c
+++ b/net/handshake/request.c
@@ -326,7 +326,11 @@ bool handshake_req_cancel(struct sock *sk)
 
 	hn = handshake_pernet(net);
 	if (hn && remove_pending(hn, req)) {
-		/* Request hadn't been accepted */
+		/* Request hadn't been accepted - mark cancelled */
+		if (test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) {
+			trace_handshake_cancel_busy(net, req, sk);
+			return false;
+		}
 		goto out_true;
 	}
 	if (test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) {

From c9b5645fd8ca10f310e41b07540f98e6a9720f40 Mon Sep 17 00:00:00 2001
From: Thomas Fourier <fourier.thomas@gmail.com>
Date: Wed, 17 Dec 2025 10:36:48 +0100
Subject: [PATCH 222/258] block: rnbd-clt: Fix leaked ID in init_dev()

If kstrdup() fails in init_dev(), then the newly allocated ID is lost.

Fixes: 64e8a6ece1a5 ("block/rnbd-clt: Dynamically alloc buffer for pathname & blk_symlink_name")
Signed-off-by: Thomas Fourier <fourier.thomas@gmail.com>
Acked-by: Jack Wang <jinpu.wang@ionos.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rnbd/rnbd-clt.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index f1409e54010a67..d1c354636315d2 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1423,9 +1423,11 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
 		goto out_alloc;
 	}
 
-	ret = ida_alloc_max(&index_ida, (1 << (MINORBITS - RNBD_PART_BITS)) - 1,
-			    GFP_KERNEL);
-	if (ret < 0) {
+	dev->clt_device_id = ida_alloc_max(&index_ida,
+					   (1 << (MINORBITS - RNBD_PART_BITS)) - 1,
+					   GFP_KERNEL);
+	if (dev->clt_device_id < 0) {
+		ret = dev->clt_device_id;
 		pr_err("Failed to initialize device '%s' from session %s, allocating idr failed, err: %d\n",
 		       pathname, sess->sessname, ret);
 		goto out_queues;
@@ -1434,10 +1436,9 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
 	dev->pathname = kstrdup(pathname, GFP_KERNEL);
 	if (!dev->pathname) {
 		ret = -ENOMEM;
-		goto out_queues;
+		goto out_ida;
 	}
 
-	dev->clt_device_id	= ret;
 	dev->sess		= sess;
 	dev->access_mode	= access_mode;
 	dev->nr_poll_queues	= nr_poll_queues;
@@ -1453,6 +1454,8 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
 
 	return dev;
 
+out_ida:
+	ida_free(&index_ida, dev->clt_device_id);
 out_queues:
 	kfree(dev->hw_queues);
 out_alloc:

From b98f06f9a5d3b32cf1b3998b4115fb3b5478752d Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: Wed, 10 Dec 2025 08:11:12 +0000
Subject: [PATCH 223/258] sctp: Fetch inet6_sk() after setting ->pinet6 in
 sctp_clone_sock().

syzbot reported the lockdep splat below. [0]

sctp_clone_sock() sets the child socket's ipv6_mc_list to NULL,
but somehow sock_release() in an error path finally acquires
lock_sock() in ipv6_sock_mc_close().

The root cause is that sctp_clone_sock() fetches inet6_sk(newsk)
before setting newinet->pinet6, meaning that the parent's
ipv6_mc_list was actually cleared.

Also, sctp_v6_copy_ip_options() uses inet6_sk() but is called
before newinet->pinet6 is set.

Let's use inet6_sk() only after setting newinet->pinet6.

[0]:
WARNING: possible recursive locking detected
syzkaller #0 Not tainted

syz.0.17/5996 is trying to acquire lock:
ffff888031af4c60 (sk_lock-AF_INET6){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1700 [inline]
ffff888031af4c60 (sk_lock-AF_INET6){+.+.}-{0:0}, at: ipv6_sock_mc_close+0xd3/0x140 net/ipv6/mcast.c:348

but task is already holding lock:
ffff888031af4320 (sk_lock-AF_INET6){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1700 [inline]
ffff888031af4320 (sk_lock-AF_INET6){+.+.}-{0:0}, at: sctp_getsockopt+0x135/0xb60 net/sctp/socket.c:8131

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(sk_lock-AF_INET6);
  lock(sk_lock-AF_INET6);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

1 lock held by syz.0.17/5996:
 #0: ffff888031af4320 (sk_lock-AF_INET6){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1700 [inline]
 #0: ffff888031af4320 (sk_lock-AF_INET6){+.+.}-{0:0}, at: sctp_getsockopt+0x135/0xb60 net/sctp/socket.c:8131

stack backtrace:
CPU: 0 UID: 0 PID: 5996 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/25/2025
Call Trace:
 <TASK>
 dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
 print_deadlock_bug+0x279/0x290 kernel/locking/lockdep.c:3041
 check_deadlock kernel/locking/lockdep.c:3093 [inline]
 validate_chain kernel/locking/lockdep.c:3895 [inline]
 __lock_acquire+0x2540/0x2cf0 kernel/locking/lockdep.c:5237
 lock_acquire+0x117/0x340 kernel/locking/lockdep.c:5868
 lock_sock_nested+0x48/0x100 net/core/sock.c:3780
 lock_sock include/net/sock.h:1700 [inline]
 ipv6_sock_mc_close+0xd3/0x140 net/ipv6/mcast.c:348
 inet6_release+0x47/0x70 net/ipv6/af_inet6.c:482
 __sock_release net/socket.c:653 [inline]
 sock_release+0x85/0x150 net/socket.c:681
 sctp_getsockopt_peeloff_common+0x56b/0x770 net/sctp/socket.c:5732
 sctp_getsockopt_peeloff_flags+0x13b/0x230 net/sctp/socket.c:5801
 sctp_getsockopt+0x3ab/0xb60 net/sctp/socket.c:8151
 do_sock_getsockopt+0x2b4/0x3d0 net/socket.c:2399
 __sys_getsockopt net/socket.c:2428 [inline]
 __do_sys_getsockopt net/socket.c:2435 [inline]
 __se_sys_getsockopt net/socket.c:2432 [inline]
 __x64_sys_getsockopt+0x1a5/0x250 net/socket.c:2432
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xf80 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f8f8c38f749
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffcfdade018 EFLAGS: 00000246 ORIG_RAX: 0000000000000037
RAX: ffffffffffffffda RBX: 00007f8f8c5e5fa0 RCX: 00007f8f8c38f749
RDX: 000000000000007a RSI: 0000000000000084 RDI: 0000000000000003
RBP: 00007f8f8c413f91 R08: 0000200000000040 R09: 0000000000000000
R10: 0000200000000340 R11: 0000000000000246 R12: 0000000000000000
R13: 00007f8f8c5e5fa0 R14: 00007f8f8c5e5fa0 R15: 0000000000000005
 </TASK>

Fixes: 16942cf4d3e31 ("sctp: Use sk_clone() in sctp_accept().")
Reported-by: syzbot+c59e6bb54e7620495725@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/6936d112.a70a0220.38f243.00a7.GAE@google.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251210081206.1141086-2-kuniyu@google.com
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/sctp/socket.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d808096f5ab17a..2493a5b1fa3ca7 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4863,8 +4863,6 @@ static struct sock *sctp_clone_sock(struct sock *sk,
 
 	newsp->pf->to_sk_daddr(&asoc->peer.primary_addr, newsk);
 	newinet->inet_dport = htons(asoc->peer.port);
-
-	newsp->pf->copy_ip_options(sk, newsk);
 	atomic_set(&newinet->inet_id, get_random_u16());
 
 	inet_set_bit(MC_LOOP, newsk);
@@ -4874,17 +4872,20 @@ static struct sock *sctp_clone_sock(struct sock *sk,
 
 #if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family == AF_INET6) {
-		struct ipv6_pinfo *newnp = inet6_sk(newsk);
+		struct ipv6_pinfo *newnp;
 
 		newinet->pinet6 = &((struct sctp6_sock *)newsk)->inet6;
 		newinet->ipv6_fl_list = NULL;
 
+		newnp = inet6_sk(newsk);
 		memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo));
 		newnp->ipv6_mc_list = NULL;
 		newnp->ipv6_ac_list = NULL;
 	}
 #endif
 
+	newsp->pf->copy_ip_options(sk, newsk);
+
 	newsp->do_auto_asconf = 0;
 	skb_queue_head_init(&newsp->pd_lobby);
 

From d7ff61e6f3ef856da82bb90cbd0391839a8917a4 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: Wed, 10 Dec 2025 08:11:13 +0000
Subject: [PATCH 224/258] sctp: Clear inet_opt in sctp_v6_copy_ip_options().

syzbot reported the splat below. [0]

Since the cited commit, the child socket inherits all fields
of its parent socket unless explicitly cleared.

syzbot set IP_OPTIONS to AF_INET6 socket and created a child
socket inheriting inet_sk(sk)->inet_opt.

sctp_v6_copy_ip_options() only clones np->opt, and leaving
inet_opt results in double-free.

Let's clear inet_opt in sctp_v6_copy_ip_options().

[0]:
BUG: KASAN: double-free in inet_sock_destruct+0x538/0x740 net/ipv4/af_inet.c:159
Free of addr ffff8880304b6d40 by task ksoftirqd/0/15

CPU: 0 UID: 0 PID: 15 Comm: ksoftirqd/0 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025
Call Trace:
 <TASK>
 dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
 print_address_description mm/kasan/report.c:378 [inline]
 print_report+0xca/0x240 mm/kasan/report.c:482
 kasan_report_invalid_free+0xea/0x110 mm/kasan/report.c:557
 check_slab_allocation+0xe1/0x130 include/linux/page-flags.h:-1
 kasan_slab_pre_free include/linux/kasan.h:198 [inline]
 slab_free_hook mm/slub.c:2484 [inline]
 slab_free mm/slub.c:6630 [inline]
 kfree+0x148/0x6d0 mm/slub.c:6837
 inet_sock_destruct+0x538/0x740 net/ipv4/af_inet.c:159
 __sk_destruct+0x89/0x660 net/core/sock.c:2350
 sock_put include/net/sock.h:1991 [inline]
 sctp_endpoint_destroy_rcu+0xa1/0xf0 net/sctp/endpointola.c:197
 rcu_do_batch kernel/rcu/tree.c:2605 [inline]
 rcu_core+0xcab/0x1770 kernel/rcu/tree.c:2861
 handle_softirqs+0x286/0x870 kernel/softirq.c:622
 run_ksoftirqd+0x9b/0x100 kernel/softirq.c:1063
 smpboot_thread_fn+0x542/0xa60 kernel/smpboot.c:160
 kthread+0x711/0x8a0 kernel/kthread.c:463
 ret_from_fork+0x4bc/0x870 arch/x86/kernel/process.c:158
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
 </TASK>

Allocated by task 6003:
 kasan_save_stack mm/kasan/common.c:56 [inline]
 kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
 poison_kmalloc_redzone mm/kasan/common.c:400 [inline]
 __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:417
 kasan_kmalloc include/linux/kasan.h:262 [inline]
 __do_kmalloc_node mm/slub.c:5642 [inline]
 __kmalloc_noprof+0x411/0x7f0 mm/slub.c:5654
 kmalloc_noprof include/linux/slab.h:961 [inline]
 kzalloc_noprof include/linux/slab.h:1094 [inline]
 ip_options_get+0x51/0x4c0 net/ipv4/ip_options.c:517
 do_ip_setsockopt+0x1d9b/0x2d00 net/ipv4/ip_sockglue.c:1087
 ip_setsockopt+0x66/0x110 net/ipv4/ip_sockglue.c:1417
 do_sock_setsockopt+0x17c/0x1b0 net/socket.c:2360
 __sys_setsockopt net/socket.c:2385 [inline]
 __do_sys_setsockopt net/socket.c:2391 [inline]
 __se_sys_setsockopt net/socket.c:2388 [inline]
 __x64_sys_setsockopt+0x13f/0x1b0 net/socket.c:2388
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Freed by task 15:
 kasan_save_stack mm/kasan/common.c:56 [inline]
 kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
 __kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:587
 kasan_save_free_info mm/kasan/kasan.h:406 [inline]
 poison_slab_object mm/kasan/common.c:252 [inline]
 __kasan_slab_free+0x5c/0x80 mm/kasan/common.c:284
 kasan_slab_free include/linux/kasan.h:234 [inline]
 slab_free_hook mm/slub.c:2539 [inline]
 slab_free mm/slub.c:6630 [inline]
 kfree+0x19a/0x6d0 mm/slub.c:6837
 inet_sock_destruct+0x538/0x740 net/ipv4/af_inet.c:159
 __sk_destruct+0x89/0x660 net/core/sock.c:2350
 sock_put include/net/sock.h:1991 [inline]
 sctp_endpoint_destroy_rcu+0xa1/0xf0 net/sctp/endpointola.c:197
 rcu_do_batch kernel/rcu/tree.c:2605 [inline]
 rcu_core+0xcab/0x1770 kernel/rcu/tree.c:2861
 handle_softirqs+0x286/0x870 kernel/softirq.c:622
 run_ksoftirqd+0x9b/0x100 kernel/softirq.c:1063
 smpboot_thread_fn+0x542/0xa60 kernel/smpboot.c:160
 kthread+0x711/0x8a0 kernel/kthread.c:463
 ret_from_fork+0x4bc/0x870 arch/x86/kernel/process.c:158
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245

Fixes: 16942cf4d3e31 ("sctp: Use sk_clone() in sctp_accept().")
Reported-by: syzbot+ec33a1a006ed5abe7309@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/6936d112.a70a0220.38f243.00a8.GAE@google.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251210081206.1141086-3-kuniyu@google.com
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/sctp/ipv6.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 069b7e45d8bda0..531cb0690007ad 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -492,6 +492,8 @@ static void sctp_v6_copy_ip_options(struct sock *sk, struct sock *newsk)
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct ipv6_txoptions *opt;
 
+	inet_sk(newsk)->inet_opt = NULL;
+
 	newnp = inet6_sk(newsk);
 
 	rcu_read_lock();

From 1d856251a009d64007d71d01c988bead6d3a098c Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Wed, 10 Dec 2025 11:22:54 -0500
Subject: [PATCH 225/258] net/sched: act_mirred: fix loop detection

Fix a loop scenario of ethx:egress->ethx:egress

Example setup to reproduce:
tc qdisc add dev ethx root handle 1: drr
tc filter add dev ethx parent 1: protocol ip prio 1 matchall \
         action mirred egress redirect dev ethx

Now ping out of ethx and you get a deadlock:

[  116.892898][  T307] ============================================
[  116.893182][  T307] WARNING: possible recursive locking detected
[  116.893418][  T307] 6.18.0-rc6-01205-ge05021a829b8-dirty #204 Not tainted
[  116.893682][  T307] --------------------------------------------
[  116.893926][  T307] ping/307 is trying to acquire lock:
[  116.894133][  T307] ffff88800c122908 (&sch->root_lock_key){+...}-{3:3}, at: __dev_queue_xmit+0x2210/0x3b50
[  116.894517][  T307]
[  116.894517][  T307] but task is already holding lock:
[  116.894836][  T307] ffff88800c122908 (&sch->root_lock_key){+...}-{3:3}, at: __dev_queue_xmit+0x2210/0x3b50
[  116.895252][  T307]
[  116.895252][  T307] other info that might help us debug this:
[  116.895608][  T307]  Possible unsafe locking scenario:
[  116.895608][  T307]
[  116.895901][  T307]        CPU0
[  116.896057][  T307]        ----
[  116.896200][  T307]   lock(&sch->root_lock_key);
[  116.896392][  T307]   lock(&sch->root_lock_key);
[  116.896605][  T307]
[  116.896605][  T307]  *** DEADLOCK ***
[  116.896605][  T307]
[  116.896864][  T307]  May be due to missing lock nesting notation
[  116.896864][  T307]
[  116.897123][  T307] 6 locks held by ping/307:
[  116.897302][  T307]  #0: ffff88800b4b0250 (sk_lock-AF_INET){+.+.}-{0:0}, at: raw_sendmsg+0xb20/0x2cf0
[  116.897808][  T307]  #1: ffffffff88c839c0 (rcu_read_lock){....}-{1:3}, at: ip_output+0xa9/0x600
[  116.898138][  T307]  #2: ffffffff88c839c0 (rcu_read_lock){....}-{1:3}, at: ip_finish_output2+0x2c6/0x1ee0
[  116.898459][  T307]  #3: ffffffff88c83960 (rcu_read_lock_bh){....}-{1:3}, at: __dev_queue_xmit+0x200/0x3b50
[  116.898782][  T307]  #4: ffff88800c122908 (&sch->root_lock_key){+...}-{3:3}, at: __dev_queue_xmit+0x2210/0x3b50
[  116.899132][  T307]  #5: ffffffff88c83960 (rcu_read_lock_bh){....}-{1:3}, at: __dev_queue_xmit+0x200/0x3b50
[  116.899442][  T307]
[  116.899442][  T307] stack backtrace:
[  116.899667][  T307] CPU: 2 UID: 0 PID: 307 Comm: ping Not tainted 6.18.0-rc6-01205-ge05021a829b8-dirty #204 PREEMPT(voluntary)
[  116.899672][  T307] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[  116.899675][  T307] Call Trace:
[  116.899678][  T307]  <TASK>
[  116.899680][  T307]  dump_stack_lvl+0x6f/0xb0
[  116.899688][  T307]  print_deadlock_bug.cold+0xc0/0xdc
[  116.899695][  T307]  __lock_acquire+0x11f7/0x1be0
[  116.899704][  T307]  lock_acquire+0x162/0x300
[  116.899707][  T307]  ? __dev_queue_xmit+0x2210/0x3b50
[  116.899713][  T307]  ? srso_alias_return_thunk+0x5/0xfbef5
[  116.899717][  T307]  ? stack_trace_save+0x93/0xd0
[  116.899723][  T307]  _raw_spin_lock+0x30/0x40
[  116.899728][  T307]  ? __dev_queue_xmit+0x2210/0x3b50
[  116.899731][  T307]  __dev_queue_xmit+0x2210/0x3b50

Fixes: 178ca30889a1 ("Revert "net/sched: Fix mirred deadlock on device recursion"")
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20251210162255.1057663-1-jhs@mojatatu.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/sched/act_mirred.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index f27b583def78e4..91c96cc625bd63 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -281,6 +281,15 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
 
 	want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
 
+	if (dev == skb->dev && want_ingress == at_ingress) {
+		pr_notice_once("tc mirred: Loop (%s:%s --> %s:%s)\n",
+			       netdev_name(skb->dev),
+			       at_ingress ? "ingress" : "egress",
+			       netdev_name(dev),
+			       want_ingress ? "ingress" : "egress");
+		goto err_cant_do;
+	}
+
 	/* All mirred/redirected skbs should clear previous ct info */
 	nf_reset_ct(skb_to_send);
 	if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */

From 5cba412d6a005719d52dc72b6d7e5a59af979eaa Mon Sep 17 00:00:00 2001
From: Victor Nogueira <victor@mojatatu.com>
Date: Wed, 10 Dec 2025 11:22:55 -0500
Subject: [PATCH 226/258] selftests/tc-testing: Test case exercising potential
 mirred redirect deadlock

Add a test case that reproduces deadlock scenario where the user has
a drr qdisc attached to root and has a mirred action that redirects to
self on egress

Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20251210162255.1057663-2-jhs@mojatatu.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../tc-testing/tc-tests/actions/mirred.json   | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index b73bd255ea36f5..da156feabcbff8 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -1052,5 +1052,51 @@
             "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
             "$TC actions flush action mirred"
         ]
+    },
+    {
+        "id": "7eba",
+        "name": "Redirect multiport: dummy egress ->  dummy egress (Loop)",
+        "category": [
+            "filter",
+            "mirred"
+        ],
+        "plugins": {
+            "requires": [
+                "nsPlugin"
+            ]
+        },
+        "setup": [
+            "$IP link set dev $DUMMY up || true",
+            "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+            "$TC qdisc add dev $DUMMY handle 1: root drr",
+            "$TC filter add dev $DUMMY parent 1: protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1"
+        ],
+        "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC -j -s actions get action mirred index 1",
+        "matchJSON": [
+            {
+                "total acts": 0
+            },
+            {
+                "actions": [
+                    {
+                        "order": 1,
+                        "kind": "mirred",
+                        "mirred_action": "redirect",
+                        "direction": "egress",
+                        "index": 1,
+                        "stats": {
+                            "packets": 1,
+                            "overlimits": 1
+                        },
+                        "not_in_hw": true
+                    }
+                ]
+            }
+        ],
+        "teardown": [
+            "$TC qdisc del dev $DUMMY root"
+        ]
     }
 ]

From 2939203ffee818f1e5ebd60bbb85a174d63aab9c Mon Sep 17 00:00:00 2001
From: Wei Fang <wei.fang@nxp.com>
Date: Thu, 11 Dec 2025 10:09:19 +0800
Subject: [PATCH 227/258] net: enetc: do not transmit redirected XDP frames
 when the link is down

In the current implementation, the enetc_xdp_xmit() always transmits
redirected XDP frames even if the link is down, but the frames cannot
be transmitted from TX BD rings when the link is down, so the frames
are still kept in the TX BD rings. If the XDP program is uninstalled,
users will see the following warning logs.

fsl_enetc 0000:00:00.0 eno0: timeout for tx ring #6 clear

More worse, the TX BD ring cannot work properly anymore, because the
HW PIR and CIR are not equal after the re-initialization of the TX
BD ring. At this point, the BDs between CIR and PIR are invalid,
which will cause a hardware malfunction.

Another reason is that there is internal context in the ring prefetch
logic that will retain the state from the first incarnation of the ring
and continue prefetching from the stale location when we re-initialize
the ring. The internal context is only reset by an FLR. That is to say,
for LS1028A ENETC, software cannot set the HW CIR and PIR when
initializing the TX BD ring.

It does not make sense to transmit redirected XDP frames when the link is
down. Add a link status check to prevent transmission in this condition.
This fixes part of the issue, but more complex cases remain. For example,
the TX BD ring may still contain unsent frames when the link goes down.
Those situations require additional patches, which will build on this
one.

Fixes: 9d2b68cc108d ("net: enetc: add support for XDP_REDIRECT")
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Reviewed-by: Hariprasad Kelam <hkelam@marvell.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20251211020919.121113-1-wei.fang@nxp.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index d5e5800b84eff7..53b26cece16a80 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1787,7 +1787,8 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
 	int xdp_tx_bd_cnt, i, k;
 	int xdp_tx_frm_cnt = 0;
 
-	if (unlikely(test_bit(ENETC_TX_DOWN, &priv->flags)))
+	if (unlikely(test_bit(ENETC_TX_DOWN, &priv->flags) ||
+		     !netif_carrier_ok(ndev)))
 		return -ENETDOWN;
 
 	enetc_lock_mdio();

From c2a16269742e176fccdd0ef9c016a233491a49ad Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Thu, 11 Dec 2025 10:37:35 +0800
Subject: [PATCH 228/258] net: hns3: using the num_tqps in the vf driver to
 apply for resources

Currently, hdev->htqp is allocated using hdev->num_tqps, and kinfo->tqp
is allocated using kinfo->num_tqps. However, kinfo->num_tqps is set to
min(new_tqps, hdev->num_tqps);  Therefore, kinfo->num_tqps may be smaller
than hdev->num_tqps, which causes some hdev->htqp[i] to remain
uninitialized in hclgevf_knic_setup().

Thus, this patch allocates hdev->htqp and kinfo->tqp using hdev->num_tqps,
ensuring that the lengths of hdev->htqp and kinfo->tqp are consistent
and that all elements are properly initialized.

Fixes: e2cb1dec9779 ("net: hns3: Add HNS3 VF HCL(Hardware Compatibility Layer) Support")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20251211023737.2327018-2-shaojijie@huawei.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 8fcf220a120d21..70327a73dee326 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -368,12 +368,12 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev)
 	new_tqps = kinfo->rss_size * num_tc;
 	kinfo->num_tqps = min(new_tqps, hdev->num_tqps);
 
-	kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps,
+	kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps,
 				  sizeof(struct hnae3_queue *), GFP_KERNEL);
 	if (!kinfo->tqp)
 		return -ENOMEM;
 
-	for (i = 0; i < kinfo->num_tqps; i++) {
+	for (i = 0; i < hdev->num_tqps; i++) {
 		hdev->htqp[i].q.handle = &hdev->nic;
 		hdev->htqp[i].q.tqp_index = i;
 		kinfo->tqp[i] = &hdev->htqp[i].q;

From d180c11aa8a6fa735f9ac2c72c61364a9afc2ba7 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Thu, 11 Dec 2025 10:37:36 +0800
Subject: [PATCH 229/258] net: hns3: using the num_tqps to check whether
 tqp_index is out of range when vf get ring info from mbx

Currently, rss_size = num_tqps / tc_num. If tc_num is 1, then num_tqps
equals rss_size. However, if the tc_num is greater than 1, then rss_size
will be less than num_tqps, causing the tqp_index check for subsequent TCs
using rss_size to always fail.

This patch uses the num_tqps to check whether tqp_index is out of range,
instead of rss_size.

Fixes: 326334aad024 ("net: hns3: add a check for tqp_index in hclge_get_ring_chain_from_mbx()")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20251211023737.2327018-3-shaojijie@huawei.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index c7ff12a6c0764d..b7d4e06a55d40e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -193,10 +193,10 @@ static int hclge_get_ring_chain_from_mbx(
 		return -EINVAL;
 
 	for (i = 0; i < ring_num; i++) {
-		if (req->msg.param[i].tqp_index >= vport->nic.kinfo.rss_size) {
+		if (req->msg.param[i].tqp_index >= vport->nic.kinfo.num_tqps) {
 			dev_err(&hdev->pdev->dev, "tqp index(%u) is out of range(0-%u)\n",
 				req->msg.param[i].tqp_index,
-				vport->nic.kinfo.rss_size - 1U);
+				vport->nic.kinfo.num_tqps - 1U);
 			return -EINVAL;
 		}
 	}

From 6ef935e65902bfed53980ad2754b06a284ea8ac1 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Thu, 11 Dec 2025 10:37:37 +0800
Subject: [PATCH 230/258] net: hns3: add VLAN id validation before using

Currently, the VLAN id may be used without validation when
receive a VLAN configuration mailbox from VF. The length of
vlan_del_fail_bmap is BITS_TO_LONGS(VLAN_N_VID). It may cause
out-of-bounds memory access once the VLAN id is bigger than
or equal to VLAN_N_VID.

Therefore, VLAN id needs to be checked to ensure it is within
the range of VLAN_N_VID.

Fixes: fe4144d47eef ("net: hns3: sync VLAN filter entries when kill VLAN ID failed")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20251211023737.2327018-4-shaojijie@huawei.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index cf8abbe0184027..c589baea7c7754 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -10555,6 +10555,9 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
 	bool writen_to_tbl = false;
 	int ret = 0;
 
+	if (vlan_id >= VLAN_N_VID)
+		return -EINVAL;
+
 	/* When device is resetting or reset failed, firmware is unable to
 	 * handle mailbox. Just record the vlan id, and remove it after
 	 * reset finished.

From ccb8a3c08adf8121e2afb8e704f007ce99324d79 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Tue, 16 Dec 2025 22:34:35 -0700
Subject: [PATCH 231/258] block: validate pi_offset integrity limit

The PI tuple must be contained within the metadata value, so validate
that pi_offset + pi_tuple_size <= metadata_size. This guards against
block drivers that report invalid pi_offset values.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-settings.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 51401f08ce05be..d138abc973bba0 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -161,10 +161,9 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
 		return -EINVAL;
 	}
 
-	if (bi->pi_tuple_size > bi->metadata_size) {
-		pr_warn("pi_tuple_size (%u) exceeds metadata_size (%u)\n",
-			 bi->pi_tuple_size,
-			 bi->metadata_size);
+	if (bi->pi_offset + bi->pi_tuple_size > bi->metadata_size) {
+		pr_warn("pi_offset (%u) + pi_tuple_size (%u) exceeds metadata_size (%u)\n",
+			bi->pi_offset, bi->pi_tuple_size, bi->metadata_size);
 		return -EINVAL;
 	}
 

From af65faf34f6e9919bdd2912770d25d2a73cbcc7c Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Tue, 16 Dec 2025 22:34:36 -0700
Subject: [PATCH 232/258] block: validate interval_exp integrity limit

Various code assumes that the integrity interval is at least 1 sector
and evenly divides the logical block size. Add these checks to
blk_validate_integrity_limits(). This guards against block drivers that
report invalid interval_exp values.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-settings.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index d138abc973bba0..a9e65dc090dae8 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -193,8 +193,13 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
 		break;
 	}
 
-	if (!bi->interval_exp)
+	if (!bi->interval_exp) {
 		bi->interval_exp = ilog2(lim->logical_block_size);
+	} else if (bi->interval_exp < SECTOR_SHIFT ||
+		   bi->interval_exp > ilog2(lim->logical_block_size)) {
+		pr_warn("invalid interval_exp %u\n", bi->interval_exp);
+		return -EINVAL;
+	}
 
 	/*
 	 * The PI generation / validation helpers do not expect intervals to

From 8e461304009135270e9ccf2d7e2dfe29daec9b60 Mon Sep 17 00:00:00 2001
From: Shuicheng Lin <shuicheng.lin@intel.com>
Date: Fri, 5 Dec 2025 23:47:17 +0000
Subject: [PATCH 233/258] drm/xe: Limit num_syncs to prevent oversized
 allocations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The exec and vm_bind ioctl allow userspace to specify an arbitrary
num_syncs value. Without bounds checking, a very large num_syncs
can force an excessively large allocation, leading to kernel warnings
from the page allocator as below.

Introduce DRM_XE_MAX_SYNCS (set to 1024) and reject any request
exceeding this limit.

"
------------[ cut here ]------------
WARNING: CPU: 0 PID: 1217 at mm/page_alloc.c:5124 __alloc_frozen_pages_noprof+0x2f8/0x2180 mm/page_alloc.c:5124
...
Call Trace:
 <TASK>
 alloc_pages_mpol+0xe4/0x330 mm/mempolicy.c:2416
 ___kmalloc_large_node+0xd8/0x110 mm/slub.c:4317
 __kmalloc_large_node_noprof+0x18/0xe0 mm/slub.c:4348
 __do_kmalloc_node mm/slub.c:4364 [inline]
 __kmalloc_noprof+0x3d4/0x4b0 mm/slub.c:4388
 kmalloc_noprof include/linux/slab.h:909 [inline]
 kmalloc_array_noprof include/linux/slab.h:948 [inline]
 xe_exec_ioctl+0xa47/0x1e70 drivers/gpu/drm/xe/xe_exec.c:158
 drm_ioctl_kernel+0x1f1/0x3e0 drivers/gpu/drm/drm_ioctl.c:797
 drm_ioctl+0x5e7/0xc50 drivers/gpu/drm/drm_ioctl.c:894
 xe_drm_ioctl+0x10b/0x170 drivers/gpu/drm/xe/xe_device.c:224
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:598 [inline]
 __se_sys_ioctl fs/ioctl.c:584 [inline]
 __x64_sys_ioctl+0x18b/0x210 fs/ioctl.c:584
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xbb/0x380 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
...
"

v2: Add "Reported-by" and Cc stable kernels.
v3: Change XE_MAX_SYNCS from 64 to 1024. (Matt & Ashutosh)
v4: s/XE_MAX_SYNCS/DRM_XE_MAX_SYNCS/ (Matt)
v5: Do the check at the top of the exec func. (Matt)

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Reported-by: Koen Koning <koen.koning@intel.com>
Reported-by: Peter Senna Tschudin <peter.senna@linux.intel.com>
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6450
Cc: <stable@vger.kernel.org> # v6.12+
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Michal Mrozek <michal.mrozek@intel.com>
Cc: Carl Zhang <carl.zhang@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Ivan Briano <ivan.briano@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251205234715.2476561-5-shuicheng.lin@intel.com
(cherry picked from commit b07bac9bd708ec468cd1b8a5fe70ae2ac9b0a11c)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_exec.c | 3 ++-
 drivers/gpu/drm/xe/xe_vm.c   | 3 +++
 include/uapi/drm/xe_drm.h    | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 4d81210e41f524..fd948003175069 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -132,7 +132,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
 	if (XE_IOCTL_DBG(xe, args->extensions) ||
 	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
-	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]) ||
+	    XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS))
 		return -EINVAL;
 
 	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 7cac646bdf1c03..c93155c6c6272d 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3324,6 +3324,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
 	if (XE_IOCTL_DBG(xe, args->extensions))
 		return -EINVAL;
 
+	if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS))
+		return -EINVAL;
+
 	if (args->num_binds > 1) {
 		u64 __user *bind_user =
 			u64_to_user_ptr(args->vector_of_binds);
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 47853659a705e3..f64dc0eff0e67b 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1463,6 +1463,7 @@ struct drm_xe_exec {
 	/** @exec_queue_id: Exec queue ID for the batch buffer */
 	__u32 exec_queue_id;
 
+#define DRM_XE_MAX_SYNCS 1024
 	/** @num_syncs: Amount of struct drm_xe_sync in array. */
 	__u32 num_syncs;
 

From f8dd66bfb4e184c71bd26418a00546ebe7f5c17a Mon Sep 17 00:00:00 2001
From: Shuicheng Lin <shuicheng.lin@intel.com>
Date: Fri, 5 Dec 2025 23:47:18 +0000
Subject: [PATCH 234/258] drm/xe/oa: Limit num_syncs to prevent oversized
 allocations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The OA open parameters did not validate num_syncs, allowing
userspace to pass arbitrarily large values, potentially
leading to excessive allocations.

Add check to ensure that num_syncs does not exceed DRM_XE_MAX_SYNCS,
returning -EINVAL when the limit is violated.

v2: use XE_IOCTL_DBG() and drop duplicated check. (Ashutosh)

Fixes: c8507a25cebd ("drm/xe/oa/uapi: Define and parse OA sync properties")
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251205234715.2476561-6-shuicheng.lin@intel.com
(cherry picked from commit e057b2d2b8d815df3858a87dffafa2af37e5945b)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_oa.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 890c363282ae6a..1dd8ebeb41d0c7 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -1254,6 +1254,9 @@ static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value,
 static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value,
 				    struct xe_oa_open_param *param)
 {
+	if (XE_IOCTL_DBG(oa->xe, value > DRM_XE_MAX_SYNCS))
+		return -EINVAL;
+
 	param->num_syncs = value;
 	return 0;
 }

From 6f0f404bd289d79a260b634c5b3f4d330b13472c Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 12 Dec 2025 10:28:41 -0800
Subject: [PATCH 235/258] drm/xe: Adjust long-running workload timeslices to
 reasonable values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A 10ms timeslice for long-running workloads is far too long and causes
significant jitter in benchmarks when the system is shared. Adjust the
value to 5ms for preempt-fencing VMs, as the resume step there is quite
costly as memory is moved around, and set it to zero for pagefault VMs,
since switching back to pagefault mode after dma-fence mode is
relatively fast.

Also change min_run_period_ms to 'unsiged int' type rather than 's64' as
only positive values make sense.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: stable@vger.kernel.org
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/20251212182847.1683222-2-matthew.brost@intel.com
(cherry picked from commit 33a5abd9a68394aa67f9618b20eee65ee8702ff4)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c       | 5 ++++-
 drivers/gpu/drm/xe/xe_vm_types.h | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index c93155c6c6272d..79ab6c512d3e0d 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1508,7 +1508,10 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
 	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
 
 	INIT_LIST_HEAD(&vm->preempt.exec_queues);
-	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
+	if (flags & XE_VM_FLAG_FAULT_MODE)
+		vm->preempt.min_run_period_ms = 0;
+	else
+		vm->preempt.min_run_period_ms = 5;
 
 	for_each_tile(tile, xe, id)
 		xe_range_fence_tree_init(&vm->rftree[id]);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index ccd6cc090309f1..2168ef052499ec 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -263,7 +263,7 @@ struct xe_vm {
 		 * @min_run_period_ms: The minimum run period before preempting
 		 * an engine again
 		 */
-		s64 min_run_period_ms;
+		unsigned int min_run_period_ms;
 		/** @exec_queues: list of exec queues attached to this VM */
 		struct list_head exec_queues;
 		/** @num_exec_queues: number exec queues attached to this VM */

From 256edb267a9d0b5aef70e408e9fba4f930f9926e Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Fri, 5 Dec 2025 13:26:13 -0800
Subject: [PATCH 236/258] drm/xe/oa: Always set OAG_OAGLBCTXCTRL_COUNTER_RESUME
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reports can be written out to the OA buffer using ways other than periodic
sampling. These include mmio trigger and context switches. To support these
use cases, when periodic sampling is not enabled,
OAG_OAGLBCTXCTRL_COUNTER_RESUME must be set.

Fixes: 1db9a9dc90ae ("drm/xe/oa: OA stream initialization (OAG)")
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Link: https://patch.msgid.link/20251205212613.826224-4-ashutosh.dixit@intel.com
(cherry picked from commit 88d98e74adf3e20f678bb89581a5c3149fdbdeaa)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_oa.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 1dd8ebeb41d0c7..8f3da6885e6c97 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -1105,11 +1105,12 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
 			oag_buf_size_select(stream) |
 			oag_configure_mmio_trigger(stream, true));
 
-	xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
-			(OAG_OAGLBCTXCTRL_COUNTER_RESUME |
+	xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl,
+			OAG_OAGLBCTXCTRL_COUNTER_RESUME |
+			(stream->periodic ?
 			 OAG_OAGLBCTXCTRL_TIMER_ENABLE |
 			 REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK,
-					stream->period_exponent)) : 0);
+					 stream->period_exponent) : 0));
 
 	/*
 	 * Initialize Super Queue Internal Cnt Register

From eb192bedf5908e63347c4923c5a1d58f9baef158 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Fri, 5 Dec 2025 14:39:19 +0300
Subject: [PATCH 237/258] drm/xe/xe_sriov_vfio: Fix return value in
 xe_sriov_vfio_migration_supported()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The xe_sriov_vfio_migration_supported() function is type bool so
returning -EPERM means returning true.  Return false instead.

Fixes: bd45d46ffc8f ("drm/xe/pf: Export helpers for VFIO")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patch.msgid.link/aTLEZ4g-FD-iMQ2V@stanley.mountain
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
(cherry picked from commit 0a2404c8f6a3a120f79c57ef8a3302c8e8bc34d9)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_sriov_vfio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_sriov_vfio.c b/drivers/gpu/drm/xe/xe_sriov_vfio.c
index e9a7615bb5c51c..3da81af97b8bb2 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vfio.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vfio.c
@@ -21,7 +21,7 @@ EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_get_pf, "xe-vfio-pci");
 bool xe_sriov_vfio_migration_supported(struct xe_device *xe)
 {
 	if (!IS_SRIOV_PF(xe))
-		return -EPERM;
+		return false;
 
 	return xe_sriov_pf_migration_supported(xe);
 }

From 3595114bc31d1eb5e1996164c901485c1ffac6f7 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Thu, 11 Dec 2025 22:18:49 -0800
Subject: [PATCH 238/258] drm/xe/oa: Disallow 0 OA property values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An OA property value of 0 is invalid and will cause a NPD.

Reported-by: Peter Senna Tschudin <peter.senna@linux.intel.com>
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6452
Fixes: cc4e6994d5a2 ("drm/xe/oa: Move functions up so they can be reused for config ioctl")
Cc: stable@vger.kernel.org
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Harish Chegondi <harish.chegondi@intel.com>
Link: https://patch.msgid.link/20251212061850.1565459-3-ashutosh.dixit@intel.com
(cherry picked from commit 7a100e6ddcc47c1f6ba7a19402de86ce24790621)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_oa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 8f3da6885e6c97..f8bb28ab81248b 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -1347,7 +1347,7 @@ static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_fr
 		     ARRAY_SIZE(xe_oa_set_property_funcs_config));
 
 	if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) ||
-	    XE_IOCTL_DBG(oa->xe, ext.pad))
+	    XE_IOCTL_DBG(oa->xe, !ext.property) || XE_IOCTL_DBG(oa->xe, ext.pad))
 		return -EINVAL;
 
 	idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs_open));

From 3767ca4166ad42fa9e34269efeaf9f15995cd92d Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Thu, 11 Dec 2025 22:18:50 -0800
Subject: [PATCH 239/258] drm/xe/eustall: Disallow 0 EU stall property values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An EU stall property value of 0 is invalid and will cause a NPD.

Reported-by: Peter Senna Tschudin <peter.senna@linux.intel.com>
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6453
Fixes: 1537ec85ebd7 ("drm/xe/uapi: Introduce API for EU stall sampling")
Cc: stable@vger.kernel.org
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Harish Chegondi <harish.chegondi@intel.com>
Link: https://patch.msgid.link/20251212061850.1565459-4-ashutosh.dixit@intel.com
(cherry picked from commit 5bf763e908bf795da4ad538d21c1ec41f8021f76)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_eu_stall.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
index 97dfb7945b7ac8..a5c36a317a7074 100644
--- a/drivers/gpu/drm/xe/xe_eu_stall.c
+++ b/drivers/gpu/drm/xe/xe_eu_stall.c
@@ -315,7 +315,7 @@ static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension
 		return -EFAULT;
 
 	if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) ||
-	    XE_IOCTL_DBG(xe, ext.pad))
+	    XE_IOCTL_DBG(xe, !ext.property) || XE_IOCTL_DBG(xe, ext.pad))
 		return -EINVAL;
 
 	idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs));

From fe3ccd24138fd391ae8e32289d492c85f67770fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 17 Dec 2025 10:34:41 +0100
Subject: [PATCH 240/258] drm/xe: Drop preempt-fences when destroying imported
 dma-bufs.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When imported dma-bufs are destroyed, TTM is not fully
individualizing the dma-resv, but it *is* copying the fences that
need to be waited for before declaring idle. So in the case where
the bo->resv != bo->_resv we can still drop the preempt-fences, but
make sure we do that on bo->_resv which contains the fence-pointer
copy.

In the case where the copying fails, bo->_resv will typically not
contain any fences pointers at all, so there will be nothing to
drop. In that case, TTM would have ensured all fences that would
have been copied are signaled, including any remaining preempt
fences.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Fixes: fa0af721bd1f ("drm/ttm: test private resv obj on release/destroy")
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <stable@vger.kernel.org> # v6.16+
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Tested-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251217093441.5073-1-thomas.hellstrom@linux.intel.com
(cherry picked from commit 425fe550fb513b567bd6d01f397d274092a9c274)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index b0bd31d14bb97e..bf4ee976b6805f 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1527,7 +1527,7 @@ static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
 	 * always succeed here, as long as we hold the lru lock.
 	 */
 	spin_lock(&ttm_bo->bdev->lru_lock);
-	locked = dma_resv_trylock(ttm_bo->base.resv);
+	locked = dma_resv_trylock(&ttm_bo->base._resv);
 	spin_unlock(&ttm_bo->bdev->lru_lock);
 	xe_assert(xe, locked);
 
@@ -1547,13 +1547,6 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
 	bo = ttm_to_xe_bo(ttm_bo);
 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
 
-	/*
-	 * Corner case where TTM fails to allocate memory and this BOs resv
-	 * still points the VMs resv
-	 */
-	if (ttm_bo->base.resv != &ttm_bo->base._resv)
-		return;
-
 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
 		return;
 
@@ -1563,14 +1556,14 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
 	 * TODO: Don't do this for external bos once we scrub them after
 	 * unbind.
 	 */
-	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
+	dma_resv_for_each_fence(&cursor, &ttm_bo->base._resv,
 				DMA_RESV_USAGE_BOOKKEEP, fence) {
 		if (xe_fence_is_xe_preempt(fence) &&
 		    !dma_fence_is_signaled(fence)) {
 			if (!replacement)
 				replacement = dma_fence_get_stub();
 
-			dma_resv_replace_fences(ttm_bo->base.resv,
+			dma_resv_replace_fences(&ttm_bo->base._resv,
 						fence->context,
 						replacement,
 						DMA_RESV_USAGE_BOOKKEEP);
@@ -1578,7 +1571,7 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
 	}
 	dma_fence_put(replacement);
 
-	dma_resv_unlock(ttm_bo->base.resv);
+	dma_resv_unlock(&ttm_bo->base._resv);
 }
 
 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)

From 80f9c601d9c4d26f00356c0a9c461650e7089273 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 12 Dec 2025 10:28:42 -0800
Subject: [PATCH 241/258] drm/xe: Use usleep_range for accurate long-running
 workload timeslicing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

msleep is not very accurate in terms of how long it actually sleeps,
whereas usleep_range is precise. Replace the timeslice sleep for
long-running workloads with the more accurate usleep_range to avoid
jitter if the sleep period is less than 20ms.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: stable@vger.kernel.org
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/20251212182847.1683222-3-matthew.brost@intel.com
(cherry picked from commit ca415c4d4c17ad676a2c8981e1fcc432221dce79)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 311cd047911a44..f6ba2b0f074d23 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -717,6 +717,24 @@ static bool vf_recovery(struct xe_guc *guc)
 	return xe_gt_recovery_pending(guc_to_gt(guc));
 }
 
+static inline void relaxed_ms_sleep(unsigned int delay_ms)
+{
+	unsigned long min_us, max_us;
+
+	if (!delay_ms)
+		return;
+
+	if (delay_ms > 20) {
+		msleep(delay_ms);
+		return;
+	}
+
+	min_us = mul_u32_u32(delay_ms, 1000);
+	max_us = min_us + 500;
+
+	usleep_range(min_us, max_us);
+}
+
 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
@@ -1587,7 +1605,7 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
 				since_resume_ms;
 
 			if (wait_ms > 0 && q->guc->resume_time)
-				msleep(wait_ms);
+				relaxed_ms_sleep(wait_ms);
 
 			set_exec_queue_suspended(q);
 			disable_scheduling(q, false);

From 6cde588e64934858bb1553119c6b915b2fec9011 Mon Sep 17 00:00:00 2001
From: Okan Akyuz <okan.akyuz.linux@gmail.com>
Date: Mon, 15 Dec 2025 20:44:22 +0000
Subject: [PATCH 242/258] hwmon: (DS620) Update broken Datasheet URL in driver
 documentation

The URL for the DS620 datasheet has changed. Update it to reflect the
current location.

Signed-off-by: Okan Akyuz <okan.akyuz.linux@gmail.com>
Link: https://lore.kernel.org/r/20251215204423.80242-1-okan.akyuz.linux@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/ds620.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/hwmon/ds620.rst b/Documentation/hwmon/ds620.rst
index 2d686b17b547a1..e2d915a988a249 100644
--- a/Documentation/hwmon/ds620.rst
+++ b/Documentation/hwmon/ds620.rst
@@ -7,9 +7,9 @@ Supported chips:
 
     Prefix: 'ds620'
 
-    Datasheet: Publicly available at the Dallas Semiconductor website
+    Datasheet: Publicly available at the Analog Devices website
 
-	       http://www.dalsemi.com/
+	https://www.analog.com/media/en/technical-documentation/data-sheets/DS620.pdf
 
 Authors:
 	Roland Stigge <stigge@antcom.de>

From d579478cee228bdc0029a0c12a1f6a63ea9d1c77 Mon Sep 17 00:00:00 2001
From: Sasha Finkelstein <fnkl.kernel@gmail.com>
Date: Thu, 18 Dec 2025 10:15:23 -0800
Subject: [PATCH 243/258] Input: apple_z2 - fix reading incorrect reports after
 exiting sleep

Under certain conditions (more prevalent after a suspend/resume cycle),
the touchscreen controller can send the "boot complete" interrupt before
it actually finished booting. In those cases, attempting to read touch
data resuls in a stream of "not ready" messages being read and
interpreted as a touch report. Check that the response is in fact a
touch report and discard it otherwise.

Reported-by: pitust <piotr@stelmaszek.com>
Closes: https://oftc.catirclogs.org/asahi/2025-12-17#34878715;
Fixes: 471a92f8a21a ("Input: apple_z2 - add a driver for Apple Z2 touchscreens")
Signed-off-by: Sasha Finkelstein <fnkl.kernel@gmail.com>
Link: https://patch.msgid.link/20251218-z2-init-fix-v1-1-48e3aa239caf@gmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/apple_z2.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/input/touchscreen/apple_z2.c b/drivers/input/touchscreen/apple_z2.c
index 0de161eae59a05..271ababf0ad559 100644
--- a/drivers/input/touchscreen/apple_z2.c
+++ b/drivers/input/touchscreen/apple_z2.c
@@ -21,6 +21,7 @@
 #define APPLE_Z2_TOUCH_STARTED           3
 #define APPLE_Z2_TOUCH_MOVED             4
 #define APPLE_Z2_CMD_READ_INTERRUPT_DATA 0xEB
+#define APPLE_Z2_REPLY_INTERRUPT_DATA    0xE1
 #define APPLE_Z2_HBPP_CMD_BLOB           0x3001
 #define APPLE_Z2_FW_MAGIC                0x5746325A
 #define LOAD_COMMAND_INIT_PAYLOAD        0
@@ -142,6 +143,9 @@ static int apple_z2_read_packet(struct apple_z2 *z2)
 	if (error)
 		return error;
 
+	if (z2->rx_buf[0] != APPLE_Z2_REPLY_INTERRUPT_DATA)
+		return 0;
+
 	pkt_len = (get_unaligned_le16(z2->rx_buf + 1) + 8) & 0xfffffffc;
 
 	error = spi_read(z2->spidev, z2->rx_buf, pkt_len);

From 733a8924229ff8c0385121a30fcd00bf70644743 Mon Sep 17 00:00:00 2001
From: Gergo Koteles <soyer@irl.hu>
Date: Thu, 13 Nov 2025 17:02:58 +0100
Subject: [PATCH 244/258] Input: add ABS_SND_PROFILE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ABS_SND_PROFILE used to describe the state of a multi-value sound profile
switch. This will be used for the alert-slider on OnePlus phones or other
phones.

Profile values added as SND_PROFLE_(SILENT|VIBRATE|RING) identifiers
to input-event-codes.h so they can be used from DTS.

Signed-off-by: Gergo Koteles <soyer@irl.hu>
Reviewed-by: Bjorn Andersson <andersson@kernel.org>
Tested-by: Guido Günther <agx@sigxcpu.org> # oneplus,fajita & oneplus,enchilada
Reviewed-by: Guido Günther <agx@sigxcpu.org>
Signed-off-by: David Heidelberg <david@ixit.cz>
Reviewed-by: Pavel Machek <pavel@ucw.cz>
Link: https://patch.msgid.link/20251113-op6-tri-state-v8-1-54073f3874bc@ixit.cz
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 Documentation/input/event-codes.rst    | 6 ++++++
 drivers/hid/hid-debug.c                | 1 +
 include/uapi/linux/input-event-codes.h | 9 +++++++++
 3 files changed, 16 insertions(+)

diff --git a/Documentation/input/event-codes.rst b/Documentation/input/event-codes.rst
index 4424cbff251f87..77a6c9b3956d56 100644
--- a/Documentation/input/event-codes.rst
+++ b/Documentation/input/event-codes.rst
@@ -241,6 +241,12 @@ A few EV_ABS codes have special meanings:
     emitted only when the selected profile changes, indicating the newly
     selected profile value.
 
+* ABS_SND_PROFILE:
+
+  - Used to describe the state of a multi-value sound profile switch.
+    An event is emitted only when the selected profile changes,
+    indicating the newly selected profile value.
+
 * ABS_MT_<name>:
 
   - Used to describe multitouch input events. Please see
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 337d2dc81b4ca9..c5865b0d2aaaf6 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -3513,6 +3513,7 @@ static const char *absolutes[ABS_CNT] = {
 	[ABS_DISTANCE] = "Distance",	[ABS_TILT_X] = "XTilt",
 	[ABS_TILT_Y] = "YTilt",		[ABS_TOOL_WIDTH] = "ToolWidth",
 	[ABS_VOLUME] = "Volume",	[ABS_PROFILE] = "Profile",
+	[ABS_SND_PROFILE] = "SoundProfile",
 	[ABS_MISC] = "Misc",
 	[ABS_MT_SLOT] = "MTSlot",
 	[ABS_MT_TOUCH_MAJOR] = "MTMajor",
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 30f3c9eaafaad9..4bdb6a1659873d 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -891,6 +891,7 @@
 
 #define ABS_VOLUME		0x20
 #define ABS_PROFILE		0x21
+#define ABS_SND_PROFILE		0x22
 
 #define ABS_MISC		0x28
 
@@ -1000,4 +1001,12 @@
 #define SND_MAX			0x07
 #define SND_CNT			(SND_MAX+1)
 
+/*
+ * ABS_SND_PROFILE values
+ */
+
+#define SND_PROFILE_SILENT	0x00
+#define SND_PROFILE_VIBRATE	0x01
+#define SND_PROFILE_RING	0x02
+
 #endif

From 806ec7b797adc1cc9b11535307638a55ddfb873c Mon Sep 17 00:00:00 2001
From: Sanjay Govind <sanjay.govind9@gmail.com>
Date: Sat, 29 Nov 2025 20:37:11 +1300
Subject: [PATCH 245/258] Input: xpad - add support for CRKD Guitars

Add support for various CRKD Guitar Controllers.

Signed-off-by: Sanjay Govind <sanjay.govind9@gmail.com>
Link: https://patch.msgid.link/20251129073720.2750-2-sanjay.govind9@gmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/xpad.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index d72e89c25e5031..363d509493866a 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -133,6 +133,8 @@ static const struct xpad_device {
 } xpad_device[] = {
 	/* Please keep this list sorted by vendor and product ID. */
 	{ 0x0079, 0x18d4, "GPD Win 2 X-Box Controller", 0, XTYPE_XBOX360 },
+	{ 0x0351, 0x1000, "CRKD LP Blueberry Burst Pro Edition (Xbox)", 0, XTYPE_XBOX360 },
+	{ 0x0351, 0x2000, "CRKD LP Black Tribal Edition (Xbox) ", 0, XTYPE_XBOX360 },
 	{ 0x03eb, 0xff01, "Wooting One (Legacy)", 0, XTYPE_XBOX360 },
 	{ 0x03eb, 0xff02, "Wooting Two (Legacy)", 0, XTYPE_XBOX360 },
 	{ 0x03f0, 0x038D, "HyperX Clutch", 0, XTYPE_XBOX360 },			/* wired */
@@ -420,6 +422,7 @@ static const struct xpad_device {
 	{ 0x3285, 0x0663, "Nacon Evol-X", 0, XTYPE_XBOXONE },
 	{ 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 },
 	{ 0x3537, 0x1010, "GameSir G7 SE", 0, XTYPE_XBOXONE },
+	{ 0x3651, 0x1000, "CRKD SG", 0, XTYPE_XBOX360 },
 	{ 0x366c, 0x0005, "ByoWave Proteus Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE, FLAG_DELAY_INIT },
 	{ 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX },
 	{ 0x37d7, 0x2501, "Flydigi Apex 5", 0, XTYPE_XBOX360 },
@@ -518,6 +521,7 @@ static const struct usb_device_id xpad_table[] = {
 	 */
 	{ USB_INTERFACE_INFO('X', 'B', 0) },	/* Xbox USB-IF not-approved class */
 	XPAD_XBOX360_VENDOR(0x0079),		/* GPD Win 2 controller */
+	XPAD_XBOX360_VENDOR(0x0351),		/* CRKD Controllers */
 	XPAD_XBOX360_VENDOR(0x03eb),		/* Wooting Keyboards (Legacy) */
 	XPAD_XBOX360_VENDOR(0x03f0),		/* HP HyperX Xbox 360 controllers */
 	XPAD_XBOXONE_VENDOR(0x03f0),		/* HP HyperX Xbox One controllers */
@@ -578,6 +582,7 @@ static const struct usb_device_id xpad_table[] = {
 	XPAD_XBOXONE_VENDOR(0x3285),		/* Nacon Evol-X */
 	XPAD_XBOX360_VENDOR(0x3537),		/* GameSir Controllers */
 	XPAD_XBOXONE_VENDOR(0x3537),		/* GameSir Controllers */
+	XPAD_XBOX360_VENDOR(0x3651),		/* CRKD Controllers */
 	XPAD_XBOXONE_VENDOR(0x366c),		/* ByoWave controllers */
 	XPAD_XBOX360_VENDOR(0x37d7),		/* Flydigi Controllers */
 	XPAD_XBOX360_VENDOR(0x413d),		/* Black Shark Green Ghost Controller */

From 248d3a73a0167dce15ba100477c3e778c4787178 Mon Sep 17 00:00:00 2001
From: Junjie Cao <junjie.cao@intel.com>
Date: Thu, 18 Dec 2025 21:56:59 -0800
Subject: [PATCH 246/258] Input: ti_am335x_tsc - fix off-by-one error in
 wire_order validation

The current validation 'wire_order[i] > ARRAY_SIZE(config_pins)' allows
wire_order[i] to equal ARRAY_SIZE(config_pins), which causes out-of-bounds
access when used as index in 'config_pins[wire_order[i]]'.

Since config_pins has 4 elements (indices 0-3), the valid range for
wire_order should be 0-3. Fix the off-by-one error by using >= instead
of > in the validation check.

Signed-off-by: Junjie Cao <junjie.cao@intel.com>
Link: https://patch.msgid.link/20251114062817.852698-1-junjie.cao@intel.com
Fixes: bb76dc09ddfc ("input: ti_am33x_tsc: Order of TSC wires, made configurable")
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/ti_am335x_tsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c
index d6edfab1677040..0534b2ba650bbf 100644
--- a/drivers/input/touchscreen/ti_am335x_tsc.c
+++ b/drivers/input/touchscreen/ti_am335x_tsc.c
@@ -85,7 +85,7 @@ static int titsc_config_wires(struct titsc *ts_dev)
 		wire_order[i] = ts_dev->config_inp[i] & 0x0F;
 		if (WARN_ON(analog_line[i] > 7))
 			return -EINVAL;
-		if (WARN_ON(wire_order[i] > ARRAY_SIZE(config_pins)))
+		if (WARN_ON(wire_order[i] >= ARRAY_SIZE(config_pins)))
 			return -EINVAL;
 	}
 

From c2e8dc1222c2136e714d5d972dce7e64924e4ed8 Mon Sep 17 00:00:00 2001
From: Sairaj Kodilkar <sarunkod@amd.com>
Date: Fri, 21 Nov 2025 14:41:15 +0530
Subject: [PATCH 247/258] amd/iommu: Preserve domain ids inside the kdump
 kernel

Currently AMD IOMMU driver does not reserve domain ids programmed in the
DTE while reusing the device table inside kdump kernel. This can cause
reallocation of these domain ids for newer domains that are created by
the kdump kernel, which can lead to potential IO_PAGE_FAULTs

Hence reserve these ids inside pdom_ids.

Fixes: 38e5f33ee359 ("iommu/amd: Reuse device table for kdump")
Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
Reported-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd/init.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 4b29534189770b..106ee3cf30388a 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1136,9 +1136,13 @@ static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
 static bool __reuse_device_table(struct amd_iommu *iommu)
 {
 	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
-	u32 lo, hi, old_devtb_size;
+	struct dev_table_entry *old_dev_tbl_entry;
+	u32 lo, hi, old_devtb_size, devid;
 	phys_addr_t old_devtb_phys;
+	u16 dom_id;
+	bool dte_v;
 	u64 entry;
+	int ret;
 
 	/* Each IOMMU use separate device table with the same size */
 	lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
@@ -1173,6 +1177,23 @@ static bool __reuse_device_table(struct amd_iommu *iommu)
 		return false;
 	}
 
+	for (devid = 0; devid <= pci_seg->last_bdf; devid++) {
+		old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];
+		dte_v = FIELD_GET(DTE_FLAG_V, old_dev_tbl_entry->data[0]);
+		dom_id = FIELD_GET(DEV_DOMID_MASK, old_dev_tbl_entry->data[1]);
+
+		if (!dte_v || !dom_id)
+			continue;
+		/*
+		 * ID reservation can fail with -ENOSPC when there
+		 * are multiple devices present in the same domain,
+		 * hence check only for -ENOMEM.
+		 */
+		ret = ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_KERNEL);
+		if (ret == -ENOMEM)
+			return false;
+	}
+
 	return true;
 }
 

From c7fe9384c85d31e35bb61574d7a742ba59fb27c3 Mon Sep 17 00:00:00 2001
From: Sairaj Kodilkar <sarunkod@amd.com>
Date: Fri, 21 Nov 2025 14:41:16 +0530
Subject: [PATCH 248/258] amd/iommu: Make protection domain ID functions
 non-static

So that both iommu.c and init.c can utilize them. Also define a new
function 'pdom_id_destroy()' to destroy 'pdom_ids' instead of directly
calling ida functions.

Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd/amd_iommu.h |  5 +++++
 drivers/iommu/amd/init.c      |  7 ++-----
 drivers/iommu/amd/iommu.c     | 27 ++++++++++++++++++---------
 3 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 25044d28f28a8d..b742ef1adb352b 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -173,6 +173,11 @@ static inline struct protection_domain *to_pdomain(struct iommu_domain *dom)
 bool translation_pre_enabled(struct amd_iommu *iommu);
 int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line);
 
+int amd_iommu_pdom_id_alloc(void);
+int amd_iommu_pdom_id_reserve(u16 id, gfp_t gfp);
+void amd_iommu_pdom_id_free(int id);
+void amd_iommu_pdom_id_destroy(void);
+
 #ifdef CONFIG_DMI
 void amd_iommu_apply_ivrs_quirks(void);
 #else
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 106ee3cf30388a..384c90b4f90a0a 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1142,7 +1142,6 @@ static bool __reuse_device_table(struct amd_iommu *iommu)
 	u16 dom_id;
 	bool dte_v;
 	u64 entry;
-	int ret;
 
 	/* Each IOMMU use separate device table with the same size */
 	lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
@@ -1189,8 +1188,7 @@ static bool __reuse_device_table(struct amd_iommu *iommu)
 		 * are multiple devices present in the same domain,
 		 * hence check only for -ENOMEM.
 		 */
-		ret = ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_KERNEL);
-		if (ret == -ENOMEM)
+		if (amd_iommu_pdom_id_reserve(dom_id, GFP_KERNEL) == -ENOMEM)
 			return false;
 	}
 
@@ -3148,8 +3146,7 @@ static bool __init check_ioapic_information(void)
 
 static void __init free_dma_resources(void)
 {
-	ida_destroy(&pdom_ids);
-
+	amd_iommu_pdom_id_destroy();
 	free_unity_maps();
 }
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 9f1d56a5e145fe..5d45795c367a6b 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1811,17 +1811,26 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag)
  * contain.
  *
  ****************************************************************************/
-
-static int pdom_id_alloc(void)
+int amd_iommu_pdom_id_alloc(void)
 {
 	return ida_alloc_range(&pdom_ids, 1, MAX_DOMAIN_ID - 1, GFP_ATOMIC);
 }
 
-static void pdom_id_free(int id)
+int amd_iommu_pdom_id_reserve(u16 id, gfp_t gfp)
+{
+	return ida_alloc_range(&pdom_ids, id, id, gfp);
+}
+
+void amd_iommu_pdom_id_free(int id)
 {
 	ida_free(&pdom_ids, id);
 }
 
+void amd_iommu_pdom_id_destroy(void)
+{
+	ida_destroy(&pdom_ids);
+}
+
 static void free_gcr3_tbl_level1(u64 *tbl)
 {
 	u64 *ptr;
@@ -1864,7 +1873,7 @@ static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info)
 	gcr3_info->glx = 0;
 
 	/* Free per device domain ID */
-	pdom_id_free(gcr3_info->domid);
+	amd_iommu_pdom_id_free(gcr3_info->domid);
 
 	iommu_free_pages(gcr3_info->gcr3_tbl);
 	gcr3_info->gcr3_tbl = NULL;
@@ -1900,14 +1909,14 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info,
 		return -EBUSY;
 
 	/* Allocate per device domain ID */
-	domid = pdom_id_alloc();
+	domid = amd_iommu_pdom_id_alloc();
 	if (domid <= 0)
 		return -ENOSPC;
 	gcr3_info->domid = domid;
 
 	gcr3_info->gcr3_tbl = iommu_alloc_pages_node_sz(nid, GFP_ATOMIC, SZ_4K);
 	if (gcr3_info->gcr3_tbl == NULL) {
-		pdom_id_free(domid);
+		amd_iommu_pdom_id_free(domid);
 		return -ENOMEM;
 	}
 
@@ -2503,7 +2512,7 @@ struct protection_domain *protection_domain_alloc(void)
 	if (!domain)
 		return NULL;
 
-	domid = pdom_id_alloc();
+	domid = amd_iommu_pdom_id_alloc();
 	if (domid <= 0) {
 		kfree(domain);
 		return NULL;
@@ -2802,7 +2811,7 @@ void amd_iommu_domain_free(struct iommu_domain *dom)
 
 	WARN_ON(!list_empty(&domain->dev_list));
 	pt_iommu_deinit(&domain->iommu);
-	pdom_id_free(domain->id);
+	amd_iommu_pdom_id_free(domain->id);
 	kfree(domain);
 }
 
@@ -2853,7 +2862,7 @@ void amd_iommu_init_identity_domain(void)
 	domain->ops = &identity_domain_ops;
 	domain->owner = &amd_iommu_ops;
 
-	identity_domain.id = pdom_id_alloc();
+	identity_domain.id = amd_iommu_pdom_id_alloc();
 
 	protection_domain_init(&identity_domain);
 }

From f4ea8e05f2a857d5447c25f7daf00807d38b307d Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 19 Dec 2025 15:09:09 +0000
Subject: [PATCH 249/258] lkdtm/bugs: Do not confuse the clang/objtool with
 busy wait loop

Since commit eb972eab0794 ("lkdtm/bugs: Add cases for BUG and PANIC
occurring in hardirq context"), building with clang for x86_64 results
in the following warnings:

vmlinux.o: warning: objtool: lkdtm_PANIC_IN_HARDIRQ(): unexpected end of section .text.lkdtm_PANIC_IN_HARDIRQ
vmlinux.o: warning: objtool: lkdtm_BUG_IN_HARDIRQ(): unexpected end of section .text.lkdtm_BUG_IN_HARDIRQ

caused by busy "while (wait_for_...);" loops. Add READ_ONCE() and
cpu_relax() to better indicate the intention and avoid any unwanted
compiler optimisations.

Fixes: eb972eab0794 ("lkdtm/bugs: Add cases for BUG and PANIC occurring in hardirq context")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512190111.jxFSqxUH-lkp@intel.com/
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/misc/lkdtm/bugs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index fa05d77acb558d..502059078b4560 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -120,8 +120,8 @@ static void lkdtm_PANIC_IN_HARDIRQ(void)
 			       CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	hrtimer_start(&timer, us_to_ktime(100), HRTIMER_MODE_REL_HARD);
 
-	while (wait_for_panic)
-		;
+	while (READ_ONCE(wait_for_panic))
+		cpu_relax();
 
 	hrtimer_cancel(&timer);
 }
@@ -150,8 +150,8 @@ static void lkdtm_BUG_IN_HARDIRQ(void)
 			       CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	hrtimer_start(&timer, us_to_ktime(100), HRTIMER_MODE_REL_HARD);
 
-	while (wait_for_bug)
-		;
+	while (READ_ONCE(wait_for_bug))
+		cpu_relax();
 
 	hrtimer_cancel(&timer);
 }

From 987697749def9c5e10d9a2d992f012db61ae1967 Mon Sep 17 00:00:00 2001
From: Frank Wunderlich <frank-w@public-files.de>
Date: Wed, 19 Nov 2025 18:51:22 +0100
Subject: [PATCH 250/258] arm64: dts: mediatek: mt7986: add dtbs with applied
 overlays for bpi-r3

Build devicetree binaries for testing overlays and providing users
full dtb without using overlays.

Suggested-by: Rob Herring <robh+dt@kernel.org>
Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Fixes: a58c36806741 ("arm64: dts: mediatek: mt7988a-bpi-r4pro: Add mmc  overlays")
Fixes: dec929e61a42 ("arm64: dts: mediatek: mt7988a-bpi-r4-pro: Add PCIe  overlays")
Fixes: 714a80ced07a ("arm64: dts: mediatek: mt7988a-bpi-r4: Add dt  overlays for sd + emmc")
Fixes: 312189ebb802 ("arm64: dts: mt7986: add overlay for SATA power  socket on BPI-R3")
Fixes: 8e01fb15b815 ("arm64: dts: mt7986: add Bananapi R3")
Acked-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Acked-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://patch.msgid.link/20251119175124.48947-2-linux@fw-web.de
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 arch/arm64/boot/dts/mediatek/Makefile | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/Makefile b/arch/arm64/boot/dts/mediatek/Makefile
index c5fd6191a925ad..77d76730d61b97 100644
--- a/arch/arm64/boot/dts/mediatek/Makefile
+++ b/arch/arm64/boot/dts/mediatek/Makefile
@@ -19,6 +19,27 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-nand.dtbo
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-nor.dtbo
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sata.dtbo
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sd.dtbo
+mt7986a-bananapi-bpi-r3-emmc-nand-dtbs := \
+	mt7986a-bananapi-bpi-r3.dtb \
+	mt7986a-bananapi-bpi-r3-emmc.dtbo \
+	mt7986a-bananapi-bpi-r3-nand.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-emmc-nand.dtb
+mt7986a-bananapi-bpi-r3-emmc-nor-dtbs := \
+	mt7986a-bananapi-bpi-r3.dtb \
+	mt7986a-bananapi-bpi-r3-emmc.dtbo \
+	mt7986a-bananapi-bpi-r3-nor.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-emmc-nor.dtb
+mt7986a-bananapi-bpi-r3-sd-nand-dtbs := \
+	mt7986a-bananapi-bpi-r3.dtb \
+	mt7986a-bananapi-bpi-r3-sd.dtbo \
+	mt7986a-bananapi-bpi-r3-nand.dtbo \
+	mt7986a-bananapi-bpi-r3-sata.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sd-nand.dtb
+mt7986a-bananapi-bpi-r3-sd-nor-dtbs := \
+	mt7986a-bananapi-bpi-r3.dtb \
+	mt7986a-bananapi-bpi-r3-sd.dtbo \
+	mt7986a-bananapi-bpi-r3-nor.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sd-nor.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-rfb.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986b-rfb.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4.dtb

From 0773bc6ab7ec0b707632c991fe29edf28f03a641 Mon Sep 17 00:00:00 2001
From: Frank Wunderlich <frank-w@public-files.de>
Date: Wed, 19 Nov 2025 18:51:23 +0100
Subject: [PATCH 251/258] arm64: dts: mediatek: mt7988: add dtbs with applied
 overlays for bpi-r4 (pro)

Build devicetree binaries for testing overlays and providing users
full dtb without using overlays for Bananapi R4 (pro) variants.

Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Link: https://patch.msgid.link/20251119175124.48947-3-linux@fw-web.de
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 arch/arm64/boot/dts/mediatek/Makefile | 32 +++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/Makefile b/arch/arm64/boot/dts/mediatek/Makefile
index 77d76730d61b97..cac8f4c6d76f13 100644
--- a/arch/arm64/boot/dts/mediatek/Makefile
+++ b/arch/arm64/boot/dts/mediatek/Makefile
@@ -52,6 +52,38 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-cn18.dtbo
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-emmc.dtbo
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-sd.dtbo
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-sd.dtbo
+mt7988a-bananapi-bpi-r4-emmc-dtbs := \
+	mt7988a-bananapi-bpi-r4.dtb \
+	mt7988a-bananapi-bpi-r4-emmc.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-emmc.dtb
+mt7988a-bananapi-bpi-r4-sd-dtbs := \
+	mt7988a-bananapi-bpi-r4.dtb \
+	mt7988a-bananapi-bpi-r4-sd.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-sd.dtb
+mt7988a-bananapi-bpi-r4-2g5-emmc-dtbs := \
+	mt7988a-bananapi-bpi-r4-2g5.dtb \
+	mt7988a-bananapi-bpi-r4-emmc.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-2g5-emmc.dtb
+mt7988a-bananapi-bpi-r4-2g5-sd-dtbs := \
+	mt7988a-bananapi-bpi-r4-2g5.dtb \
+	mt7988a-bananapi-bpi-r4-sd.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-2g5-sd.dtb
+mt7988a-bananapi-bpi-r4-pro-8x-emmc-dtbs := \
+	mt7988a-bananapi-bpi-r4-pro-8x.dtb \
+	mt7988a-bananapi-bpi-r4-pro-emmc.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-emmc.dtb
+mt7988a-bananapi-bpi-r4-pro-8x-sd-dtbs := \
+	mt7988a-bananapi-bpi-r4-pro-8x.dtb \
+	mt7988a-bananapi-bpi-r4-pro-sd.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-sd.dtb
+mt7988a-bananapi-bpi-r4-pro-8x-sd-cn15-dtbs := \
+	mt7988a-bananapi-bpi-r4-pro-8x-sd.dtb \
+	mt7988a-bananapi-bpi-r4-pro-cn15.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-sd-cn15.dtb
+mt7988a-bananapi-bpi-r4-pro-8x-sd-cn18-dtbs := \
+	mt7988a-bananapi-bpi-r4-pro-8x-sd.dtb \
+	mt7988a-bananapi-bpi-r4-pro-cn18.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-sd-cn18.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8167-pumpkin.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8173-elm.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8173-elm-hana.dtb

From ce7b1d58609abc2941a1f38094147f439fb74233 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Fri, 5 Dec 2025 22:59:38 +0100
Subject: [PATCH 252/258] arm64: dts: mediatek: Apply mt8395-radxa DT overlay
 at build time

It's a requirement that DT overlays be applied at build time in order to
validate them as overlays are not validated on their own.

Add missing target for mt8395-radxa hd panel overlay.

Fixes: 4c8ff61199a7 ("arm64: dts: mediatek: mt8395-radxa-nio-12l: Add Radxa 8 HD panel")
Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Acked-by: AngeloGioacchino Del Regno <angelogiaocchino.delregno@collabora.com>
Link: https://patch.msgid.link/20251205215940.19287-1-linux@fw-web.de
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 arch/arm64/boot/dts/mediatek/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/Makefile b/arch/arm64/boot/dts/mediatek/Makefile
index cac8f4c6d76f13..3f76d9ce987975 100644
--- a/arch/arm64/boot/dts/mediatek/Makefile
+++ b/arch/arm64/boot/dts/mediatek/Makefile
@@ -166,6 +166,8 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt8390-grinn-genio-700-sbc.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-kontron-3-5-sbc-i1200.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-radxa-nio-12l.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-radxa-nio-12l-8-hd-panel.dtbo
+mt8395-radxa-nio-12l-8-hd-panel-dtbs := mt8395-radxa-nio-12l.dtb mt8395-radxa-nio-12l-8-hd-panel.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-radxa-nio-12l-8-hd-panel.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt8516-pumpkin.dtb
 
 # Device tree overlays support

From b3db91c3bfea69a6c6258fea508f25a59c0feb1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Fri, 19 Dec 2025 16:11:05 +0000
Subject: [PATCH 253/258] hwmon: (ltc4282): Fix reset_history file permissions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The reset_history attributes are write only. Hence don't report them as
readable just to return -EOPNOTSUPP later on.

Fixes: cbc29538dbf7 ("hwmon: Add driver for LTC4282")
Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20251219-ltc4282-fix-reset-history-v1-1-8eab974c124b@analog.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ltc4282.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/ltc4282.c b/drivers/hwmon/ltc4282.c
index b9cad89f2cd9a8..db6534e6799113 100644
--- a/drivers/hwmon/ltc4282.c
+++ b/drivers/hwmon/ltc4282.c
@@ -1000,8 +1000,9 @@ static umode_t ltc4282_in_is_visible(const struct ltc4282_state *st, u32 attr)
 	case hwmon_in_max:
 	case hwmon_in_min:
 	case hwmon_in_enable:
-	case hwmon_in_reset_history:
 		return 0644;
+	case hwmon_in_reset_history:
+		return 0200;
 	default:
 		return 0;
 	}
@@ -1020,8 +1021,9 @@ static umode_t ltc4282_curr_is_visible(u32 attr)
 		return 0444;
 	case hwmon_curr_max:
 	case hwmon_curr_min:
-	case hwmon_curr_reset_history:
 		return 0644;
+	case hwmon_curr_reset_history:
+		return 0200;
 	default:
 		return 0;
 	}
@@ -1039,8 +1041,9 @@ static umode_t ltc4282_power_is_visible(u32 attr)
 		return 0444;
 	case hwmon_power_max:
 	case hwmon_power_min:
-	case hwmon_power_reset_history:
 		return 0644;
+	case hwmon_power_reset_history:
+		return 0200;
 	default:
 		return 0;
 	}

From 4cc5373f2e749a6c96e8b9fa971931a4dd852860 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 Dec 2025 11:20:06 +0000
Subject: [PATCH 254/258] clang: work around asm output constraint problems

Work around clang problems with "=rm" asm constraint.

clang seems to always chose the memory output, while it is almost
always the worst choice.

Add ASM_OUTPUT_RM so that we can replace "=rm" constraint
where it matters for clang, while not penalizing gcc.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Suggested-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-clang.h | 1 +
 include/linux/compiler_types.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 107ce05bd16eb4..7edf1a07b53505 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -145,6 +145,7 @@
  */
 #define ASM_INPUT_G "ir"
 #define ASM_INPUT_RM "r"
+#define ASM_OUTPUT_RM "=r"
 
 /*
  * Declare compiler support for __typeof_unqual__() operator.
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 1280693766b9dd..d3318a3c257775 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -548,11 +548,12 @@ struct ftrace_likely_data {
 
 /*
  * Clang has trouble with constraints with multiple
- * alternative behaviors (mainly "g" and "rm").
+ * alternative behaviors ("g" , "rm" and "=rm").
  */
 #ifndef ASM_INPUT_G
   #define ASM_INPUT_G "g"
   #define ASM_INPUT_RM "rm"
+  #define ASM_OUTPUT_RM "=rm"
 #endif
 
 #ifdef CONFIG_CC_HAS_ASM_INLINE

From 91ff28ae6d050e0ca01ac13eb8ba31d744cf672f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 Dec 2025 11:20:07 +0000
Subject: [PATCH 255/258] x86/irqflags: Use ASM_OUTPUT_RM in native_save_fl()

clang is generating very inefficient code for native_save_fl() which is
used for local_irq_save() in critical spots.

Allowing the "pop %0" to use memory:

 1) forces the compiler to add annoying stack canaries when
    CONFIG_STACKPROTECTOR_STRONG=y in many places.

 2) Almost always is followed by an immediate "move memory,register"

One good example is _raw_spin_lock_irqsave, with 8 extra instructions

  ffffffff82067a30 <_raw_spin_lock_irqsave>:
  ffffffff82067a30:		...
  ffffffff82067a39:		53						push   %rbx

  // Three instructions to ajust the stack, read the per-cpu canary
  // and copy it to 8(%rsp)
  ffffffff82067a3a:		48 83 ec 10 			sub    $0x10,%rsp
  ffffffff82067a3e:		65 48 8b 05 da 15 45 02 mov    %gs:0x24515da(%rip),%rax 	   # <__stack_chk_guard>
  ffffffff82067a46:		48 89 44 24 08			mov    %rax,0x8(%rsp)

  ffffffff82067a4b:		9c						pushf

  // instead of pop %rbx, compiler uses 2 instructions.
  ffffffff82067a4c:		8f 04 24				pop    (%rsp)
  ffffffff82067a4f:		48 8b 1c 24 			mov    (%rsp),%rbx

  ffffffff82067a53:		fa						cli
  ffffffff82067a54:		b9 01 00 00 00			mov    $0x1,%ecx
  ffffffff82067a59:		31 c0					xor    %eax,%eax
  ffffffff82067a5b:		f0 0f b1 0f 			lock cmpxchg %ecx,(%rdi)
  ffffffff82067a5f:		75 1d					jne    ffffffff82067a7e <_raw_spin_lock_irqsave+0x4e>

  // three instructions to check the stack canary
  ffffffff82067a61:		65 48 8b 05 b7 15 45 02 mov    %gs:0x24515b7(%rip),%rax 	   # <__stack_chk_guard>
  ffffffff82067a69:		48 3b 44 24 08			cmp    0x8(%rsp),%rax
  ffffffff82067a6e:		75 17					jne    ffffffff82067a87

  ...

  // One extra instruction to adjust the stack.
  ffffffff82067a73:		48 83 c4 10 			add    $0x10,%rsp
  ...

  // One more instruction in case the stack was mangled.
  ffffffff82067a87:		e8 a4 35 ff ff			call   ffffffff8205b030 <__stack_chk_fail>

This patch changes nothing for gcc, but for clang saves ~20000 bytes of text
even though more functions are inlined.

  $ size vmlinux.gcc.before vmlinux.gcc.after vmlinux.clang.before vmlinux.clang.after
     text	   data		bss		dec		hex	filename
  45565821	25005462	4704800	75276083	47c9f33	vmlinux.gcc.before
  45565821	25005462	4704800	75276083	47c9f33	vmlinux.gcc.after
  45121072	24638617	5533040	75292729	47ce039	vmlinux.clang.before
  45093887	24638633	5536808	75269328	47c84d0	vmlinux.clang.after

  $ scripts/bloat-o-meter -t vmlinux.clang.before vmlinux.clang.after
  add/remove: 1/2 grow/shrink: 21/533 up/down: 2250/-22112 (-19862)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/irqflags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index b30e5474c18e1b..a1193e9d65f200 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -25,7 +25,7 @@ extern __always_inline unsigned long native_save_fl(void)
 	 */
 	asm volatile("# __raw_save_flags\n\t"
 		     "pushf ; pop %0"
-		     : "=rm" (flags)
+		     : ASM_OUTPUT_RM (flags)
 		     : /* no input */
 		     : "memory");
 

From 3766511de1ce62472898d0ffafeb2551c880b161 Mon Sep 17 00:00:00 2001
From: Songwei Chai <quic_songchai@quicinc.com>
Date: Fri, 6 Jun 2025 14:09:36 +0800
Subject: [PATCH 256/258] scripts: coccicheck: filter *.cocci files by MODE

Enhance the coccicheck script to filter *.cocci files based on the
specified MODE (e.g., report, patch). This ensures that only compatible
semantic patch files are executed, preventing errors such as:

    "virtual rule report not supported"

This error occurs when a .cocci file does not define a 'virtual <MODE>'
rule, yet is executed in that mode.

For example:

    make coccicheck M=drivers/hwtracing/coresight/ MODE=report

In this case, running "secs_to_jiffies.cocci" would trigger the error
because it lacks support for 'report' mode. With this change, such files
are skipped automatically, improving robustness and developer
experience.

Signed-off-by: Songwei Chai <quic_songchai@quicinc.com>
Reviewed-by: Julia Lawall <Julia.Lawall@inria.fr>
---
 scripts/coccicheck | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/coccicheck b/scripts/coccicheck
index 0e6bc5a10320c9..89d591af5f3e7b 100755
--- a/scripts/coccicheck
+++ b/scripts/coccicheck
@@ -270,7 +270,11 @@ fi
 
 if [ "$COCCI" = "" ] ; then
     for f in `find $srctree/scripts/coccinelle/ -name '*.cocci' -type f | sort`; do
-	coccinelle $f
+        if grep -q "virtual[[:space:]]\+$MODE" "$f"; then
+                coccinelle $f
+        else
+                echo "warning: Skipping $f as it does not match mode '$MODE'"
+        fi
     done
 else
     coccinelle $COCCI

From 52ad85fd33a72c47877384fcf605e0bdb2ad1848 Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Sat, 22 Nov 2025 12:48:04 +0100
Subject: [PATCH 257/258] Coccinelle: pm_runtime: Fix typo in report message

s/Unecessary/Unnecessary/

Reviewed-by: Julia Lawall <julia.lawall@inria.fr>
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
---
 scripts/coccinelle/api/pm_runtime.cocci | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/coccinelle/api/pm_runtime.cocci b/scripts/coccinelle/api/pm_runtime.cocci
index bf128ccae92101..b720489418fa4c 100644
--- a/scripts/coccinelle/api/pm_runtime.cocci
+++ b/scripts/coccinelle/api/pm_runtime.cocci
@@ -109,5 +109,5 @@ p2 << r.p2;
 pm_runtime_api << r.pm_runtime_api;
 @@
 
-msg = "%s returns < 0 as error. Unecessary IS_ERR_VALUE at line %s" % (pm_runtime_api, p2[0].line)
+msg = "%s returns < 0 as error. Unnecessary IS_ERR_VALUE at line %s" % (pm_runtime_api, p2[0].line)
 coccilib.report.print_report(p1[0],msg)

From 9448598b22c50c8a5bb77a9103e2d49f134c9578 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 21 Dec 2025 15:52:04 -0800
Subject: [PATCH 258/258] Linux 6.19-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index e404e4767944ed..3cd00b62cde99c 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 19
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Baby Opossum Posse
 
 # *DOCUMENTATION*