LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [kvm-unit-tests RFC PATCH 2/6] configure: Make arch_libdir a first-class entity
From: Chinmay Rath @ 2026-06-02  6:48 UTC (permalink / raw)
  To: thuth
  Cc: npiggin, harshpb, lvivier, linuxppc-dev, kvm, andrew.jones, sbhat,
	Chinmay Rath
In-Reply-To: <20260602064806.3101025-1-rathc@linux.ibm.com>

From: Nicholas Piggin <npiggin@gmail.com>

arch_libdir was brought in to improve the heuristic determination of
the lib/ directory based on arch and testdir names, but it did not
entirely clean that mess up.

Remove the arch_libdir->arch->testdir heuristic and just require
everybody sets arch_libdir correctly. Fail if the lib/arch or
lib/arch/asm directories can not be found.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
 Makefile  |  2 +-
 configure | 20 ++++++++++++--------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/Makefile b/Makefile
index 42ef5826..8e002043 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,7 @@ vpath %.s $(SRCDIR)
 vpath %.S $(SRCDIR)
 
 libdirs-get = $(shell [ -d "lib/$(1)" ] && echo "lib/$(1) lib/$(1)/asm")
-ARCH_LIBDIRS := $(call libdirs-get,$(ARCH_LIBDIR)) $(call libdirs-get,$(TEST_DIR))
+ARCH_LIBDIRS := $(call libdirs-get,$(ARCH_LIBDIR))
 OBJDIRS := $(ARCH_LIBDIRS)
 
 DESTDIR := $(PREFIX)/share/kvm-unit-tests/
diff --git a/configure b/configure
index 6d549d1e..aeb5570c 100755
--- a/configure
+++ b/configure
@@ -274,7 +274,6 @@ fi
 arch_name=$arch
 [ "$arch" = "aarch64" ] && arch="arm64"
 [ "$arch_name" = "arm64" ] && arch_name="aarch64"
-arch_libdir=$arch
 
 if [ "$arch" = "riscv" ]; then
     echo "riscv32 or riscv64 must be specified"
@@ -373,8 +372,10 @@ fi
 
 if [ "$arch" = "i386" ] || [ "$arch" = "x86_64" ]; then
     testdir=x86
+    arch_libdir=x86
 elif [ "$arch" = "arm" ] || [ "$arch" = "arm64" ]; then
     testdir=arm
+    arch_libdir=$arch
     if [ "$target" = "qemu" ]; then
         : "${uart_early_addr:=0x9000000}"
     elif [ "$target" = "kvmtool" ]; then
@@ -385,6 +386,7 @@ elif [ "$arch" = "arm" ] || [ "$arch" = "arm64" ]; then
     fi
 elif [ "$arch" = "ppc64" ]; then
     testdir=powerpc
+    arch_libdir=ppc64
     firmware="$testdir/boot_rom.bin"
     if [ "$endian" != "little" ] && [ "$endian" != "big" ]; then
         echo "You must provide endianness (big or little)!"
@@ -400,6 +402,7 @@ elif [ "$arch" = "riscv32" ] || [ "$arch" = "riscv64" ]; then
     fi
 elif [ "$arch" = "s390x" ]; then
     testdir=s390x
+    arch_libdir=s390x
 else
     echo "arch $arch is not supported!"
     arch=
@@ -409,6 +412,10 @@ if [ ! -d "$srcdir/$testdir" ]; then
     echo "$srcdir/$testdir does not exist!"
     exit 1
 fi
+if [ ! -d "$srcdir/lib/$arch_libdir" ]; then
+    echo "$srcdir/lib/$arch_libdir does not exist!"
+    exit 1
+fi
 
 if [ "$efi" = "y" ] && [ -f "$srcdir/$testdir/efi/run" ]; then
     ln -fs "$srcdir/$testdir/efi/run" $testdir-run
@@ -471,15 +478,12 @@ fi
 # link lib/asm for the architecture
 rm -f lib/asm
 asm="asm-generic"
-if [ -d "$srcdir/lib/$arch/asm" ]; then
-	asm="$srcdir/lib/$arch/asm"
-	mkdir -p "lib/$arch"
-elif [ -d "$srcdir/lib/$arch_libdir/asm" ]; then
+if [ -d "$srcdir/lib/$arch_libdir/asm" ]; then
 	asm="$srcdir/lib/$arch_libdir/asm"
 	mkdir -p "lib/$arch_libdir"
-elif [ -d "$srcdir/lib/$testdir/asm" ]; then
-	asm="$srcdir/lib/$testdir/asm"
-	mkdir -p "lib/$testdir"
+else
+	echo "$srcdir/lib/$arch_libdir/asm does not exist"
+	exit 1
 fi
 ln -sf "$asm" lib/asm
 mkdir -p lib/generated lib/libfdt
-- 
2.53.0



^ permalink raw reply related

* Re: [linux-next20260529] kernel BUG at kernel/sched/core.c:7512!
From: Peter Zijlstra @ 2026-06-02 10:03 UTC (permalink / raw)
  To: Shrikanth Hegde
  Cc: Venkat Rao Bagalkote, Madhavan Srinivasan,
	Mukesh Kumar Chaurasiya, Ritesh Harjani, linuxppc-dev, LKML,
	Srikar Dronamraju
In-Reply-To: <b0211b41-a41b-4367-ae85-86cc2d6dd4b6@linux.ibm.com>

On Tue, Jun 02, 2026 at 03:26:11PM +0530, Shrikanth Hegde wrote:

> > I would suggest trying something a little more focussed like so:
> > 
> > diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> > index 806c74e0d5ab..b002c179415c 100644
> > --- a/arch/powerpc/mm/fault.c
> > +++ b/arch/powerpc/mm/fault.c
> > @@ -589,6 +589,7 @@ static __always_inline void __do_page_fault(struct pt_regs *regs)
> >   	err = ___do_page_fault(regs, regs->dar, regs->dsisr);
> >   	if (unlikely(err))
> >   		bad_page_fault(regs, err);
> > +	local_irq_disable();
> >   }
> >   DEFINE_INTERRUPT_HANDLER(do_page_fault)
> > 
> > Since only ___do_page_fault() will enable interrupts, you only need to
> > disable them again on its return path.
> > 
> 
> Seems there are more...
> 
> do_program_check (called by program_check_exception, emulation_assist_interrupt)
> alignment_exception
> SPEFloatingPointException
> facility_unavailable_exception
> 
> 
> Many looks like it can recover only if hit in userspace.
> Hence i though it would make sense to put it under arch_interrupt_exit_prepare
> which is called just before irqentry_exit.

Ah, fair enough.


^ permalink raw reply

* Re: [linux-next20260529] kernel BUG at kernel/sched/core.c:7512!
From: Shrikanth Hegde @ 2026-06-02  9:56 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Venkat Rao Bagalkote, Madhavan Srinivasan,
	Mukesh Kumar Chaurasiya, Ritesh Harjani, linuxppc-dev, LKML,
	Srikar Dronamraju
In-Reply-To: <20260602081845.GX3126523@noisy.programming.kicks-ass.net>

Hi Peter.

On 6/2/26 1:48 PM, Peter Zijlstra wrote:
> On Tue, Jun 02, 2026 at 01:26:48PM +0530, Shrikanth Hegde wrote:
>>
>>
>> On 6/1/26 3:26 PM, Peter Zijlstra wrote:
>>> On Mon, Jun 01, 2026 at 02:46:24PM +0530, Shrikanth Hegde wrote:
>>>
>>>> Ritesh, Mukesh, Is below possible scenario?
>>>>
>>>> do_page_fault seems to enable irq's in the interrupt handler?
>>>> is that expected? if so, one might see
>>>>
>>>> -- do_page_fault (enter kernel mode)
>>>>      -- enables interrupts
>>>>      -- gets interrupt - Sets need_resched.
>>>>         -- irqentry_exit - Sees it is kernel mode. Just checks preempt count
>>>> 			 and calls preempt_schedule_irq, which catches both
>>>> 			 preempt_count and !irqs_disabled. Hence the panic?
>>>>
>>>> Should do_page_fault do preempt_disable when it enables the interrupts?
>>>
>>> No, it is expected for page-fault to be able to schedule. Specifically,
>>> it must be able to sleep to support loading pages from disk.
>>
>> Oh yes. Ok. Thanks for taking a look.
>>
>>>
>>> Please check the value of preempt_count() (does it perchance have
>>> HARDIRQ_OFFSET?). Also, if the fault handler does enable IRQs, it must
>>> also disable them again once done.
>>
>> Will check it.
>>
>>>
>>> Notably, I see ___do_page_fault() do interrupt_cond_loadl_irq_enable(),
>>> but I'm not seeing a local_irq_disable() to match!
>>
>> Yes, that's likely the culprit. It is possible that ___do_page_fault runs for longer
>> and it may set need_resched. If it was in kernel mode, then it may not disable the
>> interrupt and then subsequent irqentry_exit panics.
>>
>> BTW I was able to consistently repro this on P9 with hackbench as below.
>>
>> for i in {0..10}; do ./hackbench 10 process 10000 loops; done;
>> for i in {0..10}; do ./hackbench 20 process 10000 loops; done;
>> for i in {0..10}; do ./hackbench 30 process 10000 loops; done;
>> for i in {0..10}; do ./hackbench 40 process 10000 loops; done;    << usually panics here.
>> for i in {0..10}; do ./hackbench 10 thread 10000 loops; done;
>> for i in {0..10}; do ./hackbench 20 thread 10000 loops; done;
>> for i in {0..10}; do ./hackbench -pipe 10 process 10000 loops; done;
>> for i in {0..10}; do ./hackbench -pipe 20 process 10000 loops; done;
>> for i in {0..10}; do ./hackbench -pipe 30 process 10000 loops; done;
>> for i in {0..10}; do ./hackbench -pipe 40 process 10000 loops; done;
>> for i in {0..10}; do ./hackbench -pipe 10 thread 10000 loops; done;
>> for i in {0..10}; do ./hackbench -pipe 20 thread 10000 loops; done;
>>
>> Note, if i run ./hackbench 40 process 10000 loops alone, it doesn't panic.
>> Likely some continous stressing needed to get into this case.
>>
>> Below diff helps to fix it. With it see test passes. Hackbench numbers aren't super happy
>> about it. It is regressing a bit compared to baseline. But no panic atleast.
>> AND i have changed the BUG_ON to WARN_ON as irq_disabled right after. We could still fix the
>> call sites if the warning is seen.
>>
>> diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
>> index de5601282755..7da373a56813 100644
>> --- a/arch/powerpc/include/asm/entry-common.h
>> +++ b/arch/powerpc/include/asm/entry-common.h
>> @@ -253,16 +253,17 @@ static inline void arch_interrupt_enter_prepare(struct pt_regs *regs)
>>   static inline void arch_interrupt_exit_prepare(struct pt_regs *regs)
>>   {
>>          if (user_mode(regs)) {
>> -               BUG_ON(regs_is_unrecoverable(regs));
>> -               BUG_ON(regs_irqs_disabled(regs));
>> +               WARN_ON(regs_is_unrecoverable(regs));
>> +               WARN_ON(regs_irqs_disabled(regs));
>>                  /*
>>                   * We don't need to restore AMR on the way back to userspace for KUAP.
>>                   * AMR can only have been unlocked if we interrupted the kernel.
>>                   */
>>                  kuap_assert_locked();
>> -
>> -               local_irq_disable();
>>          }
>> +
>> +       /* irqentry_exit expects to be called with interrupts disabled */
>> +       local_irq_disable();
>>   }
>>   static inline void arch_interrupt_async_enter_prepare(struct pt_regs *regs)
>>
> 
> I would suggest trying something a little more focussed like so:
> 
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 806c74e0d5ab..b002c179415c 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -589,6 +589,7 @@ static __always_inline void __do_page_fault(struct pt_regs *regs)
>   	err = ___do_page_fault(regs, regs->dar, regs->dsisr);
>   	if (unlikely(err))
>   		bad_page_fault(regs, err);
> +	local_irq_disable();
>   }
>   
>   DEFINE_INTERRUPT_HANDLER(do_page_fault)
> 
> Since only ___do_page_fault() will enable interrupts, you only need to
> disable them again on its return path.
> 

Seems there are more...

do_program_check (called by program_check_exception, emulation_assist_interrupt)
alignment_exception
SPEFloatingPointException
facility_unavailable_exception


Many looks like it can recover only if hit in userspace.
Hence i though it would make sense to put it under arch_interrupt_exit_prepare
which is called just before irqentry_exit.


^ permalink raw reply

* Re: [PATCH 5/7] gpio: ppc44x: Convert GPIO to generic MMIO
From: Rosen Penev @ 2026-06-02  9:26 UTC (permalink / raw)
  To: Bartosz Golaszewski, Rosen Penev
  Cc: Madhavan Srinivasan, chleroy, Michael Ellerman, Nicholas Piggin,
	Linus Walleij, open list:LINUX FOR POWERPC (32-BIT AND 64-BIT),
	open list, linux-gpio
In-Reply-To: <CAMRc=Me0rqs8nxrp95X-2Bjw059ahRxwKrg-NtmEt025w2m9bw@mail.gmail.com>

On Tue Jun 2, 2026 at 12:51 AM PDT, Bartosz Golaszewski wrote:
> On Tue, 2 Jun 2026 07:01:29 +0200, Rosen Penev <rosenp@gmail.com> said:
>> Use gpio_generic_chip_init() to set up the PPC44x GPIO chip
>> instead of open-coding the basic get, set, locking and state handling.
>>
>> Keep the PPC44x-specific direction callbacks because they still need to
>> program ODR and the OSR/TSR registers around the generic data and
>> direction registers.
>>
>> Assisted-by: Codex:GPT-5.5
>> Signed-off-by: Rosen Penev <rosenp@gmail.com>
>> ---
>
> ...
>
>> @@ -124,10 +102,11 @@ static int
>>  ppc4xx_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
>>  {
>>  	struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
>> +	struct gpio_generic_chip *gen_gc = &chip->chip;
>>  	struct ppc4xx_gpio __iomem *regs = chip->regs;
>>  	unsigned long flags;
>>
>> -	spin_lock_irqsave(&chip->lock, flags);
>> +	gpio_generic_chip_lock_irqsave(gen_gc, flags);
>
> If you're already doing it, can you use lock guards too?
Sure. btw, I avoided placing

https://lore.kernel.org/all/20260517063754.21819-1-rosenp@gmail.com/

in the beginning of the series. My thinking is that's for older kernels.
I believe either the generic API or devm_gpiochip_add_data fixes this.
>
> Bart



^ permalink raw reply

* [PATCH v2 8/8] powerpc/signal: Use unsafe_copy_siginfo_to_user()
From: Christophe Leroy (CS GROUP) @ 2026-06-02  8:46 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1780389863.git.chleroy@kernel.org>

Use unsafe_copy_siginfo_to_user() in order to do the copy
within the user access block.

On an mpc 8321 (book3s/32) the improvment is about 5% on a process
sending a signal to itself.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/powerpc/kernel/signal_32.c | 18 +++++++++---------
 arch/powerpc/kernel/signal_64.c |  5 +----
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 6b1fbd95b07d..99a3efa874eb 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -716,12 +716,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *s
 }
 #endif
 
-#ifdef CONFIG_PPC64
-
-#define copy_siginfo_to_user	copy_siginfo_to_user32
-
-#endif /* CONFIG_PPC64 */
-
 /*
  * Set up a signal frame for a "real-time" signal handler
  * (one which gets siginfo).
@@ -735,6 +729,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 	struct pt_regs *regs = tsk->thread.regs;
 	/* Save the thread's msr before get_tm_stackpointer() changes it */
 	unsigned long msr = regs->msr;
+	compat_siginfo_t uinfo;
 
 	/* Set up Signal Frame */
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
@@ -744,6 +739,9 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 	else
 		prepare_save_user_regs(1);
 
+	if (IS_ENABLED(CONFIG_COMPAT))
+		copy_siginfo_to_external32(&uinfo, &ksig->info);
+
 	scoped_user_rw_access_size(newsp, __SIGNAL_FRAMESIZE + 16 + sizeof(*frame), badframe) {
 		struct mcontext __user *mctx;
 		struct mcontext __user *tm_mctx = NULL;
@@ -785,14 +783,16 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 			asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
 		}
 		unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, badframe);
+		if (IS_ENABLED(CONFIG_COMPAT))
+			unsafe_copy_to_user(&frame->info, &uinfo, sizeof(frame->info), badframe);
+		else
+			unsafe_copy_siginfo_to_user((void __user *)&frame->info, &ksig->info,
+						    badframe);
 
 		/* create a stack frame for the caller of the handler */
 		unsafe_put_user(regs->gpr[1], newsp, badframe);
 	}
 
-	if (copy_siginfo_to_user(&frame->info, &ksig->info))
-		goto badframe;
-
 	regs->link = tramp;
 
 #ifdef CONFIG_PPC_FPU_REGS
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 19f320218ed1..e85c430305a1 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -897,14 +897,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		}
 
 		unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe);
+		unsafe_copy_siginfo_to_user(&frame->info, &ksig->info, badframe);
 		/* Allocate a dummy caller frame for the signal handler. */
 		unsafe_put_user(regs->gpr[1], newsp, badframe);
 	}
 
-	/* Save the siginfo outside of the unsafe block. */
-	if (copy_siginfo_to_user(&frame->info, &ksig->info))
-		goto badframe;
-
 	/* Make sure signal handler doesn't get spurious FP exceptions */
 	tsk->thread.fp_state.fpscr = 0;
 
-- 
2.54.0



^ permalink raw reply related

* [PATCH v2 7/8] powerpc/uaccess: Add unsafe_clear_user()
From: Christophe Leroy (CS GROUP) @ 2026-06-02  8:46 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1780389863.git.chleroy@kernel.org>

Implement unsafe_clear_user() for powerpc.
It's a copy/paste of unsafe_copy_to_user() with value 0 as source.

It may be improved in a later patch by using 'dcbz' instruction
to zeroize full cache lines at once.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/powerpc/include/asm/uaccess.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index e98c628e3899..ef6711d1278b 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -588,6 +588,26 @@ do {									\
 		unsafe_put_user(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e); \
 } while (0)
 
+#define unsafe_clear_user(d, l, e)					\
+do {									\
+	u8 __user *_dst = (u8 __user *)(d);				\
+	size_t _len = (l);						\
+	int _i;								\
+									\
+	for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64)) \
+		unsafe_put_user(0, (u64 __user *)(_dst + _i), e);	\
+	if (_len & 4) {							\
+		unsafe_put_user(0, (u32 __user *)(_dst + _i), e);	\
+		_i += 4;						\
+	}								\
+	if (_len & 2) {							\
+		unsafe_put_user(0, (u16 __user *)(_dst + _i), e);	\
+		_i += 2;						\
+	}								\
+	if (_len & 1)							\
+		unsafe_put_user(0, (u8 __user *)(_dst + _i), e);	\
+} while (0)
+
 #define arch_get_kernel_nofault(dst, src, type, err_label)		\
 	__get_user_size_goto(*((type *)(dst)),				\
 		(__force type __user *)(src), sizeof(type), err_label)
-- 
2.54.0



^ permalink raw reply related

* [PATCH v2 6/8] signal: Add unsafe_copy_siginfo_to_user()
From: Christophe Leroy (CS GROUP) @ 2026-06-02  8:46 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1780389863.git.chleroy@kernel.org>

In the same spirit as commit fb05121fd6a2 ("signal: Add
unsafe_get_compat_sigset()"), implement an 'unsafe' version of
copy_siginfo_to_user() in order to use it within user access blocks.

For that, also add an 'unsafe' version of clear_user().

This commit adds the generic fallback for unsafe_clear_user().
Architectures wanting to use unsafe_copy_siginfo_to_user() within a
user_access_begin() section have to make sure they have their
own unsafe_clear_user().

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 include/linux/signal.h  | 15 +++++++++++++++
 include/linux/uaccess.h |  1 +
 kernel/signal.c         |  5 -----
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/include/linux/signal.h b/include/linux/signal.h
index f19816832f05..3ee6c9463f8b 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -36,6 +36,21 @@ static inline void copy_siginfo_to_external(siginfo_t *to,
 int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from);
 int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from);
 
+static __always_inline char __user *si_expansion(const siginfo_t __user *info)
+{
+	return ((char __user *)info) + sizeof(struct kernel_siginfo);
+}
+
+#define unsafe_copy_siginfo_to_user(to, from, label) do {		\
+	siginfo_t __user *__ucs_to = to;				\
+	const kernel_siginfo_t *__ucs_from = from;			\
+	char __user *__ucs_expansion = si_expansion(__ucs_to);		\
+									\
+	unsafe_copy_to_user(__ucs_to, __ucs_from,			\
+			    sizeof(struct kernel_siginfo), label);	\
+	unsafe_clear_user(__ucs_expansion, SI_EXPANSION_SIZE, label);	\
+} while (0)
+
 enum siginfo_layout {
 	SIL_KILL,
 	SIL_TIMER,
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 56328601218c..43e573b172a2 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -636,6 +636,7 @@ do {							\
 #define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e)
 #define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e)
 #define unsafe_copy_from_user(d,s,l,e) unsafe_op_wrap(__copy_from_user(d,s,l),e)
+#define unsafe_clear_user(d, l, e) unsafe_op_wrap(__clear_user(d, l), e)
 static inline unsigned long user_access_save(void) { return 0UL; }
 static inline void user_access_restore(unsigned long flags) { }
 #endif /* !user_access_begin */
diff --git a/kernel/signal.c b/kernel/signal.c
index 2d102e025883..2c5eb741fe8c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3493,11 +3493,6 @@ enum siginfo_layout siginfo_layout(unsigned sig, int si_code)
 	return layout;
 }
 
-static inline char __user *si_expansion(const siginfo_t __user *info)
-{
-	return ((char __user *)info) + sizeof(struct kernel_siginfo);
-}
-
 int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from)
 {
 	char __user *expansion = si_expansion(to);
-- 
2.54.0



^ permalink raw reply related

* [PATCH v2 5/8] powerpc/signal: Include the new stack frame inside the user access block
From: Christophe Leroy (CS GROUP) @ 2026-06-02  8:46 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1780389863.git.chleroy@kernel.org>

Include the new stack frame inside the user access block and set it up
using unsafe_put_user().

On an mpc 8321 (book3s/32) the improvment is about 4% on a process
sending a signal to itself.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/powerpc/kernel/signal_32.c | 28 ++++++++++++----------------
 arch/powerpc/kernel/signal_64.c | 13 ++++++-------
 2 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index f5d5139a1426..6b1fbd95b07d 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -730,7 +730,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 		       struct task_struct *tsk)
 {
 	struct rt_sigframe __user *frame;
-	unsigned long newsp = 0;
+	unsigned long __user *newsp;
 	unsigned long tramp;
 	struct pt_regs *regs = tsk->thread.regs;
 	/* Save the thread's msr before get_tm_stackpointer() changes it */
@@ -738,12 +738,13 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 
 	/* Set up Signal Frame */
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
+	newsp = (unsigned long __user *)((unsigned long)frame - (__SIGNAL_FRAMESIZE + 16));
 	if (MSR_TM_ACTIVE(msr))
 		prepare_save_tm_user_regs();
 	else
 		prepare_save_user_regs(1);
 
-	scoped_user_rw_access(frame, badframe) {
+	scoped_user_rw_access_size(newsp, __SIGNAL_FRAMESIZE + 16 + sizeof(*frame), badframe) {
 		struct mcontext __user *mctx;
 		struct mcontext __user *tm_mctx = NULL;
 
@@ -784,6 +785,9 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 			asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
 		}
 		unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, badframe);
+
+		/* create a stack frame for the caller of the handler */
+		unsafe_put_user(regs->gpr[1], newsp, badframe);
 	}
 
 	if (copy_siginfo_to_user(&frame->info, &ksig->info))
@@ -795,13 +799,8 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 	tsk->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */
 #endif
 
-	/* create a stack frame for the caller of the handler */
-	newsp = ((unsigned long)frame) - (__SIGNAL_FRAMESIZE + 16);
-	if (put_user(regs->gpr[1], (u32 __user *)newsp))
-		goto badframe;
-
 	/* Fill registers for signal handler */
-	regs->gpr[1] = newsp;
+	regs->gpr[1] = (unsigned long)newsp;
 	regs->gpr[3] = ksig->sig;
 	regs->gpr[4] = (unsigned long)&frame->info;
 	regs->gpr[5] = (unsigned long)&frame->uc;
@@ -826,7 +825,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 {
 	struct sigcontext __user *sc;
 	struct sigframe __user *frame;
-	unsigned long newsp = 0;
+	unsigned long __user *newsp;
 	unsigned long tramp;
 	struct pt_regs *regs = tsk->thread.regs;
 	/* Save the thread's msr before get_tm_stackpointer() changes it */
@@ -834,12 +833,13 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 
 	/* Set up Signal Frame */
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
+	newsp = (unsigned long __user *)((unsigned long)frame - __SIGNAL_FRAMESIZE);
 	if (MSR_TM_ACTIVE(msr))
 		prepare_save_tm_user_regs();
 	else
 		prepare_save_user_regs(1);
 
-	scoped_user_rw_access(frame, badframe) {
+	scoped_user_rw_access_size(newsp, __SIGNAL_FRAMESIZE + sizeof(*frame), badframe) {
 		struct mcontext __user *mctx;
 		struct mcontext __user *tm_mctx = NULL;
 
@@ -876,6 +876,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 			unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], badframe);
 			asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
 		}
+		unsafe_put_user(regs->gpr[1], newsp, badframe);
 	}
 
 	regs->link = tramp;
@@ -884,12 +885,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 	tsk->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */
 #endif
 
-	/* create a stack frame for the caller of the handler */
-	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
-	if (put_user(regs->gpr[1], (u32 __user *)newsp))
-		goto badframe;
-
-	regs->gpr[1] = newsp;
+	regs->gpr[1] = (unsigned long)newsp;
 	regs->gpr[3] = ksig->sig;
 	regs->gpr[4] = (unsigned long) sc;
 	regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler);
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index d23a980b32a8..19f320218ed1 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -856,13 +856,14 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		struct task_struct *tsk)
 {
 	struct rt_sigframe __user *frame;
-	unsigned long newsp = 0;
+	unsigned long __user *newsp;
 	long err = 0;
 	struct pt_regs *regs = tsk->thread.regs;
 	/* Save the thread's msr before get_tm_stackpointer() changes it */
 	unsigned long msr = regs->msr;
 
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
+	newsp = (unsigned long __user *)((unsigned long)frame - __SIGNAL_FRAMESIZE);
 
 	/*
 	 * This only applies when calling unsafe_setup_sigcontext() and must be
@@ -874,7 +875,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		err |= setup_tm_sigcontexts(frame, tsk, ksig->sig, NULL,
 					    (unsigned long)ksig->ka.sa.sa_handler, msr);
 
-	scoped_user_write_access(frame, badframe) {
+	scoped_user_write_access_size(newsp, __SIGNAL_FRAMESIZE + sizeof(*frame), badframe) {
 		unsafe_put_user(&frame->info, &frame->pinfo, badframe);
 		unsafe_put_user(&frame->uc, &frame->puc, badframe);
 
@@ -896,6 +897,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		}
 
 		unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe);
+		/* Allocate a dummy caller frame for the signal handler. */
+		unsafe_put_user(regs->gpr[1], newsp, badframe);
 	}
 
 	/* Save the siginfo outside of the unsafe block. */
@@ -915,10 +918,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		regs_set_return_ip(regs, (unsigned long) &frame->tramp[0]);
 	}
 
-	/* Allocate a dummy caller frame for the signal handler. */
-	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
-	err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
-
 	/* Set up "regs" so we "return" to the signal handler. */
 	if (is_elf2_task()) {
 		regs->ctr = (unsigned long) ksig->ka.sa.sa_handler;
@@ -940,7 +939,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 
 	/* enter the signal handler in native-endian mode */
 	regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
-	regs->gpr[1] = newsp;
+	regs->gpr[1] = (unsigned long)newsp;
 	regs->gpr[3] = ksig->sig;
 	regs->result = 0;
 	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
-- 
2.54.0



^ permalink raw reply related

* [PATCH v2 4/8] powerpc/signal64: Access function descriptor with scoped user access
From: Christophe Leroy (CS GROUP) @ 2026-06-02  8:46 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1780389863.git.chleroy@kernel.org>

Access the function descriptor of the handler within a scoped
user access block.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/powerpc/kernel/signal_64.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 4ff8ad5d60d0..d23a980b32a8 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -932,8 +932,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		struct func_desc __user *ptr =
 			(struct func_desc __user *)ksig->ka.sa.sa_handler;
 
-		err |= get_user(regs->ctr, &ptr->addr);
-		err |= get_user(regs->gpr[2], &ptr->toc);
+		scoped_user_read_access(ptr, badfunc) {
+			unsafe_get_user(regs->ctr, &ptr->addr, badfunc);
+			unsafe_get_user(regs->gpr[2], &ptr->toc, badfunc);
+		}
 	}
 
 	/* enter the signal handler in native-endian mode */
@@ -956,5 +958,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 badframe:
 	signal_fault(current, regs, "handle_rt_signal64", frame);
 
+	return 1;
+
+badfunc:
+	signal_fault(current, regs, __func__, (void __user *)ksig->ka.sa.sa_handler);
+
 	return 1;
 }
-- 
2.54.0



^ permalink raw reply related

* [PATCH v2 3/8] powerpc/signal64: Convert to scoped user access
From: Christophe Leroy (CS GROUP) @ 2026-06-02  8:46 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1780389863.git.chleroy@kernel.org>

Commit 861574d51bbd ("powerpc/uaccess: Implement masked user access")
provides optimised user access by avoiding the cost of access_ok().

Convert signal64 functions to scoped user access.

Scoped user access also make the code simpler.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/powerpc/kernel/signal_64.c | 81 +++++++++++++--------------------
 1 file changed, 32 insertions(+), 49 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 9b4cd0bbf4a7..4ff8ad5d60d0 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -693,15 +693,12 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 
 	if (old_ctx != NULL) {
 		prepare_setup_sigcontext(current);
-		if (!user_write_access_begin(old_ctx, ctx_size))
-			return -EFAULT;
-
-		unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL,
-					0, ctx_has_vsx_region, efault_out);
-		unsafe_copy_to_user(&old_ctx->uc_sigmask, &current->blocked,
-				    sizeof(sigset_t), efault_out);
-
-		user_write_access_end();
+		scoped_user_write_access_size(old_ctx, ctx_size, efault_out) {
+			unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL,
+						0, ctx_has_vsx_region, efault_out);
+			unsafe_copy_to_user(&old_ctx->uc_sigmask, &current->blocked,
+					    sizeof(sigset_t), efault_out);
+		}
 	}
 	if (new_ctx == NULL)
 		return 0;
@@ -727,14 +724,12 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	}
 	set_current_blocked(&set);
 
-	if (!user_read_access_begin(new_ctx, ctx_size))
-		return -EFAULT;
-	if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
-		user_read_access_end();
-		force_exit_sig(SIGSEGV);
-		return -EFAULT;
+	scoped_user_read_access_size(new_ctx, ctx_size, efault_out) {
+		if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
+			force_exit_sig(SIGSEGV);
+			return -EFAULT;
+		}
 	}
-	user_read_access_end();
 
 	/* This returns like rt_sigreturn */
 	set_thread_flag(TIF_RESTOREALL);
@@ -742,7 +737,6 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	return 0;
 
 efault_out:
-	user_write_access_end();
 	return -EFAULT;
 }
 
@@ -825,6 +819,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 					   &uc_transact->uc_mcontext))
 			goto badframe;
 	} else {
+		struct sigcontext __user *uc_mcontext = &uc->uc_mcontext;
 		/*
 		 * Fall through, for non-TM restore
 		 *
@@ -839,13 +834,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
 		 */
 		regs_set_return_msr(current->thread.regs,
 				current->thread.regs->msr & ~MSR_TS_MASK);
-		if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext)))
-			goto badframe;
-
-		unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext,
-					  badframe_block);
-
-		user_read_access_end();
+		scoped_user_read_access(uc_mcontext, badframe)
+			unsafe_restore_sigcontext(current, NULL, 1, uc_mcontext, badframe);
 	}
 
 	if (restore_altstack(&uc->uc_stack))
@@ -855,8 +845,6 @@ SYSCALL_DEFINE0(rt_sigreturn)
 
 	return 0;
 
-badframe_block:
-	user_read_access_end();
 badframe:
 	signal_fault(current, regs, "rt_sigreturn", uc);
 
@@ -886,32 +874,29 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		err |= setup_tm_sigcontexts(frame, tsk, ksig->sig, NULL,
 					    (unsigned long)ksig->ka.sa.sa_handler, msr);
 
-	if (!user_write_access_begin(frame, sizeof(*frame)))
-		goto badframe;
+	scoped_user_write_access(frame, badframe) {
+		unsafe_put_user(&frame->info, &frame->pinfo, badframe);
+		unsafe_put_user(&frame->uc, &frame->puc, badframe);
 
-	unsafe_put_user(&frame->info, &frame->pinfo, badframe_block);
-	unsafe_put_user(&frame->uc, &frame->puc, badframe_block);
+		/* Create the ucontext.  */
+		unsafe_put_user(0, &frame->uc.uc_flags, badframe);
+		unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe);
 
-	/* Create the ucontext.  */
-	unsafe_put_user(0, &frame->uc.uc_flags, badframe_block);
-	unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block);
-
-	if (MSR_TM_ACTIVE(msr)) {
+		if (MSR_TM_ACTIVE(msr)) {
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-		/* The ucontext_t passed to userland points to the second
-		 * ucontext_t (for transactional state) with its uc_link ptr.
-		 */
-		unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
+			/* The ucontext_t passed to userland points to the second
+			 * ucontext_t (for transactional state) with its uc_link ptr.
+			 */
+			unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe);
 #endif
-	} else {
-		unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
-		unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
-					NULL, (unsigned long)ksig->ka.sa.sa_handler,
-					1, badframe_block);
-	}
+		} else {
+			unsafe_put_user(0, &frame->uc.uc_link, badframe);
+			unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig, NULL,
+						(unsigned long)ksig->ka.sa.sa_handler, 1, badframe);
+		}
 
-	unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
-	user_write_access_end();
+		unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe);
+	}
 
 	/* Save the siginfo outside of the unsafe block. */
 	if (copy_siginfo_to_user(&frame->info, &ksig->info))
@@ -968,8 +953,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 
 	return 0;
 
-badframe_block:
-	user_write_access_end();
 badframe:
 	signal_fault(current, regs, "handle_rt_signal64", frame);
 
-- 
2.54.0



^ permalink raw reply related

* [PATCH v2 2/8] powerpc/signal64: Untangle setup_tm_sigcontexts() and user_access_begin()
From: Christophe Leroy (CS GROUP) @ 2026-06-02  8:46 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1780389863.git.chleroy@kernel.org>

Call setup_tm_sigcontexts() before opening user access to avoid
having to close and open again.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
v2: Add a stub setup_tm_sigcontexts() for when CONFIG_PPC_TRANSACTIONAL_MEM is not set
---
 arch/powerpc/kernel/signal_64.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 86bb5bb4c143..9b4cd0bbf4a7 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -203,8 +203,7 @@ static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc,
  * examine the transactional registers in the 2nd sigcontext to determine the
  * real origin of the signal.
  */
-static long setup_tm_sigcontexts(struct sigcontext __user *sc,
-				 struct sigcontext __user *tm_sc,
+static long setup_tm_sigcontexts(struct rt_sigframe __user *frame,
 				 struct task_struct *tsk,
 				 int signr, sigset_t *set, unsigned long handler,
 				 unsigned long msr)
@@ -217,6 +216,8 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
 	 * Userland shall check AT_HWCAP to know wether it can rely on the
 	 * v_regs pointer or not.
 	 */
+	struct sigcontext __user *sc = &frame->uc.uc_mcontext;
+	struct sigcontext __user *tm_sc = &frame->uc_transact.uc_mcontext;
 #ifdef CONFIG_ALTIVEC
 	elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
 	elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc);
@@ -325,6 +326,15 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
 
 	return err;
 }
+#else
+static long setup_tm_sigcontexts(struct rt_sigframe __user *frame,
+				 struct task_struct *tsk,
+				 int signr, sigset_t *set, unsigned long handler,
+				 unsigned long msr)
+{
+	BUILD_BUG();
+	return -EINVAL;
+}
 #endif
 
 /*
@@ -872,6 +882,9 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 	 */
 	if (!MSR_TM_ACTIVE(msr))
 		prepare_setup_sigcontext(tsk);
+	else
+		err |= setup_tm_sigcontexts(frame, tsk, ksig->sig, NULL,
+					    (unsigned long)ksig->ka.sa.sa_handler, msr);
 
 	if (!user_write_access_begin(frame, sizeof(*frame)))
 		goto badframe;
@@ -889,19 +902,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		 * ucontext_t (for transactional state) with its uc_link ptr.
 		 */
 		unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
-
-		user_write_access_end();
-
-		err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
-					    &frame->uc_transact.uc_mcontext,
-					    tsk, ksig->sig, NULL,
-					    (unsigned long)ksig->ka.sa.sa_handler,
-					    msr);
-
-		if (!user_write_access_begin(&frame->uc.uc_sigmask,
-					     sizeof(frame->uc.uc_sigmask)))
-			goto badframe;
-
 #endif
 	} else {
 		unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
-- 
2.54.0



^ permalink raw reply related

* [PATCH v2 1/8] powerpc/signal32: Convert to scoped user access
From: Christophe Leroy (CS GROUP) @ 2026-06-02  8:46 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1780389863.git.chleroy@kernel.org>

Commit 861574d51bbd ("powerpc/uaccess: Implement masked user access")
provides optimised user access by avoiding the cost of access_ok().

Convert signal32 functions to scoped user access.

Scoped user access also make the code simpler.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/powerpc/kernel/signal_32.c | 456 +++++++++++++++-----------------
 1 file changed, 217 insertions(+), 239 deletions(-)

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 7a718ed32b27..f5d5139a1426 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -468,98 +468,98 @@ static long restore_user_regs(struct pt_regs *regs,
 {
 	unsigned int save_r2 = 0;
 	unsigned long msr;
-#ifdef CONFIG_VSX
-	int i;
-#endif
 
-	if (!user_read_access_begin(sr, sizeof(*sr)))
-		return 1;
-	/*
-	 * restore general registers but not including MSR or SOFTE. Also
-	 * take care of keeping r2 (TLS) intact if not a signal
-	 */
-	if (!sig)
-		save_r2 = (unsigned int)regs->gpr[2];
-	unsafe_restore_general_regs(regs, sr, failed);
-	set_trap_norestart(regs);
-	unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
-	if (!sig)
-		regs->gpr[2] = (unsigned long) save_r2;
-
-	/* if doing signal return, restore the previous little-endian mode */
-	if (sig)
-		regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
+	scoped_user_read_access(sr, failed) {
+		/*
+		 * restore general registers but not including MSR or SOFTE. Also
+		 * take care of keeping r2 (TLS) intact if not a signal
+		 */
+		if (!sig)
+			save_r2 = (unsigned int)regs->gpr[2];
+		unsafe_restore_general_regs(regs, sr, failed);
+		set_trap_norestart(regs);
+		unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
+		if (!sig)
+			regs->gpr[2] = (unsigned long)save_r2;
+
+		/* if doing signal return, restore the previous little-endian mode */
+		if (sig)
+			regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
 
 #ifdef CONFIG_ALTIVEC
-	/*
-	 * Force the process to reload the altivec registers from
-	 * current->thread when it next does altivec instructions
-	 */
-	regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
-	if (msr & MSR_VEC) {
-		/* restore altivec registers from the stack */
-		unsafe_copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
-				      sizeof(sr->mc_vregs), failed);
-		current->thread.used_vr = true;
-	} else if (current->thread.used_vr)
-		memset(&current->thread.vr_state, 0,
-		       ELF_NVRREG * sizeof(vector128));
-
-	/* Always get VRSAVE back */
-	unsafe_get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32], failed);
-	if (cpu_has_feature(CPU_FTR_ALTIVEC))
-		mtspr(SPRN_VRSAVE, current->thread.vrsave);
+		/*
+		 * Force the process to reload the altivec registers from
+		 * current->thread when it next does altivec instructions
+		 */
+		regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
+		if (msr & MSR_VEC) {
+			/* restore altivec registers from the stack */
+			unsafe_copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
+					      sizeof(sr->mc_vregs), failed);
+			current->thread.used_vr = true;
+		} else if (current->thread.used_vr) {
+			memset(&current->thread.vr_state, 0,
+			       ELF_NVRREG * sizeof(vector128));
+		}
+
+		/* Always get VRSAVE back */
+		unsafe_get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32], failed);
+		if (cpu_has_feature(CPU_FTR_ALTIVEC))
+			mtspr(SPRN_VRSAVE, current->thread.vrsave);
 #endif /* CONFIG_ALTIVEC */
-	unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
+		unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
 
 #ifdef CONFIG_VSX
-	/*
-	 * Force the process to reload the VSX registers from
-	 * current->thread when it next does VSX instruction.
-	 */
-	regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
-	if (msr & MSR_VSX) {
 		/*
-		 * Restore altivec registers from the stack to a local
-		 * buffer, then write this out to the thread_struct
+		 * Force the process to reload the VSX registers from
+		 * current->thread when it next does VSX instruction.
 		 */
-		unsafe_copy_vsx_from_user(current, &sr->mc_vsregs, failed);
-		current->thread.used_vsr = true;
-	} else if (current->thread.used_vsr)
-		for (i = 0; i < 32 ; i++)
-			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+		regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
+		if (msr & MSR_VSX) {
+			/*
+			 * Restore altivec registers from the stack to a local
+			 * buffer, then write this out to the thread_struct
+			 */
+			unsafe_copy_vsx_from_user(current, &sr->mc_vsregs, failed);
+			current->thread.used_vsr = true;
+		} else if (current->thread.used_vsr) {
+			int i;
+
+			for (i = 0; i < 32 ; i++)
+				current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+		}
 #endif /* CONFIG_VSX */
-	/*
-	 * force the process to reload the FP registers from
-	 * current->thread when it next does FP instructions
-	 */
-	regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
+		/*
+		 * force the process to reload the FP registers from
+		 * current->thread when it next does FP instructions
+		 */
+		regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
 
 #ifdef CONFIG_SPE
-	/*
-	 * Force the process to reload the spe registers from
-	 * current->thread when it next does spe instructions.
-	 * Since this is user ABI, we must enforce the sizing.
-	 */
-	BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32));
-	regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
-	if (msr & MSR_SPE) {
-		/* restore spe registers from the stack */
-		unsafe_copy_from_user(&current->thread.spe, &sr->mc_vregs,
-				      sizeof(current->thread.spe), failed);
-		current->thread.used_spe = true;
-	} else if (current->thread.used_spe)
-		memset(&current->thread.spe, 0, sizeof(current->thread.spe));
-
-	/* Always get SPEFSCR back */
-	unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
-#endif /* CONFIG_SPE */
+		/*
+		 * Force the process to reload the spe registers from
+		 * current->thread when it next does spe instructions.
+		 * Since this is user ABI, we must enforce the sizing.
+		 */
+		BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32));
+		regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
+		if (msr & MSR_SPE) {
+			/* restore spe registers from the stack */
+			unsafe_copy_from_user(&current->thread.spe, &sr->mc_vregs,
+					      sizeof(current->thread.spe), failed);
+			current->thread.used_spe = true;
+		} else if (current->thread.used_spe) {
+			memset(&current->thread.spe, 0, sizeof(current->thread.spe));
+		}
 
-	user_read_access_end();
+		/* Always get SPEFSCR back */
+		unsafe_get_user(current->thread.spefscr,
+				(u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
+#endif /* CONFIG_SPE */
+	}
 	return 0;
 
 failed:
-	user_read_access_end();
 	return 1;
 }
 
@@ -574,7 +574,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 				 struct mcontext __user *tm_sr)
 {
 	unsigned long msr, msr_hi;
-	int i;
 
 	if (tm_suspend_disabled)
 		return 1;
@@ -585,86 +584,81 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	 * TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR
 	 * were set by the signal delivery.
 	 */
-	if (!user_read_access_begin(sr, sizeof(*sr)))
-		return 1;
-
-	unsafe_restore_general_regs(&current->thread.ckpt_regs, sr, failed);
-	unsafe_get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP], failed);
-	unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
+	scoped_user_read_access(sr, failed) {
+		unsafe_restore_general_regs(&current->thread.ckpt_regs, sr, failed);
+		unsafe_get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP], failed);
+		unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
 
-	/* Restore the previous little-endian mode */
-	regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
+		/* Restore the previous little-endian mode */
+		regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
 
-	regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
-	if (msr & MSR_VEC) {
-		/* restore altivec registers from the stack */
-		unsafe_copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
-				      sizeof(sr->mc_vregs), failed);
-		current->thread.used_vr = true;
-	} else if (current->thread.used_vr) {
-		memset(&current->thread.vr_state, 0,
-		       ELF_NVRREG * sizeof(vector128));
-		memset(&current->thread.ckvr_state, 0,
-		       ELF_NVRREG * sizeof(vector128));
-	}
+		regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
+		if (msr & MSR_VEC) {
+			/* restore altivec registers from the stack */
+			unsafe_copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
+					      sizeof(sr->mc_vregs), failed);
+			current->thread.used_vr = true;
+		} else if (current->thread.used_vr) {
+			memset(&current->thread.vr_state, 0, ELF_NVRREG * sizeof(vector128));
+			memset(&current->thread.ckvr_state, 0, ELF_NVRREG * sizeof(vector128));
+		}
 
-	/* Always get VRSAVE back */
-	unsafe_get_user(current->thread.ckvrsave,
-			(u32 __user *)&sr->mc_vregs[32], failed);
-	if (cpu_has_feature(CPU_FTR_ALTIVEC))
-		mtspr(SPRN_VRSAVE, current->thread.ckvrsave);
+		/* Always get VRSAVE back */
+		unsafe_get_user(current->thread.ckvrsave,
+				(u32 __user *)&sr->mc_vregs[32], failed);
+		if (cpu_has_feature(CPU_FTR_ALTIVEC))
+			mtspr(SPRN_VRSAVE, current->thread.ckvrsave);
 
-	regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
+		regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
 
-	unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
+		unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
 
-	regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
-	if (msr & MSR_VSX) {
-		/*
-		 * Restore altivec registers from the stack to a local
-		 * buffer, then write this out to the thread_struct
-		 */
-		unsafe_copy_ckvsx_from_user(current, &sr->mc_vsregs, failed);
-		current->thread.used_vsr = true;
-	} else if (current->thread.used_vsr)
-		for (i = 0; i < 32 ; i++) {
-			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
-			current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+		regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
+		if (msr & MSR_VSX) {
+			/*
+			 * Restore altivec registers from the stack to a local
+			 * buffer, then write this out to the thread_struct
+			 */
+			unsafe_copy_ckvsx_from_user(current, &sr->mc_vsregs, failed);
+			current->thread.used_vsr = true;
+		} else if (current->thread.used_vsr) {
+			int i;
+
+			for (i = 0; i < 32 ; i++) {
+				current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+				current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+			}
 		}
+	}
 
-	user_read_access_end();
-
-	if (!user_read_access_begin(tm_sr, sizeof(*tm_sr)))
-		return 1;
+	scoped_user_read_access(tm_sr, failed) {
+		unsafe_restore_general_regs(regs, tm_sr, failed);
 
-	unsafe_restore_general_regs(regs, tm_sr, failed);
+		/* restore altivec registers from the stack */
+		if (msr & MSR_VEC)
+			unsafe_copy_from_user(&current->thread.vr_state, &tm_sr->mc_vregs,
+					      sizeof(sr->mc_vregs), failed);
 
-	/* restore altivec registers from the stack */
-	if (msr & MSR_VEC)
-		unsafe_copy_from_user(&current->thread.vr_state, &tm_sr->mc_vregs,
-				      sizeof(sr->mc_vregs), failed);
+		/* Always get VRSAVE back */
+		unsafe_get_user(current->thread.vrsave,
+				(u32 __user *)&tm_sr->mc_vregs[32], failed);
 
-	/* Always get VRSAVE back */
-	unsafe_get_user(current->thread.vrsave,
-			(u32 __user *)&tm_sr->mc_vregs[32], failed);
+		unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed);
 
-	unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed);
+		if (msr & MSR_VSX) {
+			/*
+			 * Restore altivec registers from the stack to a local
+			 * buffer, then write this out to the thread_struct
+			 */
+			unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed);
+			current->thread.used_vsr = true;
+		}
 
-	if (msr & MSR_VSX) {
-		/*
-		 * Restore altivec registers from the stack to a local
-		 * buffer, then write this out to the thread_struct
-		 */
-		unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed);
-		current->thread.used_vsr = true;
+		/* Get the top half of the MSR from the user context */
+		unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed);
+		msr_hi <<= 32;
 	}
 
-	/* Get the top half of the MSR from the user context */
-	unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed);
-	msr_hi <<= 32;
-
-	user_read_access_end();
-
 	/* If TM bits are set to the reserved value, it's an invalid context */
 	if (MSR_TM_RESV(msr_hi))
 		return 1;
@@ -712,7 +706,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	return 0;
 
 failed:
-	user_read_access_end();
 	return 1;
 }
 #else
@@ -737,8 +730,6 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 		       struct task_struct *tsk)
 {
 	struct rt_sigframe __user *frame;
-	struct mcontext __user *mctx;
-	struct mcontext __user *tm_mctx = NULL;
 	unsigned long newsp = 0;
 	unsigned long tramp;
 	struct pt_regs *regs = tsk->thread.regs;
@@ -747,52 +738,53 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 
 	/* Set up Signal Frame */
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
-	mctx = &frame->uc.uc_mcontext;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	tm_mctx = &frame->uc_transact.uc_mcontext;
-#endif
 	if (MSR_TM_ACTIVE(msr))
 		prepare_save_tm_user_regs();
 	else
 		prepare_save_user_regs(1);
 
-	if (!user_access_begin(frame, sizeof(*frame)))
-		goto badframe;
+	scoped_user_rw_access(frame, badframe) {
+		struct mcontext __user *mctx;
+		struct mcontext __user *tm_mctx = NULL;
 
-	/* Put the siginfo & fill in most of the ucontext */
-	unsafe_put_user(0, &frame->uc.uc_flags, failed);
+		mctx = &frame->uc.uc_mcontext;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+		tm_mctx = &frame->uc_transact.uc_mcontext;
+#endif
+		/* Put the siginfo & fill in most of the ucontext */
+		unsafe_put_user(0, &frame->uc.uc_flags, badframe);
 #ifdef CONFIG_PPC64
-	unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed);
+		unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe);
 #else
-	unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed);
+		unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe);
 #endif
-	unsafe_put_user(to_user_ptr(&frame->uc.uc_mcontext), &frame->uc.uc_regs, failed);
+		unsafe_put_user(to_user_ptr(&frame->uc.uc_mcontext), &frame->uc.uc_regs, badframe);
 
-	if (MSR_TM_ACTIVE(msr)) {
+		if (MSR_TM_ACTIVE(msr)) {
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-		unsafe_put_user((unsigned long)&frame->uc_transact,
-				&frame->uc.uc_link, failed);
-		unsafe_put_user((unsigned long)tm_mctx,
-				&frame->uc_transact.uc_regs, failed);
+			unsafe_put_user((unsigned long)&frame->uc_transact,
+					&frame->uc.uc_link, badframe);
+			unsafe_put_user((unsigned long)tm_mctx,
+					&frame->uc_transact.uc_regs, badframe);
 #endif
-		unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed);
-	} else {
-		unsafe_put_user(0, &frame->uc.uc_link, failed);
-		unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed);
-	}
+			unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, badframe);
+		} else {
+			unsafe_put_user(0, &frame->uc.uc_link, badframe);
+			unsafe_save_user_regs(regs, mctx, tm_mctx, 1, badframe);
+		}
 
-	/* Save user registers on the stack */
-	if (tsk->mm->context.vdso) {
-		tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32);
-	} else {
-		tramp = (unsigned long)mctx->mc_pad;
-		unsafe_put_user(PPC_RAW_LI(_R0, __NR_rt_sigreturn), &mctx->mc_pad[0], failed);
-		unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
-		asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
+		/* Save user registers on the stack */
+		if (tsk->mm->context.vdso) {
+			tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32);
+		} else {
+			tramp = (unsigned long)mctx->mc_pad;
+			unsafe_put_user(PPC_RAW_LI(_R0, __NR_rt_sigreturn), &mctx->mc_pad[0],
+					badframe);
+			unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], badframe);
+			asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
+		}
+		unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, badframe);
 	}
-	unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed);
-
-	user_access_end();
 
 	if (copy_siginfo_to_user(&frame->info, &ksig->info))
 		goto badframe;
@@ -820,9 +812,6 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 
 	return 0;
 
-failed:
-	user_access_end();
-
 badframe:
 	signal_fault(tsk, regs, "handle_rt_signal32", frame);
 
@@ -837,8 +826,6 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 {
 	struct sigcontext __user *sc;
 	struct sigframe __user *frame;
-	struct mcontext __user *mctx;
-	struct mcontext __user *tm_mctx = NULL;
 	unsigned long newsp = 0;
 	unsigned long tramp;
 	struct pt_regs *regs = tsk->thread.regs;
@@ -847,46 +834,49 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 
 	/* Set up Signal Frame */
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
-	mctx = &frame->mctx;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	tm_mctx = &frame->mctx_transact;
-#endif
 	if (MSR_TM_ACTIVE(msr))
 		prepare_save_tm_user_regs();
 	else
 		prepare_save_user_regs(1);
 
-	if (!user_access_begin(frame, sizeof(*frame)))
-		goto badframe;
-	sc = (struct sigcontext __user *) &frame->sctx;
+	scoped_user_rw_access(frame, badframe) {
+		struct mcontext __user *mctx;
+		struct mcontext __user *tm_mctx = NULL;
+
+		mctx = &frame->mctx;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+		tm_mctx = &frame->mctx_transact;
+#endif
+		sc = (struct sigcontext __user *)&frame->sctx;
 
 #if _NSIG != 64
 #error "Please adjust handle_signal()"
 #endif
-	unsafe_put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler, failed);
-	unsafe_put_user(oldset->sig[0], &sc->oldmask, failed);
+		unsafe_put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler, badframe);
+		unsafe_put_user(oldset->sig[0], &sc->oldmask, badframe);
 #ifdef CONFIG_PPC64
-	unsafe_put_user((oldset->sig[0] >> 32), &sc->_unused[3], failed);
+		unsafe_put_user((oldset->sig[0] >> 32), &sc->_unused[3], badframe);
 #else
-	unsafe_put_user(oldset->sig[1], &sc->_unused[3], failed);
+		unsafe_put_user(oldset->sig[1], &sc->_unused[3], badframe);
 #endif
-	unsafe_put_user(to_user_ptr(mctx), &sc->regs, failed);
-	unsafe_put_user(ksig->sig, &sc->signal, failed);
+		unsafe_put_user(to_user_ptr(mctx), &sc->regs, badframe);
+		unsafe_put_user(ksig->sig, &sc->signal, badframe);
 
-	if (MSR_TM_ACTIVE(msr))
-		unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed);
-	else
-		unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed);
-
-	if (tsk->mm->context.vdso) {
-		tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32);
-	} else {
-		tramp = (unsigned long)mctx->mc_pad;
-		unsafe_put_user(PPC_RAW_LI(_R0, __NR_sigreturn), &mctx->mc_pad[0], failed);
-		unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
-		asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
+		if (MSR_TM_ACTIVE(msr))
+			unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, badframe);
+		else
+			unsafe_save_user_regs(regs, mctx, tm_mctx, 1, badframe);
+
+		if (tsk->mm->context.vdso) {
+			tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32);
+		} else {
+			tramp = (unsigned long)mctx->mc_pad;
+			unsafe_put_user(PPC_RAW_LI(_R0, __NR_sigreturn), &mctx->mc_pad[0],
+					badframe);
+			unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], badframe);
+			asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
+		}
 	}
-	user_access_end();
 
 	regs->link = tramp;
 
@@ -908,9 +898,6 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 
 	return 0;
 
-failed:
-	user_access_end();
-
 badframe:
 	signal_fault(tsk, regs, "handle_signal32", frame);
 
@@ -922,21 +909,19 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int
 	sigset_t set;
 	struct mcontext __user *mcp;
 
-	if (!user_read_access_begin(ucp, sizeof(*ucp)))
-		return -EFAULT;
-
-	unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
+	scoped_user_read_access(ucp, failed) {
+		unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
 #ifdef CONFIG_PPC64
-	{
-		u32 cmcp;
+		{
+			u32 cmcp;
 
-		unsafe_get_user(cmcp, &ucp->uc_regs, failed);
-		mcp = (struct mcontext __user *)(u64)cmcp;
-	}
+			unsafe_get_user(cmcp, &ucp->uc_regs, failed);
+			mcp = (struct mcontext __user *)(u64)cmcp;
+		}
 #else
-	unsafe_get_user(mcp, &ucp->uc_regs, failed);
+		unsafe_get_user(mcp, &ucp->uc_regs, failed);
 #endif
-	user_read_access_end();
+	}
 
 	set_current_blocked(&set);
 	if (restore_user_regs(regs, mcp, sig))
@@ -945,7 +930,6 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int
 	return 0;
 
 failed:
-	user_read_access_end();
 	return -EFAULT;
 }
 
@@ -960,13 +944,10 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
 	u32 cmcp;
 	u32 tm_cmcp;
 
-	if (!user_read_access_begin(ucp, sizeof(*ucp)))
-		return -EFAULT;
-
-	unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
-	unsafe_get_user(cmcp, &ucp->uc_regs, failed);
-
-	user_read_access_end();
+	scoped_user_read_access(ucp, failed) {
+		unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
+		unsafe_get_user(cmcp, &ucp->uc_regs, failed);
+	}
 
 	if (__get_user(tm_cmcp, &tm_ucp->uc_regs))
 		return -EFAULT;
@@ -981,7 +962,6 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
 	return 0;
 
 failed:
-	user_read_access_end();
 	return -EFAULT;
 }
 #endif
@@ -1051,12 +1031,11 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 		mctx = (struct mcontext __user *)
 			((unsigned long) &old_ctx->uc_mcontext & ~0xfUL);
 		prepare_save_user_regs(ctx_has_vsx_region);
-		if (!user_write_access_begin(old_ctx, ctx_size))
-			return -EFAULT;
-		unsafe_save_user_regs(regs, mctx, NULL, ctx_has_vsx_region, failed);
-		unsafe_put_sigset_t(&old_ctx->uc_sigmask, &current->blocked, failed);
-		unsafe_put_user(to_user_ptr(mctx), &old_ctx->uc_regs, failed);
-		user_write_access_end();
+		scoped_user_write_access_size(old_ctx, ctx_size, failed) {
+			unsafe_save_user_regs(regs, mctx, NULL, ctx_has_vsx_region, failed);
+			unsafe_put_sigset_t(&old_ctx->uc_sigmask, &current->blocked, failed);
+			unsafe_put_user(to_user_ptr(mctx), &old_ctx->uc_regs, failed);
+		}
 	}
 	if (new_ctx == NULL)
 		return 0;
@@ -1084,7 +1063,6 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	return 0;
 
 failed:
-	user_write_access_end();
 	return -EFAULT;
 }
 
-- 
2.54.0



^ permalink raw reply related

* [PATCH v2 0/8] powerpc/signal: Convert to scoped user access
From: Christophe Leroy (CS GROUP) @ 2026-06-02  8:46 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev

This series converts powerpc architecture signal handling to scoped
user access and enlarges some of the block accesses to minimise the
number of times user access has to be opened and closed.

As mentioned in individual patches, some bring real performance
improvement.

This series is built from previous series [1] which predates
implementation of scoped user access.

[1] https://lore.kernel.org/all/1718f38859d5366f82d5bef531f255cedf537b5d.1631861883.git.christophe.leroy@csgroup.eu/T/#t

Changes in v2:
- Add a stub setup_tm_sigcontexts() for when CONFIG_PPC_TRANSACTIONAL_MEM is not set in patch 2

Christophe Leroy (CS GROUP) (8):
  powerpc/signal32: Convert to scoped user access
  powerpc/signal64: Untangle setup_tm_sigcontexts() and
    user_access_begin()
  powerpc/signal64: Convert to scoped user access
  powerpc/signal64: Access function descriptor with scoped user access
  powerpc/signal: Include the new stack frame inside the user access
    block
  signal: Add unsafe_copy_siginfo_to_user()
  powerpc/uaccess: Add unsafe_clear_user()
  powerpc/signal: Use unsafe_copy_siginfo_to_user()

 arch/powerpc/include/asm/uaccess.h |  20 ++
 arch/powerpc/kernel/signal_32.c    | 498 ++++++++++++++---------------
 arch/powerpc/kernel/signal_64.c    | 138 ++++----
 include/linux/signal.h             |  15 +
 include/linux/uaccess.h            |   1 +
 kernel/signal.c                    |   5 -
 6 files changed, 334 insertions(+), 343 deletions(-)

-- 
2.54.0



^ permalink raw reply

* Re: [PATCH 22/23] reset: rzg2l: use platform_device_set_of_node_from_dev()
From: Philipp Zabel @ 2026-06-02  8:24 UTC (permalink / raw)
  To: Bartosz Golaszewski, Lee Jones, Mark Brown, Thierry Reding,
	Sebastian Hesselbarth, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Srinivas Kandagatla,
	Greg Kroah-Hartman, Vinod Koul, Rafael J. Wysocki,
	Danilo Krummrich, Rob Herring, Saravana Kannan,
	Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
	Christophe Leroy (CS GROUP), Andi Shyti, Andy Shevchenko,
	Joerg Roedel, Will Deacon, Robin Murphy, Doug Berger,
	Florian Fainelli, Broadcom internal kernel review list,
	Ulf Hansson, Frank Li, Sascha Hauer, Pengutronix Kernel Team,
	Fabio Estevam, Matthew Brost, Thomas Hellström, Rodrigo Vivi,
	David Airlie, Simona Vetter, Peter Chen, Paul Cercueil, Bin Liu,
	Maximilian Luz, Hans de Goede, Ilpo Järvinen,
	Krzysztof Kozlowski, Benjamin Herrenschmidt
  Cc: brgl, linux-kernel, netdev, linux-arm-msm, linux-sound,
	driver-core, devicetree, linuxppc-dev, linux-i2c, iommu, linux-pm,
	imx, linux-arm-kernel, intel-xe, dri-devel, linux-usb, linux-mips,
	platform-driver-x86
In-Reply-To: <20260521-pdev-fwnode-ref-v1-22-88c324a1b8d2@oss.qualcomm.com>

On Do, 2026-05-21 at 10:36 +0200, Bartosz Golaszewski wrote:
> Ahead of reworking the reference counting logic for platform devices,
> encapsulate the assignment of the OF node from another device for
> dynamically allocated platform devices with the provided helper.
> 
> Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
> ---
>  drivers/reset/reset-rzg2l-usbphy-ctrl.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/reset/reset-rzg2l-usbphy-ctrl.c b/drivers/reset/reset-rzg2l-usbphy-ctrl.c
> index fd75d9601a3bfde7b7e3f6db287ec8c5c45a20ab..f003b360629c90bb37ed0ade7a675b5b0f28fa7e 100644
> --- a/drivers/reset/reset-rzg2l-usbphy-ctrl.c
> +++ b/drivers/reset/reset-rzg2l-usbphy-ctrl.c
> @@ -249,7 +249,7 @@ static int rzg2l_usbphy_ctrl_probe(struct platform_device *pdev)
>  	vdev->dev.parent = dev;
>  	priv->vdev = vdev;
>  
> -	device_set_of_node_from_dev(&vdev->dev, dev);
> +	platform_device_set_of_node_from_dev(vdev, dev);
>  	error = platform_device_add(vdev);
>  	if (error)
>  		goto err_device_put;

Acked-by: Philipp Zabel <p.zabel@pengutronix.de>

regards
Philipp


^ permalink raw reply

* Re: [linux-next20260529] kernel BUG at kernel/sched/core.c:7512!
From: Peter Zijlstra @ 2026-06-02  8:18 UTC (permalink / raw)
  To: Shrikanth Hegde
  Cc: Venkat Rao Bagalkote, Madhavan Srinivasan,
	Mukesh Kumar Chaurasiya, Ritesh Harjani, linuxppc-dev, LKML,
	Srikar Dronamraju
In-Reply-To: <37e69c39-564b-4ca9-bb27-1b99faab540c@linux.ibm.com>

On Tue, Jun 02, 2026 at 01:26:48PM +0530, Shrikanth Hegde wrote:
> 
> 
> On 6/1/26 3:26 PM, Peter Zijlstra wrote:
> > On Mon, Jun 01, 2026 at 02:46:24PM +0530, Shrikanth Hegde wrote:
> > 
> > > Ritesh, Mukesh, Is below possible scenario?
> > > 
> > > do_page_fault seems to enable irq's in the interrupt handler?
> > > is that expected? if so, one might see
> > > 
> > > -- do_page_fault (enter kernel mode)
> > >     -- enables interrupts
> > >     -- gets interrupt - Sets need_resched.
> > >        -- irqentry_exit - Sees it is kernel mode. Just checks preempt count
> > > 			 and calls preempt_schedule_irq, which catches both
> > > 			 preempt_count and !irqs_disabled. Hence the panic?
> > > 
> > > Should do_page_fault do preempt_disable when it enables the interrupts?
> > 
> > No, it is expected for page-fault to be able to schedule. Specifically,
> > it must be able to sleep to support loading pages from disk.
> 
> Oh yes. Ok. Thanks for taking a look.
> 
> > 
> > Please check the value of preempt_count() (does it perchance have
> > HARDIRQ_OFFSET?). Also, if the fault handler does enable IRQs, it must
> > also disable them again once done.
> 
> Will check it.
> 
> > 
> > Notably, I see ___do_page_fault() do interrupt_cond_loadl_irq_enable(),
> > but I'm not seeing a local_irq_disable() to match!
> 
> Yes, that's likely the culprit. It is possible that ___do_page_fault runs for longer
> and it may set need_resched. If it was in kernel mode, then it may not disable the
> interrupt and then subsequent irqentry_exit panics.
> 
> BTW I was able to consistently repro this on P9 with hackbench as below.
> 
> for i in {0..10}; do ./hackbench 10 process 10000 loops; done;
> for i in {0..10}; do ./hackbench 20 process 10000 loops; done;
> for i in {0..10}; do ./hackbench 30 process 10000 loops; done;
> for i in {0..10}; do ./hackbench 40 process 10000 loops; done;    << usually panics here.
> for i in {0..10}; do ./hackbench 10 thread 10000 loops; done;
> for i in {0..10}; do ./hackbench 20 thread 10000 loops; done;
> for i in {0..10}; do ./hackbench -pipe 10 process 10000 loops; done;
> for i in {0..10}; do ./hackbench -pipe 20 process 10000 loops; done;
> for i in {0..10}; do ./hackbench -pipe 30 process 10000 loops; done;
> for i in {0..10}; do ./hackbench -pipe 40 process 10000 loops; done;
> for i in {0..10}; do ./hackbench -pipe 10 thread 10000 loops; done;
> for i in {0..10}; do ./hackbench -pipe 20 thread 10000 loops; done;
> 
> Note, if i run ./hackbench 40 process 10000 loops alone, it doesn't panic.
> Likely some continous stressing needed to get into this case.
> 
> Below diff helps to fix it. With it see test passes. Hackbench numbers aren't super happy
> about it. It is regressing a bit compared to baseline. But no panic atleast.
> AND i have changed the BUG_ON to WARN_ON as irq_disabled right after. We could still fix the
> call sites if the warning is seen.
> 
> diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
> index de5601282755..7da373a56813 100644
> --- a/arch/powerpc/include/asm/entry-common.h
> +++ b/arch/powerpc/include/asm/entry-common.h
> @@ -253,16 +253,17 @@ static inline void arch_interrupt_enter_prepare(struct pt_regs *regs)
>  static inline void arch_interrupt_exit_prepare(struct pt_regs *regs)
>  {
>         if (user_mode(regs)) {
> -               BUG_ON(regs_is_unrecoverable(regs));
> -               BUG_ON(regs_irqs_disabled(regs));
> +               WARN_ON(regs_is_unrecoverable(regs));
> +               WARN_ON(regs_irqs_disabled(regs));
>                 /*
>                  * We don't need to restore AMR on the way back to userspace for KUAP.
>                  * AMR can only have been unlocked if we interrupted the kernel.
>                  */
>                 kuap_assert_locked();
> -
> -               local_irq_disable();
>         }
> +
> +       /* irqentry_exit expects to be called with interrupts disabled */
> +       local_irq_disable();
>  }
>  static inline void arch_interrupt_async_enter_prepare(struct pt_regs *regs)
> 

I would suggest trying something a little more focussed like so:

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 806c74e0d5ab..b002c179415c 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -589,6 +589,7 @@ static __always_inline void __do_page_fault(struct pt_regs *regs)
 	err = ___do_page_fault(regs, regs->dar, regs->dsisr);
 	if (unlikely(err))
 		bad_page_fault(regs, err);
+	local_irq_disable();
 }
 
 DEFINE_INTERRUPT_HANDLER(do_page_fault)

Since only ___do_page_fault() will enable interrupts, you only need to
disable them again on its return path. 



^ permalink raw reply related

* Re: [linux-next20260529] kernel BUG at kernel/sched/core.c:7512!
From: Shrikanth Hegde @ 2026-06-02  7:56 UTC (permalink / raw)
  To: Peter Zijlstra, Venkat Rao Bagalkote
  Cc: Madhavan Srinivasan, Mukesh Kumar Chaurasiya, Ritesh Harjani,
	linuxppc-dev, LKML, Srikar Dronamraju
In-Reply-To: <20260601095601.GN3102624@noisy.programming.kicks-ass.net>

On 6/1/26 3:26 PM, Peter Zijlstra wrote:
> On Mon, Jun 01, 2026 at 02:46:24PM +0530, Shrikanth Hegde wrote:
> 
>> Ritesh, Mukesh, Is below possible scenario?
>>
>> do_page_fault seems to enable irq's in the interrupt handler?
>> is that expected? if so, one might see
>>
>> -- do_page_fault (enter kernel mode)
>>     -- enables interrupts
>>     -- gets interrupt - Sets need_resched.
>>        -- irqentry_exit - Sees it is kernel mode. Just checks preempt count
>> 			 and calls preempt_schedule_irq, which catches both
>> 			 preempt_count and !irqs_disabled. Hence the panic?
>>
>> Should do_page_fault do preempt_disable when it enables the interrupts?
> 
> No, it is expected for page-fault to be able to schedule. Specifically,
> it must be able to sleep to support loading pages from disk.

Oh yes. Ok. Thanks for taking a look.

> 
> Please check the value of preempt_count() (does it perchance have
> HARDIRQ_OFFSET?). Also, if the fault handler does enable IRQs, it must
> also disable them again once done.

Will check it.

> 
> Notably, I see ___do_page_fault() do interrupt_cond_loadl_irq_enable(),
> but I'm not seeing a local_irq_disable() to match!

Yes, that's likely the culprit. It is possible that ___do_page_fault runs for longer
and it may set need_resched. If it was in kernel mode, then it may not disable the
interrupt and then subsequent irqentry_exit panics.

BTW I was able to consistently repro this on P9 with hackbench as below.

for i in {0..10}; do ./hackbench 10 process 10000 loops; done;
for i in {0..10}; do ./hackbench 20 process 10000 loops; done;
for i in {0..10}; do ./hackbench 30 process 10000 loops; done;
for i in {0..10}; do ./hackbench 40 process 10000 loops; done;    << usually panics here.
for i in {0..10}; do ./hackbench 10 thread 10000 loops; done;
for i in {0..10}; do ./hackbench 20 thread 10000 loops; done;
for i in {0..10}; do ./hackbench -pipe 10 process 10000 loops; done;
for i in {0..10}; do ./hackbench -pipe 20 process 10000 loops; done;
for i in {0..10}; do ./hackbench -pipe 30 process 10000 loops; done;
for i in {0..10}; do ./hackbench -pipe 40 process 10000 loops; done;
for i in {0..10}; do ./hackbench -pipe 10 thread 10000 loops; done;
for i in {0..10}; do ./hackbench -pipe 20 thread 10000 loops; done;

Note, if i run ./hackbench 40 process 10000 loops alone, it doesn't panic.
Likely some continous stressing needed to get into this case.

Below diff helps to fix it. With it see test passes. Hackbench numbers aren't super happy
about it. It is regressing a bit compared to baseline. But no panic atleast.
AND i have changed the BUG_ON to WARN_ON as irq_disabled right after. We could still fix the
call sites if the warning is seen.

diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
index de5601282755..7da373a56813 100644
--- a/arch/powerpc/include/asm/entry-common.h
+++ b/arch/powerpc/include/asm/entry-common.h
@@ -253,16 +253,17 @@ static inline void arch_interrupt_enter_prepare(struct pt_regs *regs)
  static inline void arch_interrupt_exit_prepare(struct pt_regs *regs)
  {
         if (user_mode(regs)) {
-               BUG_ON(regs_is_unrecoverable(regs));
-               BUG_ON(regs_irqs_disabled(regs));
+               WARN_ON(regs_is_unrecoverable(regs));
+               WARN_ON(regs_irqs_disabled(regs));
                 /*
                  * We don't need to restore AMR on the way back to userspace for KUAP.
                  * AMR can only have been unlocked if we interrupted the kernel.
                  */
                 kuap_assert_locked();
-
-               local_irq_disable();
         }
+
+       /* irqentry_exit expects to be called with interrupts disabled */
+       local_irq_disable();
  }

  static inline void arch_interrupt_async_enter_prepare(struct pt_regs *regs)

^ permalink raw reply related

* Re: [PATCH 5/7] gpio: ppc44x: Convert GPIO to generic MMIO
From: Bartosz Golaszewski @ 2026-06-02  7:51 UTC (permalink / raw)
  To: Rosen Penev
  Cc: Madhavan Srinivasan, chleroy, Michael Ellerman, Nicholas Piggin,
	Linus Walleij, Bartosz Golaszewski,
	open list:LINUX FOR POWERPC (32-BIT AND 64-BIT), open list,
	linux-gpio
In-Reply-To: <20260602050131.856789-6-rosenp@gmail.com>

On Tue, 2 Jun 2026 07:01:29 +0200, Rosen Penev <rosenp@gmail.com> said:
> Use gpio_generic_chip_init() to set up the PPC44x GPIO chip
> instead of open-coding the basic get, set, locking and state handling.
>
> Keep the PPC44x-specific direction callbacks because they still need to
> program ODR and the OSR/TSR registers around the generic data and
> direction registers.
>
> Assisted-by: Codex:GPT-5.5
> Signed-off-by: Rosen Penev <rosenp@gmail.com>
> ---

...

> @@ -124,10 +102,11 @@ static int
>  ppc4xx_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
>  {
>  	struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
> +	struct gpio_generic_chip *gen_gc = &chip->chip;
>  	struct ppc4xx_gpio __iomem *regs = chip->regs;
>  	unsigned long flags;
>
> -	spin_lock_irqsave(&chip->lock, flags);
> +	gpio_generic_chip_lock_irqsave(gen_gc, flags);

If you're already doing it, can you use lock guards too?

Bart


^ permalink raw reply

* [PATCH v2 net-next 9/9] net: dsa: netc: implement dynamic FDB entry ageing
From: wei.fang @ 2026-06-02  7:23 UTC (permalink / raw)
  To: claudiu.manoil, vladimir.oltean, xiaoning.wang, andrew+netdev,
	davem, edumazet, kuba, pabeni, chleroy, andrew, olteanv, linux
  Cc: wei.fang, imx, netdev, linux-kernel, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20260602072313.3162120-1-wei.fang@oss.nxp.com>

From: Wei Fang <wei.fang@nxp.com>

The NETC switch does not age out dynamic FDB entries automatically.
Without software management, stale entries persist after topology
changes and cause incorrect forwarding.

Add a delayed work that periodically removes entries that have not been
refreshed within the specified cycles. The effective aging time is:

  ageing_time = fdbt_ageing_delay * 100

Default values are 3s interval and 100 cycles (300s total), matching
the IEEE 802.1Q default ageing time. The work starts when the first
port joins a bridge (tracked via br_cnt) and is cancelled when the
last port leaves. All FDB operations are serialized under fdbt_lock.

When a port leaves the bridge, its dynamic FDB entries are flushed
right away in port_bridge_leave(), without waiting for the ageing
cycle. Additionally, when a link down event is detected on a port,
netc_mac_link_down() will also clear the port's dynamic FDB entries
via netc_port_remove_dynamic_entries(). Non-bridge ports have no
dynamic FDB entries, so this call is always safe.

Implement set_ageing_time and port_fast_age DSA operations to allow
the bridge layer to reconfigure aging parameters and trigger per-port
flushes on demand.

Signed-off-by: Wei Fang <wei.fang@nxp.com>
---
 drivers/net/dsa/netc/netc_main.c   | 90 ++++++++++++++++++++++++++++++
 drivers/net/dsa/netc/netc_switch.h |  7 +++
 2 files changed, 97 insertions(+)

diff --git a/drivers/net/dsa/netc/netc_main.c b/drivers/net/dsa/netc/netc_main.c
index 5b58ce06beb8..3737a852cafc 100644
--- a/drivers/net/dsa/netc/netc_main.c
+++ b/drivers/net/dsa/netc/netc_main.c
@@ -447,6 +447,25 @@ static void netc_free_ntmp_user(struct netc_switch *priv)
 	netc_free_ntmp_bitmaps(priv);
 }
 
+static void netc_clean_fdbt_ageing_entries(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct netc_switch *priv;
+
+	priv = container_of(dwork, struct netc_switch, fdbt_ageing_work);
+
+	/* Update the activity element in FDB table */
+	mutex_lock(&priv->fdbt_lock);
+	ntmp_fdbt_update_activity_element(&priv->ntmp);
+	/* Delete the ageing entries after the activity element is updated */
+	ntmp_fdbt_delete_ageing_entries(&priv->ntmp, NETC_FDBT_AGEING_THRESH);
+	mutex_unlock(&priv->fdbt_lock);
+
+	if (atomic_read(&priv->br_cnt))
+		schedule_delayed_work(&priv->fdbt_ageing_work,
+				      READ_ONCE(priv->fdbt_ageing_delay));
+}
+
 static void netc_switch_dos_default_config(struct netc_switch *priv)
 {
 	struct netc_switch_regs *regs = &priv->regs;
@@ -861,6 +880,10 @@ static int netc_setup(struct dsa_switch *ds)
 
 	INIT_HLIST_HEAD(&priv->fdb_list);
 	mutex_init(&priv->fdbt_lock);
+	priv->fdbt_ageing_delay = NETC_FDBT_AGEING_DELAY;
+	atomic_set(&priv->br_cnt, 0);
+	INIT_DELAYED_WORK(&priv->fdbt_ageing_work,
+			  netc_clean_fdbt_ageing_entries);
 	INIT_HLIST_HEAD(&priv->vlan_list);
 	mutex_init(&priv->vft_lock);
 
@@ -925,6 +948,7 @@ static void netc_teardown(struct dsa_switch *ds)
 {
 	struct netc_switch *priv = ds->priv;
 
+	disable_delayed_work_sync(&priv->fdbt_ageing_work);
 	netc_destroy_all_lists(priv);
 	netc_free_host_flood_rules(priv);
 	netc_free_ntmp_user(priv);
@@ -1948,6 +1972,7 @@ static int netc_port_bridge_join(struct dsa_switch *ds, int port,
 				 struct netlink_ext_ack *extack)
 {
 	struct netc_port *np = NETC_PORT(ds, port);
+	struct netc_switch *priv = ds->priv;
 	u16 vlan_unaware_pvid;
 	int err;
 
@@ -1974,20 +1999,42 @@ static int netc_port_bridge_join(struct dsa_switch *ds, int port,
 	np->host_flood = NULL;
 	netc_port_wr(np, NETC_PIPFCR, 0);
 
+	if (atomic_inc_return(&priv->br_cnt) == 1)
+		schedule_delayed_work(&priv->fdbt_ageing_work,
+				      READ_ONCE(priv->fdbt_ageing_delay));
+
 	return 0;
 }
 
+static void netc_port_remove_dynamic_entries(struct netc_port *np)
+{
+	struct netc_switch *priv = np->switch_priv;
+
+	/* Return if the port is not available */
+	if (!np->dp)
+		return;
+
+	mutex_lock(&priv->fdbt_lock);
+	ntmp_fdbt_delete_port_dynamic_entries(&priv->ntmp, np->dp->index);
+	mutex_unlock(&priv->fdbt_lock);
+}
+
 static void netc_port_bridge_leave(struct dsa_switch *ds, int port,
 				   struct dsa_bridge bridge)
 {
 	struct netc_port *np = NETC_PORT(ds, port);
 	struct net_device *ndev = np->dp->user;
+	struct netc_switch *priv = ds->priv;
 	u16 vlan_unaware_pvid;
 	bool mc, uc;
 
 	netc_port_set_mlo(np, MLO_DISABLE);
 	netc_port_set_pvid(np, NETC_STANDALONE_PVID);
 
+	if (atomic_dec_and_test(&priv->br_cnt))
+		cancel_delayed_work_sync(&priv->fdbt_ageing_work);
+
+	netc_port_remove_dynamic_entries(np);
 	uc = ndev->flags & IFF_PROMISC;
 	mc = ndev->flags & (IFF_PROMISC | IFF_ALLMULTI);
 
@@ -2007,6 +2054,44 @@ static void netc_port_bridge_leave(struct dsa_switch *ds, int port,
 	netc_port_del_vlan_entry(np, vlan_unaware_pvid);
 }
 
+static int netc_set_ageing_time(struct dsa_switch *ds, unsigned int msecs)
+{
+	struct netc_switch *priv = ds->priv;
+	unsigned long delay_jiffies;
+
+	/* The dynamic FDB entry is deleted when its activity counter reaches
+	 * NETC_FDBT_AGEING_THRESH (100). Each delayed_work tick increments
+	 * the counter by 1 if the entry is inactive.
+	 *
+	 * Therefore:
+	 *   msecs (ms)    = NETC_FDBT_AGEING_THRESH * delay_ms (ms)
+	 *   delay_ms      = msecs / NETC_FDBT_AGEING_THRESH
+	 *   delay_jiffies = (delay_ms / 1000) * HZ
+	 *                 = (msecs * HZ) / (1000 * NETC_FDBT_AGEING_THRESH)
+	 *
+	 * Use DIV_ROUND_CLOSEST_ULL to perform a single nearest-jiffy
+	 * rounding, avoiding the two-step rounding error of the intermediate
+	 * delay_ms approach.
+	 *   Maximum error = +/-0.5 jiffy * 100 = 50000/HZ ms.
+	 */
+	delay_jiffies = DIV_ROUND_CLOSEST_ULL((u64)msecs * HZ,
+					      1000 * NETC_FDBT_AGEING_THRESH);
+	WRITE_ONCE(priv->fdbt_ageing_delay, delay_jiffies);
+
+	if (atomic_read(&priv->br_cnt))
+		mod_delayed_work(system_percpu_wq, &priv->fdbt_ageing_work,
+				 READ_ONCE(priv->fdbt_ageing_delay));
+
+	return 0;
+}
+
+static void netc_port_fast_age(struct dsa_switch *ds, int port)
+{
+	struct netc_port *np = NETC_PORT(ds, port);
+
+	netc_port_remove_dynamic_entries(np);
+}
+
 static void netc_phylink_get_caps(struct dsa_switch *ds, int port,
 				  struct phylink_config *config)
 {
@@ -2261,6 +2346,7 @@ static void netc_mac_link_down(struct phylink_config *config,
 	np = NETC_PORT(dp->ds, dp->index);
 	netc_port_mac_rx_graceful_stop(np);
 	netc_port_mac_tx_graceful_stop(np);
+	netc_port_remove_dynamic_entries(np);
 }
 
 static const struct phylink_mac_ops netc_phylink_mac_ops = {
@@ -2290,6 +2376,8 @@ static const struct dsa_switch_ops netc_switch_ops = {
 	.port_vlan_del			= netc_port_vlan_del,
 	.port_bridge_join		= netc_port_bridge_join,
 	.port_bridge_leave		= netc_port_bridge_leave,
+	.set_ageing_time		= netc_set_ageing_time,
+	.port_fast_age			= netc_port_fast_age,
 	.get_pause_stats		= netc_port_get_pause_stats,
 	.get_rmon_stats			= netc_port_get_rmon_stats,
 	.get_eth_ctrl_stats		= netc_port_get_eth_ctrl_stats,
@@ -2338,6 +2426,8 @@ static int netc_switch_probe(struct pci_dev *pdev,
 	ds->phylink_mac_ops = &netc_phylink_mac_ops;
 	ds->fdb_isolation = true;
 	ds->max_num_bridges = priv->info->num_ports - 1;
+	ds->ageing_time_min = 1000;
+	ds->ageing_time_max = U32_MAX;
 	ds->priv = priv;
 	priv->ds = ds;
 
diff --git a/drivers/net/dsa/netc/netc_switch.h b/drivers/net/dsa/netc/netc_switch.h
index 982c8d3a3fbf..4abef8b383bd 100644
--- a/drivers/net/dsa/netc/netc_switch.h
+++ b/drivers/net/dsa/netc/netc_switch.h
@@ -50,6 +50,9 @@
 /* PAUSE refresh threshold: send refresh when timer reaches this value */
 #define NETC_PAUSE_THRESH		0x7FFF
 
+#define NETC_FDBT_AGEING_DELAY		(3 * HZ)
+#define NETC_FDBT_AGEING_THRESH		100
+
 struct netc_switch;
 
 struct netc_switch_info {
@@ -124,6 +127,10 @@ struct netc_switch {
 	struct ntmp_user ntmp;
 	struct hlist_head fdb_list;
 	struct mutex fdbt_lock; /* FDB table lock */
+	struct delayed_work fdbt_ageing_work;
+	/* (fdbt_ageing_delay * ageing_act_cnt) is ageing time */
+	unsigned long fdbt_ageing_delay;
+	atomic_t br_cnt;
 	struct hlist_head vlan_list;
 	struct mutex vft_lock; /* VLAN filter table lock */
 
-- 
2.34.1



^ permalink raw reply related

* [PATCH v2 net-next 8/9] net: dsa: netc: add bridge mode support
From: wei.fang @ 2026-06-02  7:23 UTC (permalink / raw)
  To: claudiu.manoil, vladimir.oltean, xiaoning.wang, andrew+netdev,
	davem, edumazet, kuba, pabeni, chleroy, andrew, olteanv, linux
  Cc: wei.fang, imx, netdev, linux-kernel, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20260602072313.3162120-1-wei.fang@oss.nxp.com>

From: Wei Fang <wei.fang@nxp.com>

Wire up the port_bridge_join, port_bridge_leave and port_vlan_filtering
DSA callbacks to support both VLAN-unaware and VLAN-aware bridge modes.

For VLAN-unaware bridges, each bridge instance is assigned a dedicated
internal PVID via NETC_VLAN_UNAWARE_PVID(bridge.num), counting down
from VID 4095. A VFT entry is created for this PVID with hardware MAC
learning and flood-on-miss forwarding enabled. The CPU port is included
as a VFT member so frames can reach the host. The reserved VID range is
blocked in port_vlan_add to prevent user-space conflicts.

Only one VLAN-aware bridge is supported at a time; this constraint is
enforced in port_bridge_join and port_vlan_filtering. The per-port PVID
is tracked in software and written to the BPDVR register whenever VLAN
filtering is active.

FDB operations are extended to the bridge database: when vid is zero the
VLAN-unaware PVID for the bridge is substituted. Dynamic entries learned
autonomously by the hardware are handled by falling back to a
key-element-data delete via ntmp_fdbt_delete_entry_by_keye() when an
entry is absent from the software shadow list. Internal PVIDs are
translated back to VID 0 in port_fdb_dump before reporting to
user-space.

Host flood rules are removed from the ingress port filter table when a
port joins a bridge to avoid bypassing FDB lookup and MAC learning.

Signed-off-by: Wei Fang <wei.fang@nxp.com>
---
 drivers/net/dsa/netc/netc_main.c   | 310 +++++++++++++++++++++++++++--
 drivers/net/dsa/netc/netc_switch.h |   2 +
 2 files changed, 297 insertions(+), 15 deletions(-)

diff --git a/drivers/net/dsa/netc/netc_main.c b/drivers/net/dsa/netc/netc_main.c
index a97121dda237..5b58ce06beb8 100644
--- a/drivers/net/dsa/netc/netc_main.c
+++ b/drivers/net/dsa/netc/netc_main.c
@@ -695,10 +695,16 @@ static int netc_port_del_fdb_entry(struct netc_port *np,
 
 	entry = netc_lookup_fdb_entry(priv, addr, vid);
 	if (unlikely(!entry))
-		/* Currently only single port mode is supported, MAC learning
-		 * is disabled, so there is no dynamically learned FDB entry.
-		 * We need to support deleting dynamically FDB entry when the
-		 * bridge mode is supported.
+		/* The hardware-learned dynamic FDB entries cannot be deleted
+		 * through .port_fdb_del() interface.
+		 * For NTF_MASTER path: Since hardware-learned dynamic FDB
+		 * entries are never synchronized back to the bridge software
+		 * database. br_fdb_delete() -> br_fdb_find() cannot find the
+		 * FDB entry, so .port_fdb_del() will not be called.
+		 * For NTF_SELF path: dsa_user_netdev_ops does not implement
+		 * ndo_fdb_del(), so rtnl_fdb_del() falls back to
+		 * ndo_dflt_fdb_del(), which only supports NUD_PERMANENT static
+		 * entries and rejects all others with -EINVAL.
 		 */
 		goto unlock_fdbt;
 
@@ -1274,6 +1280,16 @@ static int netc_port_add_vlan_entry(struct netc_port *np, u16 vid,
 	entry->ect_gid = NTMP_NULL_ENTRY_ID;
 
 	bitmap_stg = BIT(index) | VFT_STG_ID(0);
+	/* If the VID is a VLAN-unaware PVID, the CPU port needs to be
+	 * a member of this VLAN.
+	 */
+	if (dsa_port_is_user(np->dp) &&
+	    vid >= NETC_VLAN_UNAWARE_PVID(priv->ds->max_num_bridges)) {
+		struct dsa_port *cpu_dp = np->dp->cpu_dp;
+
+		bitmap_stg |= BIT(cpu_dp->index);
+	}
+
 	cfg = FIELD_PREP(VFT_MLO, MLO_HW) |
 	      FIELD_PREP(VFT_MFO, MFO_NO_MATCH_FLOOD);
 
@@ -1311,11 +1327,16 @@ static int netc_port_add_vlan_entry(struct netc_port *np, u16 vid,
 	return err;
 }
 
-static bool netc_port_vlan_egress_rule_changed(struct netc_vlan_entry *entry,
+static bool netc_port_vlan_egress_rule_changed(struct netc_switch *priv,
+					       struct netc_vlan_entry *entry,
 					       int port, bool untagged)
 {
 	bool old_untagged = !!(entry->untagged_port_bitmap & BIT(port));
 
+	/* VLAN-unaware VIDs have no egress rules, so return 'false' */
+	if (entry->vid >= NETC_VLAN_UNAWARE_PVID(priv->ds->max_num_bridges))
+		return false;
+
 	return old_untagged != untagged;
 }
 
@@ -1338,7 +1359,8 @@ static int netc_port_set_vlan_entry(struct netc_port *np, u16 vid,
 	}
 
 	/* Check whether the egress VLAN rule is changed */
-	changed = netc_port_vlan_egress_rule_changed(entry, port, untagged);
+	changed = netc_port_vlan_egress_rule_changed(priv, entry, port,
+						     untagged);
 	if (changed) {
 		entry->untagged_port_bitmap ^= BIT(port);
 		err = netc_port_update_vlan_egress_rule(np, entry);
@@ -1402,6 +1424,17 @@ static int netc_port_del_vlan_entry(struct netc_port *np, u16 vid)
 	cfge = &entry->cfge;
 	vlan_port_bitmap = FIELD_GET(VFT_PORT_MEMBERSHIP,
 				     le32_to_cpu(cfge->bitmap_stg));
+	/* If the VID is a VLAN-unaware PVID, we need to clear the CPU
+	 * port bit of vlan_port_bitmap, so that the VLAN entry can be
+	 * deleted if no user ports use this VLAN.
+	 */
+	if (dsa_port_is_user(np->dp) &&
+	    vid >= NETC_VLAN_UNAWARE_PVID(priv->ds->max_num_bridges)) {
+		struct dsa_port *cpu_dp = np->dp->cpu_dp;
+
+		vlan_port_bitmap &= ~BIT(cpu_dp->index);
+	}
+
 	/* If the VLAN only belongs to the current port */
 	if (vlan_port_bitmap == BIT(port)) {
 		err = ntmp_vft_delete_entry(&priv->ntmp, vid);
@@ -1507,17 +1540,57 @@ static int netc_port_max_mtu(struct dsa_switch *ds, int port)
 	return NETC_MAX_FRAME_LEN - VLAN_ETH_HLEN - ETH_FCS_LEN;
 }
 
+static struct net_device *netc_classify_db(struct dsa_db db)
+{
+	switch (db.type) {
+	case DSA_DB_PORT:
+		return NULL;
+	case DSA_DB_BRIDGE:
+		return db.bridge.dev;
+	default:
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+}
+
+static u16 netc_vlan_unaware_pvid(struct dsa_switch *ds,
+				  const struct net_device *br_ndev)
+{
+	struct dsa_port *dp;
+	int br_num = -1;
+
+	if (!br_ndev)
+		return NETC_STANDALONE_PVID;
+
+	dsa_switch_for_each_available_port(dp, ds) {
+		if (dsa_port_bridge_dev_get(dp) == br_ndev) {
+			br_num = dp->bridge->num;
+			break;
+		}
+	}
+
+	/* The br_num is supposed to be 1 ~ ds->max_num_bridges, see
+	 * dsa_bridge_num_get(). Since max_num_bridges is non-zero,
+	 * so dsa_port_bridge_create() will return an error if
+	 * dsa_bridge_num_get() returns 0.
+	 */
+	if (WARN_ON(br_num <= 0))
+		return NETC_STANDALONE_PVID;
+
+	return NETC_VLAN_UNAWARE_PVID(br_num);
+}
+
 static int netc_port_fdb_add(struct dsa_switch *ds, int port,
 			     const unsigned char *addr, u16 vid,
 			     struct dsa_db db)
 {
+	struct net_device *br_ndev = netc_classify_db(db);
 	struct netc_port *np = NETC_PORT(ds, port);
 
-	/* Currently, only support standalone port mode, so only
-	 * NETC_STANDALONE_PVID (= 0) is supported here.
-	 */
-	if (vid != NETC_STANDALONE_PVID)
-		return -EOPNOTSUPP;
+	if (IS_ERR(br_ndev))
+		return PTR_ERR(br_ndev);
+
+	if (!vid)
+		vid = netc_vlan_unaware_pvid(ds, br_ndev);
 
 	return netc_port_set_fdb_entry(np, addr, vid);
 }
@@ -1526,10 +1599,14 @@ static int netc_port_fdb_del(struct dsa_switch *ds, int port,
 			     const unsigned char *addr, u16 vid,
 			     struct dsa_db db)
 {
+	struct net_device *br_ndev = netc_classify_db(db);
 	struct netc_port *np = NETC_PORT(ds, port);
 
-	if (vid != NETC_STANDALONE_PVID)
-		return -EOPNOTSUPP;
+	if (IS_ERR(br_ndev))
+		return PTR_ERR(br_ndev);
+
+	if (!vid)
+		vid = netc_vlan_unaware_pvid(ds, br_ndev);
 
 	return netc_port_del_fdb_entry(np, addr, vid);
 }
@@ -1565,6 +1642,8 @@ static int netc_port_fdb_dump(struct dsa_switch *ds, int port,
 		cfg = le32_to_cpu(cfge->cfg);
 		is_static = (cfg & FDBT_DYNAMIC) ? false : true;
 		vid = le16_to_cpu(keye->fid);
+		if (vid >= NETC_VLAN_UNAWARE_PVID(ds->max_num_bridges))
+			vid = 0;
 
 		err = cb(keye->mac_addr, vid, is_static, data);
 		if (err)
@@ -1681,6 +1760,19 @@ static void netc_port_set_host_flood(struct dsa_switch *ds, int port,
 	struct netc_port *np = NETC_PORT(ds, port);
 	struct ipft_entry_data *old_host_flood;
 
+	/* Do not add host flood rule to ingress port filter table when
+	 * the port has joined a bridge. Otherwise, the ingress frames
+	 * will bypass FDB table lookup and MAC learning, so the frames
+	 * will be redirected directly to the CPU port.
+	 */
+	if (dsa_port_bridge_dev_get(np->dp)) {
+		netc_port_remove_host_flood(np, np->host_flood);
+		np->host_flood = NULL;
+		netc_port_wr(np, NETC_PIPFCR, 0);
+
+		return;
+	}
+
 	if (np->uc == uc && np->mc == mc)
 		return;
 
@@ -1702,12 +1794,87 @@ static void netc_port_set_host_flood(struct dsa_switch *ds, int port,
 	netc_port_remove_host_flood(np, old_host_flood);
 }
 
+static int netc_single_vlan_aware_bridge(struct dsa_switch *ds,
+					 struct netlink_ext_ack *extack)
+{
+	struct net_device *br_ndev = NULL;
+	struct dsa_port *dp;
+
+	dsa_switch_for_each_available_port(dp, ds) {
+		struct net_device *port_br = dsa_port_bridge_dev_get(dp);
+
+		if (!port_br || !br_vlan_enabled(port_br))
+			continue;
+
+		if (!br_ndev) {
+			br_ndev = port_br;
+			continue;
+		}
+
+		if (br_ndev == port_br)
+			continue;
+
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Only one VLAN-aware bridge is supported");
+
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int netc_port_vlan_filtering(struct dsa_switch *ds,
+				    int port, bool vlan_aware,
+				    struct netlink_ext_ack *extack)
+{
+	struct netc_port *np = NETC_PORT(ds, port);
+	struct net_device *br_ndev;
+	u32 pvid, val;
+	int err;
+
+	err = netc_single_vlan_aware_bridge(ds, extack);
+	if (err)
+		return err;
+
+	br_ndev = dsa_port_bridge_dev_get(np->dp);
+	pvid = netc_vlan_unaware_pvid(ds, br_ndev);
+	if (pvid == NETC_STANDALONE_PVID) {
+		vlan_aware = false;
+		goto bpdvr_config;
+	}
+
+	if (vlan_aware) {
+		err = netc_port_del_vlan_entry(np, pvid);
+		if (err)
+			return err;
+
+		pvid = np->pvid;
+	} else {
+		err = netc_port_set_vlan_entry(np, pvid, false);
+		if (err)
+			return err;
+	}
+
+bpdvr_config:
+	val = (vlan_aware ? 0 : BPDVR_RXVAM) | (pvid & BPDVR_VID);
+	netc_port_rmw(np, NETC_BPDVR, BPDVR_RXVAM | BPDVR_VID, val);
+
+	return 0;
+}
+
+static void netc_port_set_pvid(struct netc_port *np, u16 pvid)
+{
+	netc_port_rmw(np, NETC_BPDVR, BPDVR_VID, pvid);
+}
+
 static int netc_port_vlan_add(struct dsa_switch *ds, int port,
 			      const struct switchdev_obj_port_vlan *vlan,
 			      struct netlink_ext_ack *extack)
 {
 	struct netc_port *np = NETC_PORT(ds, port);
+	struct dsa_port *dp = np->dp;
 	bool untagged;
+	int err;
 
 	/* The 8021q layer may attempt to change NETC_STANDALONE_PVID
 	 * (VID 0), so we need to ignore it.
@@ -1715,20 +1882,129 @@ static int netc_port_vlan_add(struct dsa_switch *ds, int port,
 	if (vlan->vid == NETC_STANDALONE_PVID)
 		return 0;
 
+	if (vlan->vid >= NETC_VLAN_UNAWARE_PVID(ds->max_num_bridges)) {
+		NL_SET_ERR_MSG_FMT_MOD(extack,
+				       "VID %d~4095 reserved for VLAN-unaware bridge",
+				       NETC_VLAN_UNAWARE_PVID(ds->max_num_bridges));
+		return -EINVAL;
+	}
+
 	untagged = !!(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED);
+	err = netc_port_set_vlan_entry(np, vlan->vid, untagged);
+	if (err)
+		return err;
 
-	return netc_port_set_vlan_entry(np, vlan->vid, untagged);
+	if (vlan->flags & BRIDGE_VLAN_INFO_PVID) {
+		np->pvid = vlan->vid;
+		if (dsa_port_is_vlan_filtering(dp))
+			netc_port_set_pvid(np, vlan->vid);
+
+		return 0;
+	}
+
+	if (np->pvid != vlan->vid)
+		return 0;
+
+	/* Delete PVID */
+	np->pvid = NETC_STANDALONE_PVID;
+	if (dsa_port_is_vlan_filtering(dp))
+		netc_port_set_pvid(np, NETC_STANDALONE_PVID);
+
+	return 0;
 }
 
 static int netc_port_vlan_del(struct dsa_switch *ds, int port,
 			      const struct switchdev_obj_port_vlan *vlan)
 {
 	struct netc_port *np = NETC_PORT(ds, port);
+	int err;
 
 	if (vlan->vid == NETC_STANDALONE_PVID)
 		return 0;
 
-	return netc_port_del_vlan_entry(np, vlan->vid);
+	err = netc_port_del_vlan_entry(np, vlan->vid);
+	if (err)
+		return err;
+
+	if (np->pvid == vlan->vid) {
+		np->pvid = NETC_STANDALONE_PVID;
+
+		/* Set the port PVID to NETC_STANDALONE_PVID if the VLAN-aware
+		 * bridge port has no PVID. The untagged frames will not be
+		 * forwarded to other user ports, as NETC_STANDALONE_PVID VLAN
+		 * entry has disabled MAC learning and flooding, and other user
+		 * ports do not have FDB entries with NETC_STANDALONE_PVID.
+		 */
+		if (dsa_port_is_vlan_filtering(np->dp))
+			netc_port_set_pvid(np, NETC_STANDALONE_PVID);
+	}
+
+	return 0;
+}
+
+static int netc_port_bridge_join(struct dsa_switch *ds, int port,
+				 struct dsa_bridge bridge,
+				 bool *tx_fwd_offload,
+				 struct netlink_ext_ack *extack)
+{
+	struct netc_port *np = NETC_PORT(ds, port);
+	u16 vlan_unaware_pvid;
+	int err;
+
+	err = netc_single_vlan_aware_bridge(ds, extack);
+	if (err)
+		return err;
+
+	netc_port_set_mlo(np, MLO_NOT_OVERRIDE);
+
+	if (br_vlan_enabled(bridge.dev))
+		goto out;
+
+	vlan_unaware_pvid = NETC_VLAN_UNAWARE_PVID(bridge.num);
+	err = netc_port_set_vlan_entry(np, vlan_unaware_pvid, false);
+	if (err) {
+		netc_port_set_mlo(np, MLO_DISABLE);
+		return err;
+	}
+
+	netc_port_set_pvid(np, vlan_unaware_pvid);
+
+out:
+	netc_port_remove_host_flood(np, np->host_flood);
+	np->host_flood = NULL;
+	netc_port_wr(np, NETC_PIPFCR, 0);
+
+	return 0;
+}
+
+static void netc_port_bridge_leave(struct dsa_switch *ds, int port,
+				   struct dsa_bridge bridge)
+{
+	struct netc_port *np = NETC_PORT(ds, port);
+	struct net_device *ndev = np->dp->user;
+	u16 vlan_unaware_pvid;
+	bool mc, uc;
+
+	netc_port_set_mlo(np, MLO_DISABLE);
+	netc_port_set_pvid(np, NETC_STANDALONE_PVID);
+
+	uc = ndev->flags & IFF_PROMISC;
+	mc = ndev->flags & (IFF_PROMISC | IFF_ALLMULTI);
+
+	if (netc_port_add_host_flood_rule(np, uc, mc))
+		dev_warn(ds->dev,
+			 "Failed to restore host flood rule on port %d\n",
+			 port);
+
+	if (br_vlan_enabled(bridge.dev))
+		return;
+
+	vlan_unaware_pvid = NETC_VLAN_UNAWARE_PVID(bridge.num);
+	/* There is no need to check the return value even if it fails.
+	 * Because the PVID has been set to NETC_STANDALONE_PVID, the
+	 * frames will not match this VLAN entry.
+	 */
+	netc_port_del_vlan_entry(np, vlan_unaware_pvid);
 }
 
 static void netc_phylink_get_caps(struct dsa_switch *ds, int port,
@@ -2009,8 +2285,11 @@ static const struct dsa_switch_ops netc_switch_ops = {
 	.port_mdb_add			= netc_port_mdb_add,
 	.port_mdb_del			= netc_port_mdb_del,
 	.port_set_host_flood		= netc_port_set_host_flood,
+	.port_vlan_filtering		= netc_port_vlan_filtering,
 	.port_vlan_add			= netc_port_vlan_add,
 	.port_vlan_del			= netc_port_vlan_del,
+	.port_bridge_join		= netc_port_bridge_join,
+	.port_bridge_leave		= netc_port_bridge_leave,
 	.get_pause_stats		= netc_port_get_pause_stats,
 	.get_rmon_stats			= netc_port_get_rmon_stats,
 	.get_eth_ctrl_stats		= netc_port_get_eth_ctrl_stats,
@@ -2058,6 +2337,7 @@ static int netc_switch_probe(struct pci_dev *pdev,
 	ds->ops = &netc_switch_ops;
 	ds->phylink_mac_ops = &netc_phylink_mac_ops;
 	ds->fdb_isolation = true;
+	ds->max_num_bridges = priv->info->num_ports - 1;
 	ds->priv = priv;
 	priv->ds = ds;
 
diff --git a/drivers/net/dsa/netc/netc_switch.h b/drivers/net/dsa/netc/netc_switch.h
index 9ff334301fbc..982c8d3a3fbf 100644
--- a/drivers/net/dsa/netc/netc_switch.h
+++ b/drivers/net/dsa/netc/netc_switch.h
@@ -33,6 +33,7 @@
 #define NETC_MAX_FRAME_LEN		9600
 
 #define NETC_STANDALONE_PVID		0
+#define NETC_VLAN_UNAWARE_PVID(br_id)	(4096 - (br_id))
 
 /* Threshold format: MANT (bits 11:4) * 2^EXP (bits 3:0)
  * Unit: Memory words (average of 20 bytes each)
@@ -79,6 +80,7 @@ struct netc_port {
 	u16 enable:1;
 	u16 uc:1;
 	u16 mc:1;
+	u16 pvid;
 	struct ipft_entry_data *host_flood;
 };
 
-- 
2.34.1



^ permalink raw reply related

* [PATCH v2 net-next 7/9] net: dsa: netc: add VLAN filter table and egress treatment management
From: wei.fang @ 2026-06-02  7:23 UTC (permalink / raw)
  To: claudiu.manoil, vladimir.oltean, xiaoning.wang, andrew+netdev,
	davem, edumazet, kuba, pabeni, chleroy, andrew, olteanv, linux
  Cc: wei.fang, imx, netdev, linux-kernel, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20260602072313.3162120-1-wei.fang@oss.nxp.com>

From: Wei Fang <wei.fang@nxp.com>

Implement the DSA .port_vlan_add and .port_vlan_del operations to enable
VLAN-aware bridge offloading on the NETC switch.

VLAN membership is maintained in the VLAN Filter Table (VFT). Adding the
first port to a VLAN creates a new VFT entry with hardware MAC learning
and flood-on-miss forwarding; subsequent ports update the existing
entry's membership bitmap. Removing the last port deletes the entry.

Egress tagging is handled through the Egress Treatment Table (ETT). Each
VLAN is allocated a group of ETT entries, one per available port. Ports
are assigned a sequential ett_offset during initialisation, used to
address each port's entry within the group. Untagged ports configure the
ETT to strip the outer VLAN tag; tagged ports pass frames through
unmodified. Each ETT group is optionally paired with an Egress Counter
Table (ECT) group for per-port frame counting, allocated on a best-effort
basis. When the egress rule of an ETT entry changes, the counter of the
corresponding ECT entry will be recounted to track the number of frames
that match the new egress rule.

A software shadow list serialised by vft_lock tracks active VLAN state
across both port membership and egress tagging. VID 0 is used for single
port mode and is ignored by both callbacks.

Signed-off-by: Wei Fang <wei.fang@nxp.com>
---
 drivers/net/dsa/netc/netc_main.c   | 436 +++++++++++++++++++++++++++++
 drivers/net/dsa/netc/netc_switch.h |  24 ++
 include/linux/fsl/ntmp.h           |  15 +
 3 files changed, 475 insertions(+)

diff --git a/drivers/net/dsa/netc/netc_main.c b/drivers/net/dsa/netc/netc_main.c
index d4475ad7ed6c..a97121dda237 100644
--- a/drivers/net/dsa/netc/netc_main.c
+++ b/drivers/net/dsa/netc/netc_main.c
@@ -37,6 +37,27 @@ static void netc_destroy_fdb_list(struct netc_switch *priv)
 		netc_del_fdb_entry(entry);
 }
 
+static struct netc_vlan_entry *
+netc_lookup_vlan_entry(struct netc_switch *priv, u16 vid)
+{
+	struct netc_vlan_entry *entry;
+
+	hlist_for_each_entry(entry, &priv->vlan_list, node)
+		if (entry->vid == vid)
+			return entry;
+
+	return NULL;
+}
+
+static void netc_destroy_vlan_list(struct netc_switch *priv)
+{
+	struct netc_vlan_entry *entry;
+	struct hlist_node *tmp;
+
+	hlist_for_each_entry_safe(entry, tmp, &priv->vlan_list, node)
+		netc_del_vlan_entry(entry);
+}
+
 static enum dsa_tag_protocol
 netc_get_tag_protocol(struct dsa_switch *ds, int port,
 		      enum dsa_tag_protocol mprot)
@@ -222,6 +243,7 @@ static int netc_init_all_ports(struct netc_switch *priv)
 	struct device *dev = priv->dev;
 	struct netc_port *np;
 	struct dsa_port *dp;
+	int ett_offset = 0;
 	int err;
 
 	priv->ports = devm_kcalloc(dev, priv->info->num_ports,
@@ -251,6 +273,8 @@ static int netc_init_all_ports(struct netc_switch *priv)
 	dsa_switch_for_each_available_port(dp, priv->ds) {
 		np = priv->ports[dp->index];
 		np->dp = dp;
+		np->ett_offset = ett_offset++;
+		priv->port_bitmap |= BIT(dp->index);
 
 		err = netc_port_get_info_from_dt(np, dp->dn, dev);
 		if (err)
@@ -831,6 +855,8 @@ static int netc_setup(struct dsa_switch *ds)
 
 	INIT_HLIST_HEAD(&priv->fdb_list);
 	mutex_init(&priv->fdbt_lock);
+	INIT_HLIST_HEAD(&priv->vlan_list);
+	mutex_init(&priv->vft_lock);
 
 	netc_switch_fixed_config(priv);
 
@@ -858,6 +884,7 @@ static int netc_setup(struct dsa_switch *ds)
 	 * hardware state.
 	 */
 	mutex_destroy(&priv->fdbt_lock);
+	mutex_destroy(&priv->vft_lock);
 	netc_free_ntmp_user(priv);
 
 	return err;
@@ -867,6 +894,8 @@ static void netc_destroy_all_lists(struct netc_switch *priv)
 {
 	netc_destroy_fdb_list(priv);
 	mutex_destroy(&priv->fdbt_lock);
+	netc_destroy_vlan_list(priv);
+	mutex_destroy(&priv->vft_lock);
 }
 
 static void netc_free_host_flood_rules(struct netc_switch *priv)
@@ -1025,6 +1054,382 @@ static void netc_switch_get_ip_revision(struct netc_switch *priv)
 	priv->revision = FIELD_GET(IPBRR0_IP_REV, val);
 }
 
+static void netc_init_ett_cfge(struct ett_cfge_data *cfge, bool untagged,
+			       u32 ett_eid, u32 ect_eid)
+{
+	u32 vuda_sqta = FMTEID_VUDA_SQTA;
+	u16 efm_cfg = 0;
+
+	if (ect_eid != NTMP_NULL_ENTRY_ID) {
+		/* Increase egress frame counter */
+		efm_cfg |= FIELD_PREP(ETT_ECA, ETT_ECA_INC);
+		cfge->ec_eid = cpu_to_le32(ect_eid);
+	}
+
+	/* If egress rule is VLAN untagged */
+	if (untagged) {
+		/* delete outer VLAN tag */
+		vuda_sqta |= FIELD_PREP(FMTEID_VUDA, FMTEID_VUDA_DEL_OTAG);
+		/* length change: twos-complement notation */
+		efm_cfg |= FIELD_PREP(ETT_EFM_LEN_CHANGE,
+				      ETT_FRM_LEN_DEL_VLAN);
+	}
+
+	cfge->efm_eid = cpu_to_le32(vuda_sqta);
+	cfge->efm_cfg = cpu_to_le16(efm_cfg);
+}
+
+static int netc_add_ett_entry(struct netc_switch *priv, bool untagged,
+			      u32 ett_eid, u32 ect_eid)
+{
+	struct ntmp_user *ntmp = &priv->ntmp;
+	struct ett_cfge_data cfge = {};
+
+	netc_init_ett_cfge(&cfge, untagged, ett_eid, ect_eid);
+
+	return ntmp_ett_add_entry(ntmp, ett_eid, &cfge);
+}
+
+static int netc_update_ett_entry(struct netc_switch *priv, bool untagged,
+				 u32 ett_eid, u32 ect_eid)
+{
+	struct ntmp_user *ntmp = &priv->ntmp;
+	struct ett_cfge_data cfge = {};
+
+	netc_init_ett_cfge(&cfge, untagged, ett_eid, ect_eid);
+
+	return ntmp_ett_update_entry(ntmp, ett_eid, &cfge);
+}
+
+static int netc_add_ett_group_entries(struct netc_switch *priv,
+				      u32 untagged_port_bitmap,
+				      u32 ett_base_eid,
+				      u32 ect_base_eid)
+{
+	struct netc_port **ports = priv->ports;
+	u32 ett_eid, ect_eid;
+	bool untagged;
+	int i, err;
+
+	for (i = 0; i < priv->info->num_ports; i++) {
+		if (!ports[i]->dp)
+			continue;
+
+		untagged = !!(untagged_port_bitmap & BIT(i));
+		ett_eid = ett_base_eid + ports[i]->ett_offset;
+		ect_eid = NTMP_NULL_ENTRY_ID;
+		if (ect_base_eid != NTMP_NULL_ENTRY_ID)
+			ect_eid = ect_base_eid + ports[i]->ett_offset;
+
+		err = netc_add_ett_entry(priv, untagged, ett_eid, ect_eid);
+		if (err)
+			goto clear_ett_entries;
+	}
+
+	return 0;
+
+clear_ett_entries:
+	while (--i >= 0) {
+		if (!ports[i]->dp)
+			continue;
+
+		ett_eid = ett_base_eid + ports[i]->ett_offset;
+		ntmp_ett_delete_entry(&priv->ntmp, ett_eid);
+	}
+
+	return err;
+}
+
+static int netc_add_vlan_egress_rule(struct netc_switch *priv,
+				     struct netc_vlan_entry *entry)
+{
+	u32 num_ports = netc_num_available_ports(priv);
+	struct ntmp_user *ntmp = &priv->ntmp;
+	u32 ect_eid = NTMP_NULL_ENTRY_ID;
+	u32 ett_eid, ett_gid, ect_gid;
+	int err;
+
+	/* Step 1: Find available egress counter table entries and update
+	 * these entries.
+	 */
+	ect_gid = ntmp_lookup_free_eid(ntmp->ect_gid_bitmap,
+				       ntmp->ect_bitmap_size);
+	if (ect_gid == NTMP_NULL_ENTRY_ID) {
+		dev_info(priv->dev,
+			 "No egress counter table entries available\n");
+	} else {
+		ect_eid = ect_gid * num_ports;
+		for (int i = 0; i < num_ports; i++)
+			/* Reset the counters of the entry. There is no need
+			 * to check the return value, the only issue is that
+			 * the entry's counter might be inaccurate, but it
+			 * will not affect the functionality.
+			 */
+			ntmp_ect_update_entry(ntmp, ect_eid + i);
+	}
+
+	/* Step 2: Find available egress treatment table entries and add
+	 * these entries.
+	 */
+	ett_gid = ntmp_lookup_free_eid(ntmp->ett_gid_bitmap,
+				       ntmp->ett_bitmap_size);
+	if (ett_gid == NTMP_NULL_ENTRY_ID) {
+		dev_err(priv->dev,
+			"No egress treatment table entries available\n");
+		err = -ENOSPC;
+		goto clear_ect_gid;
+	}
+
+	ett_eid = ett_gid * num_ports;
+	err = netc_add_ett_group_entries(priv, entry->untagged_port_bitmap,
+					 ett_eid, ect_eid);
+	if (err)
+		goto clear_ett_gid;
+
+	entry->cfge.et_eid = cpu_to_le32(ett_eid);
+	entry->ect_gid = ect_gid;
+
+	return 0;
+
+clear_ett_gid:
+	ntmp_clear_eid_bitmap(ntmp->ett_gid_bitmap, ett_gid);
+
+clear_ect_gid:
+	if (ect_gid != NTMP_NULL_ENTRY_ID)
+		ntmp_clear_eid_bitmap(ntmp->ect_gid_bitmap, ect_gid);
+
+	return err;
+}
+
+static void netc_delete_vlan_egress_rule(struct netc_switch *priv,
+					 struct netc_vlan_entry *entry)
+{
+	u32 num_ports = netc_num_available_ports(priv);
+	struct ntmp_user *ntmp = &priv->ntmp;
+	u32 ett_eid, ett_gid;
+
+	ett_eid = le32_to_cpu(entry->cfge.et_eid);
+	if (ett_eid == NTMP_NULL_ENTRY_ID)
+		return;
+
+	ett_gid = ett_eid / num_ports;
+	ntmp_clear_eid_bitmap(ntmp->ett_gid_bitmap, ett_gid);
+	for (int i = 0; i < num_ports; i++)
+		ntmp_ett_delete_entry(ntmp, ett_eid + i);
+
+	if (entry->ect_gid == NTMP_NULL_ENTRY_ID)
+		return;
+
+	ntmp_clear_eid_bitmap(ntmp->ect_gid_bitmap, entry->ect_gid);
+}
+
+static int netc_port_update_vlan_egress_rule(struct netc_port *np,
+					     struct netc_vlan_entry *entry)
+{
+	bool untagged = !!(entry->untagged_port_bitmap & BIT(np->dp->index));
+	u32 num_ports = netc_num_available_ports(np->switch_priv);
+	u32 ett_eid = le32_to_cpu(entry->cfge.et_eid);
+	struct netc_switch *priv = np->switch_priv;
+	u32 ect_eid = NTMP_NULL_ENTRY_ID;
+	int err;
+
+	if (ett_eid == NTMP_NULL_ENTRY_ID)
+		return 0;
+
+	if (entry->ect_gid != NTMP_NULL_ENTRY_ID) {
+		/* Each ETT entry maps to an ECT entry if ect_gid is not NULL
+		 * entry ID. The offset of the ECT entry corresponding to the
+		 * port in the group is equal to ett_offset.
+		 */
+		ect_eid = entry->ect_gid * num_ports + np->ett_offset;
+		ntmp_ect_update_entry(&priv->ntmp, ect_eid);
+	}
+
+	ett_eid += np->ett_offset;
+	err = netc_update_ett_entry(priv, untagged, ett_eid, ect_eid);
+	if (err)
+		dev_err(priv->dev,
+			"Failed to update VLAN %u egress rule on port %d\n",
+			entry->vid, np->dp->index);
+
+	return err;
+}
+
+static int netc_port_add_vlan_entry(struct netc_port *np, u16 vid,
+				    bool untagged)
+{
+	struct netc_switch *priv = np->switch_priv;
+	struct netc_vlan_entry *entry;
+	struct vft_cfge_data *cfge;
+	u32 index = np->dp->index;
+	u32 bitmap_stg;
+	int err;
+	u16 cfg;
+
+	entry = kzalloc_obj(*entry);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->vid = vid;
+	entry->ect_gid = NTMP_NULL_ENTRY_ID;
+
+	bitmap_stg = BIT(index) | VFT_STG_ID(0);
+	cfg = FIELD_PREP(VFT_MLO, MLO_HW) |
+	      FIELD_PREP(VFT_MFO, MFO_NO_MATCH_FLOOD);
+
+	cfge = &entry->cfge;
+	cfge->et_eid = cpu_to_le32(NTMP_NULL_ENTRY_ID);
+	cfge->bitmap_stg = cpu_to_le32(bitmap_stg);
+	cfge->fid = cpu_to_le16(vid);
+	cfge->cfg = cpu_to_le16(cfg);
+	cfge->eta_port_bitmap = cpu_to_le32(priv->port_bitmap);
+
+	if (untagged)
+		entry->untagged_port_bitmap = BIT(index);
+
+	err = netc_add_vlan_egress_rule(priv, entry);
+	if (err)
+		goto free_vlan_entry;
+
+	err = ntmp_vft_add_entry(&priv->ntmp, vid, cfge);
+	if (err) {
+		dev_err(priv->dev,
+			"Failed to add VLAN %u entry on port %d\n",
+			vid, index);
+		goto delete_vlan_egress_rule;
+	}
+
+	netc_add_vlan_entry(priv, entry);
+
+	return 0;
+
+delete_vlan_egress_rule:
+	netc_delete_vlan_egress_rule(priv, entry);
+free_vlan_entry:
+	kfree(entry);
+
+	return err;
+}
+
+static bool netc_port_vlan_egress_rule_changed(struct netc_vlan_entry *entry,
+					       int port, bool untagged)
+{
+	bool old_untagged = !!(entry->untagged_port_bitmap & BIT(port));
+
+	return old_untagged != untagged;
+}
+
+static int netc_port_set_vlan_entry(struct netc_port *np, u16 vid,
+				    bool untagged)
+{
+	struct netc_switch *priv = np->switch_priv;
+	struct netc_vlan_entry *entry;
+	struct vft_cfge_data *cfge;
+	int port = np->dp->index;
+	bool changed;
+	int err = 0;
+
+	mutex_lock(&priv->vft_lock);
+
+	entry = netc_lookup_vlan_entry(priv, vid);
+	if (!entry) {
+		err = netc_port_add_vlan_entry(np, vid, untagged);
+		goto unlock_vft;
+	}
+
+	/* Check whether the egress VLAN rule is changed */
+	changed = netc_port_vlan_egress_rule_changed(entry, port, untagged);
+	if (changed) {
+		entry->untagged_port_bitmap ^= BIT(port);
+		err = netc_port_update_vlan_egress_rule(np, entry);
+		if (err) {
+			entry->untagged_port_bitmap ^= BIT(port);
+			goto unlock_vft;
+		}
+	}
+
+	cfge = &entry->cfge;
+	if (cfge->bitmap_stg & cpu_to_le32(BIT(port)))
+		goto unlock_vft;
+
+	cfge->bitmap_stg |= cpu_to_le32(BIT(port));
+	err = ntmp_vft_update_entry(&priv->ntmp, vid, cfge);
+	if (err) {
+		dev_err(priv->dev,
+			"Failed to update VLAN %u entry on port %d\n",
+			vid, port);
+
+		goto restore_bitmap_stg;
+	}
+
+	mutex_unlock(&priv->vft_lock);
+
+	return 0;
+
+restore_bitmap_stg:
+	cfge->bitmap_stg &= cpu_to_le32(~BIT(port));
+	if (changed) {
+		entry->untagged_port_bitmap ^= BIT(port);
+		/* Recover the corresponding ETT entry. It doesn't matter
+		 * if it fails because the bit corresponding to the port
+		 * in the port bitmap of the VFT entry is not set. so the
+		 * frame will not match that ETT entry.
+		 */
+		if (netc_port_update_vlan_egress_rule(np, entry))
+			entry->untagged_port_bitmap ^= BIT(port);
+	}
+unlock_vft:
+	mutex_unlock(&priv->vft_lock);
+
+	return err;
+}
+
+static int netc_port_del_vlan_entry(struct netc_port *np, u16 vid)
+{
+	struct netc_switch *priv = np->switch_priv;
+	struct netc_vlan_entry *entry;
+	struct vft_cfge_data *cfge;
+	int port = np->dp->index;
+	u32 vlan_port_bitmap;
+	int err = 0;
+
+	mutex_lock(&priv->vft_lock);
+
+	entry = netc_lookup_vlan_entry(priv, vid);
+	if (!entry)
+		goto unlock_vft;
+
+	cfge = &entry->cfge;
+	vlan_port_bitmap = FIELD_GET(VFT_PORT_MEMBERSHIP,
+				     le32_to_cpu(cfge->bitmap_stg));
+	/* If the VLAN only belongs to the current port */
+	if (vlan_port_bitmap == BIT(port)) {
+		err = ntmp_vft_delete_entry(&priv->ntmp, vid);
+		if (err)
+			goto unlock_vft;
+
+		netc_delete_vlan_egress_rule(priv, entry);
+		netc_del_vlan_entry(entry);
+
+		goto unlock_vft;
+	}
+
+	if (!(vlan_port_bitmap & BIT(port)))
+		goto unlock_vft;
+
+	cfge->bitmap_stg &= cpu_to_le32(~BIT(port));
+	err = ntmp_vft_update_entry(&priv->ntmp, vid, cfge);
+	if (err) {
+		cfge->bitmap_stg |= cpu_to_le32(BIT(port));
+		goto unlock_vft;
+	}
+
+unlock_vft:
+	mutex_unlock(&priv->vft_lock);
+
+	return err;
+}
+
 static int netc_port_enable(struct dsa_switch *ds, int port,
 			    struct phy_device *phy)
 {
@@ -1297,6 +1702,35 @@ static void netc_port_set_host_flood(struct dsa_switch *ds, int port,
 	netc_port_remove_host_flood(np, old_host_flood);
 }
 
+static int netc_port_vlan_add(struct dsa_switch *ds, int port,
+			      const struct switchdev_obj_port_vlan *vlan,
+			      struct netlink_ext_ack *extack)
+{
+	struct netc_port *np = NETC_PORT(ds, port);
+	bool untagged;
+
+	/* The 8021q layer may attempt to change NETC_STANDALONE_PVID
+	 * (VID 0), so we need to ignore it.
+	 */
+	if (vlan->vid == NETC_STANDALONE_PVID)
+		return 0;
+
+	untagged = !!(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED);
+
+	return netc_port_set_vlan_entry(np, vlan->vid, untagged);
+}
+
+static int netc_port_vlan_del(struct dsa_switch *ds, int port,
+			      const struct switchdev_obj_port_vlan *vlan)
+{
+	struct netc_port *np = NETC_PORT(ds, port);
+
+	if (vlan->vid == NETC_STANDALONE_PVID)
+		return 0;
+
+	return netc_port_del_vlan_entry(np, vlan->vid);
+}
+
 static void netc_phylink_get_caps(struct dsa_switch *ds, int port,
 				  struct phylink_config *config)
 {
@@ -1575,6 +2009,8 @@ static const struct dsa_switch_ops netc_switch_ops = {
 	.port_mdb_add			= netc_port_mdb_add,
 	.port_mdb_del			= netc_port_mdb_del,
 	.port_set_host_flood		= netc_port_set_host_flood,
+	.port_vlan_add			= netc_port_vlan_add,
+	.port_vlan_del			= netc_port_vlan_del,
 	.get_pause_stats		= netc_port_get_pause_stats,
 	.get_rmon_stats			= netc_port_get_rmon_stats,
 	.get_eth_ctrl_stats		= netc_port_get_eth_ctrl_stats,
diff --git a/drivers/net/dsa/netc/netc_switch.h b/drivers/net/dsa/netc/netc_switch.h
index 4fbd12825b67..9ff334301fbc 100644
--- a/drivers/net/dsa/netc/netc_switch.h
+++ b/drivers/net/dsa/netc/netc_switch.h
@@ -74,6 +74,7 @@ struct netc_port {
 	struct dsa_port *dp;
 	struct clk *ref_clk; /* RGMII/RMII reference clock */
 	struct mii_bus *emdio;
+	int ett_offset;
 
 	u16 enable:1;
 	u16 uc:1;
@@ -94,6 +95,14 @@ struct netc_fdb_entry {
 	struct hlist_node node;
 };
 
+struct netc_vlan_entry {
+	u16 vid;
+	u32 ect_gid;
+	u32 untagged_port_bitmap;
+	struct vft_cfge_data cfge;
+	struct hlist_node node;
+};
+
 struct netc_port_stat {
 	int reg;
 	char name[ETH_GSTRING_LEN] __nonstring;
@@ -108,10 +117,13 @@ struct netc_switch {
 	const struct netc_switch_info *info;
 	struct netc_switch_regs regs;
 	struct netc_port **ports;
+	u32 port_bitmap; /* bitmap of available ports */
 
 	struct ntmp_user ntmp;
 	struct hlist_head fdb_list;
 	struct mutex fdbt_lock; /* FDB table lock */
+	struct hlist_head vlan_list;
+	struct mutex vft_lock; /* VLAN filter table lock */
 
 	/* Switch hardware capabilities */
 	u32 htmcapr_num_words;
@@ -153,6 +165,18 @@ static inline void netc_del_fdb_entry(struct netc_fdb_entry *entry)
 	kfree(entry);
 }
 
+static inline void netc_add_vlan_entry(struct netc_switch *priv,
+				       struct netc_vlan_entry *entry)
+{
+	hlist_add_head(&entry->node, &priv->vlan_list);
+}
+
+static inline void netc_del_vlan_entry(struct netc_vlan_entry *entry)
+{
+	hlist_del(&entry->node);
+	kfree(entry);
+}
+
 int netc_switch_platform_probe(struct netc_switch *priv);
 
 /* ethtool APIs */
diff --git a/include/linux/fsl/ntmp.h b/include/linux/fsl/ntmp.h
index 531c3b9982ee..1e6fd7c94af0 100644
--- a/include/linux/fsl/ntmp.h
+++ b/include/linux/fsl/ntmp.h
@@ -267,6 +267,21 @@ struct bpt_cfge_data {
 	__le32 fc_ports;
 };
 
+union ntmp_fmt_eid {
+	__le32 index;
+#define	FMTEID_INDEX		GENMASK(12, 0)
+	__le32 vuda_sqta;
+#define FMTEID_VUDA		GENMASK(1, 0)
+#define FMTEID_VUDA_DEL_OTAG	2
+#define FMTEID_SQTA		GENMASK(4, 2)
+#define FMTEID_SQTA_DEL		2
+#define FMTEID_VUDA_SQTA	BIT(13)
+	__le32 vara_vid;
+#define FMTEID_VID		GENMASK(11, 0)
+#define FMTEID_VARA		GENMASK(13, 12)
+#define FMTEID_VARA_VID		BIT(14)
+};
+
 #if IS_ENABLED(CONFIG_NXP_NETC_LIB)
 int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev,
 		   const struct netc_cbdr_regs *regs);
-- 
2.34.1



^ permalink raw reply related

* [PATCH v2 net-next 6/9] net: enetc: add helpers to set/clear table bitmap
From: wei.fang @ 2026-06-02  7:23 UTC (permalink / raw)
  To: claudiu.manoil, vladimir.oltean, xiaoning.wang, andrew+netdev,
	davem, edumazet, kuba, pabeni, chleroy, andrew, olteanv, linux
  Cc: wei.fang, imx, netdev, linux-kernel, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20260602072313.3162120-1-wei.fang@oss.nxp.com>

From: Wei Fang <wei.fang@nxp.com>

NTMP index tables require software to allocate and manage entry IDs.
Add two bitmap helper functions to facilitate this management:

ntmp_lookup_free_eid(): finds the first zero bit in the given bitmap,
sets it to mark the entry as in-use, and returns the corresponding entry
ID. Returns NTMP_NULL_ENTRY_ID if no free entry is available.

ntmp_clear_eid_bitmap(): clears the bit associated with the given entry
ID in the bitmap to mark the entry as free. It is a no-op if the entry
ID is NTMP_NULL_ENTRY_ID.

Both functions are exported for use by other modules, such as the NETC
switch driver which needs to manage group index bitmaps for the Egress
Treatment Table (ETT) and Egress Count Table (ECT).

Signed-off-by: Wei Fang <wei.fang@nxp.com>
---
 drivers/net/ethernet/freescale/enetc/ntmp.c | 24 +++++++++++++++++++++
 include/linux/fsl/ntmp.h                    |  2 ++
 2 files changed, 26 insertions(+)

diff --git a/drivers/net/ethernet/freescale/enetc/ntmp.c b/drivers/net/ethernet/freescale/enetc/ntmp.c
index 03a4ac6af56e..b2db91683e7b 100644
--- a/drivers/net/ethernet/freescale/enetc/ntmp.c
+++ b/drivers/net/ethernet/freescale/enetc/ntmp.c
@@ -47,6 +47,30 @@
 #define RSST_STSE_DATA_SIZE(n)		((n) * 8)
 #define RSST_CFGE_DATA_SIZE(n)		(n)
 
+u32 ntmp_lookup_free_eid(unsigned long *bitmap, u32 size)
+{
+	u32 entry_id;
+
+	entry_id = find_first_zero_bit(bitmap, size);
+	if (entry_id == size)
+		return NTMP_NULL_ENTRY_ID;
+
+	/* Set the bit once we found it */
+	set_bit(entry_id, bitmap);
+
+	return entry_id;
+}
+EXPORT_SYMBOL_GPL(ntmp_lookup_free_eid);
+
+void ntmp_clear_eid_bitmap(unsigned long *bitmap, u32 entry_id)
+{
+	if (entry_id == NTMP_NULL_ENTRY_ID)
+		return;
+
+	clear_bit(entry_id, bitmap);
+}
+EXPORT_SYMBOL_GPL(ntmp_clear_eid_bitmap);
+
 int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev,
 		   const struct netc_cbdr_regs *regs)
 {
diff --git a/include/linux/fsl/ntmp.h b/include/linux/fsl/ntmp.h
index 1e087316d4f4..531c3b9982ee 100644
--- a/include/linux/fsl/ntmp.h
+++ b/include/linux/fsl/ntmp.h
@@ -271,6 +271,8 @@ struct bpt_cfge_data {
 int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev,
 		   const struct netc_cbdr_regs *regs);
 void ntmp_free_cbdr(struct netc_cbdr *cbdr);
+u32 ntmp_lookup_free_eid(unsigned long *bitmap, u32 size);
+void ntmp_clear_eid_bitmap(unsigned long *bitmap, u32 entry_id);
 
 /* NTMP APIs */
 int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id,
-- 
2.34.1



^ permalink raw reply related

* [PATCH v2 net-next 5/9] net: dsa: netc: initialize the group bitmap of ETT and ECT
From: wei.fang @ 2026-06-02  7:23 UTC (permalink / raw)
  To: claudiu.manoil, vladimir.oltean, xiaoning.wang, andrew+netdev,
	davem, edumazet, kuba, pabeni, chleroy, andrew, olteanv, linux
  Cc: wei.fang, imx, netdev, linux-kernel, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20260602072313.3162120-1-wei.fang@oss.nxp.com>

From: Wei Fang <wei.fang@nxp.com>

The Egress Treatment Table (ETT) and Egress Count Table (ECT) are both
index tables whose entry IDs are allocated by software. Every num_ports
entries form a group, where each entry in the group corresponds to one
port. To facilitate group allocation and management, initialize the group
index bitmaps for both tables based on hardware capabilities reported by
ETTCAPR and ECTCAPR registers.

The bitmap size per table is calculated as the total number of hardware
entries divided by the number of available ports, which gives the number
of groups available for software allocation. A set bit in the bitmap
represents a group index that has been allocated.

These bitmaps will be used by subsequent patches that add VLAN support.

Signed-off-by: Wei Fang <wei.fang@nxp.com>
---
 drivers/net/dsa/netc/netc_main.c      | 90 ++++++++++++++++++++++++++-
 drivers/net/dsa/netc/netc_switch_hw.h |  6 ++
 include/linux/fsl/ntmp.h              |  7 +++
 3 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/netc/netc_main.c b/drivers/net/dsa/netc/netc_main.c
index fa7dd307ce13..d4475ad7ed6c 100644
--- a/drivers/net/dsa/netc/netc_main.c
+++ b/drivers/net/dsa/netc/netc_main.c
@@ -323,16 +323,104 @@ static void netc_remove_all_cbdrs(struct netc_switch *priv)
 		ntmp_free_cbdr(&ntmp->ring[i]);
 }
 
+static u32 netc_num_available_ports(struct netc_switch *priv)
+{
+	struct dsa_port *dp;
+	u32 num_ports = 0;
+
+	dsa_switch_for_each_available_port(dp, priv->ds)
+		num_ports++;
+
+	return num_ports;
+}
+
+static int netc_init_ntmp_bitmap_sizes(struct netc_switch *priv)
+{
+	u32 num_ports = netc_num_available_ports(priv);
+	struct netc_switch_regs *regs = &priv->regs;
+	struct ntmp_user *ntmp = &priv->ntmp;
+	u32 val;
+
+	if (!num_ports)
+		return -EINVAL;
+
+	val = netc_base_rd(regs, NETC_ETTCAPR);
+	ntmp->ett_bitmap_size = NETC_GET_NUM_ENTRIES(val) / num_ports;
+	if (!ntmp->ett_bitmap_size)
+		return -EINVAL;
+
+	val = netc_base_rd(regs, NETC_ECTCAPR);
+	ntmp->ect_bitmap_size = NETC_GET_NUM_ENTRIES(val) / num_ports;
+	if (!ntmp->ect_bitmap_size)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int netc_init_ntmp_bitmaps(struct netc_switch *priv)
+{
+	struct ntmp_user *ntmp = &priv->ntmp;
+
+	ntmp->ett_gid_bitmap = bitmap_zalloc(ntmp->ett_bitmap_size,
+					     GFP_KERNEL);
+	if (!ntmp->ett_gid_bitmap)
+		return -ENOMEM;
+
+	ntmp->ect_gid_bitmap = bitmap_zalloc(ntmp->ect_bitmap_size,
+					     GFP_KERNEL);
+	if (!ntmp->ect_gid_bitmap)
+		goto free_ett_gid_bitmap;
+
+	return 0;
+
+free_ett_gid_bitmap:
+	bitmap_free(ntmp->ett_gid_bitmap);
+	ntmp->ett_gid_bitmap = NULL;
+
+	return -ENOMEM;
+}
+
+static void netc_free_ntmp_bitmaps(struct netc_switch *priv)
+{
+	struct ntmp_user *ntmp = &priv->ntmp;
+
+	bitmap_free(ntmp->ect_gid_bitmap);
+	ntmp->ect_gid_bitmap = NULL;
+
+	bitmap_free(ntmp->ett_gid_bitmap);
+	ntmp->ett_gid_bitmap = NULL;
+}
+
 static int netc_init_ntmp_user(struct netc_switch *priv)
 {
+	int err;
+
 	netc_init_ntmp_tbl_versions(priv);
 
-	return netc_init_all_cbdrs(priv);
+	err = netc_init_ntmp_bitmap_sizes(priv);
+	if (err)
+		return err;
+
+	err = netc_init_ntmp_bitmaps(priv);
+	if (err)
+		return err;
+
+	err = netc_init_all_cbdrs(priv);
+	if (err)
+		goto free_ntmp_bitmaps;
+
+	return 0;
+
+free_ntmp_bitmaps:
+	netc_free_ntmp_bitmaps(priv);
+
+	return err;
 }
 
 static void netc_free_ntmp_user(struct netc_switch *priv)
 {
 	netc_remove_all_cbdrs(priv);
+	netc_free_ntmp_bitmaps(priv);
 }
 
 static void netc_switch_dos_default_config(struct netc_switch *priv)
diff --git a/drivers/net/dsa/netc/netc_switch_hw.h b/drivers/net/dsa/netc/netc_switch_hw.h
index 1d976882a6cc..1404ae41c7bc 100644
--- a/drivers/net/dsa/netc/netc_switch_hw.h
+++ b/drivers/net/dsa/netc/netc_switch_hw.h
@@ -36,6 +36,12 @@
 #define  DOSL3CR_SAMEADDR		BIT(0)
 #define  DOSL3CR_IPSAMCC		BIT(1)
 
+#define NETC_ETTCAPR			0x18c4
+#define NETC_ECTCAPR			0x18ec
+/* Index table NUM_ENTRIES mask */
+#define NETC_NUM_ENTRIES		GENMASK(15, 0)
+#define NETC_GET_NUM_ENTRIES(v)		FIELD_GET(NETC_NUM_ENTRIES, (v))
+
 /* Hash table memory capability register, the memory is shared by
  * the following tables:
  *
diff --git a/include/linux/fsl/ntmp.h b/include/linux/fsl/ntmp.h
index 1123d060713c..1e087316d4f4 100644
--- a/include/linux/fsl/ntmp.h
+++ b/include/linux/fsl/ntmp.h
@@ -3,6 +3,7 @@
 #ifndef __NETC_NTMP_H
 #define __NETC_NTMP_H
 
+#include <linux/bitmap.h>
 #include <linux/bitops.h>
 #include <linux/if_ether.h>
 
@@ -70,6 +71,12 @@ struct ntmp_user {
 	struct device *dev;
 	struct netc_cbdr *ring;
 	struct netc_tbl_vers tbl;
+
+	/* NTMP table bitmaps for resource management */
+	u32 ett_bitmap_size;
+	u32 ect_bitmap_size;
+	unsigned long *ett_gid_bitmap; /* only valid for switch */
+	unsigned long *ect_gid_bitmap; /* only valid for switch */
 };
 
 struct maft_entry_data {
-- 
2.34.1



^ permalink raw reply related

* [PATCH v2 net-next 4/9] net: enetc: add "Update" operation to the egress count table
From: wei.fang @ 2026-06-02  7:23 UTC (permalink / raw)
  To: claudiu.manoil, vladimir.oltean, xiaoning.wang, andrew+netdev,
	davem, edumazet, kuba, pabeni, chleroy, andrew, olteanv, linux
  Cc: wei.fang, imx, netdev, linux-kernel, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20260602072313.3162120-1-wei.fang@oss.nxp.com>

From: Wei Fang <wei.fang@nxp.com>

The egress count table is a static bounded index table, egress related
statistics are maintained in this table. The table is implemented as a
linear array of entries accessed using an index (0, 1, 2, ..., n) that
uniquely identifies an entry within the array. Egress Counter Entry ID
(EC_EID) is used as an index to an entry in this table. The EC_EID is
specified in the egress treatment table.

Egress count table entries are always present and enabled. The table
only supports access via entry ID, which is assigned by the software.
And it supports Update, Query and Query followed by Update operations.
Currently, only Update operation is supported.

Signed-off-by: Wei Fang <wei.fang@nxp.com>
---
 drivers/net/ethernet/freescale/enetc/ntmp.c | 45 +++++++++++++++++++++
 include/linux/fsl/ntmp.h                    |  7 ++++
 2 files changed, 52 insertions(+)

diff --git a/drivers/net/ethernet/freescale/enetc/ntmp.c b/drivers/net/ethernet/freescale/enetc/ntmp.c
index e2f439cb1942..03a4ac6af56e 100644
--- a/drivers/net/ethernet/freescale/enetc/ntmp.c
+++ b/drivers/net/ethernet/freescale/enetc/ntmp.c
@@ -25,6 +25,7 @@
 #define NTMP_FDBT_ID			15
 #define NTMP_VFT_ID			18
 #define NTMP_ETT_ID			33
+#define NTMP_ECT_ID			39
 #define NTMP_BPT_ID			41
 
 /* Generic Update Actions for most tables */
@@ -33,6 +34,7 @@
 
 /* Specific Update Actions for some tables */
 #define FDBT_UA_ACTEU			BIT(1)
+#define ECT_UA_STSEU			BIT(0)
 #define BPT_UA_BPSEU			BIT(1)
 
 /* Query Action: 0: Full query. 1: Query entry ID, the fields after entry
@@ -287,6 +289,8 @@ static const char *ntmp_table_name(int tbl_id)
 		return "VLAN Filter Table";
 	case NTMP_ETT_ID:
 		return "Egress Treatment Table";
+	case NTMP_ECT_ID:
+		return "Egress Count Table";
 	case NTMP_BPT_ID:
 		return "Buffer Pool Table";
 	default:
@@ -1197,6 +1201,47 @@ int ntmp_ett_delete_entry(struct ntmp_user *user, u32 entry_id)
 }
 EXPORT_SYMBOL_GPL(ntmp_ett_delete_entry);
 
+/**
+ * ntmp_ect_update_entry - reset the statistics element data of the
+ * specified egress counter table entry
+ * @user: target ntmp_user struct
+ * @entry_id: entry ID
+ *
+ * Return: 0 on success, otherwise a negative error code
+ */
+int ntmp_ect_update_entry(struct ntmp_user *user, u32 entry_id)
+{
+	struct ntmp_req_by_eid *req;
+	struct netc_swcbd swcbd;
+	struct netc_cbdr *cbdr;
+	union netc_cbd cbd;
+	int err;
+
+	swcbd.size = sizeof(*req);
+	err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req);
+	if (err)
+		return err;
+
+	/* Request data */
+	ntmp_fill_crd_eid(req, user->tbl.ect_ver, 0, ECT_UA_STSEU, entry_id);
+
+	/* Request header */
+	ntmp_fill_request_hdr(&cbd, swcbd.dma, NTMP_LEN(swcbd.size, 0),
+			      NTMP_ECT_ID, NTMP_CMD_UPDATE, NTMP_AM_ENTRY_ID);
+
+	ntmp_select_and_lock_cbdr(user, &cbdr);
+	err = netc_xmit_ntmp_cmd(cbdr, &cbd, &swcbd);
+	if (err)
+		dev_err(user->dev,
+			"Failed to update %s entry 0x%x, err: %pe\n",
+			ntmp_table_name(NTMP_ECT_ID), entry_id, ERR_PTR(err));
+
+	ntmp_unlock_cbdr(cbdr);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(ntmp_ect_update_entry);
+
 int ntmp_bpt_update_entry(struct ntmp_user *user, u32 entry_id,
 			  const struct bpt_cfge_data *cfge)
 {
diff --git a/include/linux/fsl/ntmp.h b/include/linux/fsl/ntmp.h
index 0c951e1c763d..1123d060713c 100644
--- a/include/linux/fsl/ntmp.h
+++ b/include/linux/fsl/ntmp.h
@@ -37,6 +37,7 @@ struct netc_tbl_vers {
 	u8 bpt_ver;
 	u8 ipft_ver;
 	u8 ett_ver;
+	u8 ect_ver;
 };
 
 struct netc_swcbd {
@@ -232,6 +233,11 @@ struct ett_cfge_data {
 	__le32 esqa_tgt_eid;
 };
 
+struct ect_stse_data {
+	__le64 enq_frm_cnt;
+	__le64 rej_frm_cnt;
+};
+
 struct bpt_bpse_data {
 	__le32 amount_used;
 	__le32 amount_used_hwm;
@@ -294,6 +300,7 @@ int ntmp_ett_add_entry(struct ntmp_user *user, u32 entry_id,
 int ntmp_ett_update_entry(struct ntmp_user *user, u32 entry_id,
 			  const struct ett_cfge_data *cfge);
 int ntmp_ett_delete_entry(struct ntmp_user *user, u32 entry_id);
+int ntmp_ect_update_entry(struct ntmp_user *user, u32 entry_id);
 int ntmp_bpt_update_entry(struct ntmp_user *user, u32 entry_id,
 			  const struct bpt_cfge_data *cfge);
 #else
-- 
2.34.1



^ permalink raw reply related

* [PATCH v2 net-next 3/9] net: enetc: add interfaces to manage egress treatment table
From: wei.fang @ 2026-06-02  7:23 UTC (permalink / raw)
  To: claudiu.manoil, vladimir.oltean, xiaoning.wang, andrew+netdev,
	davem, edumazet, kuba, pabeni, chleroy, andrew, olteanv, linux
  Cc: wei.fang, imx, netdev, linux-kernel, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20260602072313.3162120-1-wei.fang@oss.nxp.com>

From: Wei Fang <wei.fang@nxp.com>

Each entry in the egress treatment table contains the egress packet
processing actions to be applied to a grouping or scope of packets
exiting on a particular egress port of the switch. A scope of packets,
for example, could be the packets exiting a particular VLAN, matching
a particular 802.1Q bridge forwarding entry or belonging to a stream
identified at ingress. The egress treatment table is implemented as a
linear array of entries accessed using an index (0,1, 2, ..., n) that
uniquely identifies an entry within the array.

The egress treatment table only supports access vid entry ID, which is
assigned by the software. It supports Add, Update, Delete and Query
operations. Note that only Query operation is not supported yet.

Signed-off-by: Wei Fang <wei.fang@nxp.com>
---
 drivers/net/ethernet/freescale/enetc/ntmp.c   | 106 ++++++++++++++++++
 .../ethernet/freescale/enetc/ntmp_private.h   |   8 ++
 include/linux/fsl/ntmp.h                      |  23 ++++
 3 files changed, 137 insertions(+)

diff --git a/drivers/net/ethernet/freescale/enetc/ntmp.c b/drivers/net/ethernet/freescale/enetc/ntmp.c
index bcbbb012aec6..e2f439cb1942 100644
--- a/drivers/net/ethernet/freescale/enetc/ntmp.c
+++ b/drivers/net/ethernet/freescale/enetc/ntmp.c
@@ -24,6 +24,7 @@
 #define NTMP_IPFT_ID			13
 #define NTMP_FDBT_ID			15
 #define NTMP_VFT_ID			18
+#define NTMP_ETT_ID			33
 #define NTMP_BPT_ID			41
 
 /* Generic Update Actions for most tables */
@@ -284,6 +285,8 @@ static const char *ntmp_table_name(int tbl_id)
 		return "FDB Table";
 	case NTMP_VFT_ID:
 		return "VLAN Filter Table";
+	case NTMP_ETT_ID:
+		return "Egress Treatment Table";
 	case NTMP_BPT_ID:
 		return "Buffer Pool Table";
 	default:
@@ -1091,6 +1094,109 @@ int ntmp_vft_delete_entry(struct ntmp_user *user, u16 vid)
 }
 EXPORT_SYMBOL_GPL(ntmp_vft_delete_entry);
 
+/**
+ * ntmp_ett_set_entry - add a new entry to the egress treatment table or
+ * update the configuration element data of the specified entry
+ * @user: target ntmp_user struct
+ * @entry_id: entry ID
+ * @cmd: command type, NTMP_CMD_ADD or NTMP_CMD_UPDATE
+ * @cfge: configuration element data
+ *
+ * Return: 0 on success, otherwise a negative error code
+ */
+static int ntmp_ett_set_entry(struct ntmp_user *user, u32 entry_id,
+			      int cmd, const struct ett_cfge_data *cfge)
+{
+	struct netc_swcbd swcbd;
+	struct ett_req_ua *req;
+	struct netc_cbdr *cbdr;
+	union netc_cbd cbd;
+	int err;
+
+	if (cmd != NTMP_CMD_ADD && cmd != NTMP_CMD_UPDATE)
+		return -EINVAL;
+
+	swcbd.size = sizeof(*req);
+	err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req);
+	if (err)
+		return err;
+
+	/* Request data */
+	ntmp_fill_crd_eid(&req->rbe, user->tbl.ett_ver, 0,
+			  NTMP_GEN_UA_CFGEU, entry_id);
+	req->cfge = *cfge;
+
+	/* Request header */
+	ntmp_fill_request_hdr(&cbd, swcbd.dma, NTMP_LEN(swcbd.size, 0),
+			      NTMP_ETT_ID, cmd, NTMP_AM_ENTRY_ID);
+
+	ntmp_select_and_lock_cbdr(user, &cbdr);
+	err = netc_xmit_ntmp_cmd(cbdr, &cbd, &swcbd);
+	ntmp_unlock_cbdr(cbdr);
+
+	return err;
+}
+
+/**
+ * ntmp_ett_add_entry - add a new entry to the egress treatment table
+ * @user: target ntmp_user struct
+ * @entry_id: entry ID
+ * @cfge: configuration element data
+ *
+ * Return: 0 on success, otherwise a negative error code
+ */
+int ntmp_ett_add_entry(struct ntmp_user *user, u32 entry_id,
+		       const struct ett_cfge_data *cfge)
+{
+	int err;
+
+	err = ntmp_ett_set_entry(user, entry_id, NTMP_CMD_ADD, cfge);
+	if (err)
+		dev_err(user->dev, "Failed to add %s entry 0x%x, err: %pe\n",
+			ntmp_table_name(NTMP_ETT_ID), entry_id, ERR_PTR(err));
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(ntmp_ett_add_entry);
+
+/**
+ * ntmp_ett_update_entry - update the configuration element data of the
+ * specified entry
+ * @user: target ntmp_user struct
+ * @entry_id: entry ID
+ * @cfge: configuration element data
+ *
+ * Return: 0 on success, otherwise a negative error code
+ */
+int ntmp_ett_update_entry(struct ntmp_user *user, u32 entry_id,
+			  const struct ett_cfge_data *cfge)
+{
+	int err;
+
+	err = ntmp_ett_set_entry(user, entry_id, NTMP_CMD_UPDATE, cfge);
+	if (err)
+		dev_err(user->dev,
+			"Failed to update %s entry 0x%x, err: %pe\n",
+			ntmp_table_name(NTMP_ETT_ID), entry_id, ERR_PTR(err));
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(ntmp_ett_update_entry);
+
+/**
+ * ntmp_ett_delete_entry - delete the specified egress treatment table entry
+ * @user: target ntmp_user struct
+ * @entry_id: entry ID
+ *
+ * Return: 0 on success, otherwise a negative error code
+ */
+int ntmp_ett_delete_entry(struct ntmp_user *user, u32 entry_id)
+{
+	return ntmp_delete_entry_by_id(user, NTMP_ETT_ID, user->tbl.ett_ver,
+				       entry_id, NTMP_EID_REQ_LEN, 0);
+}
+EXPORT_SYMBOL_GPL(ntmp_ett_delete_entry);
+
 int ntmp_bpt_update_entry(struct ntmp_user *user, u32 entry_id,
 			  const struct bpt_cfge_data *cfge)
 {
diff --git a/drivers/net/ethernet/freescale/enetc/ntmp_private.h b/drivers/net/ethernet/freescale/enetc/ntmp_private.h
index 9d30f128849a..531ea7ddd145 100644
--- a/drivers/net/ethernet/freescale/enetc/ntmp_private.h
+++ b/drivers/net/ethernet/freescale/enetc/ntmp_private.h
@@ -217,6 +217,14 @@ struct vft_req_qd {
 	union vft_access_key ak;
 };
 
+/* Egress Treatment Table Request Data Buffer Format of Update and Add
+ * actions
+ */
+struct ett_req_ua {
+	struct ntmp_req_by_eid rbe;
+	struct ett_cfge_data cfge;
+};
+
 /* Buffer Pool Table Request Data Buffer Format of Update action */
 struct bpt_req_update {
 	struct ntmp_req_by_eid rbe;
diff --git a/include/linux/fsl/ntmp.h b/include/linux/fsl/ntmp.h
index 36a9089526ad..0c951e1c763d 100644
--- a/include/linux/fsl/ntmp.h
+++ b/include/linux/fsl/ntmp.h
@@ -36,6 +36,7 @@ struct netc_tbl_vers {
 	u8 vft_ver;
 	u8 bpt_ver;
 	u8 ipft_ver;
+	u8 ett_ver;
 };
 
 struct netc_swcbd {
@@ -214,6 +215,23 @@ struct vft_cfge_data {
 	__le32 et_eid;
 };
 
+struct ett_cfge_data {
+	__le16 efm_cfg;
+#define ETT_EFM_MODE		GENMASK(1, 0)
+#define ETT_ESQA		GENMASK(5, 4)
+#define ETT_ECA			GENMASK(8, 6)
+#define ETT_ECA_INC		1
+#define ETT_EFM_LEN_CHANGE	GENMASK(15, 9)
+#define ETT_FRM_LEN_DEL_VLAN	0x7c
+#define ETT_FRM_LEN_DEL_RTAG	0x7a
+#define ETT_FRM_LEN_DEL_VLAN_RTAG	0x76
+	__le16 efm_data_len;
+#define ETT_EFM_DATA_LEN	GENMASK(10, 0)
+	__le32 efm_eid;
+	__le32 ec_eid;
+	__le32 esqa_tgt_eid;
+};
+
 struct bpt_bpse_data {
 	__le32 amount_used;
 	__le32 amount_used_hwm;
@@ -271,6 +289,11 @@ int ntmp_vft_add_entry(struct ntmp_user *user, u16 vid,
 int ntmp_vft_update_entry(struct ntmp_user *user, u16 vid,
 			  const struct vft_cfge_data *cfge);
 int ntmp_vft_delete_entry(struct ntmp_user *user, u16 vid);
+int ntmp_ett_add_entry(struct ntmp_user *user, u32 entry_id,
+		       const struct ett_cfge_data *cfge);
+int ntmp_ett_update_entry(struct ntmp_user *user, u32 entry_id,
+			  const struct ett_cfge_data *cfge);
+int ntmp_ett_delete_entry(struct ntmp_user *user, u32 entry_id);
 int ntmp_bpt_update_entry(struct ntmp_user *user, u32 entry_id,
 			  const struct bpt_cfge_data *cfge);
 #else
-- 
2.34.1



^ permalink raw reply related

* [PATCH v2 net-next 2/9] net: enetc: add "Update" and "Delete" operations to VLAN filter table
From: wei.fang @ 2026-06-02  7:23 UTC (permalink / raw)
  To: claudiu.manoil, vladimir.oltean, xiaoning.wang, andrew+netdev,
	davem, edumazet, kuba, pabeni, chleroy, andrew, olteanv, linux
  Cc: wei.fang, imx, netdev, linux-kernel, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20260602072313.3162120-1-wei.fang@oss.nxp.com>

From: Wei Fang <wei.fang@nxp.com>

Add two interfaces to manage entries in the VLAN filter table:

ntmp_vft_update_entry(): Update the configuration element data of the
specified VLAN filter entry based on the given VLAN ID. It uses the
exact key access method to locate the entry.

ntmp_vft_delete_entry(): Delete the VLAN filter entry corresponding to
the specified VLAN ID. It also uses the exact key access method to
identify the target entry.

In addition, introduce struct vft_req_qd to describe the request data
buffer format for Query and Delete actions of the VLAN filter table,
which contains a common request data header and a VLAN access key.

Signed-off-by: Wei Fang <wei.fang@nxp.com>
---
 drivers/net/ethernet/freescale/enetc/ntmp.c   | 103 ++++++++++++++++--
 .../ethernet/freescale/enetc/ntmp_private.h   |   6 +
 include/linux/fsl/ntmp.h                      |   3 +
 3 files changed, 105 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/ntmp.c b/drivers/net/ethernet/freescale/enetc/ntmp.c
index cb494ad02e3e..bcbbb012aec6 100644
--- a/drivers/net/ethernet/freescale/enetc/ntmp.c
+++ b/drivers/net/ethernet/freescale/enetc/ntmp.c
@@ -956,15 +956,17 @@ int ntmp_fdbt_delete_port_dynamic_entries(struct ntmp_user *user, int port)
 EXPORT_SYMBOL_GPL(ntmp_fdbt_delete_port_dynamic_entries);
 
 /**
- * ntmp_vft_add_entry - add an entry into the VLAN filter table
+ * ntmp_vft_set_entry - add an entry into the VLAN filter table or update
+ * the configuration element data of the specified VLAN filter entry
  * @user: target ntmp_user struct
  * @vid: VLAN ID
+ * @cmd: command type, NTMP_CMD_ADD or NTMP_CMD_UPDATE
  * @cfge: configuration element data
  *
  * Return: 0 on success, otherwise a negative error code
  */
-int ntmp_vft_add_entry(struct ntmp_user *user, u16 vid,
-		       const struct vft_cfge_data *cfge)
+static int ntmp_vft_set_entry(struct ntmp_user *user, u16 vid, int cmd,
+			      const struct vft_cfge_data *cfge)
 {
 	struct netc_swcbd swcbd;
 	struct vft_req_ua *req;
@@ -973,34 +975,121 @@ int ntmp_vft_add_entry(struct ntmp_user *user, u16 vid,
 	u32 len;
 	int err;
 
+	if (cmd != NTMP_CMD_ADD && cmd != NTMP_CMD_UPDATE)
+		return -EINVAL;
+
 	swcbd.size = sizeof(*req);
 	err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req);
 	if (err)
 		return err;
 
 	/* Request data */
-	ntmp_fill_crd(&req->crd, user->tbl.vft_ver, 0,
-		      NTMP_GEN_UA_CFGEU);
+	ntmp_fill_crd(&req->crd, user->tbl.vft_ver, 0, NTMP_GEN_UA_CFGEU);
 	req->ak.exact.vid = cpu_to_le16(vid);
 	req->cfge = *cfge;
 
 	/* Request header */
 	len = NTMP_LEN(swcbd.size, NTMP_STATUS_RESP_LEN);
 	ntmp_fill_request_hdr(&cbd, swcbd.dma, len, NTMP_VFT_ID,
-			      NTMP_CMD_ADD, NTMP_AM_EXACT_KEY);
+			      cmd, NTMP_AM_EXACT_KEY);
 
 	ntmp_select_and_lock_cbdr(user, &cbdr);
 	err = netc_xmit_ntmp_cmd(cbdr, &cbd, &swcbd);
+	ntmp_unlock_cbdr(cbdr);
+
+	return err;
+}
+
+/**
+ * ntmp_vft_add_entry - add an entry into the VLAN filter table
+ * @user: target ntmp_user struct
+ * @vid: VLAN ID
+ * @cfge: configuration element data
+ *
+ * Return: 0 on success, otherwise a negative error code
+ */
+int ntmp_vft_add_entry(struct ntmp_user *user, u16 vid,
+		       const struct vft_cfge_data *cfge)
+{
+	int err;
+
+	err = ntmp_vft_set_entry(user, vid, NTMP_CMD_ADD, cfge);
 	if (err)
 		dev_err(user->dev,
 			"Failed to add %s entry, vid: %u, err: %pe\n",
 			ntmp_table_name(NTMP_VFT_ID), vid, ERR_PTR(err));
 
+	return err;
+}
+EXPORT_SYMBOL_GPL(ntmp_vft_add_entry);
+
+/**
+ * ntmp_vft_update_entry - update the configuration element data of the
+ * specified VLAN filter entry
+ * @user: target ntmp_user struct
+ * @vid: VLAN ID
+ * @cfge: configuration element data
+ *
+ * Return: 0 on success, otherwise a negative error code
+ */
+int ntmp_vft_update_entry(struct ntmp_user *user, u16 vid,
+			  const struct vft_cfge_data *cfge)
+{
+	int err;
+
+	err = ntmp_vft_set_entry(user, vid, NTMP_CMD_UPDATE, cfge);
+	if (err)
+		dev_err(user->dev,
+			"Failed to update %s entry, vid: %u, err: %pe\n",
+			ntmp_table_name(NTMP_VFT_ID), vid, ERR_PTR(err));
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(ntmp_vft_update_entry);
+
+/**
+ * ntmp_vft_delete_entry - delete the VLAN filter entry based on the
+ * specified VLAN ID
+ * @user: target ntmp_user struct
+ * @vid: VLAN ID
+ *
+ * Return: 0 on success, otherwise a negative error code
+ */
+int ntmp_vft_delete_entry(struct ntmp_user *user, u16 vid)
+{
+	struct netc_swcbd swcbd;
+	struct vft_req_qd *req;
+	struct netc_cbdr *cbdr;
+	union netc_cbd cbd;
+	u32 len;
+	int err;
+
+	swcbd.size = sizeof(*req);
+	err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req);
+	if (err)
+		return err;
+
+	/* Request data */
+	ntmp_fill_crd(&req->crd, user->tbl.vft_ver, 0, 0);
+	req->ak.exact.vid = cpu_to_le16(vid);
+
+	/* Request header */
+	len = NTMP_LEN(swcbd.size, NTMP_STATUS_RESP_LEN);
+	ntmp_fill_request_hdr(&cbd, swcbd.dma, len, NTMP_VFT_ID,
+			      NTMP_CMD_DELETE, NTMP_AM_EXACT_KEY);
+
+	ntmp_select_and_lock_cbdr(user, &cbdr);
+	err = netc_xmit_ntmp_cmd(cbdr, &cbd, &swcbd);
+	if (err)
+		dev_err(user->dev,
+			"Failed to delete %s entry, vid: %u, err: %pe\n",
+			ntmp_table_name(NTMP_VFT_ID), vid, ERR_PTR(err));
+
 	ntmp_unlock_cbdr(cbdr);
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(ntmp_vft_add_entry);
+EXPORT_SYMBOL_GPL(ntmp_vft_delete_entry);
 
 int ntmp_bpt_update_entry(struct ntmp_user *user, u32 entry_id,
 			  const struct bpt_cfge_data *cfge)
diff --git a/drivers/net/ethernet/freescale/enetc/ntmp_private.h b/drivers/net/ethernet/freescale/enetc/ntmp_private.h
index ad532b059ba8..9d30f128849a 100644
--- a/drivers/net/ethernet/freescale/enetc/ntmp_private.h
+++ b/drivers/net/ethernet/freescale/enetc/ntmp_private.h
@@ -211,6 +211,12 @@ struct vft_req_ua {
 	struct vft_cfge_data cfge;
 };
 
+/* VLAN Filter Table Request Data Buffer Format of Query and Delete actions */
+struct vft_req_qd {
+	struct ntmp_cmn_req_data crd;
+	union vft_access_key ak;
+};
+
 /* Buffer Pool Table Request Data Buffer Format of Update action */
 struct bpt_req_update {
 	struct ntmp_req_by_eid rbe;
diff --git a/include/linux/fsl/ntmp.h b/include/linux/fsl/ntmp.h
index 5db078e1caa0..36a9089526ad 100644
--- a/include/linux/fsl/ntmp.h
+++ b/include/linux/fsl/ntmp.h
@@ -268,6 +268,9 @@ int ntmp_fdbt_delete_ageing_entries(struct ntmp_user *user, u8 act_cnt);
 int ntmp_fdbt_delete_port_dynamic_entries(struct ntmp_user *user, int port);
 int ntmp_vft_add_entry(struct ntmp_user *user, u16 vid,
 		       const struct vft_cfge_data *cfge);
+int ntmp_vft_update_entry(struct ntmp_user *user, u16 vid,
+			  const struct vft_cfge_data *cfge);
+int ntmp_vft_delete_entry(struct ntmp_user *user, u16 vid);
 int ntmp_bpt_update_entry(struct ntmp_user *user, u32 entry_id,
 			  const struct bpt_cfge_data *cfge);
 #else
-- 
2.34.1



^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox