All of lore.kernel.org
 help / color / mirror / Atom feed
* [paulmckrcu:dev.2024.12.24a] [srcu]  9216c28c6a: BUG:unable_to_handle_page_fault_for_address
@ 2024-12-31  5:02 kernel test robot
  2024-12-31  7:16 ` Z qiang
  0 siblings, 1 reply; 13+ messages in thread
From: kernel test robot @ 2024-12-31  5:02 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: oe-lkp, lkp, rcu, oliver.sang



Hello,

kernel test robot noticed "BUG:unable_to_handle_page_fault_for_address" on:

commit: 9216c28c6a927fd20f116feed55bba025f18f401 ("srcu: Make SRCU readers use ->srcu_ctrs for counter selection")
https://github.com/paulmckrcu/linux dev.2024.12.24a

in testcase: rcutorture
version: 
with following parameters:

	runtime: 300s
	test: default
	torture_type: srcu



config: i386-randconfig-005-20241230
compiler: gcc-12
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

(please refer to attached dmesg/kmsg for entire log/backtrace)


+------------------------------------------------+------------+------------+
|                                                | 2add2e88ea | 9216c28c6a |
+------------------------------------------------+------------+------------+
| BUG:unable_to_handle_page_fault_for_address    | 0          | 6          |
| Oops                                           | 0          | 6          |
| EIP:__srcu_read_lock                           | 0          | 6          |
| Kernel_panic-not_syncing:Fatal_exception       | 0          | 6          |
+------------------------------------------------+------------+------------+


If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202412311203.ca7bddba-lkp@intel.com


[  168.973150][  T628] BUG: unable to handle page fault for address: 2367a000
[  168.973700][  T628] #PF: supervisor write access in kernel mode
[  168.974809][  T628] #PF: error_code(0x0002) - not-present page
[  168.975761][  T628] *pde = 00000000
[  168.976236][  T628] Oops: Oops: 0002 [#1] PREEMPT SMP
[  168.977052][  T628] CPU: 0 UID: 0 PID: 628 Comm: rcu_torture_wri Tainted: G                T  6.13.0-rc2-00067-g9216c28c6a92 #1
[  168.978867][  T628] Tainted: [T]=RANDSTRUCT
[  168.979429][  T628] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[ 168.980862][ T628] EIP: __srcu_read_lock (kernel/rcu/srcutree.c:749)
[ 168.981213][ T628] Code: 85 ff 74 0c e8 45 59 00 00 83 3b 00 74 02 0f 0b 5b 5e 5f 5d c3 8b 00 f0 83 44 24 fc 00 83 c0 07 83 e0 fc c3 55 89 e5 8b 50 04 <64> ff 02 f0 83 44 24 fc 00 2b 50 08 5d 89 d0 c1 f8 03 c3 55 89 e5
All code
========
   0:	85 ff                	test   %edi,%edi
   2:	74 0c                	je     0x10
   4:	e8 45 59 00 00       	call   0x594e
   9:	83 3b 00             	cmpl   $0x0,(%rbx)
   c:	74 02                	je     0x10
   e:	0f 0b                	ud2
  10:	5b                   	pop    %rbx
  11:	5e                   	pop    %rsi
  12:	5f                   	pop    %rdi
  13:	5d                   	pop    %rbp
  14:	c3                   	ret
  15:	8b 00                	mov    (%rax),%eax
  17:	f0 83 44 24 fc 00    	lock addl $0x0,-0x4(%rsp)
  1d:	83 c0 07             	add    $0x7,%eax
  20:	83 e0 fc             	and    $0xfffffffc,%eax
  23:	c3                   	ret
  24:	55                   	push   %rbp
  25:	89 e5                	mov    %esp,%ebp
  27:	8b 50 04             	mov    0x4(%rax),%edx
  2a:*	64 ff 02             	incl   %fs:(%rdx)		<-- trapping instruction
  2d:	f0 83 44 24 fc 00    	lock addl $0x0,-0x4(%rsp)
  33:	2b 50 08             	sub    0x8(%rax),%edx
  36:	5d                   	pop    %rbp
  37:	89 d0                	mov    %edx,%eax
  39:	c1 f8 03             	sar    $0x3,%eax
  3c:	c3                   	ret
  3d:	55                   	push   %rbp
  3e:	89 e5                	mov    %esp,%ebp

Code starting with the faulting instruction
===========================================
   0:	64 ff 02             	incl   %fs:(%rdx)
   3:	f0 83 44 24 fc 00    	lock addl $0x0,-0x4(%rsp)
   9:	2b 50 08             	sub    0x8(%rax),%edx
   c:	5d                   	pop    %rbp
   d:	89 d0                	mov    %edx,%eax
   f:	c1 f8 03             	sar    $0x3,%eax
  12:	c3                   	ret
  13:	55                   	push   %rbp
  14:	89 e5                	mov    %esp,%ebp
[  168.982540][  T628] EAX: ef0c8420 EBX: ef0c8420 ECX: e5e1e840 EDX: 00000000
[  168.983022][  T628] ESI: ef0c919c EDI: 00000000 EBP: c75e9ee8 ESP: c75e9ee8
[  168.983503][  T628] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010246
[  168.984024][  T628] CR0: 80050033 CR2: 2367a000 CR3: 075f5000 CR4: 00040690
[  168.984518][  T628] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[  168.985008][  T628] DR6: fffe0ff0 DR7: 00000400
[  168.985329][  T628] Call Trace:
[ 168.985571][ T628] ? show_regs (arch/x86/kernel/dumpstack.c:479 arch/x86/kernel/dumpstack.c:465)
[ 168.985877][ T628] ? __die_body (arch/x86/kernel/dumpstack.c:421)
[ 168.986185][ T628] ? __die (arch/x86/kernel/dumpstack.c:435)
[ 168.986466][ T628] ? page_fault_oops (arch/x86/mm/fault.c:715)
[ 168.986811][ T628] ? kernelmode_fixup_or_oops+0x50/0x58
[ 168.987273][ T628] ? __bad_area_nosemaphore+0x37/0x1d5
[ 168.987726][ T628] ? validate_chain (kernel/locking/lockdep.c:3819 kernel/locking/lockdep.c:3872)
[ 168.988058][ T628] ? bad_area_nosemaphore (arch/x86/mm/fault.c:835)
[ 168.988406][ T628] ? do_user_addr_fault (arch/x86/mm/fault.c:1280 (discriminator 1))
[ 168.988763][ T628] ? exc_page_fault (arch/x86/include/asm/irqflags.h:26 arch/x86/include/asm/irqflags.h:87 arch/x86/include/asm/irqflags.h:147 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539)
[ 168.989110][ T628] ? pvclock_clocksource_read_nowd (arch/x86/mm/fault.c:1494)
[ 168.989472][ T628] ? handle_exception (arch/x86/entry/entry_32.S:1048)
[ 168.989800][ T628] ? siphash_4u64 (lib/siphash.c:203)
[ 168.990123][ T628] ? pvclock_clocksource_read_nowd (arch/x86/mm/fault.c:1494)
[ 168.990539][ T628] ? __srcu_read_lock (kernel/rcu/srcutree.c:749)
[ 168.990858][ T628] ? rcu_torture_barrier_init (kernel/rcu/rcutorture.c:3381) rcutorture
[ 168.991319][ T628] ? siphash_4u64 (lib/siphash.c:203)
[ 168.991618][ T628] ? pvclock_clocksource_read_nowd (arch/x86/mm/fault.c:1494)
[ 168.992021][ T628] ? __srcu_read_lock (kernel/rcu/srcutree.c:749)
[ 168.992340][ T628] srcu_read_lock (include/linux/srcu.h:165 include/linux/srcu.h:257) rcutorture
[ 168.992735][ T628] srcu_torture_read_lock (kernel/rcu/rcutorture.c:693) rcutorture
[ 168.993184][ T628] rcu_torture_writer (kernel/rcu/rcutorture.c:1528) rcutorture
[ 168.993615][ T628] ? _raw_spin_unlock_irqrestore (arch/x86/include/asm/irqflags.h:26 arch/x86/include/asm/irqflags.h:87 arch/x86/include/asm/irqflags.h:147 include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194)
[ 168.994020][ T628] ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:80 (discriminator 13))
[ 168.994369][ T628] kthread (kernel/kthread.c:391)
[ 168.994647][ T628] ? rcu_torture_pipe_update (kernel/rcu/rcutorture.c:1447) rcutorture
[ 168.995108][ T628] ? list_del_init (include/linux/lockdep.h:248)
[ 168.995428][ T628] ret_from_fork (arch/x86/kernel/process.c:153)
[ 168.995735][ T628] ? list_del_init (include/linux/lockdep.h:248)
[ 168.996053][ T628] ret_from_fork_asm (arch/x86/entry/entry_32.S:737)
[ 168.996380][ T628] entry_INT80_32 (arch/x86/entry/entry_32.S:942)
[  168.996692][  T628] Modules linked in: rcutorture(+) torture intel_rapl_msr intel_rapl_common iosf_mbi crc32c_intel aesni_intel input_leds led_class fuse
[  168.997654][  T628] CR2: 000000002367a000
[  168.997945][  T628] ---[ end trace 0000000000000000 ]---


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20241231/202412311203.ca7bddba-lkp@intel.com



-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address
  2024-12-31  5:02 [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address kernel test robot
@ 2024-12-31  7:16 ` Z qiang
  2024-12-31 16:19   ` Paul E. McKenney
  0 siblings, 1 reply; 13+ messages in thread
From: Z qiang @ 2024-12-31  7:16 UTC (permalink / raw)
  To: kernel test robot; +Cc: Paul E. McKenney, oe-lkp, lkp, rcu

>
>
>
> Hello,
>
> kernel test robot noticed "BUG:unable_to_handle_page_fault_for_address" on:
>
> commit: 9216c28c6a927fd20f116feed55bba025f18f401 ("srcu: Make SRCU readers use ->srcu_ctrs for counter selection")
> https://github.com/paulmckrcu/linux dev.2024.12.24a
>
> in testcase: rcutorture
> version:
> with following parameters:
>
>         runtime: 300s
>         test: default
>         torture_type: srcu
>
>
>
> config: i386-randconfig-005-20241230
> compiler: gcc-12
> test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
>
> (please refer to attached dmesg/kmsg for entire log/backtrace)
>
>
> +------------------------------------------------+------------+------------+
> |                                                | 2add2e88ea | 9216c28c6a |
> +------------------------------------------------+------------+------------+
> | BUG:unable_to_handle_page_fault_for_address    | 0          | 6          |
> | Oops                                           | 0          | 6          |
> | EIP:__srcu_read_lock                           | 0          | 6          |
> | Kernel_panic-not_syncing:Fatal_exception       | 0          | 6          |
> +------------------------------------------------+------------+------------+
>
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <oliver.sang@intel.com>
> | Closes: https://lore.kernel.org/oe-lkp/202412311203.ca7bddba-lkp@intel.com
>

Please try the following modifications:

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index e85db7d5b364..7c7304dee645 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -1999,6 +1999,7 @@ static int srcu_module_coming(struct module *mod)
        for (i = 0; i < mod->num_srcu_structs; i++) {
                ssp = *(sspp++);
                ssp->sda = alloc_percpu(struct srcu_data);
+               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
                if (WARN_ON_ONCE(!ssp->sda))
                        return -ENOMEM;
        }



Thanks
Zqiang

>
> [  168.973150][  T628] BUG: unable to handle page fault for address: 2367a000
> [  168.973700][  T628] #PF: supervisor write access in kernel mode
> [  168.974809][  T628] #PF: error_code(0x0002) - not-present page
> [  168.975761][  T628] *pde = 00000000
> [  168.976236][  T628] Oops: Oops: 0002 [#1] PREEMPT SMP
> [  168.977052][  T628] CPU: 0 UID: 0 PID: 628 Comm: rcu_torture_wri Tainted: G                T  6.13.0-rc2-00067-g9216c28c6a92 #1
> [  168.978867][  T628] Tainted: [T]=RANDSTRUCT
> [  168.979429][  T628] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
> [ 168.980862][ T628] EIP: __srcu_read_lock (kernel/rcu/srcutree.c:749)
> [ 168.981213][ T628] Code: 85 ff 74 0c e8 45 59 00 00 83 3b 00 74 02 0f 0b 5b 5e 5f 5d c3 8b 00 f0 83 44 24 fc 00 83 c0 07 83 e0 fc c3 55 89 e5 8b 50 04 <64> ff 02 f0 83 44 24 fc 00 2b 50 08 5d 89 d0 c1 f8 03 c3 55 89 e5
> All code
> ========
>    0:   85 ff                   test   %edi,%edi
>    2:   74 0c                   je     0x10
>    4:   e8 45 59 00 00          call   0x594e
>    9:   83 3b 00                cmpl   $0x0,(%rbx)
>    c:   74 02                   je     0x10
>    e:   0f 0b                   ud2
>   10:   5b                      pop    %rbx
>   11:   5e                      pop    %rsi
>   12:   5f                      pop    %rdi
>   13:   5d                      pop    %rbp
>   14:   c3                      ret
>   15:   8b 00                   mov    (%rax),%eax
>   17:   f0 83 44 24 fc 00       lock addl $0x0,-0x4(%rsp)
>   1d:   83 c0 07                add    $0x7,%eax
>   20:   83 e0 fc                and    $0xfffffffc,%eax
>   23:   c3                      ret
>   24:   55                      push   %rbp
>   25:   89 e5                   mov    %esp,%ebp
>   27:   8b 50 04                mov    0x4(%rax),%edx
>   2a:*  64 ff 02                incl   %fs:(%rdx)               <-- trapping instruction
>   2d:   f0 83 44 24 fc 00       lock addl $0x0,-0x4(%rsp)
>   33:   2b 50 08                sub    0x8(%rax),%edx
>   36:   5d                      pop    %rbp
>   37:   89 d0                   mov    %edx,%eax
>   39:   c1 f8 03                sar    $0x3,%eax
>   3c:   c3                      ret
>   3d:   55                      push   %rbp
>   3e:   89 e5                   mov    %esp,%ebp
>
> Code starting with the faulting instruction
> ===========================================
>    0:   64 ff 02                incl   %fs:(%rdx)
>    3:   f0 83 44 24 fc 00       lock addl $0x0,-0x4(%rsp)
>    9:   2b 50 08                sub    0x8(%rax),%edx
>    c:   5d                      pop    %rbp
>    d:   89 d0                   mov    %edx,%eax
>    f:   c1 f8 03                sar    $0x3,%eax
>   12:   c3                      ret
>   13:   55                      push   %rbp
>   14:   89 e5                   mov    %esp,%ebp
> [  168.982540][  T628] EAX: ef0c8420 EBX: ef0c8420 ECX: e5e1e840 EDX: 00000000
> [  168.983022][  T628] ESI: ef0c919c EDI: 00000000 EBP: c75e9ee8 ESP: c75e9ee8
> [  168.983503][  T628] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010246
> [  168.984024][  T628] CR0: 80050033 CR2: 2367a000 CR3: 075f5000 CR4: 00040690
> [  168.984518][  T628] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
> [  168.985008][  T628] DR6: fffe0ff0 DR7: 00000400
> [  168.985329][  T628] Call Trace:
> [ 168.985571][ T628] ? show_regs (arch/x86/kernel/dumpstack.c:479 arch/x86/kernel/dumpstack.c:465)
> [ 168.985877][ T628] ? __die_body (arch/x86/kernel/dumpstack.c:421)
> [ 168.986185][ T628] ? __die (arch/x86/kernel/dumpstack.c:435)
> [ 168.986466][ T628] ? page_fault_oops (arch/x86/mm/fault.c:715)
> [ 168.986811][ T628] ? kernelmode_fixup_or_oops+0x50/0x58
> [ 168.987273][ T628] ? __bad_area_nosemaphore+0x37/0x1d5
> [ 168.987726][ T628] ? validate_chain (kernel/locking/lockdep.c:3819 kernel/locking/lockdep.c:3872)
> [ 168.988058][ T628] ? bad_area_nosemaphore (arch/x86/mm/fault.c:835)
> [ 168.988406][ T628] ? do_user_addr_fault (arch/x86/mm/fault.c:1280 (discriminator 1))
> [ 168.988763][ T628] ? exc_page_fault (arch/x86/include/asm/irqflags.h:26 arch/x86/include/asm/irqflags.h:87 arch/x86/include/asm/irqflags.h:147 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539)
> [ 168.989110][ T628] ? pvclock_clocksource_read_nowd (arch/x86/mm/fault.c:1494)
> [ 168.989472][ T628] ? handle_exception (arch/x86/entry/entry_32.S:1048)
> [ 168.989800][ T628] ? siphash_4u64 (lib/siphash.c:203)
> [ 168.990123][ T628] ? pvclock_clocksource_read_nowd (arch/x86/mm/fault.c:1494)
> [ 168.990539][ T628] ? __srcu_read_lock (kernel/rcu/srcutree.c:749)
> [ 168.990858][ T628] ? rcu_torture_barrier_init (kernel/rcu/rcutorture.c:3381) rcutorture
> [ 168.991319][ T628] ? siphash_4u64 (lib/siphash.c:203)
> [ 168.991618][ T628] ? pvclock_clocksource_read_nowd (arch/x86/mm/fault.c:1494)
> [ 168.992021][ T628] ? __srcu_read_lock (kernel/rcu/srcutree.c:749)
> [ 168.992340][ T628] srcu_read_lock (include/linux/srcu.h:165 include/linux/srcu.h:257) rcutorture
> [ 168.992735][ T628] srcu_torture_read_lock (kernel/rcu/rcutorture.c:693) rcutorture
> [ 168.993184][ T628] rcu_torture_writer (kernel/rcu/rcutorture.c:1528) rcutorture
> [ 168.993615][ T628] ? _raw_spin_unlock_irqrestore (arch/x86/include/asm/irqflags.h:26 arch/x86/include/asm/irqflags.h:87 arch/x86/include/asm/irqflags.h:147 include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194)
> [ 168.994020][ T628] ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:80 (discriminator 13))
> [ 168.994369][ T628] kthread (kernel/kthread.c:391)
> [ 168.994647][ T628] ? rcu_torture_pipe_update (kernel/rcu/rcutorture.c:1447) rcutorture
> [ 168.995108][ T628] ? list_del_init (include/linux/lockdep.h:248)
> [ 168.995428][ T628] ret_from_fork (arch/x86/kernel/process.c:153)
> [ 168.995735][ T628] ? list_del_init (include/linux/lockdep.h:248)
> [ 168.996053][ T628] ret_from_fork_asm (arch/x86/entry/entry_32.S:737)
> [ 168.996380][ T628] entry_INT80_32 (arch/x86/entry/entry_32.S:942)
> [  168.996692][  T628] Modules linked in: rcutorture(+) torture intel_rapl_msr intel_rapl_common iosf_mbi crc32c_intel aesni_intel input_leds led_class fuse
> [  168.997654][  T628] CR2: 000000002367a000
> [  168.997945][  T628] ---[ end trace 0000000000000000 ]---
>
>
> The kernel config and materials to reproduce are available at:
> https://download.01.org/0day-ci/archive/20241231/202412311203.ca7bddba-lkp@intel.com
>
>
>
> --
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests/wiki
>
>

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address
  2024-12-31  7:16 ` Z qiang
@ 2024-12-31 16:19   ` Paul E. McKenney
  2025-01-02  5:41     ` Oliver Sang
  0 siblings, 1 reply; 13+ messages in thread
From: Paul E. McKenney @ 2024-12-31 16:19 UTC (permalink / raw)
  To: Z qiang; +Cc: kernel test robot, oe-lkp, lkp, rcu

On Tue, Dec 31, 2024 at 03:16:25PM +0800, Z qiang wrote:
> >
> >
> >
> > Hello,
> >
> > kernel test robot noticed "BUG:unable_to_handle_page_fault_for_address" on:
> >
> > commit: 9216c28c6a927fd20f116feed55bba025f18f401 ("srcu: Make SRCU readers use ->srcu_ctrs for counter selection")
> > https://github.com/paulmckrcu/linux dev.2024.12.24a
> >
> > in testcase: rcutorture
> > version:
> > with following parameters:
> >
> >         runtime: 300s
> >         test: default
> >         torture_type: srcu
> >
> >
> >
> > config: i386-randconfig-005-20241230
> > compiler: gcc-12
> > test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
> >
> > (please refer to attached dmesg/kmsg for entire log/backtrace)
> >
> >
> > +------------------------------------------------+------------+------------+
> > |                                                | 2add2e88ea | 9216c28c6a |
> > +------------------------------------------------+------------+------------+
> > | BUG:unable_to_handle_page_fault_for_address    | 0          | 6          |
> > | Oops                                           | 0          | 6          |
> > | EIP:__srcu_read_lock                           | 0          | 6          |
> > | Kernel_panic-not_syncing:Fatal_exception       | 0          | 6          |
> > +------------------------------------------------+------------+------------+
> >
> >
> > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > the same patch/commit), kindly add following tags
> > | Reported-by: kernel test robot <oliver.sang@intel.com>
> > | Closes: https://lore.kernel.org/oe-lkp/202412311203.ca7bddba-lkp@intel.com
> >
> 
> Please try the following modifications:
> 
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index e85db7d5b364..7c7304dee645 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -1999,6 +1999,7 @@ static int srcu_module_coming(struct module *mod)
>         for (i = 0; i < mod->num_srcu_structs; i++) {
>                 ssp = *(sspp++);
>                 ssp->sda = alloc_percpu(struct srcu_data);
> +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];

This does look quite promising, so thank you for digging into this!!!

Looking forward to seeing if it fixes the problem.  ;-)

							Thanx, Paul

>                 if (WARN_ON_ONCE(!ssp->sda))
>                         return -ENOMEM;
>         }
> 
> 
> 
> Thanks
> Zqiang
> 
> >
> > [  168.973150][  T628] BUG: unable to handle page fault for address: 2367a000
> > [  168.973700][  T628] #PF: supervisor write access in kernel mode
> > [  168.974809][  T628] #PF: error_code(0x0002) - not-present page
> > [  168.975761][  T628] *pde = 00000000
> > [  168.976236][  T628] Oops: Oops: 0002 [#1] PREEMPT SMP
> > [  168.977052][  T628] CPU: 0 UID: 0 PID: 628 Comm: rcu_torture_wri Tainted: G                T  6.13.0-rc2-00067-g9216c28c6a92 #1
> > [  168.978867][  T628] Tainted: [T]=RANDSTRUCT
> > [  168.979429][  T628] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
> > [ 168.980862][ T628] EIP: __srcu_read_lock (kernel/rcu/srcutree.c:749)
> > [ 168.981213][ T628] Code: 85 ff 74 0c e8 45 59 00 00 83 3b 00 74 02 0f 0b 5b 5e 5f 5d c3 8b 00 f0 83 44 24 fc 00 83 c0 07 83 e0 fc c3 55 89 e5 8b 50 04 <64> ff 02 f0 83 44 24 fc 00 2b 50 08 5d 89 d0 c1 f8 03 c3 55 89 e5
> > All code
> > ========
> >    0:   85 ff                   test   %edi,%edi
> >    2:   74 0c                   je     0x10
> >    4:   e8 45 59 00 00          call   0x594e
> >    9:   83 3b 00                cmpl   $0x0,(%rbx)
> >    c:   74 02                   je     0x10
> >    e:   0f 0b                   ud2
> >   10:   5b                      pop    %rbx
> >   11:   5e                      pop    %rsi
> >   12:   5f                      pop    %rdi
> >   13:   5d                      pop    %rbp
> >   14:   c3                      ret
> >   15:   8b 00                   mov    (%rax),%eax
> >   17:   f0 83 44 24 fc 00       lock addl $0x0,-0x4(%rsp)
> >   1d:   83 c0 07                add    $0x7,%eax
> >   20:   83 e0 fc                and    $0xfffffffc,%eax
> >   23:   c3                      ret
> >   24:   55                      push   %rbp
> >   25:   89 e5                   mov    %esp,%ebp
> >   27:   8b 50 04                mov    0x4(%rax),%edx
> >   2a:*  64 ff 02                incl   %fs:(%rdx)               <-- trapping instruction
> >   2d:   f0 83 44 24 fc 00       lock addl $0x0,-0x4(%rsp)
> >   33:   2b 50 08                sub    0x8(%rax),%edx
> >   36:   5d                      pop    %rbp
> >   37:   89 d0                   mov    %edx,%eax
> >   39:   c1 f8 03                sar    $0x3,%eax
> >   3c:   c3                      ret
> >   3d:   55                      push   %rbp
> >   3e:   89 e5                   mov    %esp,%ebp
> >
> > Code starting with the faulting instruction
> > ===========================================
> >    0:   64 ff 02                incl   %fs:(%rdx)
> >    3:   f0 83 44 24 fc 00       lock addl $0x0,-0x4(%rsp)
> >    9:   2b 50 08                sub    0x8(%rax),%edx
> >    c:   5d                      pop    %rbp
> >    d:   89 d0                   mov    %edx,%eax
> >    f:   c1 f8 03                sar    $0x3,%eax
> >   12:   c3                      ret
> >   13:   55                      push   %rbp
> >   14:   89 e5                   mov    %esp,%ebp
> > [  168.982540][  T628] EAX: ef0c8420 EBX: ef0c8420 ECX: e5e1e840 EDX: 00000000
> > [  168.983022][  T628] ESI: ef0c919c EDI: 00000000 EBP: c75e9ee8 ESP: c75e9ee8
> > [  168.983503][  T628] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010246
> > [  168.984024][  T628] CR0: 80050033 CR2: 2367a000 CR3: 075f5000 CR4: 00040690
> > [  168.984518][  T628] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
> > [  168.985008][  T628] DR6: fffe0ff0 DR7: 00000400
> > [  168.985329][  T628] Call Trace:
> > [ 168.985571][ T628] ? show_regs (arch/x86/kernel/dumpstack.c:479 arch/x86/kernel/dumpstack.c:465)
> > [ 168.985877][ T628] ? __die_body (arch/x86/kernel/dumpstack.c:421)
> > [ 168.986185][ T628] ? __die (arch/x86/kernel/dumpstack.c:435)
> > [ 168.986466][ T628] ? page_fault_oops (arch/x86/mm/fault.c:715)
> > [ 168.986811][ T628] ? kernelmode_fixup_or_oops+0x50/0x58
> > [ 168.987273][ T628] ? __bad_area_nosemaphore+0x37/0x1d5
> > [ 168.987726][ T628] ? validate_chain (kernel/locking/lockdep.c:3819 kernel/locking/lockdep.c:3872)
> > [ 168.988058][ T628] ? bad_area_nosemaphore (arch/x86/mm/fault.c:835)
> > [ 168.988406][ T628] ? do_user_addr_fault (arch/x86/mm/fault.c:1280 (discriminator 1))
> > [ 168.988763][ T628] ? exc_page_fault (arch/x86/include/asm/irqflags.h:26 arch/x86/include/asm/irqflags.h:87 arch/x86/include/asm/irqflags.h:147 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539)
> > [ 168.989110][ T628] ? pvclock_clocksource_read_nowd (arch/x86/mm/fault.c:1494)
> > [ 168.989472][ T628] ? handle_exception (arch/x86/entry/entry_32.S:1048)
> > [ 168.989800][ T628] ? siphash_4u64 (lib/siphash.c:203)
> > [ 168.990123][ T628] ? pvclock_clocksource_read_nowd (arch/x86/mm/fault.c:1494)
> > [ 168.990539][ T628] ? __srcu_read_lock (kernel/rcu/srcutree.c:749)
> > [ 168.990858][ T628] ? rcu_torture_barrier_init (kernel/rcu/rcutorture.c:3381) rcutorture
> > [ 168.991319][ T628] ? siphash_4u64 (lib/siphash.c:203)
> > [ 168.991618][ T628] ? pvclock_clocksource_read_nowd (arch/x86/mm/fault.c:1494)
> > [ 168.992021][ T628] ? __srcu_read_lock (kernel/rcu/srcutree.c:749)
> > [ 168.992340][ T628] srcu_read_lock (include/linux/srcu.h:165 include/linux/srcu.h:257) rcutorture
> > [ 168.992735][ T628] srcu_torture_read_lock (kernel/rcu/rcutorture.c:693) rcutorture
> > [ 168.993184][ T628] rcu_torture_writer (kernel/rcu/rcutorture.c:1528) rcutorture
> > [ 168.993615][ T628] ? _raw_spin_unlock_irqrestore (arch/x86/include/asm/irqflags.h:26 arch/x86/include/asm/irqflags.h:87 arch/x86/include/asm/irqflags.h:147 include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194)
> > [ 168.994020][ T628] ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:80 (discriminator 13))
> > [ 168.994369][ T628] kthread (kernel/kthread.c:391)
> > [ 168.994647][ T628] ? rcu_torture_pipe_update (kernel/rcu/rcutorture.c:1447) rcutorture
> > [ 168.995108][ T628] ? list_del_init (include/linux/lockdep.h:248)
> > [ 168.995428][ T628] ret_from_fork (arch/x86/kernel/process.c:153)
> > [ 168.995735][ T628] ? list_del_init (include/linux/lockdep.h:248)
> > [ 168.996053][ T628] ret_from_fork_asm (arch/x86/entry/entry_32.S:737)
> > [ 168.996380][ T628] entry_INT80_32 (arch/x86/entry/entry_32.S:942)
> > [  168.996692][  T628] Modules linked in: rcutorture(+) torture intel_rapl_msr intel_rapl_common iosf_mbi crc32c_intel aesni_intel input_leds led_class fuse
> > [  168.997654][  T628] CR2: 000000002367a000
> > [  168.997945][  T628] ---[ end trace 0000000000000000 ]---
> >
> >
> > The kernel config and materials to reproduce are available at:
> > https://download.01.org/0day-ci/archive/20241231/202412311203.ca7bddba-lkp@intel.com
> >
> >
> >
> > --
> > 0-DAY CI Kernel Test Service
> > https://github.com/intel/lkp-tests/wiki
> >
> >

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address
  2024-12-31 16:19   ` Paul E. McKenney
@ 2025-01-02  5:41     ` Oliver Sang
  2025-01-02 18:55       ` Paul E. McKenney
  0 siblings, 1 reply; 13+ messages in thread
From: Oliver Sang @ 2025-01-02  5:41 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: Z qiang, oe-lkp, lkp, rcu, oliver.sang

hi, Zqiang, hi, Paul,

On Tue, Dec 31, 2024 at 08:19:44AM -0800, Paul E. McKenney wrote:
> On Tue, Dec 31, 2024 at 03:16:25PM +0800, Z qiang wrote:

[...]

> > >
> > > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > > the same patch/commit), kindly add following tags
> > > | Reported-by: kernel test robot <oliver.sang@intel.com>
> > > | Closes: https://lore.kernel.org/oe-lkp/202412311203.ca7bddba-lkp@intel.com
> > >
> > 
> > Please try the following modifications:

yes, the following modifications fix the issues we found for 9216c28c6a. thanks!

Tested-by: kernel test robot <oliver.sang@intel.com>

> > 
> > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > index e85db7d5b364..7c7304dee645 100644
> > --- a/kernel/rcu/srcutree.c
> > +++ b/kernel/rcu/srcutree.c
> > @@ -1999,6 +1999,7 @@ static int srcu_module_coming(struct module *mod)
> >         for (i = 0; i < mod->num_srcu_structs; i++) {
> >                 ssp = *(sspp++);
> >                 ssp->sda = alloc_percpu(struct srcu_data);
> > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> 
> This does look quite promising, so thank you for digging into this!!!
> 
> Looking forward to seeing if it fixes the problem.  ;-)
> 
> 							Thanx, Paul
> 
> >                 if (WARN_ON_ONCE(!ssp->sda))
> >                         return -ENOMEM;
> >         }
> > 
> > 
> > 
> > Thanks
> > Zqiang

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address
  2025-01-02  5:41     ` Oliver Sang
@ 2025-01-02 18:55       ` Paul E. McKenney
  2025-01-03  5:50         ` Z qiang
  0 siblings, 1 reply; 13+ messages in thread
From: Paul E. McKenney @ 2025-01-02 18:55 UTC (permalink / raw)
  To: Oliver Sang; +Cc: Z qiang, oe-lkp, lkp, rcu

On Thu, Jan 02, 2025 at 01:41:16PM +0800, Oliver Sang wrote:
> hi, Zqiang, hi, Paul,
> 
> On Tue, Dec 31, 2024 at 08:19:44AM -0800, Paul E. McKenney wrote:
> > On Tue, Dec 31, 2024 at 03:16:25PM +0800, Z qiang wrote:
> 
> [...]
> 
> > > >
> > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > > > the same patch/commit), kindly add following tags
> > > > | Reported-by: kernel test robot <oliver.sang@intel.com>
> > > > | Closes: https://lore.kernel.org/oe-lkp/202412311203.ca7bddba-lkp@intel.com
> > > >
> > > 
> > > Please try the following modifications:
> 
> yes, the following modifications fix the issues we found for 9216c28c6a. thanks!
> 
> Tested-by: kernel test robot <oliver.sang@intel.com>

Thank you, and I will apply this during my next rebase.

Zqiang, unless you tell me otherwise, I will add your Co-developed-by
and Signed-off-by.

Either way, Happy Square New Year!

							Thanx, Paul

> > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > > index e85db7d5b364..7c7304dee645 100644
> > > --- a/kernel/rcu/srcutree.c
> > > +++ b/kernel/rcu/srcutree.c
> > > @@ -1999,6 +1999,7 @@ static int srcu_module_coming(struct module *mod)
> > >         for (i = 0; i < mod->num_srcu_structs; i++) {
> > >                 ssp = *(sspp++);
> > >                 ssp->sda = alloc_percpu(struct srcu_data);
> > > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > 
> > This does look quite promising, so thank you for digging into this!!!
> > 
> > Looking forward to seeing if it fixes the problem.  ;-)
> > 
> > 							Thanx, Paul
> > 
> > >                 if (WARN_ON_ONCE(!ssp->sda))
> > >                         return -ENOMEM;
> > >         }
> > > 
> > > 
> > > 
> > > Thanks
> > > Zqiang

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address
  2025-01-02 18:55       ` Paul E. McKenney
@ 2025-01-03  5:50         ` Z qiang
  2025-01-03 17:10           ` Paul E. McKenney
  0 siblings, 1 reply; 13+ messages in thread
From: Z qiang @ 2025-01-03  5:50 UTC (permalink / raw)
  To: paulmck; +Cc: Oliver Sang, oe-lkp, lkp, rcu

>
> On Thu, Jan 02, 2025 at 01:41:16PM +0800, Oliver Sang wrote:
> > hi, Zqiang, hi, Paul,
> >
> > On Tue, Dec 31, 2024 at 08:19:44AM -0800, Paul E. McKenney wrote:
> > > On Tue, Dec 31, 2024 at 03:16:25PM +0800, Z qiang wrote:
> >
> > [...]
> >
> > > > >
> > > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > > > > the same patch/commit), kindly add following tags
> > > > > | Reported-by: kernel test robot <oliver.sang@intel.com>
> > > > > | Closes: https://lore.kernel.org/oe-lkp/202412311203.ca7bddba-lkp@intel.com
> > > > >
> > > >
> > > > Please try the following modifications:
> >
> > yes, the following modifications fix the issues we found for 9216c28c6a. thanks!
> >
> > Tested-by: kernel test robot <oliver.sang@intel.com>
>
> Thank you, and I will apply this during my next rebase.
>
> Zqiang, unless you tell me otherwise, I will add your Co-developed-by
> and Signed-off-by.
>
> Either way, Happy Square New Year!

Thank you,  Happy New Year!  :)

Thanks
Zqiang

>
>                                                         Thanx, Paul
>
> > > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > > > index e85db7d5b364..7c7304dee645 100644
> > > > --- a/kernel/rcu/srcutree.c
> > > > +++ b/kernel/rcu/srcutree.c
> > > > @@ -1999,6 +1999,7 @@ static int srcu_module_coming(struct module *mod)
> > > >         for (i = 0; i < mod->num_srcu_structs; i++) {
> > > >                 ssp = *(sspp++);
> > > >                 ssp->sda = alloc_percpu(struct srcu_data);
> > > > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > >
> > > This does look quite promising, so thank you for digging into this!!!
> > >
> > > Looking forward to seeing if it fixes the problem.  ;-)
> > >
> > >                                                     Thanx, Paul
> > >
> > > >                 if (WARN_ON_ONCE(!ssp->sda))
> > > >                         return -ENOMEM;
> > > >         }
> > > >
> > > >
> > > >
> > > > Thanks
> > > > Zqiang

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address
  2025-01-03  5:50         ` Z qiang
@ 2025-01-03 17:10           ` Paul E. McKenney
  2025-01-06  3:24             ` Z qiang
  0 siblings, 1 reply; 13+ messages in thread
From: Paul E. McKenney @ 2025-01-03 17:10 UTC (permalink / raw)
  To: Z qiang; +Cc: Oliver Sang, oe-lkp, lkp, rcu

On Fri, Jan 03, 2025 at 01:50:50PM +0800, Z qiang wrote:
> >
> > On Thu, Jan 02, 2025 at 01:41:16PM +0800, Oliver Sang wrote:
> > > hi, Zqiang, hi, Paul,
> > >
> > > On Tue, Dec 31, 2024 at 08:19:44AM -0800, Paul E. McKenney wrote:
> > > > On Tue, Dec 31, 2024 at 03:16:25PM +0800, Z qiang wrote:
> > >
> > > [...]
> > >
> > > > > >
> > > > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > > > > > the same patch/commit), kindly add following tags
> > > > > > | Reported-by: kernel test robot <oliver.sang@intel.com>
> > > > > > | Closes: https://lore.kernel.org/oe-lkp/202412311203.ca7bddba-lkp@intel.com
> > > > > >
> > > > >
> > > > > Please try the following modifications:
> > >
> > > yes, the following modifications fix the issues we found for 9216c28c6a. thanks!
> > >
> > > Tested-by: kernel test robot <oliver.sang@intel.com>
> >
> > Thank you, and I will apply this during my next rebase.
> >
> > Zqiang, unless you tell me otherwise, I will add your Co-developed-by
> > and Signed-off-by.
> >
> > Either way, Happy Square New Year!
> 
> Thank you,  Happy New Year!  :)

And here you go!  Please let me know of anything I missed.

And thank you for saving me the time it would have taken to track
this one down.  ;-)

							Thanx, Paul

------------------------------------------------------------------------

commit 71c893e6d1857d1e4ea37aec557d734a560fdb39
Author: Paul E. McKenney <paulmck@kernel.org>
Date:   Thu Dec 19 16:08:54 2024 -0800

    srcu: Make SRCU readers use ->srcu_ctrs for counter selection
    
    This commit causes SRCU readers to use ->srcu_ctrs for counter
    selection instead of ->srcu_idx.  This takes another step towards
    array-indexing-free SRCU readers.
    
    [ paulmck: Apply kernel test robot feedback. ]
    
    Co-developed-by: Z qiang <qiang.zhang1211@gmail.com>
    Signed-off-by: Z qiang <qiang.zhang1211@gmail.com>
    Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
    Tested-by: kernel test robot <oliver.sang@intel.com>

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index c794d599db5c1..1b01ced61a45b 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -101,6 +101,7 @@ struct srcu_usage {
  */
 struct srcu_struct {
 	unsigned int srcu_idx;			/* Current rdr array element. */
+	struct srcu_ctr __percpu *srcu_ctrp;
 	struct srcu_data __percpu *sda;		/* Per-CPU srcu_data array. */
 	struct lockdep_map dep_map;
 	struct srcu_usage *srcu_sup;		/* Update-side data. */
@@ -167,6 +168,7 @@ struct srcu_struct {
 #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name)						\
 {												\
 	.sda = &pcpu_name,									\
+	.srcu_ctrp = &pcpu_name.srcu_ctrs[0],							\
 	__SRCU_STRUCT_INIT_COMMON(name, usage_name)						\
 }
 
@@ -222,13 +224,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
  */
 static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
 {
-	int idx;
+	struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
 
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
-	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
-	this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */
+	this_cpu_inc(scp->srcu_locks.counter); /* Y */
 	barrier(); /* Avoid leaking the critical section. */
-	return idx;
+	return scp - &ssp->sda->srcu_ctrs[0];
 }
 
 /*
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index d7ee2f345e192..3bf7f41ad72b8 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -253,8 +253,10 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
 	atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
 	INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
 	ssp->srcu_sup->sda_is_static = is_static;
-	if (!is_static)
+	if (!is_static) {
 		ssp->sda = alloc_percpu(struct srcu_data);
+		ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
+	}
 	if (!ssp->sda)
 		goto err_free_sup;
 	init_srcu_struct_data(ssp);
@@ -742,12 +744,11 @@ EXPORT_SYMBOL_GPL(__srcu_check_read_flavor);
  */
 int __srcu_read_lock(struct srcu_struct *ssp)
 {
-	int idx;
+	struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
 
-	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
-	this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter);
+	this_cpu_inc(scp->srcu_locks.counter);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
-	return idx;
+	return scp - &ssp->sda->srcu_ctrs[0];
 }
 EXPORT_SYMBOL_GPL(__srcu_read_lock);
 
@@ -772,13 +773,11 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  */
 int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
 {
-	int idx;
-	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
+	struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
 
-	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
-	atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_locks);
+	atomic_long_inc(&scp->srcu_locks);
 	smp_mb__after_atomic(); /* B */  /* Avoid leaking the critical section. */
-	return idx;
+	return scp - &ssp->sda->srcu_ctrs[0];
 }
 EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
 
@@ -1152,6 +1151,8 @@ static void srcu_flip(struct srcu_struct *ssp)
 	smp_mb(); /* E */  /* Pairs with B and C. */
 
 	WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter.
+	WRITE_ONCE(ssp->srcu_ctrp,
+		   &ssp->sda->srcu_ctrs[!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0])]);
 
 	/*
 	 * Ensure that if the updater misses an __srcu_read_unlock()
@@ -1998,6 +1999,7 @@ static int srcu_module_coming(struct module *mod)
 	for (i = 0; i < mod->num_srcu_structs; i++) {
 		ssp = *(sspp++);
 		ssp->sda = alloc_percpu(struct srcu_data);
+		ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
 		if (WARN_ON_ONCE(!ssp->sda))
 			return -ENOMEM;
 	}

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address
  2025-01-03 17:10           ` Paul E. McKenney
@ 2025-01-06  3:24             ` Z qiang
  2025-01-07  5:50               ` Paul E. McKenney
  0 siblings, 1 reply; 13+ messages in thread
From: Z qiang @ 2025-01-06  3:24 UTC (permalink / raw)
  To: paulmck; +Cc: Oliver Sang, oe-lkp, lkp, rcu

>
> On Fri, Jan 03, 2025 at 01:50:50PM +0800, Z qiang wrote:
> > >
> > > On Thu, Jan 02, 2025 at 01:41:16PM +0800, Oliver Sang wrote:
> > > > hi, Zqiang, hi, Paul,
> > > >
> > > > On Tue, Dec 31, 2024 at 08:19:44AM -0800, Paul E. McKenney wrote:
> > > > > On Tue, Dec 31, 2024 at 03:16:25PM +0800, Z qiang wrote:
> > > >
> > > > [...]
> > > >
> > > > > > >
> > > > > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > > > > > > the same patch/commit), kindly add following tags
> > > > > > > | Reported-by: kernel test robot <oliver.sang@intel.com>
> > > > > > > | Closes: https://lore.kernel.org/oe-lkp/202412311203.ca7bddba-lkp@intel.com
> > > > > > >
> > > > > >
> > > > > > Please try the following modifications:
> > > >
> > > > yes, the following modifications fix the issues we found for 9216c28c6a. thanks!
> > > >
> > > > Tested-by: kernel test robot <oliver.sang@intel.com>
> > >
> > > Thank you, and I will apply this during my next rebase.
> > >
> > > Zqiang, unless you tell me otherwise, I will add your Co-developed-by
> > > and Signed-off-by.
> > >
> > > Either way, Happy Square New Year!
> >
> > Thank you,  Happy New Year!  :)
>
> And here you go!  Please let me know of anything I missed.
>
> And thank you for saving me the time it would have taken to track
> this one down.  ;-)
>
>                                                         Thanx, Paul
>
> ------------------------------------------------------------------------
>
> commit 71c893e6d1857d1e4ea37aec557d734a560fdb39
> Author: Paul E. McKenney <paulmck@kernel.org>
> Date:   Thu Dec 19 16:08:54 2024 -0800
>
>     srcu: Make SRCU readers use ->srcu_ctrs for counter selection
>
>     This commit causes SRCU readers to use ->srcu_ctrs for counter
>     selection instead of ->srcu_idx.  This takes another step towards
>     array-indexing-free SRCU readers.
>
>     [ paulmck: Apply kernel test robot feedback. ]
>
>     Co-developed-by: Z qiang <qiang.zhang1211@gmail.com>
>     Signed-off-by: Z qiang <qiang.zhang1211@gmail.com>
>     Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
>     Tested-by: kernel test robot <oliver.sang@intel.com>
>
> diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> index c794d599db5c1..1b01ced61a45b 100644
> --- a/include/linux/srcutree.h
> +++ b/include/linux/srcutree.h
> @@ -101,6 +101,7 @@ struct srcu_usage {
>   */
>  struct srcu_struct {
>         unsigned int srcu_idx;                  /* Current rdr array element. */
> +       struct srcu_ctr __percpu *srcu_ctrp;
>         struct srcu_data __percpu *sda;         /* Per-CPU srcu_data array. */
>         struct lockdep_map dep_map;
>         struct srcu_usage *srcu_sup;            /* Update-side data. */
> @@ -167,6 +168,7 @@ struct srcu_struct {
>  #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name)                                                \
>  {                                                                                              \
>         .sda = &pcpu_name,                                                                      \
> +       .srcu_ctrp = &pcpu_name.srcu_ctrs[0],                                                   \
>         __SRCU_STRUCT_INIT_COMMON(name, usage_name)                                             \
>  }
>
> @@ -222,13 +224,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
>   */
>  static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
>  {
> -       int idx;
> +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
>
>         RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
> -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> -       this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */
> +       this_cpu_inc(scp->srcu_locks.counter); /* Y */
>         barrier(); /* Avoid leaking the critical section. */
> -       return idx;
> +       return scp - &ssp->sda->srcu_ctrs[0];
>  }
>
>  /*
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index d7ee2f345e192..3bf7f41ad72b8 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -253,8 +253,10 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
>         atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
>         INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
>         ssp->srcu_sup->sda_is_static = is_static;
> -       if (!is_static)
> +       if (!is_static) {
>                 ssp->sda = alloc_percpu(struct srcu_data);
> +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> +       }
>         if (!ssp->sda)
>                 goto err_free_sup;

ssp->srcu_ctrp should be assigned a value under the condition that
ssp->sda is  allocated successfully.

-       if (!is_static) {
+       if (!is_static)
                ssp->sda = alloc_percpu(struct srcu_data);
-               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
-       }
        if (!ssp->sda)
                goto err_free_sup;
+       if (!is_static)
+               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];



>         init_srcu_struct_data(ssp);
> @@ -742,12 +744,11 @@ EXPORT_SYMBOL_GPL(__srcu_check_read_flavor);
>   */
>  int __srcu_read_lock(struct srcu_struct *ssp)
>  {
> -       int idx;
> +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
>
> -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> -       this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter);
> +       this_cpu_inc(scp->srcu_locks.counter);
>         smp_mb(); /* B */  /* Avoid leaking the critical section. */
> -       return idx;
> +       return scp - &ssp->sda->srcu_ctrs[0];
>  }
>  EXPORT_SYMBOL_GPL(__srcu_read_lock);
>
> @@ -772,13 +773,11 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
>   */
>  int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
>  {
> -       int idx;
> -       struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
> +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);

Should this be the case?

struct srcu_ctr  *scp = raw_cpu_ptr(ssp->srcu_ctrp);

>
> -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> -       atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_locks);
> +       atomic_long_inc(&scp->srcu_locks);
>         smp_mb__after_atomic(); /* B */  /* Avoid leaking the critical section. */
> -       return idx;
> +       return scp - &ssp->sda->srcu_ctrs[0];
>  }
>  EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
>
> @@ -1152,6 +1151,8 @@ static void srcu_flip(struct srcu_struct *ssp)
>         smp_mb(); /* E */  /* Pairs with B and C. */
>
>         WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter.
> +       WRITE_ONCE(ssp->srcu_ctrp,
> +                  &ssp->sda->srcu_ctrs[!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0])]);
>
>         /*
>          * Ensure that if the updater misses an __srcu_read_unlock()
> @@ -1998,6 +1999,7 @@ static int srcu_module_coming(struct module *mod)
>         for (i = 0; i < mod->num_srcu_structs; i++) {
>                 ssp = *(sspp++);
>                 ssp->sda = alloc_percpu(struct srcu_data);
> +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
>                 if (WARN_ON_ONCE(!ssp->sda))
>                         return -ENOMEM;


-               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
                if (WARN_ON_ONCE(!ssp->sda))
                        return -ENOMEM;
+               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];


Thanks
Zqiang

>         }

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address
  2025-01-06  3:24             ` Z qiang
@ 2025-01-07  5:50               ` Paul E. McKenney
  2025-01-07  9:49                 ` Z qiang
  0 siblings, 1 reply; 13+ messages in thread
From: Paul E. McKenney @ 2025-01-07  5:50 UTC (permalink / raw)
  To: Z qiang; +Cc: Oliver Sang, oe-lkp, lkp, rcu

On Mon, Jan 06, 2025 at 11:24:54AM +0800, Z qiang wrote:
> >
> > On Fri, Jan 03, 2025 at 01:50:50PM +0800, Z qiang wrote:
> > > >
> > > > On Thu, Jan 02, 2025 at 01:41:16PM +0800, Oliver Sang wrote:
> > > > > hi, Zqiang, hi, Paul,
> > > > >
> > > > > On Tue, Dec 31, 2024 at 08:19:44AM -0800, Paul E. McKenney wrote:
> > > > > > On Tue, Dec 31, 2024 at 03:16:25PM +0800, Z qiang wrote:
> > > > >
> > > > > [...]
> > > > >
> > > > > > > >
> > > > > > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > > > > > > > the same patch/commit), kindly add following tags
> > > > > > > > | Reported-by: kernel test robot <oliver.sang@intel.com>
> > > > > > > > | Closes: https://lore.kernel.org/oe-lkp/202412311203.ca7bddba-lkp@intel.com
> > > > > > > >
> > > > > > >
> > > > > > > Please try the following modifications:
> > > > >
> > > > > yes, the following modifications fix the issues we found for 9216c28c6a. thanks!
> > > > >
> > > > > Tested-by: kernel test robot <oliver.sang@intel.com>
> > > >
> > > > Thank you, and I will apply this during my next rebase.
> > > >
> > > > Zqiang, unless you tell me otherwise, I will add your Co-developed-by
> > > > and Signed-off-by.
> > > >
> > > > Either way, Happy Square New Year!
> > >
> > > Thank you,  Happy New Year!  :)
> >
> > And here you go!  Please let me know of anything I missed.
> >
> > And thank you for saving me the time it would have taken to track
> > this one down.  ;-)
> >
> >                                                         Thanx, Paul
> >
> > ------------------------------------------------------------------------
> >
> > commit 71c893e6d1857d1e4ea37aec557d734a560fdb39
> > Author: Paul E. McKenney <paulmck@kernel.org>
> > Date:   Thu Dec 19 16:08:54 2024 -0800
> >
> >     srcu: Make SRCU readers use ->srcu_ctrs for counter selection
> >
> >     This commit causes SRCU readers to use ->srcu_ctrs for counter
> >     selection instead of ->srcu_idx.  This takes another step towards
> >     array-indexing-free SRCU readers.
> >
> >     [ paulmck: Apply kernel test robot feedback. ]
> >
> >     Co-developed-by: Z qiang <qiang.zhang1211@gmail.com>
> >     Signed-off-by: Z qiang <qiang.zhang1211@gmail.com>
> >     Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> >     Tested-by: kernel test robot <oliver.sang@intel.com>
> >
> > diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> > index c794d599db5c1..1b01ced61a45b 100644
> > --- a/include/linux/srcutree.h
> > +++ b/include/linux/srcutree.h
> > @@ -101,6 +101,7 @@ struct srcu_usage {
> >   */
> >  struct srcu_struct {
> >         unsigned int srcu_idx;                  /* Current rdr array element. */
> > +       struct srcu_ctr __percpu *srcu_ctrp;
> >         struct srcu_data __percpu *sda;         /* Per-CPU srcu_data array. */
> >         struct lockdep_map dep_map;
> >         struct srcu_usage *srcu_sup;            /* Update-side data. */
> > @@ -167,6 +168,7 @@ struct srcu_struct {
> >  #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name)                                                \
> >  {                                                                                              \
> >         .sda = &pcpu_name,                                                                      \
> > +       .srcu_ctrp = &pcpu_name.srcu_ctrs[0],                                                   \
> >         __SRCU_STRUCT_INIT_COMMON(name, usage_name)                                             \
> >  }
> >
> > @@ -222,13 +224,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
> >   */
> >  static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
> >  {
> > -       int idx;
> > +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
> >
> >         RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
> > -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> > -       this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */
> > +       this_cpu_inc(scp->srcu_locks.counter); /* Y */
> >         barrier(); /* Avoid leaking the critical section. */
> > -       return idx;
> > +       return scp - &ssp->sda->srcu_ctrs[0];
> >  }
> >
> >  /*
> > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > index d7ee2f345e192..3bf7f41ad72b8 100644
> > --- a/kernel/rcu/srcutree.c
> > +++ b/kernel/rcu/srcutree.c
> > @@ -253,8 +253,10 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
> >         atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
> >         INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
> >         ssp->srcu_sup->sda_is_static = is_static;
> > -       if (!is_static)
> > +       if (!is_static) {
> >                 ssp->sda = alloc_percpu(struct srcu_data);
> > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > +       }
> >         if (!ssp->sda)
> >                 goto err_free_sup;
> 
> ssp->srcu_ctrp should be assigned a value under the condition that
> ssp->sda is  allocated successfully.
> 
> -       if (!is_static) {
> +       if (!is_static)
>                 ssp->sda = alloc_percpu(struct srcu_data);
> -               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> -       }
>         if (!ssp->sda)
>                 goto err_free_sup;
> +       if (!is_static)
> +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];

It doesn't hurt to assign NULL to ssp->srcu_ctrp on allocation failure.

Or am I missing something here?

> >         init_srcu_struct_data(ssp);
> > @@ -742,12 +744,11 @@ EXPORT_SYMBOL_GPL(__srcu_check_read_flavor);
> >   */
> >  int __srcu_read_lock(struct srcu_struct *ssp)
> >  {
> > -       int idx;
> > +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
> >
> > -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> > -       this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter);
> > +       this_cpu_inc(scp->srcu_locks.counter);
> >         smp_mb(); /* B */  /* Avoid leaking the critical section. */
> > -       return idx;
> > +       return scp - &ssp->sda->srcu_ctrs[0];
> >  }
> >  EXPORT_SYMBOL_GPL(__srcu_read_lock);
> >
> > @@ -772,13 +773,11 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
> >   */
> >  int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
> >  {
> > -       int idx;
> > -       struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
> > +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
> 
> Should this be the case?
> 
> struct srcu_ctr  *scp = raw_cpu_ptr(ssp->srcu_ctrp);

Good catch!  This is messed up.  I am now thinking in terms of something
like this:

	struct srcu_ctr *scp = this_cpu_ptr(READ_ONCE(ssp->srcu_ctrp));

Except that I broke the CONFIG_NEED_SRCU_NMI_SAFE=y case at some point,
and need to fix that first.  This commit works:

c10edfa489eb ("srcu: Pull ->srcu_{un,}lock_count into a new srcu_ctr structure")

I am now trying:

71c893e6d185 ("srcu: Make SRCU readers use ->srcu_ctrs for counter selection")

The trick is to apply this patch:

------------------------------------------------------------------------
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 26ae869865a12..74809a083e2b5 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -66,7 +66,7 @@ config TREE_SRCU
 	  This option selects the full-fledged version of SRCU.
 
 config NEED_SRCU_NMI_SAFE
-	def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU
+	def_bool HAVE_NMI && !TINY_SRCU
 
 config TASKS_RCU_GENERIC
 	def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU
------------------------------------------------------------------------

Or run some system with CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS=n.

> > -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> > -       atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_locks);
> > +       atomic_long_inc(&scp->srcu_locks);
> >         smp_mb__after_atomic(); /* B */  /* Avoid leaking the critical section. */
> > -       return idx;
> > +       return scp - &ssp->sda->srcu_ctrs[0];
> >  }
> >  EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
> >
> > @@ -1152,6 +1151,8 @@ static void srcu_flip(struct srcu_struct *ssp)
> >         smp_mb(); /* E */  /* Pairs with B and C. */
> >
> >         WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter.
> > +       WRITE_ONCE(ssp->srcu_ctrp,
> > +                  &ssp->sda->srcu_ctrs[!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0])]);
> >
> >         /*
> >          * Ensure that if the updater misses an __srcu_read_unlock()
> > @@ -1998,6 +1999,7 @@ static int srcu_module_coming(struct module *mod)
> >         for (i = 0; i < mod->num_srcu_structs; i++) {
> >                 ssp = *(sspp++);
> >                 ssp->sda = alloc_percpu(struct srcu_data);
> > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> >                 if (WARN_ON_ONCE(!ssp->sda))
> >                         return -ENOMEM;
> 
> 
> -               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
>                 if (WARN_ON_ONCE(!ssp->sda))
>                         return -ENOMEM;
> +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];

Again, the assigment does not hurt anything, but in this case I agree
that your way is more clear, so I will use that.

							Thanx, Paul

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address
  2025-01-07  5:50               ` Paul E. McKenney
@ 2025-01-07  9:49                 ` Z qiang
  2025-01-08  4:26                   ` Paul E. McKenney
  0 siblings, 1 reply; 13+ messages in thread
From: Z qiang @ 2025-01-07  9:49 UTC (permalink / raw)
  To: paulmck; +Cc: Oliver Sang, oe-lkp, lkp, rcu

>
> On Mon, Jan 06, 2025 at 11:24:54AM +0800, Z qiang wrote:
> > >
> > > On Fri, Jan 03, 2025 at 01:50:50PM +0800, Z qiang wrote:
> > > > >
> > > > > On Thu, Jan 02, 2025 at 01:41:16PM +0800, Oliver Sang wrote:
> > > > > > hi, Zqiang, hi, Paul,
> > > > > >
> > > > > > On Tue, Dec 31, 2024 at 08:19:44AM -0800, Paul E. McKenney wrote:
> > > > > > > On Tue, Dec 31, 2024 at 03:16:25PM +0800, Z qiang wrote:
> > > > > >
> > > > > > [...]
> > > > > >
> > > > > > > > >
> > > > > > > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > > > > > > > > the same patch/commit), kindly add following tags
> > > > > > > > > | Reported-by: kernel test robot <oliver.sang@intel.com>
> > > > > > > > > | Closes: https://lore.kernel.org/oe-lkp/202412311203.ca7bddba-lkp@intel.com
> > > > > > > > >
> > > > > > > >
> > > > > > > > Please try the following modifications:
> > > > > >
> > > > > > yes, the following modifications fix the issues we found for 9216c28c6a. thanks!
> > > > > >
> > > > > > Tested-by: kernel test robot <oliver.sang@intel.com>
> > > > >
> > > > > Thank you, and I will apply this during my next rebase.
> > > > >
> > > > > Zqiang, unless you tell me otherwise, I will add your Co-developed-by
> > > > > and Signed-off-by.
> > > > >
> > > > > Either way, Happy Square New Year!
> > > >
> > > > Thank you,  Happy New Year!  :)
> > >
> > > And here you go!  Please let me know of anything I missed.
> > >
> > > And thank you for saving me the time it would have taken to track
> > > this one down.  ;-)
> > >
> > >                                                         Thanx, Paul
> > >
> > > ------------------------------------------------------------------------
> > >
> > > commit 71c893e6d1857d1e4ea37aec557d734a560fdb39
> > > Author: Paul E. McKenney <paulmck@kernel.org>
> > > Date:   Thu Dec 19 16:08:54 2024 -0800
> > >
> > >     srcu: Make SRCU readers use ->srcu_ctrs for counter selection
> > >
> > >     This commit causes SRCU readers to use ->srcu_ctrs for counter
> > >     selection instead of ->srcu_idx.  This takes another step towards
> > >     array-indexing-free SRCU readers.
> > >
> > >     [ paulmck: Apply kernel test robot feedback. ]
> > >
> > >     Co-developed-by: Z qiang <qiang.zhang1211@gmail.com>
> > >     Signed-off-by: Z qiang <qiang.zhang1211@gmail.com>
> > >     Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > >     Tested-by: kernel test robot <oliver.sang@intel.com>
> > >
> > > diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> > > index c794d599db5c1..1b01ced61a45b 100644
> > > --- a/include/linux/srcutree.h
> > > +++ b/include/linux/srcutree.h
> > > @@ -101,6 +101,7 @@ struct srcu_usage {
> > >   */
> > >  struct srcu_struct {
> > >         unsigned int srcu_idx;                  /* Current rdr array element. */
> > > +       struct srcu_ctr __percpu *srcu_ctrp;
> > >         struct srcu_data __percpu *sda;         /* Per-CPU srcu_data array. */
> > >         struct lockdep_map dep_map;
> > >         struct srcu_usage *srcu_sup;            /* Update-side data. */
> > > @@ -167,6 +168,7 @@ struct srcu_struct {
> > >  #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name)                                                \
> > >  {                                                                                              \
> > >         .sda = &pcpu_name,                                                                      \
> > > +       .srcu_ctrp = &pcpu_name.srcu_ctrs[0],                                                   \
> > >         __SRCU_STRUCT_INIT_COMMON(name, usage_name)                                             \
> > >  }
> > >
> > > @@ -222,13 +224,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
> > >   */
> > >  static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
> > >  {
> > > -       int idx;
> > > +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
> > >
> > >         RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
> > > -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> > > -       this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */
> > > +       this_cpu_inc(scp->srcu_locks.counter); /* Y */
> > >         barrier(); /* Avoid leaking the critical section. */
> > > -       return idx;
> > > +       return scp - &ssp->sda->srcu_ctrs[0];
> > >  }
> > >
> > >  /*
> > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > > index d7ee2f345e192..3bf7f41ad72b8 100644
> > > --- a/kernel/rcu/srcutree.c
> > > +++ b/kernel/rcu/srcutree.c
> > > @@ -253,8 +253,10 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
> > >         atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
> > >         INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
> > >         ssp->srcu_sup->sda_is_static = is_static;
> > > -       if (!is_static)
> > > +       if (!is_static) {
> > >                 ssp->sda = alloc_percpu(struct srcu_data);
> > > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > > +       }
> > >         if (!ssp->sda)
> > >                 goto err_free_sup;
> >
> > ssp->srcu_ctrp should be assigned a value under the condition that
> > ssp->sda is  allocated successfully.
> >
> > -       if (!is_static) {
> > +       if (!is_static)
> >                 ssp->sda = alloc_percpu(struct srcu_data);
> > -               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > -       }
> >         if (!ssp->sda)
> >                 goto err_free_sup;
> > +       if (!is_static)
> > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
>
> It doesn't hurt to assign NULL to ssp->srcu_ctrp on allocation failure.
>
> Or am I missing something here?

No, I just think in case of failed allocation, there is no need to set
srcu_ctrp. :)

Thanks
Zqiang

>
> > >         init_srcu_struct_data(ssp);
> > > @@ -742,12 +744,11 @@ EXPORT_SYMBOL_GPL(__srcu_check_read_flavor);
> > >   */
> > >  int __srcu_read_lock(struct srcu_struct *ssp)
> > >  {
> > > -       int idx;
> > > +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
> > >
> > > -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> > > -       this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter);
> > > +       this_cpu_inc(scp->srcu_locks.counter);
> > >         smp_mb(); /* B */  /* Avoid leaking the critical section. */
> > > -       return idx;
> > > +       return scp - &ssp->sda->srcu_ctrs[0];
> > >  }
> > >  EXPORT_SYMBOL_GPL(__srcu_read_lock);
> > >
> > > @@ -772,13 +773,11 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
> > >   */
> > >  int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
> > >  {
> > > -       int idx;
> > > -       struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
> > > +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
> >
> > Should this be the case?
> >
> > struct srcu_ctr  *scp = raw_cpu_ptr(ssp->srcu_ctrp);
>
> Good catch!  This is messed up.  I am now thinking in terms of something
> like this:
>
>         struct srcu_ctr *scp = this_cpu_ptr(READ_ONCE(ssp->srcu_ctrp));
>
> Except that I broke the CONFIG_NEED_SRCU_NMI_SAFE=y case at some point,
> and need to fix that first.  This commit works:
>
> c10edfa489eb ("srcu: Pull ->srcu_{un,}lock_count into a new srcu_ctr structure")
>
> I am now trying:
>
> 71c893e6d185 ("srcu: Make SRCU readers use ->srcu_ctrs for counter selection")
>
> The trick is to apply this patch:
>
> ------------------------------------------------------------------------
> diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
> index 26ae869865a12..74809a083e2b5 100644
> --- a/kernel/rcu/Kconfig
> +++ b/kernel/rcu/Kconfig
> @@ -66,7 +66,7 @@ config TREE_SRCU
>           This option selects the full-fledged version of SRCU.
>
>  config NEED_SRCU_NMI_SAFE
> -       def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU
> +       def_bool HAVE_NMI && !TINY_SRCU
>
>  config TASKS_RCU_GENERIC
>         def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU
> ------------------------------------------------------------------------
>
> Or run some system with CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS=n.
>
> > > -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> > > -       atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_locks);
> > > +       atomic_long_inc(&scp->srcu_locks);
> > >         smp_mb__after_atomic(); /* B */  /* Avoid leaking the critical section. */
> > > -       return idx;
> > > +       return scp - &ssp->sda->srcu_ctrs[0];
> > >  }
> > >  EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
> > >
> > > @@ -1152,6 +1151,8 @@ static void srcu_flip(struct srcu_struct *ssp)
> > >         smp_mb(); /* E */  /* Pairs with B and C. */
> > >
> > >         WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter.
> > > +       WRITE_ONCE(ssp->srcu_ctrp,
> > > +                  &ssp->sda->srcu_ctrs[!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0])]);
> > >
> > >         /*
> > >          * Ensure that if the updater misses an __srcu_read_unlock()
> > > @@ -1998,6 +1999,7 @@ static int srcu_module_coming(struct module *mod)
> > >         for (i = 0; i < mod->num_srcu_structs; i++) {
> > >                 ssp = *(sspp++);
> > >                 ssp->sda = alloc_percpu(struct srcu_data);
> > > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > >                 if (WARN_ON_ONCE(!ssp->sda))
> > >                         return -ENOMEM;
> >
> >
> > -               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> >                 if (WARN_ON_ONCE(!ssp->sda))
> >                         return -ENOMEM;
> > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
>
> Again, the assigment does not hurt anything, but in this case I agree
> that your way is more clear, so I will use that.
>
>                                                         Thanx, Paul

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address
  2025-01-07  9:49                 ` Z qiang
@ 2025-01-08  4:26                   ` Paul E. McKenney
  2025-01-08  5:55                     ` Z qiang
  0 siblings, 1 reply; 13+ messages in thread
From: Paul E. McKenney @ 2025-01-08  4:26 UTC (permalink / raw)
  To: Z qiang; +Cc: Oliver Sang, oe-lkp, lkp, rcu

On Tue, Jan 07, 2025 at 05:49:42PM +0800, Z qiang wrote:
> >
> > On Mon, Jan 06, 2025 at 11:24:54AM +0800, Z qiang wrote:
> > > >
> > > > On Fri, Jan 03, 2025 at 01:50:50PM +0800, Z qiang wrote:
> > > > > >
> > > > > > On Thu, Jan 02, 2025 at 01:41:16PM +0800, Oliver Sang wrote:
> > > > > > > hi, Zqiang, hi, Paul,
> > > > > > >
> > > > > > > On Tue, Dec 31, 2024 at 08:19:44AM -0800, Paul E. McKenney wrote:
> > > > > > > > On Tue, Dec 31, 2024 at 03:16:25PM +0800, Z qiang wrote:
> > > > > > >
> > > > > > > [...]
> > > > > > >
> > > > > > > > > >
> > > > > > > > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > > > > > > > > > the same patch/commit), kindly add following tags
> > > > > > > > > > | Reported-by: kernel test robot <oliver.sang@intel.com>
> > > > > > > > > > | Closes: https://lore.kernel.org/oe-lkp/202412311203.ca7bddba-lkp@intel.com
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > Please try the following modifications:
> > > > > > >
> > > > > > > yes, the following modifications fix the issues we found for 9216c28c6a. thanks!
> > > > > > >
> > > > > > > Tested-by: kernel test robot <oliver.sang@intel.com>
> > > > > >
> > > > > > Thank you, and I will apply this during my next rebase.
> > > > > >
> > > > > > Zqiang, unless you tell me otherwise, I will add your Co-developed-by
> > > > > > and Signed-off-by.
> > > > > >
> > > > > > Either way, Happy Square New Year!
> > > > >
> > > > > Thank you,  Happy New Year!  :)
> > > >
> > > > And here you go!  Please let me know of anything I missed.
> > > >
> > > > And thank you for saving me the time it would have taken to track
> > > > this one down.  ;-)
> > > >
> > > >                                                         Thanx, Paul
> > > >
> > > > ------------------------------------------------------------------------
> > > >
> > > > commit 71c893e6d1857d1e4ea37aec557d734a560fdb39
> > > > Author: Paul E. McKenney <paulmck@kernel.org>
> > > > Date:   Thu Dec 19 16:08:54 2024 -0800
> > > >
> > > >     srcu: Make SRCU readers use ->srcu_ctrs for counter selection
> > > >
> > > >     This commit causes SRCU readers to use ->srcu_ctrs for counter
> > > >     selection instead of ->srcu_idx.  This takes another step towards
> > > >     array-indexing-free SRCU readers.
> > > >
> > > >     [ paulmck: Apply kernel test robot feedback. ]
> > > >
> > > >     Co-developed-by: Z qiang <qiang.zhang1211@gmail.com>
> > > >     Signed-off-by: Z qiang <qiang.zhang1211@gmail.com>
> > > >     Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > > >     Tested-by: kernel test robot <oliver.sang@intel.com>
> > > >
> > > > diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> > > > index c794d599db5c1..1b01ced61a45b 100644
> > > > --- a/include/linux/srcutree.h
> > > > +++ b/include/linux/srcutree.h
> > > > @@ -101,6 +101,7 @@ struct srcu_usage {
> > > >   */
> > > >  struct srcu_struct {
> > > >         unsigned int srcu_idx;                  /* Current rdr array element. */
> > > > +       struct srcu_ctr __percpu *srcu_ctrp;
> > > >         struct srcu_data __percpu *sda;         /* Per-CPU srcu_data array. */
> > > >         struct lockdep_map dep_map;
> > > >         struct srcu_usage *srcu_sup;            /* Update-side data. */
> > > > @@ -167,6 +168,7 @@ struct srcu_struct {
> > > >  #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name)                                                \
> > > >  {                                                                                              \
> > > >         .sda = &pcpu_name,                                                                      \
> > > > +       .srcu_ctrp = &pcpu_name.srcu_ctrs[0],                                                   \
> > > >         __SRCU_STRUCT_INIT_COMMON(name, usage_name)                                             \
> > > >  }
> > > >
> > > > @@ -222,13 +224,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
> > > >   */
> > > >  static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
> > > >  {
> > > > -       int idx;
> > > > +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
> > > >
> > > >         RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
> > > > -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> > > > -       this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */
> > > > +       this_cpu_inc(scp->srcu_locks.counter); /* Y */
> > > >         barrier(); /* Avoid leaking the critical section. */
> > > > -       return idx;
> > > > +       return scp - &ssp->sda->srcu_ctrs[0];
> > > >  }
> > > >
> > > >  /*
> > > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > > > index d7ee2f345e192..3bf7f41ad72b8 100644
> > > > --- a/kernel/rcu/srcutree.c
> > > > +++ b/kernel/rcu/srcutree.c
> > > > @@ -253,8 +253,10 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
> > > >         atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
> > > >         INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
> > > >         ssp->srcu_sup->sda_is_static = is_static;
> > > > -       if (!is_static)
> > > > +       if (!is_static) {
> > > >                 ssp->sda = alloc_percpu(struct srcu_data);
> > > > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > > > +       }
> > > >         if (!ssp->sda)
> > > >                 goto err_free_sup;
> > >
> > > ssp->srcu_ctrp should be assigned a value under the condition that
> > > ssp->sda is  allocated successfully.
> > >
> > > -       if (!is_static) {
> > > +       if (!is_static)
> > >                 ssp->sda = alloc_percpu(struct srcu_data);
> > > -               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > > -       }
> > >         if (!ssp->sda)
> > >                 goto err_free_sup;
> > > +       if (!is_static)
> > > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> >
> > It doesn't hurt to assign NULL to ssp->srcu_ctrp on allocation failure.
> >
> > Or am I missing something here?
> 
> No, I just think in case of failed allocation, there is no need to set
> srcu_ctrp. :)

True enough, but you are adding an extra "if", which is not free
from either an execution-time viewpoint (not an issue here) or from a
complexity viewpoint.  I am not ruling your change out, but neither am
I convinced.

Please see below for the current state, which passes light testing even
given proper testing of _nmisafe().  Which I was failing to do before.

Thoughts?

							Thanx, Paul

------------------------------------------------------------------------

commit e8964cf2179673adc54aae38b62361fd0fdf89ff
Author: Paul E. McKenney <paulmck@kernel.org>
Date:   Thu Dec 19 16:08:54 2024 -0800

    srcu: Make SRCU readers use ->srcu_ctrs for counter selection
    
    This commit causes SRCU readers to use ->srcu_ctrs for counter
    selection instead of ->srcu_idx.  This takes another step towards
    array-indexing-free SRCU readers.
    
    [ paulmck: Apply kernel test robot feedback. ]
    
    Co-developed-by: Z qiang <qiang.zhang1211@gmail.com>
    Signed-off-by: Z qiang <qiang.zhang1211@gmail.com>
    Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
    Tested-by: kernel test robot <oliver.sang@intel.com>

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index c794d599db5c1..1b01ced61a45b 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -101,6 +101,7 @@ struct srcu_usage {
  */
 struct srcu_struct {
 	unsigned int srcu_idx;			/* Current rdr array element. */
+	struct srcu_ctr __percpu *srcu_ctrp;
 	struct srcu_data __percpu *sda;		/* Per-CPU srcu_data array. */
 	struct lockdep_map dep_map;
 	struct srcu_usage *srcu_sup;		/* Update-side data. */
@@ -167,6 +168,7 @@ struct srcu_struct {
 #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name)						\
 {												\
 	.sda = &pcpu_name,									\
+	.srcu_ctrp = &pcpu_name.srcu_ctrs[0],							\
 	__SRCU_STRUCT_INIT_COMMON(name, usage_name)						\
 }
 
@@ -222,13 +224,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
  */
 static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
 {
-	int idx;
+	struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
 
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
-	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
-	this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */
+	this_cpu_inc(scp->srcu_locks.counter); /* Y */
 	barrier(); /* Avoid leaking the critical section. */
-	return idx;
+	return scp - &ssp->sda->srcu_ctrs[0];
 }
 
 /*
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index d7ee2f345e192..308cc7b5098c5 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -253,8 +253,10 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
 	atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
 	INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
 	ssp->srcu_sup->sda_is_static = is_static;
-	if (!is_static)
+	if (!is_static) {
 		ssp->sda = alloc_percpu(struct srcu_data);
+		ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
+	}
 	if (!ssp->sda)
 		goto err_free_sup;
 	init_srcu_struct_data(ssp);
@@ -742,12 +744,11 @@ EXPORT_SYMBOL_GPL(__srcu_check_read_flavor);
  */
 int __srcu_read_lock(struct srcu_struct *ssp)
 {
-	int idx;
+	struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
 
-	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
-	this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter);
+	this_cpu_inc(scp->srcu_locks.counter);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
-	return idx;
+	return scp - &ssp->sda->srcu_ctrs[0];
 }
 EXPORT_SYMBOL_GPL(__srcu_read_lock);
 
@@ -772,13 +773,12 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  */
 int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
 {
-	int idx;
-	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
+	struct srcu_ctr __percpu *scpp = READ_ONCE(ssp->srcu_ctrp);
+	struct srcu_ctr *scp = this_cpu_ptr(scpp);
 
-	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
-	atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_locks);
+	atomic_long_inc(&scp->srcu_locks);
 	smp_mb__after_atomic(); /* B */  /* Avoid leaking the critical section. */
-	return idx;
+	return scpp - &ssp->sda->srcu_ctrs[0];
 }
 EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
 
@@ -1152,6 +1152,8 @@ static void srcu_flip(struct srcu_struct *ssp)
 	smp_mb(); /* E */  /* Pairs with B and C. */
 
 	WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter.
+	WRITE_ONCE(ssp->srcu_ctrp,
+		   &ssp->sda->srcu_ctrs[!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0])]);
 
 	/*
 	 * Ensure that if the updater misses an __srcu_read_unlock()
@@ -2000,6 +2002,7 @@ static int srcu_module_coming(struct module *mod)
 		ssp->sda = alloc_percpu(struct srcu_data);
 		if (WARN_ON_ONCE(!ssp->sda))
 			return -ENOMEM;
+		ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
 	}
 	return 0;
 }

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address
  2025-01-08  4:26                   ` Paul E. McKenney
@ 2025-01-08  5:55                     ` Z qiang
  2025-01-08 15:05                       ` Paul E. McKenney
  0 siblings, 1 reply; 13+ messages in thread
From: Z qiang @ 2025-01-08  5:55 UTC (permalink / raw)
  To: paulmck; +Cc: Oliver Sang, oe-lkp, lkp, rcu

>
> On Tue, Jan 07, 2025 at 05:49:42PM +0800, Z qiang wrote:
> > >
> > > On Mon, Jan 06, 2025 at 11:24:54AM +0800, Z qiang wrote:
> > > > >
> > > > > On Fri, Jan 03, 2025 at 01:50:50PM +0800, Z qiang wrote:
> > > > > > >
> > > > > > > On Thu, Jan 02, 2025 at 01:41:16PM +0800, Oliver Sang wrote:
> > > > > > > > hi, Zqiang, hi, Paul,
> > > > > > > >
> > > > > > > > On Tue, Dec 31, 2024 at 08:19:44AM -0800, Paul E. McKenney wrote:
> > > > > > > > > On Tue, Dec 31, 2024 at 03:16:25PM +0800, Z qiang wrote:
> > > > > > > >
> > > > > > > > [...]
> > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > > > > > > > > > > the same patch/commit), kindly add following tags
> > > > > > > > > > > | Reported-by: kernel test robot <oliver.sang@intel.com>
> > > > > > > > > > > | Closes: https://lore.kernel.org/oe-lkp/202412311203.ca7bddba-lkp@intel.com
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Please try the following modifications:
> > > > > > > >
> > > > > > > > yes, the following modifications fix the issues we found for 9216c28c6a. thanks!
> > > > > > > >
> > > > > > > > Tested-by: kernel test robot <oliver.sang@intel.com>
> > > > > > >
> > > > > > > Thank you, and I will apply this during my next rebase.
> > > > > > >
> > > > > > > Zqiang, unless you tell me otherwise, I will add your Co-developed-by
> > > > > > > and Signed-off-by.
> > > > > > >
> > > > > > > Either way, Happy Square New Year!
> > > > > >
> > > > > > Thank you,  Happy New Year!  :)
> > > > >
> > > > > And here you go!  Please let me know of anything I missed.
> > > > >
> > > > > And thank you for saving me the time it would have taken to track
> > > > > this one down.  ;-)
> > > > >
> > > > >                                                         Thanx, Paul
> > > > >
> > > > > ------------------------------------------------------------------------
> > > > >
> > > > > commit 71c893e6d1857d1e4ea37aec557d734a560fdb39
> > > > > Author: Paul E. McKenney <paulmck@kernel.org>
> > > > > Date:   Thu Dec 19 16:08:54 2024 -0800
> > > > >
> > > > >     srcu: Make SRCU readers use ->srcu_ctrs for counter selection
> > > > >
> > > > >     This commit causes SRCU readers to use ->srcu_ctrs for counter
> > > > >     selection instead of ->srcu_idx.  This takes another step towards
> > > > >     array-indexing-free SRCU readers.
> > > > >
> > > > >     [ paulmck: Apply kernel test robot feedback. ]
> > > > >
> > > > >     Co-developed-by: Z qiang <qiang.zhang1211@gmail.com>
> > > > >     Signed-off-by: Z qiang <qiang.zhang1211@gmail.com>
> > > > >     Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > > > >     Tested-by: kernel test robot <oliver.sang@intel.com>
> > > > >
> > > > > diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> > > > > index c794d599db5c1..1b01ced61a45b 100644
> > > > > --- a/include/linux/srcutree.h
> > > > > +++ b/include/linux/srcutree.h
> > > > > @@ -101,6 +101,7 @@ struct srcu_usage {
> > > > >   */
> > > > >  struct srcu_struct {
> > > > >         unsigned int srcu_idx;                  /* Current rdr array element. */
> > > > > +       struct srcu_ctr __percpu *srcu_ctrp;
> > > > >         struct srcu_data __percpu *sda;         /* Per-CPU srcu_data array. */
> > > > >         struct lockdep_map dep_map;
> > > > >         struct srcu_usage *srcu_sup;            /* Update-side data. */
> > > > > @@ -167,6 +168,7 @@ struct srcu_struct {
> > > > >  #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name)                                                \
> > > > >  {                                                                                              \
> > > > >         .sda = &pcpu_name,                                                                      \
> > > > > +       .srcu_ctrp = &pcpu_name.srcu_ctrs[0],                                                   \
> > > > >         __SRCU_STRUCT_INIT_COMMON(name, usage_name)                                             \
> > > > >  }
> > > > >
> > > > > @@ -222,13 +224,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
> > > > >   */
> > > > >  static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
> > > > >  {
> > > > > -       int idx;
> > > > > +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
> > > > >
> > > > >         RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
> > > > > -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> > > > > -       this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */
> > > > > +       this_cpu_inc(scp->srcu_locks.counter); /* Y */
> > > > >         barrier(); /* Avoid leaking the critical section. */
> > > > > -       return idx;
> > > > > +       return scp - &ssp->sda->srcu_ctrs[0];
> > > > >  }
> > > > >
> > > > >  /*
> > > > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > > > > index d7ee2f345e192..3bf7f41ad72b8 100644
> > > > > --- a/kernel/rcu/srcutree.c
> > > > > +++ b/kernel/rcu/srcutree.c
> > > > > @@ -253,8 +253,10 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
> > > > >         atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
> > > > >         INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
> > > > >         ssp->srcu_sup->sda_is_static = is_static;
> > > > > -       if (!is_static)
> > > > > +       if (!is_static) {
> > > > >                 ssp->sda = alloc_percpu(struct srcu_data);
> > > > > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > > > > +       }
> > > > >         if (!ssp->sda)
> > > > >                 goto err_free_sup;
> > > >
> > > > ssp->srcu_ctrp should be assigned a value under the condition that
> > > > ssp->sda is  allocated successfully.
> > > >
> > > > -       if (!is_static) {
> > > > +       if (!is_static)
> > > >                 ssp->sda = alloc_percpu(struct srcu_data);
> > > > -               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > > > -       }
> > > >         if (!ssp->sda)
> > > >                 goto err_free_sup;
> > > > +       if (!is_static)
> > > > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > >
> > > It doesn't hurt to assign NULL to ssp->srcu_ctrp on allocation failure.
> > >
> > > Or am I missing something here?
> >
> > No, I just think in case of failed allocation, there is no need to set
> > srcu_ctrp. :)
>
> True enough, but you are adding an extra "if", which is not free
> from either an execution-time viewpoint (not an issue here) or from a
> complexity viewpoint.  I am not ruling your change out, but neither am
> I convinced.
>
> Please see below for the current state, which passes light testing even
> given proper testing of _nmisafe().  Which I was failing to do before.
>
> Thoughts?
>
>                                                         Thanx, Paul
>
> ------------------------------------------------------------------------
>
> commit e8964cf2179673adc54aae38b62361fd0fdf89ff
> Author: Paul E. McKenney <paulmck@kernel.org>
> Date:   Thu Dec 19 16:08:54 2024 -0800
>
>     srcu: Make SRCU readers use ->srcu_ctrs for counter selection
>
>     This commit causes SRCU readers to use ->srcu_ctrs for counter
>     selection instead of ->srcu_idx.  This takes another step towards
>     array-indexing-free SRCU readers.
>
>     [ paulmck: Apply kernel test robot feedback. ]
>
>     Co-developed-by: Z qiang <qiang.zhang1211@gmail.com>
>     Signed-off-by: Z qiang <qiang.zhang1211@gmail.com>
>     Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
>     Tested-by: kernel test robot <oliver.sang@intel.com>
>
> diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> index c794d599db5c1..1b01ced61a45b 100644
> --- a/include/linux/srcutree.h
> +++ b/include/linux/srcutree.h
> @@ -101,6 +101,7 @@ struct srcu_usage {
>   */
>  struct srcu_struct {
>         unsigned int srcu_idx;                  /* Current rdr array element. */
> +       struct srcu_ctr __percpu *srcu_ctrp;
>         struct srcu_data __percpu *sda;         /* Per-CPU srcu_data array. */
>         struct lockdep_map dep_map;
>         struct srcu_usage *srcu_sup;            /* Update-side data. */
> @@ -167,6 +168,7 @@ struct srcu_struct {
>  #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name)                                                \
>  {                                                                                              \
>         .sda = &pcpu_name,                                                                      \
> +       .srcu_ctrp = &pcpu_name.srcu_ctrs[0],                                                   \
>         __SRCU_STRUCT_INIT_COMMON(name, usage_name)                                             \
>  }
>
> @@ -222,13 +224,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
>   */
>  static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
>  {
> -       int idx;
> +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
>
>         RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
> -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> -       this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */
> +       this_cpu_inc(scp->srcu_locks.counter); /* Y */
>         barrier(); /* Avoid leaking the critical section. */
> -       return idx;
> +       return scp - &ssp->sda->srcu_ctrs[0];
>  }
>
>  /*
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index d7ee2f345e192..308cc7b5098c5 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -253,8 +253,10 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
>         atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
>         INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
>         ssp->srcu_sup->sda_is_static = is_static;
> -       if (!is_static)
> +       if (!is_static) {
>                 ssp->sda = alloc_percpu(struct srcu_data);
> +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> +       }
>         if (!ssp->sda)
>                 goto err_free_sup;
>         init_srcu_struct_data(ssp);
> @@ -742,12 +744,11 @@ EXPORT_SYMBOL_GPL(__srcu_check_read_flavor);
>   */
>  int __srcu_read_lock(struct srcu_struct *ssp)
>  {
> -       int idx;
> +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
>
> -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> -       this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter);
> +       this_cpu_inc(scp->srcu_locks.counter);
>         smp_mb(); /* B */  /* Avoid leaking the critical section. */
> -       return idx;
> +       return scp - &ssp->sda->srcu_ctrs[0];
>  }
>  EXPORT_SYMBOL_GPL(__srcu_read_lock);
>
> @@ -772,13 +773,12 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
>   */
>  int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
>  {
> -       int idx;
> -       struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
> +       struct srcu_ctr __percpu *scpp = READ_ONCE(ssp->srcu_ctrp);
> +       struct srcu_ctr *scp = this_cpu_ptr(scpp);

Whether raw_cpu_ptr() should be used, otherwise it will appear:

BUG: using smp_processor_id() in preemptible [00000000] code: init/1
[    7.324304][    T1] caller is debug_smp_processor_id+0x1b/0x30
[    7.324308][    T1] CPU: 0 UID: 0 PID: 1 Comm: init Not tainted
6.13.0-rc2-yoctodev-standard+ #99
10b660623b999b402f7cd2198fe7d4ebd36ef1d0
[    7.324311][    T1] Hardware name: QEMU Standard PC (Q35 + ICH9,
2009), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014
[    7.324314][    T1] Call Trace:
[    7.324315][    T1]  <TASK>
[    7.324319][    T1]  dump_stack_lvl+0xbb/0xd0
[    7.324326][    T1]  dump_stack+0x14/0x20
[    7.324330][    T1]  check_preemption_disabled+0xc7/0xf0
[    7.324340][    T1]  debug_smp_processor_id+0x1b/0x30
[    7.324343][    T1]  __srcu_read_lock_nmisafe+0x39/0xd0
[    7.324353][    T1]  console_flush_all+0x18c/0xbf0
........

Thanks
Zqiang

>
> -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> -       atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_locks);
> +       atomic_long_inc(&scp->srcu_locks);
>         smp_mb__after_atomic(); /* B */  /* Avoid leaking the critical section. */
> -       return idx;
> +       return scpp - &ssp->sda->srcu_ctrs[0];
>  }
>  EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
>
> @@ -1152,6 +1152,8 @@ static void srcu_flip(struct srcu_struct *ssp)
>         smp_mb(); /* E */  /* Pairs with B and C. */
>
>         WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter.
> +       WRITE_ONCE(ssp->srcu_ctrp,
> +                  &ssp->sda->srcu_ctrs[!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0])]);
>
>         /*
>          * Ensure that if the updater misses an __srcu_read_unlock()
> @@ -2000,6 +2002,7 @@ static int srcu_module_coming(struct module *mod)
>                 ssp->sda = alloc_percpu(struct srcu_data);
>                 if (WARN_ON_ONCE(!ssp->sda))
>                         return -ENOMEM;
> +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
>         }
>         return 0;
>  }

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address
  2025-01-08  5:55                     ` Z qiang
@ 2025-01-08 15:05                       ` Paul E. McKenney
  0 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2025-01-08 15:05 UTC (permalink / raw)
  To: Z qiang; +Cc: Oliver Sang, oe-lkp, lkp, rcu

On Wed, Jan 08, 2025 at 01:55:12PM +0800, Z qiang wrote:
> >
> > On Tue, Jan 07, 2025 at 05:49:42PM +0800, Z qiang wrote:
> > > >
> > > > On Mon, Jan 06, 2025 at 11:24:54AM +0800, Z qiang wrote:
> > > > > >
> > > > > > On Fri, Jan 03, 2025 at 01:50:50PM +0800, Z qiang wrote:
> > > > > > > >
> > > > > > > > On Thu, Jan 02, 2025 at 01:41:16PM +0800, Oliver Sang wrote:
> > > > > > > > > hi, Zqiang, hi, Paul,
> > > > > > > > >
> > > > > > > > > On Tue, Dec 31, 2024 at 08:19:44AM -0800, Paul E. McKenney wrote:
> > > > > > > > > > On Tue, Dec 31, 2024 at 03:16:25PM +0800, Z qiang wrote:
> > > > > > > > >
> > > > > > > > > [...]
> > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > > > > > > > > > > > the same patch/commit), kindly add following tags
> > > > > > > > > > > > | Reported-by: kernel test robot <oliver.sang@intel.com>
> > > > > > > > > > > > | Closes: https://lore.kernel.org/oe-lkp/202412311203.ca7bddba-lkp@intel.com
> > > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Please try the following modifications:
> > > > > > > > >
> > > > > > > > > yes, the following modifications fix the issues we found for 9216c28c6a. thanks!
> > > > > > > > >
> > > > > > > > > Tested-by: kernel test robot <oliver.sang@intel.com>
> > > > > > > >
> > > > > > > > Thank you, and I will apply this during my next rebase.
> > > > > > > >
> > > > > > > > Zqiang, unless you tell me otherwise, I will add your Co-developed-by
> > > > > > > > and Signed-off-by.
> > > > > > > >
> > > > > > > > Either way, Happy Square New Year!
> > > > > > >
> > > > > > > Thank you,  Happy New Year!  :)
> > > > > >
> > > > > > And here you go!  Please let me know of anything I missed.
> > > > > >
> > > > > > And thank you for saving me the time it would have taken to track
> > > > > > this one down.  ;-)
> > > > > >
> > > > > >                                                         Thanx, Paul
> > > > > >
> > > > > > ------------------------------------------------------------------------
> > > > > >
> > > > > > commit 71c893e6d1857d1e4ea37aec557d734a560fdb39
> > > > > > Author: Paul E. McKenney <paulmck@kernel.org>
> > > > > > Date:   Thu Dec 19 16:08:54 2024 -0800
> > > > > >
> > > > > >     srcu: Make SRCU readers use ->srcu_ctrs for counter selection
> > > > > >
> > > > > >     This commit causes SRCU readers to use ->srcu_ctrs for counter
> > > > > >     selection instead of ->srcu_idx.  This takes another step towards
> > > > > >     array-indexing-free SRCU readers.
> > > > > >
> > > > > >     [ paulmck: Apply kernel test robot feedback. ]
> > > > > >
> > > > > >     Co-developed-by: Z qiang <qiang.zhang1211@gmail.com>
> > > > > >     Signed-off-by: Z qiang <qiang.zhang1211@gmail.com>
> > > > > >     Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > > > > >     Tested-by: kernel test robot <oliver.sang@intel.com>
> > > > > >
> > > > > > diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> > > > > > index c794d599db5c1..1b01ced61a45b 100644
> > > > > > --- a/include/linux/srcutree.h
> > > > > > +++ b/include/linux/srcutree.h
> > > > > > @@ -101,6 +101,7 @@ struct srcu_usage {
> > > > > >   */
> > > > > >  struct srcu_struct {
> > > > > >         unsigned int srcu_idx;                  /* Current rdr array element. */
> > > > > > +       struct srcu_ctr __percpu *srcu_ctrp;
> > > > > >         struct srcu_data __percpu *sda;         /* Per-CPU srcu_data array. */
> > > > > >         struct lockdep_map dep_map;
> > > > > >         struct srcu_usage *srcu_sup;            /* Update-side data. */
> > > > > > @@ -167,6 +168,7 @@ struct srcu_struct {
> > > > > >  #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name)                                                \
> > > > > >  {                                                                                              \
> > > > > >         .sda = &pcpu_name,                                                                      \
> > > > > > +       .srcu_ctrp = &pcpu_name.srcu_ctrs[0],                                                   \
> > > > > >         __SRCU_STRUCT_INIT_COMMON(name, usage_name)                                             \
> > > > > >  }
> > > > > >
> > > > > > @@ -222,13 +224,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
> > > > > >   */
> > > > > >  static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
> > > > > >  {
> > > > > > -       int idx;
> > > > > > +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
> > > > > >
> > > > > >         RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
> > > > > > -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> > > > > > -       this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */
> > > > > > +       this_cpu_inc(scp->srcu_locks.counter); /* Y */
> > > > > >         barrier(); /* Avoid leaking the critical section. */
> > > > > > -       return idx;
> > > > > > +       return scp - &ssp->sda->srcu_ctrs[0];
> > > > > >  }
> > > > > >
> > > > > >  /*
> > > > > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > > > > > index d7ee2f345e192..3bf7f41ad72b8 100644
> > > > > > --- a/kernel/rcu/srcutree.c
> > > > > > +++ b/kernel/rcu/srcutree.c
> > > > > > @@ -253,8 +253,10 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
> > > > > >         atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
> > > > > >         INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
> > > > > >         ssp->srcu_sup->sda_is_static = is_static;
> > > > > > -       if (!is_static)
> > > > > > +       if (!is_static) {
> > > > > >                 ssp->sda = alloc_percpu(struct srcu_data);
> > > > > > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > > > > > +       }
> > > > > >         if (!ssp->sda)
> > > > > >                 goto err_free_sup;
> > > > >
> > > > > ssp->srcu_ctrp should be assigned a value under the condition that
> > > > > ssp->sda is  allocated successfully.
> > > > >
> > > > > -       if (!is_static) {
> > > > > +       if (!is_static)
> > > > >                 ssp->sda = alloc_percpu(struct srcu_data);
> > > > > -               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > > > > -       }
> > > > >         if (!ssp->sda)
> > > > >                 goto err_free_sup;
> > > > > +       if (!is_static)
> > > > > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > > >
> > > > It doesn't hurt to assign NULL to ssp->srcu_ctrp on allocation failure.
> > > >
> > > > Or am I missing something here?
> > >
> > > No, I just think in case of failed allocation, there is no need to set
> > > srcu_ctrp. :)
> >
> > True enough, but you are adding an extra "if", which is not free
> > from either an execution-time viewpoint (not an issue here) or from a
> > complexity viewpoint.  I am not ruling your change out, but neither am
> > I convinced.
> >
> > Please see below for the current state, which passes light testing even
> > given proper testing of _nmisafe().  Which I was failing to do before.
> >
> > Thoughts?
> >
> >                                                         Thanx, Paul
> >
> > ------------------------------------------------------------------------
> >
> > commit e8964cf2179673adc54aae38b62361fd0fdf89ff
> > Author: Paul E. McKenney <paulmck@kernel.org>
> > Date:   Thu Dec 19 16:08:54 2024 -0800
> >
> >     srcu: Make SRCU readers use ->srcu_ctrs for counter selection
> >
> >     This commit causes SRCU readers to use ->srcu_ctrs for counter
> >     selection instead of ->srcu_idx.  This takes another step towards
> >     array-indexing-free SRCU readers.
> >
> >     [ paulmck: Apply kernel test robot feedback. ]
> >
> >     Co-developed-by: Z qiang <qiang.zhang1211@gmail.com>
> >     Signed-off-by: Z qiang <qiang.zhang1211@gmail.com>
> >     Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> >     Tested-by: kernel test robot <oliver.sang@intel.com>
> >
> > diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> > index c794d599db5c1..1b01ced61a45b 100644
> > --- a/include/linux/srcutree.h
> > +++ b/include/linux/srcutree.h
> > @@ -101,6 +101,7 @@ struct srcu_usage {
> >   */
> >  struct srcu_struct {
> >         unsigned int srcu_idx;                  /* Current rdr array element. */
> > +       struct srcu_ctr __percpu *srcu_ctrp;
> >         struct srcu_data __percpu *sda;         /* Per-CPU srcu_data array. */
> >         struct lockdep_map dep_map;
> >         struct srcu_usage *srcu_sup;            /* Update-side data. */
> > @@ -167,6 +168,7 @@ struct srcu_struct {
> >  #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name)                                                \
> >  {                                                                                              \
> >         .sda = &pcpu_name,                                                                      \
> > +       .srcu_ctrp = &pcpu_name.srcu_ctrs[0],                                                   \
> >         __SRCU_STRUCT_INIT_COMMON(name, usage_name)                                             \
> >  }
> >
> > @@ -222,13 +224,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
> >   */
> >  static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
> >  {
> > -       int idx;
> > +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
> >
> >         RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
> > -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> > -       this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */
> > +       this_cpu_inc(scp->srcu_locks.counter); /* Y */
> >         barrier(); /* Avoid leaking the critical section. */
> > -       return idx;
> > +       return scp - &ssp->sda->srcu_ctrs[0];
> >  }
> >
> >  /*
> > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > index d7ee2f345e192..308cc7b5098c5 100644
> > --- a/kernel/rcu/srcutree.c
> > +++ b/kernel/rcu/srcutree.c
> > @@ -253,8 +253,10 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
> >         atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
> >         INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
> >         ssp->srcu_sup->sda_is_static = is_static;
> > -       if (!is_static)
> > +       if (!is_static) {
> >                 ssp->sda = alloc_percpu(struct srcu_data);
> > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> > +       }
> >         if (!ssp->sda)
> >                 goto err_free_sup;
> >         init_srcu_struct_data(ssp);
> > @@ -742,12 +744,11 @@ EXPORT_SYMBOL_GPL(__srcu_check_read_flavor);
> >   */
> >  int __srcu_read_lock(struct srcu_struct *ssp)
> >  {
> > -       int idx;
> > +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
> >
> > -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> > -       this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter);
> > +       this_cpu_inc(scp->srcu_locks.counter);
> >         smp_mb(); /* B */  /* Avoid leaking the critical section. */
> > -       return idx;
> > +       return scp - &ssp->sda->srcu_ctrs[0];
> >  }
> >  EXPORT_SYMBOL_GPL(__srcu_read_lock);
> >
> > @@ -772,13 +773,12 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
> >   */
> >  int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
> >  {
> > -       int idx;
> > -       struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
> > +       struct srcu_ctr __percpu *scpp = READ_ONCE(ssp->srcu_ctrp);
> > +       struct srcu_ctr *scp = this_cpu_ptr(scpp);
> 
> Whether raw_cpu_ptr() should be used, otherwise it will appear:
> 
> BUG: using smp_processor_id() in preemptible [00000000] code: init/1
> [    7.324304][    T1] caller is debug_smp_processor_id+0x1b/0x30
> [    7.324308][    T1] CPU: 0 UID: 0 PID: 1 Comm: init Not tainted
> 6.13.0-rc2-yoctodev-standard+ #99
> 10b660623b999b402f7cd2198fe7d4ebd36ef1d0
> [    7.324311][    T1] Hardware name: QEMU Standard PC (Q35 + ICH9,
> 2009), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014
> [    7.324314][    T1] Call Trace:
> [    7.324315][    T1]  <TASK>
> [    7.324319][    T1]  dump_stack_lvl+0xbb/0xd0
> [    7.324326][    T1]  dump_stack+0x14/0x20
> [    7.324330][    T1]  check_preemption_disabled+0xc7/0xf0
> [    7.324340][    T1]  debug_smp_processor_id+0x1b/0x30
> [    7.324343][    T1]  __srcu_read_lock_nmisafe+0x39/0xd0
> [    7.324353][    T1]  console_flush_all+0x18c/0xbf0
> ........

You are quite right!  Fixing, thank you!

Ah, and the reason that the SRCU-P scenario did not catch this is that
I didn't force CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS=y.

							Thanx, Paul

> Thanks
> Zqiang
> 
> >
> > -       idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> > -       atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_locks);
> > +       atomic_long_inc(&scp->srcu_locks);
> >         smp_mb__after_atomic(); /* B */  /* Avoid leaking the critical section. */
> > -       return idx;
> > +       return scpp - &ssp->sda->srcu_ctrs[0];
> >  }
> >  EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
> >
> > @@ -1152,6 +1152,8 @@ static void srcu_flip(struct srcu_struct *ssp)
> >         smp_mb(); /* E */  /* Pairs with B and C. */
> >
> >         WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter.
> > +       WRITE_ONCE(ssp->srcu_ctrp,
> > +                  &ssp->sda->srcu_ctrs[!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0])]);
> >
> >         /*
> >          * Ensure that if the updater misses an __srcu_read_unlock()
> > @@ -2000,6 +2002,7 @@ static int srcu_module_coming(struct module *mod)
> >                 ssp->sda = alloc_percpu(struct srcu_data);
> >                 if (WARN_ON_ONCE(!ssp->sda))
> >                         return -ENOMEM;
> > +               ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
> >         }
> >         return 0;
> >  }

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2025-01-08 15:05 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-31  5:02 [paulmckrcu:dev.2024.12.24a] [srcu] 9216c28c6a: BUG:unable_to_handle_page_fault_for_address kernel test robot
2024-12-31  7:16 ` Z qiang
2024-12-31 16:19   ` Paul E. McKenney
2025-01-02  5:41     ` Oliver Sang
2025-01-02 18:55       ` Paul E. McKenney
2025-01-03  5:50         ` Z qiang
2025-01-03 17:10           ` Paul E. McKenney
2025-01-06  3:24             ` Z qiang
2025-01-07  5:50               ` Paul E. McKenney
2025-01-07  9:49                 ` Z qiang
2025-01-08  4:26                   ` Paul E. McKenney
2025-01-08  5:55                     ` Z qiang
2025-01-08 15:05                       ` Paul E. McKenney

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.