* Re: [PATCH 1/2] powerpc: remove cpu_online_cores_map function
From: Sachin Sant @ 2021-11-05 13:09 UTC (permalink / raw)
To: Nicholas Piggin; +Cc: linuxppc-dev
In-Reply-To: <20211105035042.1398309-1-npiggin@gmail.com>
> On 05-Nov-2021, at 9:20 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
>
> This function builds the cores online map with on-stack cpumasks which
> can cause high stack usage with large NR_CPUS.
>
> It is not used in any performance sensitive paths, so instead just check
> for first thread sibling.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
Tested-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Thanks
- Sachin
> arch/powerpc/include/asm/cputhreads.h | 33 -----------------------
> arch/powerpc/platforms/powernv/idle.c | 10 +++----
> arch/powerpc/platforms/powernv/opal-imc.c | 6 ++---
> 3 files changed, 8 insertions(+), 41 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
> index b167186aaee4..f26c430f3982 100644
> --- a/arch/powerpc/include/asm/cputhreads.h
> +++ b/arch/powerpc/include/asm/cputhreads.h
> @@ -32,44 +32,11 @@ extern cpumask_t threads_core_mask;
> #define threads_core_mask (*get_cpu_mask(0))
> #endif
>
> -/* cpu_thread_mask_to_cores - Return a cpumask of one per cores
> - * hit by the argument
> - *
> - * @threads: a cpumask of online threads
> - *
> - * This function returns a cpumask which will have one online cpu's
> - * bit set for each core that has at least one thread set in the argument.
> - *
> - * This can typically be used for things like IPI for tlb invalidations
> - * since those need to be done only once per core/TLB
> - */
> -static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
> -{
> - cpumask_t tmp, res;
> - int i, cpu;
> -
> - cpumask_clear(&res);
> - for (i = 0; i < NR_CPUS; i += threads_per_core) {
> - cpumask_shift_left(&tmp, &threads_core_mask, i);
> - if (cpumask_intersects(threads, &tmp)) {
> - cpu = cpumask_next_and(-1, &tmp, cpu_online_mask);
> - if (cpu < nr_cpu_ids)
> - cpumask_set_cpu(cpu, &res);
> - }
> - }
> - return res;
> -}
> -
> static inline int cpu_nr_cores(void)
> {
> return nr_cpu_ids >> threads_shift;
> }
>
> -static inline cpumask_t cpu_online_cores_map(void)
> -{
> - return cpu_thread_mask_to_cores(cpu_online_mask);
> -}
> -
> #ifdef CONFIG_SMP
> int cpu_core_index_of_thread(int cpu);
> int cpu_first_thread_of_core(int core);
> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
> index e3ffdc8e8567..70da314fd2d7 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -146,9 +146,13 @@ EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
> static void pnv_fastsleep_workaround_apply(void *info)
>
> {
> + int cpu = smp_processor_id();
> int rc;
> int *err = info;
>
> + if (cpu_first_thread_sibling(cpu) != cpu)
> + return;
> +
> rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
> OPAL_CONFIG_IDLE_APPLY);
> if (rc)
> @@ -175,7 +179,6 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
> struct device_attribute *attr, const char *buf,
> size_t count)
> {
> - cpumask_t primary_thread_mask;
> int err;
> u8 val;
>
> @@ -200,10 +203,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
> power7_fastsleep_workaround_exit = false;
>
> cpus_read_lock();
> - primary_thread_mask = cpu_online_cores_map();
> - on_each_cpu_mask(&primary_thread_mask,
> - pnv_fastsleep_workaround_apply,
> - &err, 1);
> + on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1);
> cpus_read_unlock();
> if (err) {
> pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
> diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
> index 05d3832019b9..3fea5da6d1b3 100644
> --- a/arch/powerpc/platforms/powernv/opal-imc.c
> +++ b/arch/powerpc/platforms/powernv/opal-imc.c
> @@ -200,13 +200,13 @@ static void disable_nest_pmu_counters(void)
>
> static void disable_core_pmu_counters(void)
> {
> - cpumask_t cores_map;
> int cpu, rc;
>
> cpus_read_lock();
> /* Disable the IMC Core functions */
> - cores_map = cpu_online_cores_map();
> - for_each_cpu(cpu, &cores_map) {
> + for_each_online_cpu(cpu) {
> + if (cpu_first_thread_sibling(cpu) != cpu)
> + continue;
> rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
> get_hard_smp_processor_id(cpu));
> if (rc)
> --
> 2.23.0
>
^ permalink raw reply
* [GIT PULL] Please pull powerpc/linux.git powerpc-5.16-1 tag
From: Michael Ellerman @ 2021-11-05 13:02 UTC (permalink / raw)
To: Linus Torvalds
Cc: nathanl, songkai01, aik, kda, gustavoars, wanjiabing, cuibixuan,
peterz, joel, u.kleine-koenig, agust, atrajeev, lvivier, schnelle,
npiggin, clg, nixiaoming, hbathini, dja, atrajeev, ndesaulniers,
linux-kernel, hegdevasant, pbonzini, linuxppc-dev
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256
Hi Linus,
Please pull a small batch of powerpc updates for 5.16:
The following changes since commit e4e737bb5c170df6135a127739a9e6148ee3da82:
Linux 5.15-rc2 (2021-09-19 17:28:22 -0700)
are available in the git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.16-1
for you to fetch changes up to c12ab8dbc492b992e1ea717db933cee568780c47:
powerpc/8xx: Fix Oops with STRICT_KERNEL_RWX without DEBUG_RODATA_TEST (2021-11-01 21:39:03 +1100)
- ------------------------------------------------------------------
powerpc updates for 5.16
- Enable STRICT_KERNEL_RWX for Freescale 85xx platforms.
- Activate CONFIG_STRICT_KERNEL_RWX by default, while still allowing it to be disabled.
- Add support for out-of-line static calls on 32-bit.
- Fix oopses doing bpf-to-bpf calls when STRICT_KERNEL_RWX is enabled.
- Fix boot hangs on e5500 due to stale value in ESR passed to do_page_fault().
- Fix several bugs on pseries in handling of device tree cache information for hotplugged
CPUs, and/or during partition migration.
- Various other small features and fixes.
Thanks to: Alexey Kardashevskiy, Alistair Popple, Anatolij Gustschin, Andrew Donnellan,
Athira Rajeev, Bixuan Cui, Bjorn Helgaas, Cédric Le Goater, Christophe Leroy, Daniel
Axtens, Daniel Henrique Barboza, Denis Kirjanov, Fabiano Rosas, Frederic Barrat, Gustavo
A. R. Silva, Hari Bathini, Jacques de Laval, Joel Stanley, Kai Song, Kajol Jain, Laurent
Vivier, Leonardo Bras, Madhavan Srinivasan, Nathan Chancellor, Nathan Lynch, Naveen N.
Rao, Nicholas Piggin, Nick Desaulniers, Niklas Schnelle, Oliver O'Halloran, Rob Herring,
Russell Currey, Srikar Dronamraju, Stan Johnson, Tyrel Datwyler, Uwe Kleine-König, Vasant
Hegde, Wan Jiabing, Xiaoming Ni,
- ------------------------------------------------------------------
Alexey Kardashevskiy (1):
powerps/pseries/dma: Add support for 2M IOMMU page size
Anatolij Gustschin (3):
powerpc/5200: dts: add missing pci ranges
powerpc/5200: dts: fix pci ranges warnings
powerpc/5200: dts: fix memory node unit name
Athira Rajeev (3):
powerpc/perf: Refactor the code definition of perf reg extended mask
powerpc/perf: Expose instruction and data address registers as part of extended regs
powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10
Bixuan Cui (1):
powerpc/44x/fsp2: add missing of_node_put
Christophe Leroy (31):
powerpc/476: Fix sparse report
powerpc/powermac: Remove stale declaration of pmac_md
powerpc/mem: Fix arch/powerpc/mm/mem.c:53:12: error: no previous prototype for 'create_section_mapping'
video: fbdev: chipsfb: use memset_io() instead of memset()
powerpc: Set max_mapnr correctly
powerpc: Mark .opd section read-only
powerpc/booke: Disable STRICT_KERNEL_RWX, DEBUG_PAGEALLOC and KFENCE
powerpc/fsl_booke: Rename fsl_booke.c to fsl_book3e.c
powerpc/fsl_booke: Take exec flag into account when setting TLBCAMs
powerpc/fsl_booke: Enable reloading of TLBCAM without switching to AS1
powerpc/fsl_booke: Tell map_mem_in_cams() if init is done
powerpc/fsl_booke: Allocate separate TLBCAMs for readonly memory
powerpc/fsl_booke: Update of TLBCAMs after init
powerpc/fsl_booke: Enable STRICT_KERNEL_RWX
powerpc/32: Don't use lmw/stmw for saving/restoring non volatile regs
powerpc/audit: Convert powerpc to AUDIT_ARCH_COMPAT_GENERIC
powerpc/time: Remove generic_suspend_{dis/en}able_irqs()
powerpc/machdep: Remove stale functions from ppc_md structure
powerpc/32: Add support for out-of-line static calls
powerpc: warn on emulation of dcbz instruction in kernel mode
powerpc/lib/sstep: Don't use __{get/put}_user() on kernel addresses
powerpc/8xx: Simplify TLB handling
powerpc: Activate CONFIG_STRICT_KERNEL_RWX by default
powerpc/breakpoint: Cleanup
powerpc/32: Don't use a struct based type for pte_t
powerpc/boot: Set LC_ALL=C in wrapper script
powerpc/nohash: Fix __ptep_set_access_flags() and ptep_set_wrprotect()
powerpc/book3e: Fix set_memory_x() and set_memory_nx()
powerpc/fsl_booke: Fix setting of exec flag when setting TLBCAMs
powerpc: Don't provide __kernel_map_pages() without ARCH_SUPPORTS_DEBUG_PAGEALLOC
powerpc/8xx: Fix Oops with STRICT_KERNEL_RWX without DEBUG_RODATA_TEST
Cédric Le Goater (1):
powerpc/boot: Use CONFIG_PPC_POWERNV to compile OPAL support
Daniel Axtens (1):
powerpc: Remove unused prototype for of_show_percpuinfo
Denis Kirjanov (1):
powerpc/xmon: fix task state output
Gustavo A. R. Silva (1):
powerpc/vas: Fix potential NULL pointer dereference
Hari Bathini (1):
powerpc/bpf: Fix write protecting JIT code
Joel Stanley (2):
powerpc/s64: Clarify that radix lacks DEBUG_PAGEALLOC
powerpc/64s: Default to 64K pages for 64 bit book3s
Kai Song (1):
powerpc/eeh: Fix docstrings in eeh.c
Laurent Vivier (1):
KVM: PPC: Tick accounting should defer vtime accounting 'til after IRQ handling
Michael Ellerman (4):
Revert "powerpc/audit: Convert powerpc to AUDIT_ARCH_COMPAT_GENERIC"
powerpc/dcr: Use cmplwi instead of 3-argument cmpli
MAINTAINERS: Update powerpc KVM entry
Merge branch 'topic/ppc-kvm' into next
Nathan Lynch (8):
powerpc: fix unbalanced node refcount in check_kvm_guest()
powerpc/paravirt: vcpu_is_preempted() commentary
powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted()
powerpc/pseries/cpuhp: cache node corrections
powerpc/cpuhp: BUG -> WARN conversion in offline path
powerpc/pseries/cpuhp: delete add/remove_by_count code
powerpc/pseries/cpuhp: remove obsolete comment from pseries_cpu_die
powerpc/pseries/mobility: ignore ibm, platform-facilities updates
Nicholas Piggin (3):
KVM: PPC: Book3S HV: H_ENTER filter out reserved HPTE[B] value
powerpc/64s/interrupt: Fix check_return_regs_valid() false positive
powerpc/32e: Ignore ESR in instruction storage interrupt handler
Nick Desaulniers (1):
powerpc/asm: Remove UPD_CONSTR after GCC 4.9 removal
Niklas Schnelle (1):
powerpc: Drop superfluous pci_dev_is_added() calls
Russell Currey (2):
selftests/powerpc: Use date instead of EPOCHSECONDS in mitigation-patching.sh
powerpc/security: Use a mutex for interrupt exit code patching
Uwe Kleine-König (1):
powerpc/83xx/mpc8349emitx: Make mcu_gpiochip_remove() return void
Vasant Hegde (2):
powerpc/powernv/dump: Fix typo in comment
powerpc/powernv/prd: Unregister OPAL_MSG_PRD2 notifier during module unload
Wan Jiabing (2):
powerpc/pseries/iommu: Add of_node_put() before break
powerpc/kexec_file: Add of_node_put() before goto
Xiaoming Ni (2):
powerpc/85xx: Fix oops when mpc85xx_smp_guts_ids node cannot be found
powerpc/85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n
MAINTAINERS | 7 +-
arch/powerpc/Kconfig | 20 +-
arch/powerpc/boot/Makefile | 2 +-
arch/powerpc/boot/dts/a4m072.dts | 6 +-
arch/powerpc/boot/dts/charon.dts | 8 +-
arch/powerpc/boot/dts/digsy_mtc.dts | 8 +-
arch/powerpc/boot/dts/lite5200.dts | 8 +-
arch/powerpc/boot/dts/lite5200b.dts | 8 +-
arch/powerpc/boot/dts/media5200.dts | 8 +-
arch/powerpc/boot/dts/mpc5200b.dtsi | 6 +-
arch/powerpc/boot/dts/mucmc52.dts | 6 +-
arch/powerpc/boot/dts/o2d.dts | 2 +-
arch/powerpc/boot/dts/o2d.dtsi | 2 +-
arch/powerpc/boot/dts/o2dnt2.dts | 2 +-
arch/powerpc/boot/dts/o3dnt.dts | 2 +-
arch/powerpc/boot/dts/pcm030.dts | 6 +-
arch/powerpc/boot/dts/pcm032.dts | 8 +-
arch/powerpc/boot/dts/tqm5200.dts | 8 +-
arch/powerpc/boot/serial.c | 2 +-
arch/powerpc/boot/wrapper | 2 +
arch/powerpc/configs/cell_defconfig | 1 -
arch/powerpc/configs/g5_defconfig | 1 +
arch/powerpc/configs/maple_defconfig | 1 +
arch/powerpc/configs/microwatt_defconfig | 1 +
arch/powerpc/configs/pasemi_defconfig | 1 -
arch/powerpc/configs/powernv_defconfig | 1 -
arch/powerpc/configs/ppc64_defconfig | 1 -
arch/powerpc/configs/ps3_defconfig | 1 +
arch/powerpc/configs/pseries_defconfig | 1 -
arch/powerpc/configs/skiroot_defconfig | 1 -
arch/powerpc/include/asm/asm-const.h | 2 -
arch/powerpc/include/asm/atomic.h | 8 +-
arch/powerpc/include/asm/book3s/64/hash.h | 2 +
arch/powerpc/include/asm/book3s/64/pgtable.h | 10 +
arch/powerpc/include/asm/book3s/64/radix.h | 3 +
arch/powerpc/include/asm/io.h | 4 +-
arch/powerpc/include/asm/iommu.h | 6 -
arch/powerpc/include/asm/kexec.h | 1 -
arch/powerpc/include/asm/kvm_book3s_64.h | 4 +
arch/powerpc/include/asm/machdep.h | 13 -
arch/powerpc/include/asm/nohash/32/pgtable.h | 21 +-
arch/powerpc/include/asm/nohash/32/pte-8xx.h | 22 ++
arch/powerpc/include/asm/nohash/64/pgtable.h | 5 -
arch/powerpc/include/asm/nohash/pte-book3e.h | 18 +-
arch/powerpc/include/asm/nohash/tlbflush.h | 15 +
arch/powerpc/include/asm/paravirt.h | 40 ++-
arch/powerpc/include/asm/pgtable-types.h | 18 +-
arch/powerpc/include/asm/ppc_asm.h | 4 +-
arch/powerpc/include/asm/static_call.h | 28 ++
arch/powerpc/include/asm/uaccess.h | 6 +-
arch/powerpc/include/uapi/asm/perf_regs.h | 28 +-
arch/powerpc/kernel/Makefile | 2 +-
arch/powerpc/kernel/align.c | 1 +
arch/powerpc/kernel/eeh.c | 12 +-
arch/powerpc/kernel/firmware.c | 7 +-
arch/powerpc/kernel/head_8xx.S | 2 +-
arch/powerpc/kernel/head_booke.h | 15 +-
arch/powerpc/kernel/hw_breakpoint_constraints.c | 15 +-
arch/powerpc/kernel/interrupt.c | 2 +-
arch/powerpc/kernel/setup-common.c | 3 -
arch/powerpc/kernel/static_call.c | 37 +++
arch/powerpc/kernel/swsusp_64.c | 5 -
arch/powerpc/kernel/swsusp_asm64.S | 1 -
arch/powerpc/kernel/sysfs.c | 3 +-
arch/powerpc/kernel/time.c | 22 +-
arch/powerpc/kernel/vmlinux.lds.S | 12 +-
arch/powerpc/kexec/core.c | 13 -
arch/powerpc/kexec/core_32.c | 2 +-
arch/powerpc/kexec/core_64.c | 2 +-
arch/powerpc/kexec/file_load_64.c | 1 +
arch/powerpc/kvm/book3s_hv.c | 30 +-
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 9 +
arch/powerpc/kvm/booke.c | 16 +-
arch/powerpc/kvm/powerpc.c | 4 +-
arch/powerpc/lib/feature-fixups.c | 11 +
arch/powerpc/lib/sstep.c | 197 +++++++++----
arch/powerpc/mm/book3s64/hash_utils.c | 2 +-
arch/powerpc/mm/book3s64/radix_pgtable.c | 7 +
arch/powerpc/mm/mem.c | 4 +-
arch/powerpc/mm/mmu_decl.h | 4 +-
arch/powerpc/mm/nohash/Makefile | 4 +-
arch/powerpc/mm/nohash/{fsl_booke.c => fsl_book3e.c} | 76 ++++-
arch/powerpc/mm/nohash/kaslr_booke.c | 2 +-
arch/powerpc/mm/nohash/tlb.c | 6 +-
arch/powerpc/mm/nohash/tlb_low.S | 8 +-
arch/powerpc/mm/nohash/tlb_low_64e.S | 8 +-
arch/powerpc/mm/pgtable.c | 2 +-
arch/powerpc/mm/pgtable_32.c | 2 +-
arch/powerpc/net/bpf_jit_comp.c | 2 +-
arch/powerpc/perf/perf_regs.c | 4 +
arch/powerpc/perf/power10-events-list.h | 8 +-
arch/powerpc/perf/power10-pmu.c | 44 ++-
arch/powerpc/platforms/44x/fsp2.c | 2 +
arch/powerpc/platforms/44x/ppc476.c | 4 +-
arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c | 7 +-
arch/powerpc/platforms/85xx/Makefile | 4 +-
arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 7 +-
arch/powerpc/platforms/85xx/smp.c | 12 +-
arch/powerpc/platforms/book3s/vas-api.c | 4 +-
arch/powerpc/platforms/powermac/pmac.h | 1 -
arch/powerpc/platforms/powermac/setup.c | 2 -
arch/powerpc/platforms/powernv/opal-dump.c | 2 +-
arch/powerpc/platforms/powernv/opal-prd.c | 12 +-
arch/powerpc/platforms/powernv/pci-sriov.c | 6 -
arch/powerpc/platforms/pseries/hotplug-cpu.c | 298 +++++---------------
arch/powerpc/platforms/pseries/iommu.c | 14 +-
arch/powerpc/platforms/pseries/mobility.c | 34 +++
arch/powerpc/platforms/pseries/setup.c | 3 +-
arch/powerpc/sysdev/dcr-low.S | 2 +-
arch/powerpc/xmon/xmon.c | 3 +-
drivers/video/fbdev/chipsfb.c | 2 +-
tools/testing/selftests/powerpc/security/mitigation-patching.sh | 4 +-
112 files changed, 837 insertions(+), 582 deletions(-)
create mode 100644 arch/powerpc/include/asm/static_call.h
create mode 100644 arch/powerpc/kernel/static_call.c
rename arch/powerpc/mm/nohash/{fsl_booke.c => fsl_book3e.c} (84%)
-----BEGIN PGP SIGNATURE-----
iQIzBAEBCAAdFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAmGFJ7sACgkQUevqPMjh
pYAxrQ/+MdmbOkjDzXJMkw55QAZH+iRmUZwkFzLtl2lrs/7d83x8VA+u2QVDsMgG
OGwm0jszQF553pNPwZ07GZfTi++8WpIIa/SR1y2wTG3JUadBF/owUjZPEb9iKBAH
mubh3TMiqWiyLCKtnalQdRUCUS/CpC9zJ3UQpvN5Ehh+A8+yJkreAj8CXT9nd27V
TmYrVmrOoFuDybCNeaUYkizp2cKcModb+5+QUkDsCVI7eKP8a4ddDS8h9q/e/Wru
SoyrMgLK2zrRXjvaKBD6y6h9fJbohL+gE0zjFgboxAN/EBCSNsQXI0lBpkiWh91d
c63dO7DlIBx5MZ7990g+QiV88+kz0FAB5wCt+d9z7bV9I9cKOwaoQlfexV3+V0Kb
n6kKb2Feqt8fo+79qxXLRZ3ZfsNRo1zBPQaVTNSyKfLpqc6dCe8eTNxTm1V6zv1R
gWBts0N7YqBAwB0gLCN5BEr11p33n84jMKsahJ38mWjTIRVK9shbjMRwAV11T6Qy
8aw4axp8YGPvalawvo7u+SaeZ6QOgy6O7pfVwCHyUGMf0AagVE/I/sz67ScG6qhL
JmyTuPyEQ7z+1BxlYNXWOS7FlzoMPSs5LIyOJoP+C3ly+GTnwlFkyJpJjss69YDU
P2Z8kfLHcsuFxe0q4wGmWFGzsVqFja0TJ2W+uVZjIOlJOSA0q6c=
=H9IO
-----END PGP SIGNATURE-----
^ permalink raw reply
* Re: [PATCH v2 1/5] powerpc/watchdog: Fix missed watchdog reset due to memory ordering race
From: Laurent Dufour @ 2021-11-05 12:15 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev
In-Reply-To: <1636112623.v4srs2ivuv.astroid@bobo.none>
Le 05/11/2021 à 12:46, Nicholas Piggin a écrit :
> Excerpts from Laurent Dufour's message of November 5, 2021 7:20 pm:
>> Le 04/11/2021 à 17:10, Nicholas Piggin a écrit :
>>> It is possible for all CPUs to miss the pending cpumask becoming clear,
>>> and then nobody resetting it, which will cause the lockup detector to
>>> stop working. It will eventually expire, but watchdog_smp_panic will
>>> avoid doing anything if the pending mask is clear and it will never be
>>> reset.
>>>
>>> Order the cpumask clear vs the subsequent test to close this race.
>>>
>>> Add an extra check for an empty pending mask when the watchdog fires and
>>> finds its bit still clear, to try to catch any other possible races or
>>> bugs here and keep the watchdog working. The extra test in
>>> arch_touch_nmi_watchdog is required to prevent the new warning from
>>> firing off.
>>>
>>> Debugged-by: Laurent Dufour <ldufour@linux.ibm.com>
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>> arch/powerpc/kernel/watchdog.c | 36 +++++++++++++++++++++++++++++++++-
>>> 1 file changed, 35 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
>>> index f9ea0e5357f9..be80071336a4 100644
>>> --- a/arch/powerpc/kernel/watchdog.c
>>> +++ b/arch/powerpc/kernel/watchdog.c
>>> @@ -135,6 +135,10 @@ static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
>>> {
>>> cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
>>> cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
>>> + /*
>>> + * See wd_smp_clear_cpu_pending()
>>> + */
>>> + smp_mb();
>>> if (cpumask_empty(&wd_smp_cpus_pending)) {
>>> wd_smp_last_reset_tb = tb;
>>> cpumask_andnot(&wd_smp_cpus_pending,
>>> @@ -215,13 +219,39 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
>>>
>>> cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
>>> wd_smp_unlock(&flags);
>>> + } else {
>>> + /*
>>> + * The last CPU to clear pending should have reset the
>>> + * watchdog, yet we find it empty here. This should not
>>> + * happen but we can try to recover and avoid a false
>>> + * positive if it does.
>>> + */
>>> + if (WARN_ON_ONCE(cpumask_empty(&wd_smp_cpus_pending)))
>>> + goto none_pending;
>>
>> I run a stress on my victim node on top of this patch and hit that warning:
>>
>> [ C475] ------------[ cut here ]------------
>> [ C475] WARNING: CPU: 475 PID: 0 at
>> /home/laurent/src/linux-ppc/arch/powerpc/kernel/watchdog.c:260
>> wd_smp_clear_cpu_pending+0x320/0x4b0
>> [ C475] Modules linked in: rpadlpar_io rpaphp xt_tcpudp iptable_filter
>> ip_tables x_tables xfs pseries_rng rng_core vmx_crypto gf128mul be2net fuse
>> btrfs blake2b_generic libcrc32c xor zstd_compress lzo_compress raid6_pq
>> dm_service_time crc32c_vpmsum lpfc crc_t10dif crct10dif_generic crct10dif_common
>> dm_mirror dm_region_hash dm_log dm_multipath scsi_dh_rdac scsi_dh_alua autofs4
>> [ C475] CPU: 475 PID: 0 Comm: swapper/475 Kdump: loaded Not tainted
>> 5.15.0-rc2-ppc-bz192129+ #72
>> [ C475] NIP: c00000000003d710 LR: c00000000003d478 CTR: c00000000003e2e0
>> [ C475] REGS: c00006b16026f420 TRAP: 0700 Not tainted (5.15.0-rc2-ppc-bz192129+)
>> [ C475] MSR: 8000000000029033 <SF,EE,ME,IR,DR,RI,LE> CR: 22000222 XER: 20000000
>> [ C475] CFAR: c00000000003d480 IRQMASK: 3
>> [ C475] GPR00: c00000000003e3bc c00006b16026f6c0 c000000001b28700 0000000000000800
>> [ C475] GPR04: 0000000000000000 0000000000000800 0000000000000800 0000000000000000
>> [ C475] GPR08: 0000000000000000 0000000000000000 00000000000000f8 00013f06986272e7
>> [ C475] GPR12: c00000000003e2e0 c000000007d3df00 0000000000000000 000000001f043b60
>> [ C475] GPR16: c00006b1601b7b00 0000000000000000 c00000000003e2e0 0000000000000001
>> [ C475] GPR20: 0000347411d4cf28 c00007adbdb0a898 0000000000000001 0000000000000000
>> [ C475] GPR24: 0000000000000000 0000000000000003 c000000001b6d7d0 00013f0698627d84
>> [ C475] GPR28: c000000001bd05c8 c000000001bd05b8 c000000001bd06c8 00000000000001db
>> [ C475] NIP [c00000000003d710] wd_smp_clear_cpu_pending+0x320/0x4b0
>> [ C475] LR [c00000000003d478] wd_smp_clear_cpu_pending+0x88/0x4b0
>> [ C475] Call Trace:
>> [ C475] [c00006b16026f6c0] [0000000000000001] 0x1 (unreliable)
>> [ C475] [c00006b16026f770] [c00000000003e3bc] watchdog_timer_fn+0xdc/0x5a0
>> [ C475] [c00006b16026f840] [c000000000245a4c] __hrtimer_run_queues+0x49c/0x700
>> [ C475] [c00006b16026f8f0] [c000000000246c20] hrtimer_interrupt+0x110/0x310
>> [ C475] [c00006b16026f9a0] [c0000000000292f8] timer_interrupt+0x1e8/0x5a0
>> [ C475] [c00006b16026fa00] [c000000000009a00] decrementer_common_virt+0x210/0x220
>> [ C475] --- interrupt: 900 at plpar_hcall_norets_notrace+0x18/0x2c
>> [ C475] NIP: c0000000000e5dd0 LR: c000000000c18f04 CTR: 0000000000000000
>> [ C475] REGS: c00006b16026fa70 TRAP: 0900 Not tainted (5.15.0-rc2-ppc-bz192129+)
>> [ C475] MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 22000824 XER: 20000000
>> [ C475] CFAR: 0000000000000c00 IRQMASK: 0
>> [ C475] GPR00: 0000000000000000 c00006b16026fd10 c000000001b28700 0000000000000000
>> [ C475] GPR04: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> [ C475] GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000001
>> [ C475] GPR12: 000000000000ffff c000000007d3df00 0000000000000000 000000001f043b60
>> [ C475] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> [ C475] GPR20: 0000000000000000 0000000000000000 0000000000000000 c000000001a46cd0
>> [ C475] GPR24: c000000001b68e38 000034738d859946 0000000000000000 0000000000000001
>> [ C475] GPR28: 0000000000000000 0000000000000001 c000000001472360 c000000001472368
>> [ C475] NIP [c0000000000e5dd0] plpar_hcall_norets_notrace+0x18/0x2c
>> [ C475] LR [c000000000c18f04] check_and_cede_processor.part.2+0x24/0x70
>> [ C475] --- interrupt: 900
>> [ C475] [c00006b16026fd10] [c00007adbdb0a880] 0xc00007adbdb0a880 (unreliable)
>> [ C475] [c00006b16026fd70] [c000000000c194f4] dedicated_cede_loop+0x174/0x200
>> [ C475] [c00006b16026fdb0] [c000000000c15b2c] cpuidle_enter_state+0x3ac/0x6d0
>> [ C475] [c00006b16026fe20] [c000000000c15ef0] cpuidle_enter+0x50/0x70
>> [ C475] [c00006b16026fe60] [c0000000001a7f9c] call_cpuidle+0x4c/0x90
>> [ C475] [c00006b16026fe80] [c0000000001a84f0] do_idle+0x310/0x3c0
>> [ C475] [c00006b16026ff00] [c0000000001a8948] cpu_startup_entry+0x38/0x50
>> [ C475] [c00006b16026ff30] [c00000000005fb5c] start_secondary+0x2bc/0x2f0
>> [ C475] [c00006b16026ff90] [c00000000000d254] start_secondary_prolog+0x10/0x14
>> [ C475] Instruction dump:
>> [ C475] 48eb7049 60000000 e8610068 4bfffee4 392d0918 7c20492a 482c54f1 60000000
>> [ C475] 4bfffe4c 60000000 60000000 60000000 <0fe00000> fb210078 fb410080 fb610088
>> [ C475] irq event stamp: 0
>> [ C475] hardirqs last enabled at (0): [<0000000000000000>] 0x0
>> [ C475] hardirqs last disabled at (0): [<c00000000014342c>]
>> copy_process+0x76c/0x1e00
>> [ C475] softirqs last enabled at (0): [<c00000000014342c>]
>> copy_process+0x76c/0x1e00
>> [ C475] softirqs last disabled at (0): [<0000000000000000>] 0x0
>> [ C475] ---[ end trace 6e8311d1692d057b ]---
>>
>> I guess there is a possible race here between watchdog_timer_interrupt() and
>> another CPU watchdog_smp_panic().
>
> Hmm, yeah of course there would be. May have to just remove that warn.
Yes I do agree, it should be removed.
Cheers,
Laurent.
^ permalink raw reply
* [Bug 214913] [xfstests generic/051] BUG: Kernel NULL pointer dereference on read at 0x00000108 NIP [c0000000000372e4] tm_cgpr_active+0x14/0x40
From: bugzilla-daemon @ 2021-11-05 11:53 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-214913-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=214913
--- Comment #4 from Zorro Lang (zlang@redhat.com) ---
(In reply to Michal Suchanek from comment #3)
> What CPU is this?
>
> Does it go away if you boot with ppc_tm=off
(In reply to Michael Ellerman from comment #2)
> Thanks for the report, I agree this looks like a powerpc bug not an XFS bug.
>
> I won't have time to look at this until next week probably, unless someone
> beats me to it.
Thanks for you reply. (Un)fortunately, due to linux keeps updating, I can't
reproduce this panic on latest mainline linux master branch now. The HEAD
commit is 7ddb58cb0eca. From 8bb7eca972ad (v5.15) to 7ddb58cb0eca (v5.15+),
there're many changes, I can't sure which commit fixes this bug, or hide it? Do
you know if there was a known issue about this has been fixed?
Thanks,
Zorro
--
You may reply to this email to add a comment.
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* Re: [PATCH v2 1/5] powerpc/watchdog: Fix missed watchdog reset due to memory ordering race
From: Nicholas Piggin @ 2021-11-05 11:46 UTC (permalink / raw)
To: Laurent Dufour, linuxppc-dev
In-Reply-To: <e060e154-a807-d087-a80f-a802a858c13e@linux.ibm.com>
Excerpts from Laurent Dufour's message of November 5, 2021 7:20 pm:
> Le 04/11/2021 à 17:10, Nicholas Piggin a écrit :
>> It is possible for all CPUs to miss the pending cpumask becoming clear,
>> and then nobody resetting it, which will cause the lockup detector to
>> stop working. It will eventually expire, but watchdog_smp_panic will
>> avoid doing anything if the pending mask is clear and it will never be
>> reset.
>>
>> Order the cpumask clear vs the subsequent test to close this race.
>>
>> Add an extra check for an empty pending mask when the watchdog fires and
>> finds its bit still clear, to try to catch any other possible races or
>> bugs here and keep the watchdog working. The extra test in
>> arch_touch_nmi_watchdog is required to prevent the new warning from
>> firing off.
>>
>> Debugged-by: Laurent Dufour <ldufour@linux.ibm.com>
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> arch/powerpc/kernel/watchdog.c | 36 +++++++++++++++++++++++++++++++++-
>> 1 file changed, 35 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
>> index f9ea0e5357f9..be80071336a4 100644
>> --- a/arch/powerpc/kernel/watchdog.c
>> +++ b/arch/powerpc/kernel/watchdog.c
>> @@ -135,6 +135,10 @@ static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
>> {
>> cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
>> cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
>> + /*
>> + * See wd_smp_clear_cpu_pending()
>> + */
>> + smp_mb();
>> if (cpumask_empty(&wd_smp_cpus_pending)) {
>> wd_smp_last_reset_tb = tb;
>> cpumask_andnot(&wd_smp_cpus_pending,
>> @@ -215,13 +219,39 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
>>
>> cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
>> wd_smp_unlock(&flags);
>> + } else {
>> + /*
>> + * The last CPU to clear pending should have reset the
>> + * watchdog, yet we find it empty here. This should not
>> + * happen but we can try to recover and avoid a false
>> + * positive if it does.
>> + */
>> + if (WARN_ON_ONCE(cpumask_empty(&wd_smp_cpus_pending)))
>> + goto none_pending;
>
> I run a stress on my victim node on top of this patch and hit that warning:
>
> [ C475] ------------[ cut here ]------------
> [ C475] WARNING: CPU: 475 PID: 0 at
> /home/laurent/src/linux-ppc/arch/powerpc/kernel/watchdog.c:260
> wd_smp_clear_cpu_pending+0x320/0x4b0
> [ C475] Modules linked in: rpadlpar_io rpaphp xt_tcpudp iptable_filter
> ip_tables x_tables xfs pseries_rng rng_core vmx_crypto gf128mul be2net fuse
> btrfs blake2b_generic libcrc32c xor zstd_compress lzo_compress raid6_pq
> dm_service_time crc32c_vpmsum lpfc crc_t10dif crct10dif_generic crct10dif_common
> dm_mirror dm_region_hash dm_log dm_multipath scsi_dh_rdac scsi_dh_alua autofs4
> [ C475] CPU: 475 PID: 0 Comm: swapper/475 Kdump: loaded Not tainted
> 5.15.0-rc2-ppc-bz192129+ #72
> [ C475] NIP: c00000000003d710 LR: c00000000003d478 CTR: c00000000003e2e0
> [ C475] REGS: c00006b16026f420 TRAP: 0700 Not tainted (5.15.0-rc2-ppc-bz192129+)
> [ C475] MSR: 8000000000029033 <SF,EE,ME,IR,DR,RI,LE> CR: 22000222 XER: 20000000
> [ C475] CFAR: c00000000003d480 IRQMASK: 3
> [ C475] GPR00: c00000000003e3bc c00006b16026f6c0 c000000001b28700 0000000000000800
> [ C475] GPR04: 0000000000000000 0000000000000800 0000000000000800 0000000000000000
> [ C475] GPR08: 0000000000000000 0000000000000000 00000000000000f8 00013f06986272e7
> [ C475] GPR12: c00000000003e2e0 c000000007d3df00 0000000000000000 000000001f043b60
> [ C475] GPR16: c00006b1601b7b00 0000000000000000 c00000000003e2e0 0000000000000001
> [ C475] GPR20: 0000347411d4cf28 c00007adbdb0a898 0000000000000001 0000000000000000
> [ C475] GPR24: 0000000000000000 0000000000000003 c000000001b6d7d0 00013f0698627d84
> [ C475] GPR28: c000000001bd05c8 c000000001bd05b8 c000000001bd06c8 00000000000001db
> [ C475] NIP [c00000000003d710] wd_smp_clear_cpu_pending+0x320/0x4b0
> [ C475] LR [c00000000003d478] wd_smp_clear_cpu_pending+0x88/0x4b0
> [ C475] Call Trace:
> [ C475] [c00006b16026f6c0] [0000000000000001] 0x1 (unreliable)
> [ C475] [c00006b16026f770] [c00000000003e3bc] watchdog_timer_fn+0xdc/0x5a0
> [ C475] [c00006b16026f840] [c000000000245a4c] __hrtimer_run_queues+0x49c/0x700
> [ C475] [c00006b16026f8f0] [c000000000246c20] hrtimer_interrupt+0x110/0x310
> [ C475] [c00006b16026f9a0] [c0000000000292f8] timer_interrupt+0x1e8/0x5a0
> [ C475] [c00006b16026fa00] [c000000000009a00] decrementer_common_virt+0x210/0x220
> [ C475] --- interrupt: 900 at plpar_hcall_norets_notrace+0x18/0x2c
> [ C475] NIP: c0000000000e5dd0 LR: c000000000c18f04 CTR: 0000000000000000
> [ C475] REGS: c00006b16026fa70 TRAP: 0900 Not tainted (5.15.0-rc2-ppc-bz192129+)
> [ C475] MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 22000824 XER: 20000000
> [ C475] CFAR: 0000000000000c00 IRQMASK: 0
> [ C475] GPR00: 0000000000000000 c00006b16026fd10 c000000001b28700 0000000000000000
> [ C475] GPR04: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> [ C475] GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000001
> [ C475] GPR12: 000000000000ffff c000000007d3df00 0000000000000000 000000001f043b60
> [ C475] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> [ C475] GPR20: 0000000000000000 0000000000000000 0000000000000000 c000000001a46cd0
> [ C475] GPR24: c000000001b68e38 000034738d859946 0000000000000000 0000000000000001
> [ C475] GPR28: 0000000000000000 0000000000000001 c000000001472360 c000000001472368
> [ C475] NIP [c0000000000e5dd0] plpar_hcall_norets_notrace+0x18/0x2c
> [ C475] LR [c000000000c18f04] check_and_cede_processor.part.2+0x24/0x70
> [ C475] --- interrupt: 900
> [ C475] [c00006b16026fd10] [c00007adbdb0a880] 0xc00007adbdb0a880 (unreliable)
> [ C475] [c00006b16026fd70] [c000000000c194f4] dedicated_cede_loop+0x174/0x200
> [ C475] [c00006b16026fdb0] [c000000000c15b2c] cpuidle_enter_state+0x3ac/0x6d0
> [ C475] [c00006b16026fe20] [c000000000c15ef0] cpuidle_enter+0x50/0x70
> [ C475] [c00006b16026fe60] [c0000000001a7f9c] call_cpuidle+0x4c/0x90
> [ C475] [c00006b16026fe80] [c0000000001a84f0] do_idle+0x310/0x3c0
> [ C475] [c00006b16026ff00] [c0000000001a8948] cpu_startup_entry+0x38/0x50
> [ C475] [c00006b16026ff30] [c00000000005fb5c] start_secondary+0x2bc/0x2f0
> [ C475] [c00006b16026ff90] [c00000000000d254] start_secondary_prolog+0x10/0x14
> [ C475] Instruction dump:
> [ C475] 48eb7049 60000000 e8610068 4bfffee4 392d0918 7c20492a 482c54f1 60000000
> [ C475] 4bfffe4c 60000000 60000000 60000000 <0fe00000> fb210078 fb410080 fb610088
> [ C475] irq event stamp: 0
> [ C475] hardirqs last enabled at (0): [<0000000000000000>] 0x0
> [ C475] hardirqs last disabled at (0): [<c00000000014342c>]
> copy_process+0x76c/0x1e00
> [ C475] softirqs last enabled at (0): [<c00000000014342c>]
> copy_process+0x76c/0x1e00
> [ C475] softirqs last disabled at (0): [<0000000000000000>] 0x0
> [ C475] ---[ end trace 6e8311d1692d057b ]---
>
> I guess there is a possible race here between watchdog_timer_interrupt() and
> another CPU watchdog_smp_panic().
Hmm, yeah of course there would be. May have to just remove that warn.
Thanks,
Nick
^ permalink raw reply
* Re: [PATCH 0/3] KEXEC_SIG with appended signature
From: Daniel Axtens @ 2021-11-05 10:55 UTC (permalink / raw)
To: Michal Suchanek, keyrings
Cc: Thiago Jung Bauermann, Rob Herring, Vasily Gorbik, linux-s390,
Heiko Carstens, linux-kernel, David Howells,
Lakshmi Ramasubramanian, Luis Chamberlain, Paul Mackerras,
Frank van der Linden, Jessica Yu, Alexander Gordeev,
Michal Suchanek, linuxppc-dev, Christian Borntraeger,
Hari Bathini
In-Reply-To: <cover.1635948742.git.msuchanek@suse.de>
Michal Suchanek <msuchanek@suse.de> writes:
> S390 uses appended signature for kernel but implements the check
> separately from module loader.
>
> Support for secure boot on powerpc with appended signature is planned -
> grub patches submitted upstream but not yet merged.
Power Non-Virtualised / OpenPower already supports secure boot via kexec
with signature verification via IMA. I think you have now sent a
follow-up series that merges some of the IMA implementation, I just
wanted to make sure it was clear that we actually already have support
for this in the kernel, it's just grub that is getting new support.
> This is an attempt at unified appended signature verification.
I am always in favour of fewer reimplementations of the same feature in
the kernel :)
Regards,
Daniel
>
> Thanks
>
> Michal
>
> Michal Suchanek (3):
> s390/kexec_file: Don't opencode appended signature verification.
> module: strip the signature marker in the verification function.
> powerpc/kexec_file: Add KEXEC_SIG support.
>
> arch/powerpc/Kconfig | 11 +++++++
> arch/powerpc/kexec/elf_64.c | 14 +++++++++
> arch/s390/kernel/machine_kexec_file.c | 42 +++------------------------
> include/linux/verification.h | 3 ++
> kernel/module-internal.h | 2 --
> kernel/module.c | 11 +++----
> kernel/module_signing.c | 32 ++++++++++++++------
> 7 files changed, 59 insertions(+), 56 deletions(-)
>
> --
> 2.31.1
^ permalink raw reply
* [PATCH 07/11] powerpc/xive: Add a debugfs file to dump EQs
From: Cédric Le Goater @ 2021-11-05 10:26 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20211105102636.1016378-1-clg@kaod.org>
The XIVE driver under Linux uses a single interrupt priority and only
one event queue is configured per CPU. Expose the contents under
a 'xive/eqs/cpuX' debugfs file.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/sysdev/xive/common.c | 37 +++++++++++++++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index c5167f284da5..75c683ffae7e 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1775,9 +1775,40 @@ static int xive_ipi_debug_show(struct seq_file *m, void *private)
}
DEFINE_SHOW_ATTRIBUTE(xive_ipi_debug);
+static void xive_eq_debug_show_one(struct seq_file *m, struct xive_q *q, u8 prio)
+{
+ int i;
+
+ seq_printf(m, "EQ%d idx=%d T=%d\n", prio, q->idx, q->toggle);
+ if (q->qpage) {
+ for (i = 0; i < q->msk + 1; i++) {
+ if (!(i % 8))
+ seq_printf(m, "%05d ", i);
+ seq_printf(m, "%08x%s", be32_to_cpup(q->qpage + i),
+ (i + 1) % 8 ? " " : "\n");
+ }
+ }
+ seq_puts(m, "\n");
+}
+
+static int xive_eq_debug_show(struct seq_file *m, void *private)
+{
+ int cpu = (long)m->private;
+ struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+
+ if (xc)
+ xive_eq_debug_show_one(m, &xc->queue[xive_irq_priority],
+ xive_irq_priority);
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(xive_eq_debug);
+
static void xive_core_debugfs_create(void)
{
struct dentry *xive_dir;
+ struct dentry *xive_eq_dir;
+ long cpu;
+ char name[16];
xive_dir = debugfs_create_dir("xive", arch_debugfs_dir);
if (IS_ERR(xive_dir))
@@ -1787,6 +1818,12 @@ static void xive_core_debugfs_create(void)
NULL, &xive_ipi_debug_fops);
debugfs_create_file("interrupts", 0400, xive_dir,
NULL, &xive_irq_debug_fops);
+ xive_eq_dir = debugfs_create_dir("eqs", xive_dir);
+ for_each_possible_cpu(cpu) {
+ snprintf(name, sizeof(name), "cpu%ld", cpu);
+ debugfs_create_file(name, 0400, xive_eq_dir, (void *)cpu,
+ &xive_eq_debug_fops);
+ }
}
#endif /* CONFIG_DEBUG_FS */
--
2.31.1
^ permalink raw reply related
* [PATCH 05/11] powerpc/xive: Change the debugfs file 'xive' into a directory
From: Cédric Le Goater @ 2021-11-05 10:26 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20211105102636.1016378-1-clg@kaod.org>
Use a 'cpus' file to dump CPU states and 'interrupts' to dump IRQ states.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/sysdev/xive/common.c | 36 +++++++++++++++++++++----------
1 file changed, 25 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index b71cc1020296..0b34ad5748ee 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1760,17 +1760,10 @@ static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d)
seq_puts(m, "\n");
}
-static int xive_core_debug_show(struct seq_file *m, void *private)
+static int xive_irq_debug_show(struct seq_file *m, void *private)
{
unsigned int i;
struct irq_desc *desc;
- int cpu;
-
- if (xive_ops->debug_show)
- xive_ops->debug_show(m, private);
-
- for_each_possible_cpu(cpu)
- xive_debug_show_cpu(m, cpu);
for_each_irq_desc(i, desc) {
struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i);
@@ -1780,12 +1773,33 @@ static int xive_core_debug_show(struct seq_file *m, void *private)
}
return 0;
}
-DEFINE_SHOW_ATTRIBUTE(xive_core_debug);
+DEFINE_SHOW_ATTRIBUTE(xive_irq_debug);
+
+static int xive_cpu_debug_show(struct seq_file *m, void *private)
+{
+ int cpu;
+
+ if (xive_ops->debug_show)
+ xive_ops->debug_show(m, private);
+
+ for_each_possible_cpu(cpu)
+ xive_debug_show_cpu(m, cpu);
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(xive_cpu_debug);
static void xive_core_debugfs_create(void)
{
- debugfs_create_file("xive", 0400, arch_debugfs_dir,
- NULL, &xive_core_debug_fops);
+ struct dentry *xive_dir;
+
+ xive_dir = debugfs_create_dir("xive", arch_debugfs_dir);
+ if (IS_ERR(xive_dir))
+ return;
+
+ debugfs_create_file("cpus", 0400, xive_dir,
+ NULL, &xive_cpu_debug_fops);
+ debugfs_create_file("interrupts", 0400, xive_dir,
+ NULL, &xive_irq_debug_fops);
}
#endif /* CONFIG_DEBUG_FS */
--
2.31.1
^ permalink raw reply related
* [PATCH 11/11] powerpc/smp: Add a doorbell=off kernel parameter
From: Cédric Le Goater @ 2021-11-05 10:26 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20211105102636.1016378-1-clg@kaod.org>
On processors with a XIVE interrupt controller (POWER9 and above), the
kernel can use either doorbells or XIVE to generate CPU IPIs. Sending
doorbell is generally preferred to using the XIVE IC because it is
faster. There are cases where we want to avoid doorbells and use XIVE
only, for debug or performance. Only useful on POWER9 and above.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/include/asm/dbell.h | 1 +
arch/powerpc/kernel/dbell.c | 17 +++++++++++++++++
arch/powerpc/platforms/powernv/smp.c | 7 +++++--
arch/powerpc/platforms/pseries/smp.c | 3 +++
Documentation/admin-guide/kernel-parameters.txt | 10 ++++++++++
5 files changed, 36 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 3e9da22a2779..07775aa3e81b 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -90,6 +90,7 @@ static inline void ppc_msgsync(void)
#endif /* CONFIG_PPC_BOOK3S */
extern void doorbell_exception(struct pt_regs *regs);
+extern bool doorbell_disabled;
static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag)
{
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 5545c9cd17c1..681ee4775629 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -38,6 +38,23 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
set_irq_regs(old_regs);
}
+
+bool doorbell_disabled;
+
+static int __init doorbell_cmdline(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ if (strncmp(arg, "off", 3) == 0) {
+ pr_info("Doorbell disabled on kernel command line\n");
+ doorbell_disabled = true;
+ }
+
+ return 0;
+}
+__setup("doorbell=", doorbell_cmdline);
+
#else /* CONFIG_SMP */
DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
{
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index cbb67813cd5d..1311bda9446a 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -338,10 +338,13 @@ static void __init pnv_smp_probe(void)
ic_cause_ipi = smp_ops->cause_ipi;
WARN_ON(!ic_cause_ipi);
- if (cpu_has_feature(CPU_FTR_ARCH_300))
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (doorbell_disabled)
+ return;
smp_ops->cause_ipi = doorbell_global_ipi;
- else
+ } else {
smp_ops->cause_ipi = pnv_cause_ipi;
+ }
}
}
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index f47429323eee..3bc9e6aaf645 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -229,6 +229,9 @@ static __init void pSeries_smp_probe(void)
return;
}
+ if (doorbell_disabled)
+ return;
+
/*
* Under PowerVM, FSCR[MSGP] is enabled as guest vCPU siblings are
* gang scheduled on the same physical core, so doorbells are always
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 10fa093251e8..2e1284febe39 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1041,6 +1041,16 @@
The filter can be disabled or changed to another
driver later using sysfs.
+ doorbell=off [PPC]
+ On processors with a XIVE interrupt controller
+ (POWER9 and above), the kernel can use either
+ doorbells or XIVE to generate CPU IPIs. Sending
+ doorbell is generally preferred to using the XIVE
+ IC because it is faster. There are cases where
+ we want to avoid doorbells and use XIVE only,
+ for debug or performance. Only useful on
+ POWER9 and above.
+
driver_async_probe= [KNL]
List of driver names to be probed asynchronously.
Format: <driver_name1>,<driver_name2>...
--
2.31.1
^ permalink raw reply related
* [PATCH 06/11] powerpc/xive: Rename the 'cpus' debugfs file to 'ipis'
From: Cédric Le Goater @ 2021-11-05 10:26 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20211105102636.1016378-1-clg@kaod.org>
and remove the EQ entries output which is not very useful since only
the next two events of the queue are taken into account. We will
improve the dump of the EQ in the next patches.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/sysdev/xive/common.c | 27 +++++++--------------------
1 file changed, 7 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 0b34ad5748ee..c5167f284da5 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1704,11 +1704,11 @@ static int __init xive_off(char *arg)
__setup("xive=off", xive_off);
#ifdef CONFIG_DEBUG_FS
-static void xive_debug_show_cpu(struct seq_file *m, int cpu)
+static void xive_debug_show_ipi(struct seq_file *m, int cpu)
{
struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
- seq_printf(m, "CPU %d:", cpu);
+ seq_printf(m, "CPU %d: ", cpu);
if (xc) {
seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr);
@@ -1720,19 +1720,6 @@ static void xive_debug_show_cpu(struct seq_file *m, int cpu)
seq_printf(m, "IPI=0x%08x %s", xc->hw_ipi, buffer);
}
#endif
- {
- struct xive_q *q = &xc->queue[xive_irq_priority];
- u32 i0, i1, idx;
-
- if (q->qpage) {
- idx = q->idx;
- i0 = be32_to_cpup(q->qpage + idx);
- idx = (idx + 1) & q->msk;
- i1 = be32_to_cpup(q->qpage + idx);
- seq_printf(m, "EQ idx=%d T=%d %08x %08x ...",
- q->idx, q->toggle, i0, i1);
- }
- }
}
seq_puts(m, "\n");
}
@@ -1775,7 +1762,7 @@ static int xive_irq_debug_show(struct seq_file *m, void *private)
}
DEFINE_SHOW_ATTRIBUTE(xive_irq_debug);
-static int xive_cpu_debug_show(struct seq_file *m, void *private)
+static int xive_ipi_debug_show(struct seq_file *m, void *private)
{
int cpu;
@@ -1783,10 +1770,10 @@ static int xive_cpu_debug_show(struct seq_file *m, void *private)
xive_ops->debug_show(m, private);
for_each_possible_cpu(cpu)
- xive_debug_show_cpu(m, cpu);
+ xive_debug_show_ipi(m, cpu);
return 0;
}
-DEFINE_SHOW_ATTRIBUTE(xive_cpu_debug);
+DEFINE_SHOW_ATTRIBUTE(xive_ipi_debug);
static void xive_core_debugfs_create(void)
{
@@ -1796,8 +1783,8 @@ static void xive_core_debugfs_create(void)
if (IS_ERR(xive_dir))
return;
- debugfs_create_file("cpus", 0400, xive_dir,
- NULL, &xive_cpu_debug_fops);
+ debugfs_create_file("ipis", 0400, xive_dir,
+ NULL, &xive_ipi_debug_fops);
debugfs_create_file("interrupts", 0400, xive_dir,
NULL, &xive_irq_debug_fops);
}
--
2.31.1
^ permalink raw reply related
* [PATCH 08/11] powerpc/xive: Add a debugfs toggle for StoreEOI
From: Cédric Le Goater @ 2021-11-05 10:26 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20211105102636.1016378-1-clg@kaod.org>
It can be used to deactivate temporarily StoreEOI for tests or
performance on platforms supporting the feature (POWER10)
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/sysdev/xive/common.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 75c683ffae7e..11e2aaa13965 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -84,6 +84,16 @@ static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu);
/* An invalid CPU target */
#define XIVE_INVALID_TARGET (-1)
+/*
+ * Global toggle to switch on/off StoreEOI
+ */
+static bool xive_store_eoi = true;
+
+static bool xive_is_store_eoi(struct xive_irq_data *xd)
+{
+ return xd->flags & XIVE_IRQ_FLAG_STORE_EOI && xive_store_eoi;
+}
+
/*
* Read the next entry in a queue, return its content if it's valid
* or 0 if there is no new entry.
@@ -208,7 +218,7 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset)
{
u64 val;
- if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+ if (offset == XIVE_ESB_SET_PQ_10 && xive_is_store_eoi(xd))
offset |= XIVE_ESB_LD_ST_MO;
if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
@@ -233,7 +243,7 @@ static void xive_irq_data_dump(struct xive_irq_data *xd, char *buffer, size_t si
u64 val = xive_esb_read(xd, XIVE_ESB_GET);
snprintf(buffer, size, "flags=%c%c%c PQ=%c%c 0x%016llx 0x%016llx",
- xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ',
+ xive_is_store_eoi(xd) ? 'S' : ' ',
xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
val & XIVE_ESB_VAL_P ? 'P' : '-',
@@ -395,7 +405,7 @@ static void xive_do_source_eoi(struct xive_irq_data *xd)
xd->stale_p = false;
/* If the XIVE supports the new "store EOI facility, use it */
- if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) {
+ if (xive_is_store_eoi(xd)) {
xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
return;
}
@@ -1824,6 +1834,7 @@ static void xive_core_debugfs_create(void)
debugfs_create_file(name, 0400, xive_eq_dir, (void *)cpu,
&xive_eq_debug_fops);
}
+ debugfs_create_bool("store-eoi", 0600, xive_dir, &xive_store_eoi);
}
#endif /* CONFIG_DEBUG_FS */
--
2.31.1
^ permalink raw reply related
* [PATCH 00/11] powerpc/xive: Improve diagnostic and activate StoreEOI on P10 PowerNV
From: Cédric Le Goater @ 2021-11-05 10:26 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Cédric Le Goater
Hello,
This series tries to improve diagnostic support in the XIVE driver. It
adds pr_debug() primitives that can be activated at run-time and changes
the debugfs xive entry to expose more information :
/sys/kernel/debug/powerpc/xive/
├── eqs/
│ ├── cpu0
│ ├── cpu1
│ ├── cpu2
. .
│ └── cpu99
├── interrupts
├── ipis
├── save-restore
└── store-eoi
It also introduces 2 new kernel parameters :
xive.store-eoi=off to deactivate StoreEOI at boot but it still be
reactivated through sysfs.
doorbell=off to deactivate doorbells for CPU IPIs
and XIVE instead
The first is a workaround in case of a FW advertising the wrong
feature. It can be used to check performance also. The second is for
PowerVM development and tests when the LP-per-thread mode is
activated. Doorbells cannot be used in that case.
Finally, it activates StoreEOI support for the PowerNV platform. OPAL
for P10 has been released and we are free to use this extension.
Thanks,
C.
Cédric Le Goater (11):
powerpc/xive: Replace pr_devel() by pr_debug() to ease debug
powerpc/xive: Introduce an helper to print out interrupt
characteristics
powerpc/xive: Activate StoreEOI on P10
powerpc/xive: Introduce xive_core_debugfs_create()
powerpc/xive: Change the debugfs file 'xive' into a directory
powerpc/xive: Rename the 'cpus' debugfs file to 'ipis'
powerpc/xive: Add a debugfs file to dump EQs
powerpc/xive: Add a debugfs toggle for StoreEOI
powerpc/xive: Add a kernel parameter for StoreEOI
powerpc/xive: Add a debugfs toggle for save-restore
powerpc/smp: Add a doorbell=off kernel parameter
arch/powerpc/include/asm/dbell.h | 1 +
arch/powerpc/include/asm/opal-api.h | 1 +
arch/powerpc/sysdev/xive/xive-internal.h | 1 +
arch/powerpc/kernel/dbell.c | 17 ++
arch/powerpc/platforms/powernv/smp.c | 7 +-
arch/powerpc/platforms/pseries/smp.c | 3 +
arch/powerpc/sysdev/xive/common.c | 211 ++++++++++++------
arch/powerpc/sysdev/xive/native.c | 4 +-
arch/powerpc/sysdev/xive/spapr.c | 38 ++--
.../admin-guide/kernel-parameters.txt | 16 ++
10 files changed, 209 insertions(+), 90 deletions(-)
--
2.31.1
^ permalink raw reply
* [PATCH 01/11] powerpc/xive: Replace pr_devel() by pr_debug() to ease debug
From: Cédric Le Goater @ 2021-11-05 10:26 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20211105102636.1016378-1-clg@kaod.org>
These routines are not on hot code paths and pr_debug() is easier to
activate. Also add a '0x' prefix to hex printed values (HW IRQ number).
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/sysdev/xive/common.c | 29 +++++++++++------------
arch/powerpc/sysdev/xive/spapr.c | 38 +++++++++++++++----------------
2 files changed, 34 insertions(+), 33 deletions(-)
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index c5d75c02ad8b..7280ff3fef2d 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -451,6 +451,8 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd,
{
u64 val;
+ pr_debug("%s: HW 0x%x %smask\n", __func__, xd->hw_irq, mask ? "" : "un");
+
/*
* If the interrupt had P set, it may be in a queue.
*
@@ -612,8 +614,8 @@ static unsigned int xive_irq_startup(struct irq_data *d)
xd->saved_p = false;
xd->stale_p = false;
- pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
- d->irq, hw_irq, d);
+
+ pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d);
/* Pick a target */
target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d));
@@ -654,8 +656,7 @@ static void xive_irq_shutdown(struct irq_data *d)
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
- pr_devel("xive_irq_shutdown: irq %d [0x%x] data @%p\n",
- d->irq, hw_irq, d);
+ pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d);
if (WARN_ON(xd->target == XIVE_INVALID_TARGET))
return;
@@ -679,7 +680,7 @@ static void xive_irq_unmask(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
- pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd);
+ pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd);
xive_do_source_set_mask(xd, false);
}
@@ -688,7 +689,7 @@ static void xive_irq_mask(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
- pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd);
+ pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd);
xive_do_source_set_mask(xd, true);
}
@@ -702,7 +703,7 @@ static int xive_irq_set_affinity(struct irq_data *d,
u32 target, old_target;
int rc = 0;
- pr_debug("%s: irq %d/%x\n", __func__, d->irq, hw_irq);
+ pr_debug("%s: irq %d/0x%x\n", __func__, d->irq, hw_irq);
/* Is this valid ? */
if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
@@ -975,7 +976,7 @@ EXPORT_SYMBOL_GPL(is_xive_irq);
void xive_cleanup_irq_data(struct xive_irq_data *xd)
{
- pr_debug("%s for HW %x\n", __func__, xd->hw_irq);
+ pr_debug("%s for HW 0x%x\n", __func__, xd->hw_irq);
if (xd->eoi_mmio) {
iounmap(xd->eoi_mmio);
@@ -1211,8 +1212,8 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
pr_err("Failed to map IPI CPU %d\n", cpu);
return -EIO;
}
- pr_devel("CPU %d HW IPI %x, virq %d, trig_mmio=%p\n", cpu,
- xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio);
+ pr_debug("CPU %d HW IPI 0x%x, virq %d, trig_mmio=%p\n", cpu,
+ xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio);
/* Unmask it */
xive_do_source_set_mask(&xc->ipi_data, false);
@@ -1390,7 +1391,7 @@ static int xive_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
if (rc)
return rc;
- pr_debug("%s %d/%lx #%d\n", __func__, virq, hwirq, nr_irqs);
+ pr_debug("%s %d/0x%lx #%d\n", __func__, virq, hwirq, nr_irqs);
for (i = 0; i < nr_irqs; i++) {
/* TODO: call xive_irq_domain_map() */
@@ -1505,7 +1506,7 @@ static void xive_setup_cpu(void)
#ifdef CONFIG_SMP
void xive_smp_setup_cpu(void)
{
- pr_devel("SMP setup CPU %d\n", smp_processor_id());
+ pr_debug("SMP setup CPU %d\n", smp_processor_id());
/* This will have already been done on the boot CPU */
if (smp_processor_id() != boot_cpuid)
@@ -1651,10 +1652,10 @@ bool __init xive_core_init(struct device_node *np, const struct xive_ops *ops,
ppc_md.get_irq = xive_get_irq;
__xive_enabled = true;
- pr_devel("Initializing host..\n");
+ pr_debug("Initializing host..\n");
xive_init_host(np);
- pr_devel("Initializing boot CPU..\n");
+ pr_debug("Initializing boot CPU..\n");
/* Allocate per-CPU data and queues */
xive_prepare_cpu(smp_processor_id());
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index f143b6f111ac..77943dc70860 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -173,7 +173,7 @@ static long plpar_int_get_source_info(unsigned long flags,
} while (plpar_busy_delay(rc));
if (rc) {
- pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc);
+ pr_err("H_INT_GET_SOURCE_INFO lisn=0x%lx failed %ld\n", lisn, rc);
return rc;
}
@@ -182,8 +182,8 @@ static long plpar_int_get_source_info(unsigned long flags,
*trig_page = retbuf[2];
*esb_shift = retbuf[3];
- pr_devel("H_INT_GET_SOURCE_INFO flags=%lx eoi=%lx trig=%lx shift=%lx\n",
- retbuf[0], retbuf[1], retbuf[2], retbuf[3]);
+ pr_debug("H_INT_GET_SOURCE_INFO lisn=0x%lx flags=0x%lx eoi=0x%lx trig=0x%lx shift=0x%lx\n",
+ lisn, retbuf[0], retbuf[1], retbuf[2], retbuf[3]);
return 0;
}
@@ -200,8 +200,8 @@ static long plpar_int_set_source_config(unsigned long flags,
long rc;
- pr_devel("H_INT_SET_SOURCE_CONFIG flags=%lx lisn=%lx target=%lx prio=%lx sw_irq=%lx\n",
- flags, lisn, target, prio, sw_irq);
+ pr_debug("H_INT_SET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx target=%ld prio=%ld sw_irq=%ld\n",
+ flags, lisn, target, prio, sw_irq);
do {
@@ -210,7 +210,7 @@ static long plpar_int_set_source_config(unsigned long flags,
} while (plpar_busy_delay(rc));
if (rc) {
- pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx failed %ld\n",
+ pr_err("H_INT_SET_SOURCE_CONFIG lisn=0x%lx target=%ld prio=%ld failed %ld\n",
lisn, target, prio, rc);
return rc;
}
@@ -227,7 +227,7 @@ static long plpar_int_get_source_config(unsigned long flags,
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
long rc;
- pr_devel("H_INT_GET_SOURCE_CONFIG flags=%lx lisn=%lx\n", flags, lisn);
+ pr_debug("H_INT_GET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx\n", flags, lisn);
do {
rc = plpar_hcall(H_INT_GET_SOURCE_CONFIG, retbuf, flags, lisn,
@@ -235,7 +235,7 @@ static long plpar_int_get_source_config(unsigned long flags,
} while (plpar_busy_delay(rc));
if (rc) {
- pr_err("H_INT_GET_SOURCE_CONFIG lisn=%ld failed %ld\n",
+ pr_err("H_INT_GET_SOURCE_CONFIG lisn=0x%lx failed %ld\n",
lisn, rc);
return rc;
}
@@ -244,8 +244,8 @@ static long plpar_int_get_source_config(unsigned long flags,
*prio = retbuf[1];
*sw_irq = retbuf[2];
- pr_devel("H_INT_GET_SOURCE_CONFIG target=%lx prio=%lx sw_irq=%lx\n",
- retbuf[0], retbuf[1], retbuf[2]);
+ pr_debug("H_INT_GET_SOURCE_CONFIG target=%ld prio=%ld sw_irq=%ld\n",
+ retbuf[0], retbuf[1], retbuf[2]);
return 0;
}
@@ -273,8 +273,8 @@ static long plpar_int_get_queue_info(unsigned long flags,
*esn_page = retbuf[0];
*esn_size = retbuf[1];
- pr_devel("H_INT_GET_QUEUE_INFO page=%lx size=%lx\n",
- retbuf[0], retbuf[1]);
+ pr_debug("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld page=0x%lx size=0x%lx\n",
+ target, priority, retbuf[0], retbuf[1]);
return 0;
}
@@ -289,8 +289,8 @@ static long plpar_int_set_queue_config(unsigned long flags,
{
long rc;
- pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx qpage=%lx qsize=%lx\n",
- flags, target, priority, qpage, qsize);
+ pr_debug("H_INT_SET_QUEUE_CONFIG flags=0x%lx target=%ld priority=0x%lx qpage=0x%lx qsize=0x%lx\n",
+ flags, target, priority, qpage, qsize);
do {
rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target,
@@ -298,7 +298,7 @@ static long plpar_int_set_queue_config(unsigned long flags,
} while (plpar_busy_delay(rc));
if (rc) {
- pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx returned %ld\n",
+ pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=0x%lx returned %ld\n",
target, priority, qpage, rc);
return rc;
}
@@ -315,7 +315,7 @@ static long plpar_int_sync(unsigned long flags, unsigned long lisn)
} while (plpar_busy_delay(rc));
if (rc) {
- pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc);
+ pr_err("H_INT_SYNC lisn=0x%lx returned %ld\n", lisn, rc);
return rc;
}
@@ -333,8 +333,8 @@ static long plpar_int_esb(unsigned long flags,
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
long rc;
- pr_devel("H_INT_ESB flags=%lx lisn=%lx offset=%lx in=%lx\n",
- flags, lisn, offset, in_data);
+ pr_debug("H_INT_ESB flags=0x%lx lisn=0x%lx offset=0x%lx in=0x%lx\n",
+ flags, lisn, offset, in_data);
do {
rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset,
@@ -342,7 +342,7 @@ static long plpar_int_esb(unsigned long flags,
} while (plpar_busy_delay(rc));
if (rc) {
- pr_err("H_INT_ESB lisn=%ld offset=%ld returned %ld\n",
+ pr_err("H_INT_ESB lisn=0x%lx offset=0x%lx returned %ld\n",
lisn, offset, rc);
return rc;
}
--
2.31.1
^ permalink raw reply related
* [PATCH 02/11] powerpc/xive: Introduce an helper to print out interrupt characteristics
From: Cédric Le Goater @ 2021-11-05 10:26 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20211105102636.1016378-1-clg@kaod.org>
and extend output of debugfs and xmon with addresses of the ESB
management and trigger pages.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/sysdev/xive/common.c | 54 +++++++++++++++----------------
1 file changed, 27 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 7280ff3fef2d..3d558cad1f19 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -227,6 +227,19 @@ static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data)
out_be64(xd->eoi_mmio + offset, data);
}
+static void xive_irq_data_dump(struct xive_irq_data *xd, char *buffer, size_t size)
+{
+ u64 val = xive_esb_read(xd, XIVE_ESB_GET);
+
+ snprintf(buffer, size, "flags=%c%c%c PQ=%c%c 0x%016llx 0x%016llx",
+ xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ',
+ xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
+ xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
+ val & XIVE_ESB_VAL_P ? 'P' : '-',
+ val & XIVE_ESB_VAL_Q ? 'Q' : '-',
+ xd->trig_page, xd->eoi_page);
+}
+
#ifdef CONFIG_XMON
static notrace void xive_dump_eq(const char *name, struct xive_q *q)
{
@@ -252,11 +265,10 @@ notrace void xmon_xive_do_dump(int cpu)
#ifdef CONFIG_SMP
{
- u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
+ char buffer[128];
- xmon_printf("IPI=0x%08x PQ=%c%c ", xc->hw_ipi,
- val & XIVE_ESB_VAL_P ? 'P' : '-',
- val & XIVE_ESB_VAL_Q ? 'Q' : '-');
+ xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer));
+ xmon_printf("IPI=0x%08x %s", xc->hw_ipi, buffer);
}
#endif
xive_dump_eq("EQ", &xc->queue[xive_irq_priority]);
@@ -291,15 +303,11 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
d = xive_get_irq_data(hw_irq);
if (d) {
- struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
- u64 val = xive_esb_read(xd, XIVE_ESB_GET);
-
- xmon_printf("flags=%c%c%c PQ=%c%c",
- xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ',
- xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
- xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
- val & XIVE_ESB_VAL_P ? 'P' : '-',
- val & XIVE_ESB_VAL_Q ? 'Q' : '-');
+ char buffer[128];
+
+ xive_irq_data_dump(irq_data_get_irq_handler_data(d),
+ buffer, sizeof(buffer));
+ xmon_printf("%s", buffer);
}
xmon_printf("\n");
@@ -1703,11 +1711,10 @@ static void xive_debug_show_cpu(struct seq_file *m, int cpu)
#ifdef CONFIG_SMP
{
- u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
+ char buffer[128];
- seq_printf(m, "IPI=0x%08x PQ=%c%c ", xc->hw_ipi,
- val & XIVE_ESB_VAL_P ? 'P' : '-',
- val & XIVE_ESB_VAL_Q ? 'Q' : '-');
+ xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer));
+ seq_printf(m, "IPI=0x%08x %s", xc->hw_ipi, buffer);
}
#endif
{
@@ -1734,8 +1741,7 @@ static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d)
u32 target;
u8 prio;
u32 lirq;
- struct xive_irq_data *xd;
- u64 val;
+ char buffer[128];
rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
if (rc) {
@@ -1746,14 +1752,8 @@ static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d)
seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
hw_irq, target, prio, lirq);
- xd = irq_data_get_irq_handler_data(d);
- val = xive_esb_read(xd, XIVE_ESB_GET);
- seq_printf(m, "flags=%c%c%c PQ=%c%c",
- xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ',
- xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
- xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
- val & XIVE_ESB_VAL_P ? 'P' : '-',
- val & XIVE_ESB_VAL_Q ? 'Q' : '-');
+ xive_irq_data_dump(irq_data_get_irq_handler_data(d), buffer, sizeof(buffer));
+ seq_puts(m, buffer);
seq_puts(m, "\n");
}
--
2.31.1
^ permalink raw reply related
* [PATCH 09/11] powerpc/xive: Add a kernel parameter for StoreEOI
From: Cédric Le Goater @ 2021-11-05 10:26 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20211105102636.1016378-1-clg@kaod.org>
StoreEOI is activated by default on platforms supporting the feature
(POWER10) and will be used as soon as firmware advertises its
availability. The kernel parameter provides a way to deactivate its
use. It can be still be reactivated through debugfs.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/sysdev/xive/common.c | 13 +++++++++++++
Documentation/admin-guide/kernel-parameters.txt | 6 ++++++
2 files changed, 19 insertions(+)
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 11e2aaa13965..67fd3a306369 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1713,6 +1713,19 @@ static int __init xive_off(char *arg)
}
__setup("xive=off", xive_off);
+static int __init xive_store_eoi_cmdline(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ if (strncmp(arg, "off", 3) == 0) {
+ pr_info("StoreEOI disabled on kernel command line\n");
+ xive_store_eoi = false;
+ }
+ return 0;
+}
+__setup("xive.store-eoi=", xive_store_eoi_cmdline);
+
#ifdef CONFIG_DEBUG_FS
static void xive_debug_show_ipi(struct seq_file *m, int cpu)
{
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 43dc35fe5bc0..10fa093251e8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6387,6 +6387,12 @@
controller on both pseries and powernv
platforms. Only useful on POWER9 and above.
+ xive.store-eoi=off [PPC]
+ By default on POWER10 and above, the kernel will use
+ stores for EOI handling when the XIVE interrupt mode
+ is active. This option allows the XIVE driver to use
+ loads instead, as on POWER9.
+
xhci-hcd.quirks [USB,KNL]
A hex value specifying bitmask with supplemental xhci
host controller quirks. Meaning of each bit can be
--
2.31.1
^ permalink raw reply related
* [PATCH 10/11] powerpc/xive: Add a debugfs toggle for save-restore
From: Cédric Le Goater @ 2021-11-05 10:26 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20211105102636.1016378-1-clg@kaod.org>
On POWER10, the automatic "save & restore" of interrupt context is
always available. Provide a way to deactivate it for tests or
performance.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/sysdev/xive/xive-internal.h | 1 +
arch/powerpc/sysdev/xive/common.c | 1 +
arch/powerpc/sysdev/xive/native.c | 2 +-
3 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
index 504e7edce358..e0941bc64430 100644
--- a/arch/powerpc/sysdev/xive/xive-internal.h
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -72,5 +72,6 @@ static inline u32 xive_alloc_order(u32 queue_shift)
}
extern bool xive_cmdline_disabled;
+extern bool xive_has_save_restore;
#endif /* __XIVE_INTERNAL_H */
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 67fd3a306369..39142df828a0 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1848,6 +1848,7 @@ static void xive_core_debugfs_create(void)
&xive_eq_debug_fops);
}
debugfs_create_bool("store-eoi", 0600, xive_dir, &xive_store_eoi);
+ debugfs_create_bool("save-restore", 0600, xive_dir, &xive_has_save_restore);
}
#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 7ec8911dad57..d6a091dc1bce 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -41,7 +41,7 @@ static u32 xive_queue_shift;
static u32 xive_pool_vps = XIVE_INVALID_VP;
static struct kmem_cache *xive_provision_cache;
static bool xive_has_single_esc;
-static bool xive_has_save_restore;
+bool xive_has_save_restore;
int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
{
--
2.31.1
^ permalink raw reply related
* [PATCH 03/11] powerpc/xive: Activate StoreEOI on P10
From: Cédric Le Goater @ 2021-11-05 10:26 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20211105102636.1016378-1-clg@kaod.org>
StoreEOI (the capability to EOI with a store) requires load-after-store
ordering in some cases to be reliable. P10 introduced a new offset for
load operations to enforce correct ordering and the XIVE driver has
the required support since kernel 5.8, commit b1f9be9392f0
("powerpc/xive: Enforce load-after-store ordering when StoreEOI is active")
Since skiboot v7, StoreEOI support is advertised on P10 with a new flag
on the PowerNV platform. See skiboot commit 4bd7d84afe46 ("xive/p10:
Introduce a new OPAL_XIVE_IRQ_STORE_EOI2 flag"). When detected,
activate the feature.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/include/asm/opal-api.h | 1 +
arch/powerpc/sysdev/xive/native.c | 2 ++
2 files changed, 3 insertions(+)
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 0b63ba7d5917..a2bc4b95e703 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1094,6 +1094,7 @@ enum {
OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, /* P9 DD1.0 workaround */
OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, /* P9 DD1.0 workaround */
OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, /* P9 DD1.0 workaround */
+ OPAL_XIVE_IRQ_STORE_EOI2 = 0x00000040,
};
/* Flags for OPAL_XIVE_GET/SET_QUEUE_INFO */
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 1aec282cd650..7ec8911dad57 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -63,6 +63,8 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
opal_flags = be64_to_cpu(flags);
if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI)
data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
+ if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI2)
+ data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
if (opal_flags & OPAL_XIVE_IRQ_LSI)
data->flags |= XIVE_IRQ_FLAG_LSI;
data->eoi_page = be64_to_cpu(eoi_page);
--
2.31.1
^ permalink raw reply related
* [PATCH 04/11] powerpc/xive: Introduce xive_core_debugfs_create()
From: Cédric Le Goater @ 2021-11-05 10:26 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20211105102636.1016378-1-clg@kaod.org>
and fix some compile issues when !CONFIG_DEBUG_FS.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/sysdev/xive/common.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 3d558cad1f19..b71cc1020296 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -227,6 +227,7 @@ static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data)
out_be64(xd->eoi_mmio + offset, data);
}
+#if defined(CONFIG_XMON) || defined(CONFIG_DEBUG_FS)
static void xive_irq_data_dump(struct xive_irq_data *xd, char *buffer, size_t size)
{
u64 val = xive_esb_read(xd, XIVE_ESB_GET);
@@ -239,6 +240,7 @@ static void xive_irq_data_dump(struct xive_irq_data *xd, char *buffer, size_t si
val & XIVE_ESB_VAL_Q ? 'Q' : '-',
xd->trig_page, xd->eoi_page);
}
+#endif
#ifdef CONFIG_XMON
static notrace void xive_dump_eq(const char *name, struct xive_q *q)
@@ -1701,6 +1703,7 @@ static int __init xive_off(char *arg)
}
__setup("xive=off", xive_off);
+#ifdef CONFIG_DEBUG_FS
static void xive_debug_show_cpu(struct seq_file *m, int cpu)
{
struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
@@ -1779,10 +1782,18 @@ static int xive_core_debug_show(struct seq_file *m, void *private)
}
DEFINE_SHOW_ATTRIBUTE(xive_core_debug);
+static void xive_core_debugfs_create(void)
+{
+ debugfs_create_file("xive", 0400, arch_debugfs_dir,
+ NULL, &xive_core_debug_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
int xive_core_debug_init(void)
{
- if (xive_enabled())
- debugfs_create_file("xive", 0400, arch_debugfs_dir,
- NULL, &xive_core_debug_fops);
+ if (xive_enabled() && IS_ENABLED(CONFIG_DEBUG_FS))
+ xive_core_debugfs_create();
+
return 0;
}
--
2.31.1
^ permalink raw reply related
* [PATCH 2/2] module: Move duplicate mod_check_sig users code to mod_parse_sig
From: Michal Suchanek @ 2021-11-05 9:55 UTC (permalink / raw)
To: keyrings
Cc: Mimi Zohar, David Howells, Paul Mackerras, Alexander Gordeev,
Rob Herring, Herbert Xu, Lakshmi Ramasubramanian, James Morris,
Christian Borntraeger, Michal Suchanek, Serge E. Hallyn,
Vasily Gorbik, linux-s390, Heiko Carstens, Hari Bathini,
Dmitry Kasatkin, Frank van der Linden, linux-security-module,
linux-kernel, Luis Chamberlain, linux-crypto, Jessica Yu,
linux-integrity, linuxppc-dev, David S. Miller,
Thiago Jung Bauermann
In-Reply-To: <cover.1636105912.git.msuchanek@suse.de>
Multiple users of mod_check_sig check for the marker, then call
mod_check_sig, extract signature length, and remove the signature.
Put this code in one place together with mod_check_sig.
Signed-off-by: Michal Suchanek <msuchanek@suse.de>
---
include/linux/module_signature.h | 1 +
kernel/module_signature.c | 56 ++++++++++++++++++++++++++++-
kernel/module_signing.c | 26 +++-----------
security/integrity/ima/ima_modsig.c | 22 ++----------
4 files changed, 63 insertions(+), 42 deletions(-)
diff --git a/include/linux/module_signature.h b/include/linux/module_signature.h
index 7eb4b00381ac..1343879b72b3 100644
--- a/include/linux/module_signature.h
+++ b/include/linux/module_signature.h
@@ -42,5 +42,6 @@ struct module_signature {
int mod_check_sig(const struct module_signature *ms, size_t file_len,
const char *name);
+int mod_parse_sig(const void *data, size_t *len, size_t *sig_len, const char *name);
#endif /* _LINUX_MODULE_SIGNATURE_H */
diff --git a/kernel/module_signature.c b/kernel/module_signature.c
index 00132d12487c..784b40575ee4 100644
--- a/kernel/module_signature.c
+++ b/kernel/module_signature.c
@@ -8,14 +8,36 @@
#include <linux/errno.h>
#include <linux/printk.h>
+#include <linux/string.h>
#include <linux/module_signature.h>
#include <asm/byteorder.h>
+/**
+ * mod_check_sig_marker - check that the given data has signature marker at the end
+ *
+ * @data: Data with appended signature
+ * @len: Length of data. Signature marker length is subtracted on success.
+ */
+static inline int mod_check_sig_marker(const void *data, size_t *len)
+{
+ const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
+
+ if (markerlen > *len)
+ return -ENODATA;
+
+ if (memcmp(data + *len - markerlen, MODULE_SIG_STRING,
+ markerlen))
+ return -ENODATA;
+
+ *len -= markerlen;
+ return 0;
+}
+
/**
* mod_check_sig - check that the given signature is sane
*
* @ms: Signature to check.
- * @file_len: Size of the file to which @ms is appended.
+ * @file_len: Size of the file to which @ms is appended (without the marker).
* @name: What is being checked. Used for error messages.
*/
int mod_check_sig(const struct module_signature *ms, size_t file_len,
@@ -44,3 +66,35 @@ int mod_check_sig(const struct module_signature *ms, size_t file_len,
return 0;
}
+
+/**
+ * mod_parse_sig - check that the given signature is sane and determine signature length
+ *
+ * @data: Data with appended signature.
+ * @len: Length of data. Signature and marker length is subtracted on success.
+ * @sig_len: Length of signature. Filled on success.
+ * @name: What is being checked. Used for error messages.
+ */
+int mod_parse_sig(const void *data, size_t *len, size_t *sig_len, const char *name)
+{
+ const struct module_signature *sig;
+ int rc;
+
+ rc = mod_check_sig_marker(data, len);
+ if (rc)
+ return rc;
+
+ if (*len < sizeof(*sig))
+ return -ENODATA;
+
+ sig = (const struct module_signature *)(data + (*len - sizeof(*sig)));
+
+ rc = mod_check_sig(sig, *len, name);
+ if (rc)
+ return rc;
+
+ *sig_len = be32_to_cpu(sig->sig_len);
+ *len -= *sig_len + sizeof(*sig);
+
+ return 0;
+}
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index cef72a6f6b5d..02bbca90f467 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -25,35 +25,17 @@ int verify_appended_signature(const void *data, size_t *len,
struct key *trusted_keys,
enum key_being_used_for purpose)
{
- const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
struct module_signature ms;
- size_t sig_len, modlen = *len;
+ size_t sig_len;
int ret;
- pr_devel("==>%s %s(,%zu)\n", __func__, key_being_used_for[purpose], modlen);
+ pr_devel("==>%s %s(,%zu)\n", __func__, key_being_used_for[purpose], *len);
- if (markerlen > modlen)
- return -ENODATA;
-
- if (memcmp(data + modlen - markerlen, MODULE_SIG_STRING,
- markerlen))
- return -ENODATA;
- modlen -= markerlen;
-
- if (modlen <= sizeof(ms))
- return -EBADMSG;
-
- memcpy(&ms, data + (modlen - sizeof(ms)), sizeof(ms));
-
- ret = mod_check_sig(&ms, modlen, key_being_used_for[purpose]);
+ ret = mod_parse_sig(data, len, &sig_len, key_being_used_for[purpose]);
if (ret)
return ret;
- sig_len = be32_to_cpu(ms.sig_len);
- modlen -= sig_len + sizeof(ms);
- *len = modlen;
-
- return verify_pkcs7_signature(data, modlen, data + modlen, sig_len,
+ return verify_pkcs7_signature(data, *len, data + *len, sig_len,
trusted_keys,
purpose,
NULL, NULL);
diff --git a/security/integrity/ima/ima_modsig.c b/security/integrity/ima/ima_modsig.c
index fb25723c65bc..46917eb37fd8 100644
--- a/security/integrity/ima/ima_modsig.c
+++ b/security/integrity/ima/ima_modsig.c
@@ -37,33 +37,17 @@ struct modsig {
*
* Return: 0 on success, error code otherwise.
*/
-int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t buf_len,
+int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t len,
struct modsig **modsig)
{
- const size_t marker_len = strlen(MODULE_SIG_STRING);
- const struct module_signature *sig;
struct modsig *hdr;
- size_t sig_len;
- const void *p;
+ size_t sig_len, buf_len = len;
int rc;
- if (buf_len <= marker_len + sizeof(*sig))
- return -ENOENT;
-
- p = buf + buf_len - marker_len;
- if (memcmp(p, MODULE_SIG_STRING, marker_len))
- return -ENOENT;
-
- buf_len -= marker_len;
- sig = (const struct module_signature *)(p - sizeof(*sig));
-
- rc = mod_check_sig(sig, buf_len, func_tokens[func]);
+ rc = mod_parse_sig(buf, &buf_len, &sig_len, func_tokens[func]);
if (rc)
return rc;
- sig_len = be32_to_cpu(sig->sig_len);
- buf_len -= sig_len + sizeof(*sig);
-
/* Allocate sig_len additional bytes to hold the raw PKCS#7 data. */
hdr = kzalloc(sizeof(*hdr) + sig_len, GFP_KERNEL);
if (!hdr)
--
2.31.1
^ permalink raw reply related
* [PATCH 0/2] Additional appended signature cleanup
From: Michal Suchanek @ 2021-11-05 9:55 UTC (permalink / raw)
To: keyrings
Cc: Mimi Zohar, David Howells, Paul Mackerras, Alexander Gordeev,
Rob Herring, Herbert Xu, Lakshmi Ramasubramanian, James Morris,
Christian Borntraeger, Michal Suchanek, Serge E. Hallyn,
Vasily Gorbik, linux-s390, Heiko Carstens, Hari Bathini,
Dmitry Kasatkin, Frank van der Linden, linux-security-module,
linux-kernel, Luis Chamberlain, linux-crypto, Jessica Yu,
linux-integrity, linuxppc-dev, David S. Miller,
Thiago Jung Bauermann
In-Reply-To: <cover.1635948742.git.msuchanek@suse.de>
There is one more copy of the code checking appended signarutes.
Merge the common code to module_signature.c
Thanks
Michal
Michal Suchanek (2):
module: Use key_being_used_for for log messages in
verify_appended_signature
module: Move duplicate mod_check_sig users code to mod_parse_sig
arch/powerpc/kexec/elf_64.c | 2 +-
arch/s390/kernel/machine_kexec_file.c | 2 +-
crypto/asymmetric_keys/asymmetric_type.c | 1 +
include/linux/module_signature.h | 1 +
include/linux/verification.h | 3 +-
kernel/module.c | 3 +-
kernel/module_signature.c | 56 +++++++++++++++++++++++-
kernel/module_signing.c | 33 ++++----------
security/integrity/ima/ima_modsig.c | 22 ++--------
9 files changed, 74 insertions(+), 49 deletions(-)
--
2.31.1
^ permalink raw reply
* [PATCH 1/2] module: Use key_being_used_for for log messages in verify_appended_signature
From: Michal Suchanek @ 2021-11-05 9:55 UTC (permalink / raw)
To: keyrings
Cc: Mimi Zohar, David Howells, Paul Mackerras, Alexander Gordeev,
Rob Herring, Herbert Xu, Lakshmi Ramasubramanian, James Morris,
Christian Borntraeger, Michal Suchanek, Serge E. Hallyn,
Vasily Gorbik, linux-s390, Heiko Carstens, Hari Bathini,
Dmitry Kasatkin, Frank van der Linden, linux-security-module,
linux-kernel, Luis Chamberlain, linux-crypto, Jessica Yu,
linux-integrity, linuxppc-dev, David S. Miller,
Thiago Jung Bauermann
In-Reply-To: <cover.1636105912.git.msuchanek@suse.de>
Add value for kexec appended signature and pass in key_being_used_for
enum rather than a string to verify_appended_signature to produce log
messages about the signature.
Signed-off-by: Michal Suchanek <msuchanek@suse.de>
---
arch/powerpc/kexec/elf_64.c | 2 +-
arch/s390/kernel/machine_kexec_file.c | 2 +-
crypto/asymmetric_keys/asymmetric_type.c | 1 +
include/linux/verification.h | 3 ++-
kernel/module.c | 3 ++-
kernel/module_signing.c | 11 ++++++-----
6 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index e8dff6b23ac5..3aa5269f6e0f 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -158,7 +158,7 @@ int elf64_verify_sig(const char *kernel, unsigned long length)
size_t kernel_len = length;
return verify_appended_signature(kernel, &kernel_len, VERIFY_USE_PLATFORM_KEYRING,
- "kexec_file");
+ VERIFYING_KEXEC_APPENDED_SIGNATURE);
}
#endif /* CONFIG_KEXEC_SIG */
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 82260bb61060..37fcbb149368 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -32,7 +32,7 @@ int s390_verify_sig(const char *kernel, unsigned long length)
return 0;
return verify_appended_signature(kernel, &kernel_len, VERIFY_USE_PLATFORM_KEYRING,
- "kexec_file");
+ VERIFYING_KEXEC_APPENDED_SIGNATURE);
}
#endif /* CONFIG_KEXEC_SIG */
diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index ad8af3d70ac0..6fd20eec3882 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -25,6 +25,7 @@ const char *const key_being_used_for[NR__KEY_BEING_USED_FOR] = {
[VERIFYING_KEXEC_PE_SIGNATURE] = "kexec PE sig",
[VERIFYING_KEY_SIGNATURE] = "key sig",
[VERIFYING_KEY_SELF_SIGNATURE] = "key self sig",
+ [VERIFYING_KEXEC_APPENDED_SIGNATURE] = "kexec appended sig",
[VERIFYING_UNSPECIFIED_SIGNATURE] = "unspec sig",
};
EXPORT_SYMBOL_GPL(key_being_used_for);
diff --git a/include/linux/verification.h b/include/linux/verification.h
index c1cf0582012a..23748feb9e03 100644
--- a/include/linux/verification.h
+++ b/include/linux/verification.h
@@ -26,6 +26,7 @@ enum key_being_used_for {
VERIFYING_KEXEC_PE_SIGNATURE,
VERIFYING_KEY_SIGNATURE,
VERIFYING_KEY_SELF_SIGNATURE,
+ VERIFYING_KEXEC_APPENDED_SIGNATURE,
VERIFYING_UNSPECIFIED_SIGNATURE,
NR__KEY_BEING_USED_FOR
};
@@ -61,7 +62,7 @@ extern int verify_pefile_signature(const void *pebuf, unsigned pelen,
#endif
int verify_appended_signature(const void *data, size_t *len, struct key *trusted_keys,
- const char *what);
+ enum key_being_used_for purpose);
#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
#endif /* _LINUX_VERIFY_PEFILE_H */
diff --git a/kernel/module.c b/kernel/module.c
index 1c421b0442e3..5f1cf989e1cf 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2891,7 +2891,8 @@ static int module_sig_check(struct load_info *info, int flags)
*/
if (flags == 0) {
err = verify_appended_signature(mod, &info->len,
- VERIFY_USE_SECONDARY_KEYRING, "module");
+ VERIFY_USE_SECONDARY_KEYRING,
+ VERIFYING_MODULE_SIGNATURE);
if (!err) {
info->sig_ok = true;
return 0;
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index 4c28cb55275f..cef72a6f6b5d 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -19,17 +19,18 @@
* @data: The data to be verified
* @len: Size of @data.
* @trusted_keys: Keyring to use for verification
- * @what: Informational string for log messages
+ * @purpose: The use to which the key is being put
*/
int verify_appended_signature(const void *data, size_t *len,
- struct key *trusted_keys, const char *what)
+ struct key *trusted_keys,
+ enum key_being_used_for purpose)
{
const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
struct module_signature ms;
size_t sig_len, modlen = *len;
int ret;
- pr_devel("==>%s(,%zu)\n", __func__, modlen);
+ pr_devel("==>%s %s(,%zu)\n", __func__, key_being_used_for[purpose], modlen);
if (markerlen > modlen)
return -ENODATA;
@@ -44,7 +45,7 @@ int verify_appended_signature(const void *data, size_t *len,
memcpy(&ms, data + (modlen - sizeof(ms)), sizeof(ms));
- ret = mod_check_sig(&ms, modlen, what);
+ ret = mod_check_sig(&ms, modlen, key_being_used_for[purpose]);
if (ret)
return ret;
@@ -54,6 +55,6 @@ int verify_appended_signature(const void *data, size_t *len,
return verify_pkcs7_signature(data, modlen, data + modlen, sig_len,
trusted_keys,
- VERIFYING_MODULE_SIGNATURE,
+ purpose,
NULL, NULL);
}
--
2.31.1
^ permalink raw reply related
* Re: [PATCH v2 1/5] powerpc/watchdog: Fix missed watchdog reset due to memory ordering race
From: Laurent Dufour @ 2021-11-05 9:20 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev
In-Reply-To: <20211104161057.1255659-2-npiggin@gmail.com>
Le 04/11/2021 à 17:10, Nicholas Piggin a écrit :
> It is possible for all CPUs to miss the pending cpumask becoming clear,
> and then nobody resetting it, which will cause the lockup detector to
> stop working. It will eventually expire, but watchdog_smp_panic will
> avoid doing anything if the pending mask is clear and it will never be
> reset.
>
> Order the cpumask clear vs the subsequent test to close this race.
>
> Add an extra check for an empty pending mask when the watchdog fires and
> finds its bit still clear, to try to catch any other possible races or
> bugs here and keep the watchdog working. The extra test in
> arch_touch_nmi_watchdog is required to prevent the new warning from
> firing off.
>
> Debugged-by: Laurent Dufour <ldufour@linux.ibm.com>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/powerpc/kernel/watchdog.c | 36 +++++++++++++++++++++++++++++++++-
> 1 file changed, 35 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
> index f9ea0e5357f9..be80071336a4 100644
> --- a/arch/powerpc/kernel/watchdog.c
> +++ b/arch/powerpc/kernel/watchdog.c
> @@ -135,6 +135,10 @@ static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
> {
> cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
> cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
> + /*
> + * See wd_smp_clear_cpu_pending()
> + */
> + smp_mb();
> if (cpumask_empty(&wd_smp_cpus_pending)) {
> wd_smp_last_reset_tb = tb;
> cpumask_andnot(&wd_smp_cpus_pending,
> @@ -215,13 +219,39 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
>
> cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
> wd_smp_unlock(&flags);
> + } else {
> + /*
> + * The last CPU to clear pending should have reset the
> + * watchdog, yet we find it empty here. This should not
> + * happen but we can try to recover and avoid a false
> + * positive if it does.
> + */
> + if (WARN_ON_ONCE(cpumask_empty(&wd_smp_cpus_pending)))
> + goto none_pending;
I run a stress on my victim node on top of this patch and hit that warning:
[ C475] ------------[ cut here ]------------
[ C475] WARNING: CPU: 475 PID: 0 at
/home/laurent/src/linux-ppc/arch/powerpc/kernel/watchdog.c:260
wd_smp_clear_cpu_pending+0x320/0x4b0
[ C475] Modules linked in: rpadlpar_io rpaphp xt_tcpudp iptable_filter
ip_tables x_tables xfs pseries_rng rng_core vmx_crypto gf128mul be2net fuse
btrfs blake2b_generic libcrc32c xor zstd_compress lzo_compress raid6_pq
dm_service_time crc32c_vpmsum lpfc crc_t10dif crct10dif_generic crct10dif_common
dm_mirror dm_region_hash dm_log dm_multipath scsi_dh_rdac scsi_dh_alua autofs4
[ C475] CPU: 475 PID: 0 Comm: swapper/475 Kdump: loaded Not tainted
5.15.0-rc2-ppc-bz192129+ #72
[ C475] NIP: c00000000003d710 LR: c00000000003d478 CTR: c00000000003e2e0
[ C475] REGS: c00006b16026f420 TRAP: 0700 Not tainted (5.15.0-rc2-ppc-bz192129+)
[ C475] MSR: 8000000000029033 <SF,EE,ME,IR,DR,RI,LE> CR: 22000222 XER: 20000000
[ C475] CFAR: c00000000003d480 IRQMASK: 3
[ C475] GPR00: c00000000003e3bc c00006b16026f6c0 c000000001b28700 0000000000000800
[ C475] GPR04: 0000000000000000 0000000000000800 0000000000000800 0000000000000000
[ C475] GPR08: 0000000000000000 0000000000000000 00000000000000f8 00013f06986272e7
[ C475] GPR12: c00000000003e2e0 c000000007d3df00 0000000000000000 000000001f043b60
[ C475] GPR16: c00006b1601b7b00 0000000000000000 c00000000003e2e0 0000000000000001
[ C475] GPR20: 0000347411d4cf28 c00007adbdb0a898 0000000000000001 0000000000000000
[ C475] GPR24: 0000000000000000 0000000000000003 c000000001b6d7d0 00013f0698627d84
[ C475] GPR28: c000000001bd05c8 c000000001bd05b8 c000000001bd06c8 00000000000001db
[ C475] NIP [c00000000003d710] wd_smp_clear_cpu_pending+0x320/0x4b0
[ C475] LR [c00000000003d478] wd_smp_clear_cpu_pending+0x88/0x4b0
[ C475] Call Trace:
[ C475] [c00006b16026f6c0] [0000000000000001] 0x1 (unreliable)
[ C475] [c00006b16026f770] [c00000000003e3bc] watchdog_timer_fn+0xdc/0x5a0
[ C475] [c00006b16026f840] [c000000000245a4c] __hrtimer_run_queues+0x49c/0x700
[ C475] [c00006b16026f8f0] [c000000000246c20] hrtimer_interrupt+0x110/0x310
[ C475] [c00006b16026f9a0] [c0000000000292f8] timer_interrupt+0x1e8/0x5a0
[ C475] [c00006b16026fa00] [c000000000009a00] decrementer_common_virt+0x210/0x220
[ C475] --- interrupt: 900 at plpar_hcall_norets_notrace+0x18/0x2c
[ C475] NIP: c0000000000e5dd0 LR: c000000000c18f04 CTR: 0000000000000000
[ C475] REGS: c00006b16026fa70 TRAP: 0900 Not tainted (5.15.0-rc2-ppc-bz192129+)
[ C475] MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 22000824 XER: 20000000
[ C475] CFAR: 0000000000000c00 IRQMASK: 0
[ C475] GPR00: 0000000000000000 c00006b16026fd10 c000000001b28700 0000000000000000
[ C475] GPR04: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ C475] GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000001
[ C475] GPR12: 000000000000ffff c000000007d3df00 0000000000000000 000000001f043b60
[ C475] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ C475] GPR20: 0000000000000000 0000000000000000 0000000000000000 c000000001a46cd0
[ C475] GPR24: c000000001b68e38 000034738d859946 0000000000000000 0000000000000001
[ C475] GPR28: 0000000000000000 0000000000000001 c000000001472360 c000000001472368
[ C475] NIP [c0000000000e5dd0] plpar_hcall_norets_notrace+0x18/0x2c
[ C475] LR [c000000000c18f04] check_and_cede_processor.part.2+0x24/0x70
[ C475] --- interrupt: 900
[ C475] [c00006b16026fd10] [c00007adbdb0a880] 0xc00007adbdb0a880 (unreliable)
[ C475] [c00006b16026fd70] [c000000000c194f4] dedicated_cede_loop+0x174/0x200
[ C475] [c00006b16026fdb0] [c000000000c15b2c] cpuidle_enter_state+0x3ac/0x6d0
[ C475] [c00006b16026fe20] [c000000000c15ef0] cpuidle_enter+0x50/0x70
[ C475] [c00006b16026fe60] [c0000000001a7f9c] call_cpuidle+0x4c/0x90
[ C475] [c00006b16026fe80] [c0000000001a84f0] do_idle+0x310/0x3c0
[ C475] [c00006b16026ff00] [c0000000001a8948] cpu_startup_entry+0x38/0x50
[ C475] [c00006b16026ff30] [c00000000005fb5c] start_secondary+0x2bc/0x2f0
[ C475] [c00006b16026ff90] [c00000000000d254] start_secondary_prolog+0x10/0x14
[ C475] Instruction dump:
[ C475] 48eb7049 60000000 e8610068 4bfffee4 392d0918 7c20492a 482c54f1 60000000
[ C475] 4bfffe4c 60000000 60000000 60000000 <0fe00000> fb210078 fb410080 fb610088
[ C475] irq event stamp: 0
[ C475] hardirqs last enabled at (0): [<0000000000000000>] 0x0
[ C475] hardirqs last disabled at (0): [<c00000000014342c>]
copy_process+0x76c/0x1e00
[ C475] softirqs last enabled at (0): [<c00000000014342c>]
copy_process+0x76c/0x1e00
[ C475] softirqs last disabled at (0): [<0000000000000000>] 0x0
[ C475] ---[ end trace 6e8311d1692d057b ]---
I guess there is a possible race here between watchdog_timer_interrupt() and
another CPU watchdog_smp_panic().
> }
> return;
> }
> +
> cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
> +
> + /*
> + * Order the store to clear pending with the load(s) to check all
> + * words in the pending mask to check they are all empty. This orders
> + * with the same barrier on another CPU. This prevents two CPUs
> + * clearing the last 2 pending bits, but neither seeing the other's
> + * store when checking if the mask is empty, and missing an empty
> + * mask, which ends with a false positive.
> + */
> + smp_mb();
> if (cpumask_empty(&wd_smp_cpus_pending)) {
> unsigned long flags;
>
> +none_pending:
> + /*
> + * Double check under lock because more than one CPU could see
> + * a clear mask with the lockless check after clearing their
> + * pending bits.
> + */
> wd_smp_lock(&flags);
> if (cpumask_empty(&wd_smp_cpus_pending)) {
> wd_smp_last_reset_tb = tb;
> @@ -312,8 +342,12 @@ void arch_touch_nmi_watchdog(void)
> {
> unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
> int cpu = smp_processor_id();
> - u64 tb = get_tb();
> + u64 tb;
>
> + if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
> + return;
> +
> + tb = get_tb();
> if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
> per_cpu(wd_timer_tb, cpu) = tb;
> wd_smp_clear_cpu_pending(cpu, tb);
>
^ permalink raw reply
* Re: [PATCH] powerpc: xmon: remove the duplicated operand of the bitwise operator
From: Michael Ellerman @ 2021-11-05 6:13 UTC (permalink / raw)
To: cgel.zte; +Cc: Zeal Robot, linux-kernel, paulus, ye.guojin, linuxppc-dev
In-Reply-To: <20211105034011.76008-1-ye.guojin@zte.com.cn>
cgel.zte@gmail.com writes:
> From: Ye Guojin <ye.guojin@zte.com.cn>
>
> The operands of the bitwise OR operator are duplicated, remove one of
> them.
>
> Reported-by: Zeal Robot <zealci@zte.com.cn>
> Signed-off-by: Ye Guojin <ye.guojin@zte.com.cn>
> ---
> arch/powerpc/xmon/ppc-opc.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
Thanks, but this code is copied from binutils, we don't take cleanup
patches to it.
cheers
> diff --git a/arch/powerpc/xmon/ppc-opc.c b/arch/powerpc/xmon/ppc-opc.c
> index dfb80810b16c..2cab0ec0d162 100644
> --- a/arch/powerpc/xmon/ppc-opc.c
> +++ b/arch/powerpc/xmon/ppc-opc.c
> @@ -6731,9 +6731,9 @@ const struct powerpc_opcode powerpc_opcodes[] = {
> {"fre.", A(63,24,1), AFRALFRC_MASK, POWER5, POWER7|PPCVLE, {FRT, FRB, A_L}},
>
> {"fmul", A(63,25,0), AFRB_MASK, PPCCOM, PPCEFS|PPCVLE, {FRT, FRA, FRC}},
> -{"fm", A(63,25,0), AFRB_MASK, PWRCOM, PPCVLE|PPCVLE, {FRT, FRA, FRC}},
> +{"fm", A(63, 25, 0), AFRB_MASK, PWRCOM, PPCVLE, {FRT, FRA, FRC}},
> {"fmul.", A(63,25,1), AFRB_MASK, PPCCOM, PPCEFS|PPCVLE, {FRT, FRA, FRC}},
> -{"fm.", A(63,25,1), AFRB_MASK, PWRCOM, PPCVLE|PPCVLE, {FRT, FRA, FRC}},
> +{"fm.", A(63, 25, 1), AFRB_MASK, PWRCOM, PPCVLE, {FRT, FRA, FRC}},
>
> {"frsqrte", A(63,26,0), AFRAFRC_MASK, POWER7, PPCVLE, {FRT, FRB}},
> {"frsqrte", A(63,26,0), AFRALFRC_MASK, PPC, POWER7|PPCVLE, {FRT, FRB, A_L}},
> --
> 2.25.1
^ permalink raw reply
* [PATCH] powerpc/64s: introduce CONFIG_MAXSMP to test very large SMP
From: Nicholas Piggin @ 2021-11-05 4:11 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
Similarly to x86, add MAXSMP that should help flush out problems with
vary large SMP and other values associated with very big systems.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/Kconfig | 8 ++++++++
arch/powerpc/platforms/Kconfig.cputype | 5 +++--
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b8f6185d3998..d585fcfa456f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -64,6 +64,13 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK
config NEED_PER_CPU_PAGE_FIRST_CHUNK
def_bool y if PPC64
+config MAXSMP
+ bool "Enable Maximum number of SMP Processors and NUMA Nodes"
+ depends on SMP && DEBUG_KERNEL && PPC_BOOK3S_64
+ help
+ Enable maximum number of CPUS and NUMA Nodes for this architecture.
+ If unsure, say N.
+
config NR_IRQS
int "Number of virtual interrupt numbers"
range 32 1048576
@@ -666,6 +673,7 @@ config NUMA
config NODES_SHIFT
int
+ default "10" if MAXSMP
default "8" if PPC64
default "4"
depends on NUMA
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index a208997ade88..3fd6c1941151 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -476,8 +476,9 @@ config SMP
If you don't know what to do here, say N.
config NR_CPUS
- int "Maximum number of CPUs (2-8192)" if SMP
- range 2 8192 if SMP
+ int "Maximum number of CPUs (2-8192)" if SMP && !MAXSMP
+ range 2 16384 if SMP
+ default 16384 if MAXSMP
default "1" if !SMP
default "32" if PPC64
default "4"
--
2.23.0
^ permalink raw reply related
* [PATCH 2/2] powerpc: select CPUMASK_OFFSTACK if NR_CPUS >= 8192
From: Nicholas Piggin @ 2021-11-05 3:50 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211105035042.1398309-1-npiggin@gmail.com>
Some core kernel code starts to go beyond the 2048 byte stack size
warning at NR_CPUS=8192, so select CPUMASK_OFFSTACK in that case.
x86 does similarly for very large NR_CPUS.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ba5b66189358..b8f6185d3998 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -163,6 +163,7 @@ config PPC
select BINFMT_ELF
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS
+ select CPUMASK_OFFSTACK if NR_CPUS >= 8192
select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
select DMA_OPS_BYPASS if PPC64
select DMA_OPS if PPC64
--
2.23.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox