LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Crashes in linux-next on powerpc with CONFIG_PPC_KUAP and CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
From: Michael Ellerman @ 2019-05-07 14:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Stephen Rothwell, Petr Mladek

Hi folks,

Just an FYI in case anyone else is seeing crashes very early in boot in
linux-next with the above config options.

The problem is the combination of some new code called via printk(),
check_pointer() which calls probe_kernel_read(). That then calls 
allow_user_access() (PPC_KUAP) and that uses mmu_has_feature() too early
(before we've patched features). With the JUMP_LABEL debug enabled that
causes us to call printk() & dump_stack() and we end up recursing and
overflowing the stack.

Because it happens so early you don't get any output, just an apparently
dead system.

The stack trace (which you don't see) is something like:

  ...
  dump_stack+0xdc
  probe_kernel_read+0x1a4
  check_pointer+0x58
  string+0x3c
  vsnprintf+0x1bc
  vscnprintf+0x20
  printk_safe_log_store+0x7c
  printk+0x40
  dump_stack_print_info+0xbc
  dump_stack+0x8
  probe_kernel_read+0x1a4
  probe_kernel_read+0x19c
  check_pointer+0x58
  string+0x3c
  vsnprintf+0x1bc
  vscnprintf+0x20
  vprintk_store+0x6c
  vprintk_emit+0xec
  vprintk_func+0xd4
  printk+0x40
  cpufeatures_process_feature+0xc8
  scan_cpufeatures_subnodes+0x380
  of_scan_flat_dt_subnodes+0xb4
  dt_cpu_ftrs_scan_callback+0x158
  of_scan_flat_dt+0xf0
  dt_cpu_ftrs_scan+0x3c
  early_init_devtree+0x360
  early_setup+0x9c


The simple fix is to use early_mmu_has_feature() in allow_user_access(),
but we'd rather not do that because it penalises all
copy_to/from_users() for the life of the system with the cost of the
runtime check vs the jump label. The irony is probe_kernel_read()
shouldn't be allowing user access at all, because we're reading the
kernel not userspace.

For now if you're hitting it just turn off 
CONFIG_PPC_KUAP and/or CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG.

cheers

^ permalink raw reply

* Re: [PATCH v2 09/16] powernv/fadump: process architected register state data provided by firmware
From: Mahesh J Salgaonkar @ 2019-05-07 14:13 UTC (permalink / raw)
  To: Hari Bathini
  Cc: Ananth N Mavinakayanahalli, Mahesh J Salgaonkar, Vasant Hegde,
	linuxppc-dev, Nicholas Piggin, Stewart Smith, Daniel Axtens
In-Reply-To: <155541089317.812.14447001298006010972.stgit@hbathini.in.ibm.com>

On 2019-04-16 16:05:06 Tue, Hari Bathini wrote:
> From: Hari Bathini <hbathini@linux.vnet.ibm.com>
> 
> Firmware provides architected register state data at the time of crash.
> Process this data and build CPU notes to append to ELF core.
> 
> Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
> Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
> ---
> 
> Changes in v2:
> * Updated reg type values according to recent OPAL changes
> 
> 
>  arch/powerpc/include/asm/opal-api.h          |   23 +++
>  arch/powerpc/kernel/fadump-common.h          |    3 
>  arch/powerpc/platforms/powernv/opal-fadump.c |  187 ++++++++++++++++++++++++--
>  arch/powerpc/platforms/powernv/opal-fadump.h |    4 +
>  4 files changed, 206 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
> index 75471c2..91f2735 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -976,6 +976,29 @@ struct opal_sg_list {
>   * Firmware-Assisted Dump (FADump)
>   */
>  
> +/* FADump thread header for register entries */
> +struct opal_fadump_thread_hdr {
> +	__be32  pir;
> +	/* 0x00 - 0x0F - The corresponding stop state of the core */
> +	u8      core_state;
> +	u8      reserved[3];
> +
> +	__be32	offset;	/* Offset to Register Entries array */
> +	__be32	ecnt;	/* Number of entries */
> +	__be32	esize;	/* Alloc size of each array entry in bytes */
> +	__be32	eactsz;	/* Actual size of each array entry in bytes */
> +} __packed;
> +
> +#define OPAL_REG_TYPE_GPR		0x01
> +#define OPAL_REG_TYPE_SPR		0x02
> +
> +/* FADump register entry. */
> +struct opal_fadump_reg_entry {
> +	__be32		reg_type;
> +	__be32		reg_num;
> +	__be64		reg_val;
> +};
> +
>  /* The maximum number of dump sections supported by OPAL */
>  #define OPAL_FADUMP_NR_SECTIONS			64
>  
> diff --git a/arch/powerpc/kernel/fadump-common.h b/arch/powerpc/kernel/fadump-common.h
> index ff764d4..8d47382 100644
> --- a/arch/powerpc/kernel/fadump-common.h
> +++ b/arch/powerpc/kernel/fadump-common.h
> @@ -117,6 +117,9 @@ struct fadump_memory_range {
>  
>  /* Firmware-assisted dump configuration details. */
>  struct fw_dump {
> +	unsigned long	cpu_state_destination_addr;
> +	unsigned long	cpu_state_data_version;
> +	unsigned long	cpu_state_entry_size;
>  	unsigned long	cpu_state_data_size;
>  	unsigned long	hpte_region_size;
>  	unsigned long	boot_memory_size;
> diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
> index da8480d..853f663 100644
> --- a/arch/powerpc/platforms/powernv/opal-fadump.c
> +++ b/arch/powerpc/platforms/powernv/opal-fadump.c
> @@ -94,6 +94,12 @@ static void update_fadump_config(struct fw_dump *fadump_conf,
>  
>  				last_end = base + size;
>  				j++;
> +			} else if (fdm->section[i].src_type ==
> +				   OPAL_FADUMP_CPU_STATE_DATA) {
> +				fadump_conf->cpu_state_destination_addr =
> +					be64_to_cpu(fdm->section[i].dest_addr);
> +				fadump_conf->cpu_state_data_size =
> +					be64_to_cpu(fdm->section[i].dest_size);
>  			}
>  		}
>  		fadump_conf->rmr_regions_cnt = j;
> @@ -199,6 +205,75 @@ static int opal_invalidate_fadump(struct fw_dump *fadump_conf)
>  	return 0;
>  }
>  
> +static inline void fadump_set_regval_regnum(struct pt_regs *regs, u32 reg_type,
> +					    u32 reg_num, u64 reg_val)
> +{
> +	if (reg_type == OPAL_REG_TYPE_GPR) {
> +		if (reg_num < 32)
> +			regs->gpr[reg_num] = reg_val;
> +		return;
> +	}
> +
> +	switch (reg_num) {
> +	case 2000:
> +		regs->nip = reg_val;
> +		break;
> +	case 2001:
> +		regs->msr = reg_val;
> +		break;
> +	case 9:
> +		regs->ctr = reg_val;
> +		break;
> +	case 8:
> +		regs->link = reg_val;
> +		break;
> +	case 1:
> +		regs->xer = reg_val;
> +		break;
> +	case 2002:
> +		regs->ccr = reg_val;
> +		break;
> +	case 19:
> +		regs->dar = reg_val;
> +		break;
> +	case 18:
> +		regs->dsisr = reg_val;
> +		break;

Can we use SPRN_* #defines which are already present in asm/reg.h instead of
hard coding numbers for switch cases ? You may want to add new #defines
for NIP, MSR and CCR.

Thanks,
-Mahesh.


^ permalink raw reply

* Re: [PATCH v1] timer:clock:ptp: add support the dynamic posix clock alarm set for ptp
From: Richard Cochran @ 2019-05-07 13:49 UTC (permalink / raw)
  To: Po Liu
  Cc: Roy Zang, netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Leo Li, Claudiu Manoil, Mingkai Hu, Y.b. Lu,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net,
	linux-arm-kernel@lists.infradead.org, deepa.kernel@gmail.com
In-Reply-To: <1557032106-28041-1-git-send-email-Po.Liu@nxp.com>

On Sun, May 05, 2019 at 05:02:05AM +0000, Po Liu wrote:
> Current kernel code do not support the dynamic posix clock alarm set.
> This code would support it by the posix timer structure.
> 
> 319  const struct k_clock clock_posix_dynamic = {
> 
> 320         .clock_getres   = pc_clock_getres,
> 321         .clock_set      = pc_clock_settime,
> 322         .clock_get      = pc_clock_gettime,
> 323         .clock_adj      = pc_clock_adjtime,
> 324 +       .timer_create   = pc_timer_create,
> 325 +       .timer_del      = pc_timer_delete,
> 326 +       .timer_set      = pc_timer_set,
> 327 +       .timer_arm      = pc_timer_arm,
> }
> 

Sorry, NAK, since we decided some time ago not to support timer_*
operations on dynamic clocks.  You get much better application level
timer performance by synchronizing CLOCK_REALTIME to your PHC and
using clock_nanosleep() with CLOCK_REALTIME or CLOCK_MONOTONIC.

> This won't change the user space system call code. Normally the user
> space set alarm by timer_create() and timer_settime(). Reference code
> are tools/testing/selftests/ptp/testptp.c.

That program still has misleading examples.  Sorry about that.  I'll
submit a patch to remove them.

> +static int pc_timer_create(struct k_itimer *new_timer)
> +{
> +	return 0;
> +}
> +

This of course would never work.  Consider what happens when two or
more timers are created and armed.

Thanks,
Richard

^ permalink raw reply

* [Bug 203517] WARNING: inconsistent lock state. inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
From: bugzilla-daemon @ 2019-05-07 13:38 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-203517-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=203517

--- Comment #5 from Erhard F. (erhard_f@mailbox.org) ---
Some more info about the system:
Talos II running Gentoo Linux ppc64, Big Endian.
btrfs root filesystem (zstd compressed, standard compression level).
swap: /sbin/zram-init -d0 -s32 -azstd -Lzram_swap 4096
build partition: /sbin/zram-init -d1 -s32 -alzo -text2 -orelatime -m1777
-Lvar_tmp_dir 34816 /var/tmp

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* [PATCH] powerpc: slightly improve cache helpers
From: Christophe Leroy @ 2019-05-07 13:31 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Segher Boessenkool
  Cc: linuxppc-dev, linux-kernel

Cache instructions (dcbz, dcbi, dcbf and dcbst) take two registers
that are summed to obtain the target address. Using '%y0' argument
gives GCC the opportunity to use both registers instead of only one
with the second being forced to 0.

Suggested-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/cache.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 40ea5b3781c6..5a22a869a20b 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -85,22 +85,22 @@ extern void _set_L3CR(unsigned long);
 
 static inline void dcbz(void *addr)
 {
-	__asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory");
+	__asm__ __volatile__ ("dcbz %y0" : : "m"(*(u8 *)addr) : "memory");
 }
 
 static inline void dcbi(void *addr)
 {
-	__asm__ __volatile__ ("dcbi 0, %0" : : "r"(addr) : "memory");
+	__asm__ __volatile__ ("dcbi %y0" : : "m"(*(u8 *)addr) : "memory");
 }
 
 static inline void dcbf(void *addr)
 {
-	__asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory");
+	__asm__ __volatile__ ("dcbf %y0" : : "m"(*(u8 *)addr) : "memory");
 }
 
 static inline void dcbst(void *addr)
 {
-	__asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory");
+	__asm__ __volatile__ ("dcbst %y0" : : "m"(*(u8 *)addr) : "memory");
 }
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
-- 
2.13.3


^ permalink raw reply related

* [PATCH] powerpc/ftrace: Enable C Version of recordmcount
From: Christophe Leroy @ 2019-05-07 13:31 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Selects HAVE_C_RECORDMCOUNT to use the C version of the recordmcount
intead of the old Perl Version of recordmcount.

This should improve build time. It also seems like the old Perl Version
misses some calls to _mcount that the C version finds.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2711aac24621..d87de4f9da61 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -180,6 +180,7 @@ config PPC
 	select HAVE_ARCH_NVRAM_OPS
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_C_RECORDMCOUNT
 	select HAVE_CBPF_JIT			if !PPC64
 	select HAVE_STACKPROTECTOR		if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
 	select HAVE_STACKPROTECTOR		if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
-- 
2.13.3


^ permalink raw reply related

* [Bug 203517] WARNING: inconsistent lock state. inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
From: bugzilla-daemon @ 2019-05-07 13:29 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-203517-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=203517

--- Comment #4 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 282669
  --> https://bugzilla.kernel.org/attachment.cgi?id=282669&action=edit
bisect.log

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* [Bug 203517] WARNING: inconsistent lock state. inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
From: bugzilla-daemon @ 2019-05-07 13:28 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-203517-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=203517

Erhard F. (erhard_f@mailbox.org) changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |fs_btrfs@kernel-bugs.osdl.o
                   |                            |rg
          Component|PPC-64                      |btrfs
           Hardware|All                         |PPC-64
            Product|Platform Specific/Hardware  |File System

--- Comment #3 from Erhard F. (erhard_f@mailbox.org) ---
There are only 'skip'ped commits left to test.
The first bad commit could be any of:
d3c6ab752c4145cba9af85021f02bc4655534f93
3f93aef535c8ea03e40cd8acf0753b3e6ed33e96

commit 3f93aef535c8ea03e40cd8acf0753b3e6ed33e96
Author: Dennis Zhou <dennis@kernel.org>
Date:   Mon Feb 4 15:20:08 2019 -0500
btrfs: add zstd compression level support

commit d3c6ab752c4145cba9af85021f02bc4655534f93
Author: Dennis Zhou <dennis@kernel.org>
Date:   Mon Feb 4 15:20:07 2019 -0500
btrfs: make zstd memory requirements monotonic

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* [PATCH v3] powerpc/64s: support nospectre_v2 cmdline option
From: Christopher M. Riedl @ 2019-05-07 12:43 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Christopher M. Riedl, ajd

Add support for disabling the kernel implemented spectre v2 mitigation
(count cache flush on context switch) via the nospectre_v2 cmdline
option.

Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Christopher M. Riedl <cmr@informatik.wtf>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
---
v2->v3:
	Address mpe's nitpick

 arch/powerpc/kernel/security.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index b33bafb8fcea..7005c50a991b 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -28,7 +28,7 @@ static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NO
 bool barrier_nospec_enabled;
 static bool no_nospec;
 static bool btb_flush_enabled;
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
 static bool no_spectrev2;
 #endif
 
@@ -106,7 +106,7 @@ static __init int barrier_nospec_debugfs_init(void)
 device_initcall(barrier_nospec_debugfs_init);
 #endif /* CONFIG_DEBUG_FS */
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
 static int __init handle_nospectre_v2(char *p)
 {
 	no_spectrev2 = true;
@@ -114,6 +114,9 @@ static int __init handle_nospectre_v2(char *p)
 	return 0;
 }
 early_param("nospectre_v2", handle_nospectre_v2);
+#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
 void setup_spectre_v2(void)
 {
 	if (no_spectrev2)
@@ -391,7 +394,17 @@ static void toggle_count_cache_flush(bool enable)
 
 void setup_count_cache_flush(void)
 {
-	toggle_count_cache_flush(true);
+	bool enable = true;
+
+	if (no_spectrev2) {
+		if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED)
+		    || security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
+			pr_warn("Spectre v2 mitigations not under software control, can't disable\n");
+
+		enable = false;
+	}
+
+	toggle_count_cache_flush(enable);
 }
 
 #ifdef CONFIG_DEBUG_FS
-- 
2.21.0


^ permalink raw reply related

* Re: [PATCH v2] powerpc/64s: support nospectre_v2 cmdline option
From: Christopher M Riedl @ 2019-05-07 11:54 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: ajd
In-Reply-To: <87mujywpw0.fsf@concordia.ellerman.id.au>


> On May 7, 2019 at 5:54 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> 
> 
> "Christopher M. Riedl" <cmr@informatik.wtf> writes:
> > diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
> > index b33bafb8fcea..d775da9b9227 100644
> > --- a/arch/powerpc/kernel/security.c
> > +++ b/arch/powerpc/kernel/security.c
> > @@ -391,6 +394,15 @@ static void toggle_count_cache_flush(bool enable)
> >  
> >  void setup_count_cache_flush(void)
> >  {
> > +	if (no_spectrev2) {
> > +		if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED)
> > +		    || security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
> > +			pr_warn("Spectre v2 mitigations not under software control, can't disable\n");
> > +
> > +		toggle_count_cache_flush(false);
> > +		return;
> > +	}
> > +
> >  	toggle_count_cache_flush(true);
> >  }
> 
> I'm nit-picking, but would it be better as:
>
Agreed, v3 is on the way :)
> 
> void setup_count_cache_flush(void)
> {
> 	bool enable = true;
> 
> 	if (no_spectrev2) {
> 		if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED)
> 		    || security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
> 			pr_warn("Spectre v2 mitigations not under software control, can't disable\n");
> 
> 		enable = false;
> 	}
> 
> 	toggle_count_cache_flush(enable);
> }
> 
> ???
> 
> cheers

^ permalink raw reply

* Re: [PATCH v2] drivers/dax: Allow to include DEV_DAX_PMEM as builtin
From: Aneesh Kumar K.V @ 2019-05-07 11:49 UTC (permalink / raw)
  To: dan.j.williams; +Cc: linux-mm, linuxppc-dev, linux-nvdimm
In-Reply-To: <20190401051421.17878-1-aneesh.kumar@linux.ibm.com>


Hi Dan,

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:

> This move the dependency to DEV_DAX_PMEM_COMPAT such that only
> if DEV_DAX_PMEM is built as module we can allow the compat support.
>
> This allows to test the new code easily in a emulation setup where we
> often build things without module support.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

Any update on this. Can we merge this?

> ---
> Changes from V1:
> * Make sure we only build compat code as module
>
>  drivers/dax/Kconfig | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
> index 5ef624fe3934..a59f338f520f 100644
> --- a/drivers/dax/Kconfig
> +++ b/drivers/dax/Kconfig
> @@ -23,7 +23,6 @@ config DEV_DAX
>  config DEV_DAX_PMEM
>  	tristate "PMEM DAX: direct access to persistent memory"
>  	depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX
> -	depends on m # until we can kill DEV_DAX_PMEM_COMPAT
>  	default DEV_DAX
>  	help
>  	  Support raw access to persistent memory.  Note that this
> @@ -50,7 +49,7 @@ config DEV_DAX_KMEM
>  
>  config DEV_DAX_PMEM_COMPAT
>  	tristate "PMEM DAX: support the deprecated /sys/class/dax interface"
> -	depends on DEV_DAX_PMEM
> +	depends on m && DEV_DAX_PMEM=m
>  	default DEV_DAX_PMEM
>  	help
>  	  Older versions of the libdaxctl library expect to find all
> -- 
> 2.20.1


^ permalink raw reply

* Re: [PATCH v2] powerpc/64s: support nospectre_v2 cmdline option
From: Michael Ellerman @ 2019-05-07  9:54 UTC (permalink / raw)
  To: Christopher M. Riedl, linuxppc-dev; +Cc: Christopher M. Riedl, ajd
In-Reply-To: <20190507024321.22281-1-cmr@informatik.wtf>

"Christopher M. Riedl" <cmr@informatik.wtf> writes:
> diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
> index b33bafb8fcea..d775da9b9227 100644
> --- a/arch/powerpc/kernel/security.c
> +++ b/arch/powerpc/kernel/security.c
> @@ -391,6 +394,15 @@ static void toggle_count_cache_flush(bool enable)
>  
>  void setup_count_cache_flush(void)
>  {
> +	if (no_spectrev2) {
> +		if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED)
> +		    || security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
> +			pr_warn("Spectre v2 mitigations not under software control, can't disable\n");
> +
> +		toggle_count_cache_flush(false);
> +		return;
> +	}
> +
>  	toggle_count_cache_flush(true);
>  }

I'm nit-picking, but would it be better as:

void setup_count_cache_flush(void)
{
	bool enable = true;

	if (no_spectrev2) {
		if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED)
		    || security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
			pr_warn("Spectre v2 mitigations not under software control, can't disable\n");

		enable = false;
	}

	toggle_count_cache_flush(enable);
}

???

cheers

^ permalink raw reply

* Re: [PATCH AUTOSEL 4.14 65/95] powerpc: remove old GCC version checks
From: Christophe Leroy @ 2019-05-07  7:52 UTC (permalink / raw)
  To: Sasha Levin, linux-kernel, stable
  Cc: Sasha Levin, linuxppc-dev, Joel Stanley, Nicholas Piggin
In-Reply-To: <20190507053826.31622-65-sashal@kernel.org>

Hi Sasha,

I don't think GCC 4.6 is the minimum supported for 4.14

As far as I can see, commit cafa0010cd51f ("Raise the minimum required 
gcc version to 4.6") has not been applied to 4.14 and I can't see any 
reason such a commit should apply on a stable branch.

Christophe

Le 07/05/2019 à 07:37, Sasha Levin a écrit :
> From: Nicholas Piggin <npiggin@gmail.com>
> 
> [ Upstream commit f2910f0e6835339e6ce82cef22fa15718b7e3bfa ]
> 
> GCC 4.6 is the minimum supported now.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> Reviewed-by: Joel Stanley <joel@jms.id.au>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
> ---
>   arch/powerpc/Makefile | 31 ++-----------------------------
>   1 file changed, 2 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
> index 7452e50f4d1f..0f04c878113e 100644
> --- a/arch/powerpc/Makefile
> +++ b/arch/powerpc/Makefile
> @@ -396,36 +396,9 @@ archprepare: checkbin
>   # to stdout and these checks are run even on install targets.
>   TOUT	:= .tmp_gas_check
>   
> -# Check gcc and binutils versions:
> -# - gcc-3.4 and binutils-2.14 are a fatal combination
> -# - Require gcc 4.0 or above on 64-bit
> -# - gcc-4.2.0 has issues compiling modules on 64-bit
> +# Check toolchain versions:
> +# - gcc-4.6 is the minimum kernel-wide version so nothing required.
>   checkbin:
> -	@if test "$(cc-name)" != "clang" \
> -	    && test "$(cc-version)" = "0304" ; then \
> -		if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \
> -			echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \
> -			echo 'correctly with gcc-3.4 and your version of binutils.'; \
> -			echo '*** Please upgrade your binutils or downgrade your gcc'; \
> -			false; \
> -		fi ; \
> -	fi
> -	@if test "$(cc-name)" != "clang" \
> -	    && test "$(cc-version)" -lt "0400" \
> -	    && test "x${CONFIG_PPC64}" = "xy" ; then \
> -                echo -n "Sorry, GCC v4.0 or above is required to build " ; \
> -                echo "the 64-bit powerpc kernel." ; \
> -                false ; \
> -        fi
> -	@if test "$(cc-name)" != "clang" \
> -	    && test "$(cc-fullversion)" = "040200" \
> -	    && test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \
> -		echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
> -		echo 'kernel with modules enabled.' ; \
> -		echo -n '*** Please use a different GCC version or ' ; \
> -		echo 'disable kernel modules' ; \
> -		false ; \
> -	fi
>   	@if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \
>   	    && $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \
>   		echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \
> 

^ permalink raw reply

* Re: [PATCH] vfio-pci/nvlink2: Fix potential VMA leak
From: Greg Kurz @ 2019-05-07  7:01 UTC (permalink / raw)
  To: Sam Bobroff
  Cc: Alexey Kardashevskiy, Alex Williamson, linuxppc-dev, linux-kernel
In-Reply-To: <20190507014915.GA10274@tungsten.ozlabs.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 1323 bytes --]

On Tue, 7 May 2019 11:52:44 +1000
Sam Bobroff <sbobroff@linux.ibm.com> wrote:

> On Mon, May 06, 2019 at 03:58:45PM -0600, Alex Williamson wrote:
> > On Fri, 19 Apr 2019 17:37:17 +0200
> > Greg Kurz <groug@kaod.org> wrote:
> >   
> > > If vfio_pci_register_dev_region() fails then we should rollback
> > > previous changes, ie. unmap the ATSD registers.
> > > 
> > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > > ---  
> > 
> > Applied to vfio next branch for v5.2 with Alexey's R-b.  Thanks!
> > 
> > Alex  
> 
> Should this have a fixes tag? e.g.:
> Fixes: 7f92891778df ("vfio_pci: Add NVIDIA GV100GL [Tesla V100 SXM2] subdriver")
> 

Oops... you're right.

Alex, can you add the above tag ?

> > >  drivers/vfio/pci/vfio_pci_nvlink2.c |    2 ++
> > >  1 file changed, 2 insertions(+)
> > > 
> > > diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c b/drivers/vfio/pci/vfio_pci_nvlink2.c
> > > index 32f695ffe128..50fe3c4f7feb 100644
> > > --- a/drivers/vfio/pci/vfio_pci_nvlink2.c
> > > +++ b/drivers/vfio/pci/vfio_pci_nvlink2.c
> > > @@ -472,6 +472,8 @@ int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
> > >  	return 0;
> > >  
> > >  free_exit:
> > > +	if (data->base)
> > > +		memunmap(data->base);
> > >  	kfree(data);
> > >  
> > >  	return ret;
> > >   
> >   


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH 09/15] nds32: switch to generic version of pte allocation
From: Greentime Hu @ 2019-05-07  6:49 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Michal Hocko, Catalin Marinas, Palmer Dabbelt, linux-mips,
	Guo Ren, linux-hexagon, linux-riscv, linux-arch,
	Richard Weinberger, Helge Deller, x86, Russell King,
	Matthew Wilcox, Geert Uytterhoeven, Matt Turner, Sam Creasey,
	Arnd Bergmann, linux-um, linux-m68k, nios2-dev, Guan Xuetao,
	linux-arm-kernel, linux-parisc, Linux Kernel Mailing List,
	Richard Kuo, Paul Burton, linux-alpha, Ley Foon Tan,
	Andrew Morton, linuxppc-dev
In-Reply-To: <1556810922-20248-10-git-send-email-rppt@linux.ibm.com>

Hi Mike,

Mike Rapoport <rppt@linux.ibm.com> 於 2019年5月2日 週四 下午11:30寫道:
>
> The nds32 implementation of pte_alloc_one_kernel() differs from the generic
> in the use of __GFP_RETRY_MAYFAIL flag, which is removed after the
> conversion.
>
> The nds32 version of pte_alloc_one() missed the call to pgtable_page_ctor()
> and also used __GFP_RETRY_MAYFAIL. Switching it to use generic
> __pte_alloc_one() for the PTE page allocation ensures that page table
> constructor is run and the user page tables are allocated with
> __GFP_ACCOUNT.
>
> The conversion to the generic version of pte_free_kernel() removes the NULL
> check for pte.
>
> The pte_free() version on nds32 is identical to the generic one and can be
> simply dropped.
>
> Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
> ---
>  arch/nds32/include/asm/pgalloc.h | 31 ++++---------------------------
>  1 file changed, 4 insertions(+), 27 deletions(-)

Thanks for your patch.
I'm assuming this is going in along with the rest of the patches, so I'm not
going to add it to my tree.

Acked-by: Greentime Hu <greentime@andestech.com>

^ permalink raw reply

* [PATCH kernel 2/2] powerpc/pseries/dma: Enable swiotlb
From: Alexey Kardashevskiy @ 2019-05-07  6:25 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Alexey Kardashevskiy, Alistair Popple, Thiago Jung Bauermann,
	David Gibson
In-Reply-To: <20190507062559.20295-1-aik@ozlabs.ru>

So far the pseries platforms has always been using IOMMU making SWIOTLB
unnecessary. Now we want secure guests which means devices can only
access certain areas of guest physical memory; we are going to use
SWIOTLB for this purpose.

This allows SWIOTLB for pseries. By default there is no change in behavior.

This enables SWIOTLB when the "swiotlb" kernel parameter is set to "force".

With the SWIOTLB enabled, the kernel creates a directly mapped DMA window
(using the usual DDW mechanism) and implements SWIOTLB on top of that.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 arch/powerpc/platforms/pseries/setup.c | 5 +++++
 arch/powerpc/platforms/pseries/Kconfig | 1 +
 2 files changed, 6 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index e4f0dfd4ae33..30d72b587ac5 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -42,6 +42,7 @@
 #include <linux/of.h>
 #include <linux/of_pci.h>
 #include <linux/memblock.h>
+#include <linux/swiotlb.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
@@ -71,6 +72,7 @@
 #include <asm/isa-bridge.h>
 #include <asm/security_features.h>
 #include <asm/asm-const.h>
+#include <asm/swiotlb.h>
 
 #include "pseries.h"
 #include "../../../../drivers/pci/pci.h"
@@ -797,6 +799,9 @@ static void __init pSeries_setup_arch(void)
 	}
 
 	ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
+
+	if (swiotlb_force == SWIOTLB_FORCE)
+		ppc_swiotlb_enable = 1;
 }
 
 static void pseries_panic(char *str)
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 9c6b3d860518..b9e8b608de01 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -23,6 +23,7 @@ config PPC_PSERIES
 	select ARCH_RANDOM
 	select PPC_DOORBELL
 	select FORCE_SMP
+	select SWIOTLB
 	default y
 
 config PPC_SPLPAR
-- 
2.17.1


^ permalink raw reply related

* [PATCH kernel 1/2] powerpc/pseries/dma: Allow swiotlb
From: Alexey Kardashevskiy @ 2019-05-07  6:25 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Alexey Kardashevskiy, Alistair Popple, Thiago Jung Bauermann,
	David Gibson
In-Reply-To: <20190507062559.20295-1-aik@ozlabs.ru>

The commit 8617a5c5bc00 ("powerpc/dma: handle iommu bypass in
dma_iommu_ops") merged direct DMA ops into the IOMMU DMA ops allowing
SWIOTLB as well but only for mapping; the unmapping and bouncing parts
were left unmodified.

This adds missing direct unmapping calls to .unmap_page() and .unmap_sg().

This adds missing sync callbacks and directs them to the direct DMA hooks.

Fixes: 8617a5c5bc00 (powerpc/dma: handle iommu bypass in dma_iommu_ops)
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 arch/powerpc/kernel/dma-iommu.c | 36 +++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 09231ef06d01..92b318df1aa1 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -82,6 +82,8 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
 	if (!dma_iommu_map_bypass(dev, attrs))
 		iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size,
 				direction,  attrs);
+	else
+		dma_direct_unmap_page(dev, dma_handle, size, direction, attrs);
 }
 
 
@@ -102,6 +104,8 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
 	if (!dma_iommu_map_bypass(dev, attrs))
 		ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
 			   direction, attrs);
+	else
+		dma_direct_unmap_sg(dev, sglist, nelems, direction, attrs);
 }
 
 static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
@@ -163,6 +167,34 @@ u64 dma_iommu_get_required_mask(struct device *dev)
 	return mask;
 }
 
+static void dma_iommu_sync_for_cpu(struct device *dev, dma_addr_t addr,
+		size_t size, enum dma_data_direction dir)
+{
+	if (dma_iommu_alloc_bypass(dev))
+		dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+}
+
+static void dma_iommu_sync_for_device(struct device *dev, dma_addr_t addr,
+		size_t sz, enum dma_data_direction dir)
+{
+	if (dma_iommu_alloc_bypass(dev))
+		dma_direct_sync_single_for_device(dev, addr, sz, dir);
+}
+
+extern void dma_iommu_sync_sg_for_cpu(struct device *dev,
+		struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+	if (dma_iommu_alloc_bypass(dev))
+		dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir);
+}
+
+extern void dma_iommu_sync_sg_for_device(struct device *dev,
+		struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+	if (dma_iommu_alloc_bypass(dev))
+		dma_direct_sync_sg_for_device(dev, sgl, nents, dir);
+}
+
 const struct dma_map_ops dma_iommu_ops = {
 	.alloc			= dma_iommu_alloc_coherent,
 	.free			= dma_iommu_free_coherent,
@@ -172,4 +204,8 @@ const struct dma_map_ops dma_iommu_ops = {
 	.map_page		= dma_iommu_map_page,
 	.unmap_page		= dma_iommu_unmap_page,
 	.get_required_mask	= dma_iommu_get_required_mask,
+	.sync_single_for_cpu	= dma_iommu_sync_for_cpu,
+	.sync_single_for_device	= dma_iommu_sync_for_device,
+	.sync_sg_for_cpu	= dma_iommu_sync_sg_for_cpu,
+	.sync_sg_for_device	= dma_iommu_sync_sg_for_device,
 };
-- 
2.17.1


^ permalink raw reply related

* [PATCH kernel 0/2] pseries: Enable SWIOTLB
From: Alexey Kardashevskiy @ 2019-05-07  6:25 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Alexey Kardashevskiy, Alistair Popple, Thiago Jung Bauermann,
	David Gibson


This is an attempt to allow PCI pass through to a secure guest when
hardware can only access insecure memory. This allows SWIOTLB use
for passed through devices.

Later on secure VMs will unsecure SWIOTLB bounce buffers for DMA
and the rest of the guest RAM will be unavailable to the hardware
by default.


This is based on sha1
e93c9c99a629 Linus Torvalds "Linux 5.1".

Please comment. Thanks.



Alexey Kardashevskiy (2):
  powerpc/pseries/dma: Allow swiotlb
  powerpc/pseries/dma: Enable swiotlb

 arch/powerpc/kernel/dma-iommu.c        | 36 ++++++++++++++++++++++++++
 arch/powerpc/platforms/pseries/setup.c |  5 ++++
 arch/powerpc/platforms/pseries/Kconfig |  1 +
 3 files changed, 42 insertions(+)

-- 
2.17.1



^ permalink raw reply

* [PATCH AUTOSEL 4.14 65/95] powerpc: remove old GCC version checks
From: Sasha Levin @ 2019-05-07  5:37 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, linuxppc-dev, Joel Stanley, Nicholas Piggin
In-Reply-To: <20190507053826.31622-1-sashal@kernel.org>

From: Nicholas Piggin <npiggin@gmail.com>

[ Upstream commit f2910f0e6835339e6ce82cef22fa15718b7e3bfa ]

GCC 4.6 is the minimum supported now.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
---
 arch/powerpc/Makefile | 31 ++-----------------------------
 1 file changed, 2 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 7452e50f4d1f..0f04c878113e 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -396,36 +396,9 @@ archprepare: checkbin
 # to stdout and these checks are run even on install targets.
 TOUT	:= .tmp_gas_check
 
-# Check gcc and binutils versions:
-# - gcc-3.4 and binutils-2.14 are a fatal combination
-# - Require gcc 4.0 or above on 64-bit
-# - gcc-4.2.0 has issues compiling modules on 64-bit
+# Check toolchain versions:
+# - gcc-4.6 is the minimum kernel-wide version so nothing required.
 checkbin:
-	@if test "$(cc-name)" != "clang" \
-	    && test "$(cc-version)" = "0304" ; then \
-		if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \
-			echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \
-			echo 'correctly with gcc-3.4 and your version of binutils.'; \
-			echo '*** Please upgrade your binutils or downgrade your gcc'; \
-			false; \
-		fi ; \
-	fi
-	@if test "$(cc-name)" != "clang" \
-	    && test "$(cc-version)" -lt "0400" \
-	    && test "x${CONFIG_PPC64}" = "xy" ; then \
-                echo -n "Sorry, GCC v4.0 or above is required to build " ; \
-                echo "the 64-bit powerpc kernel." ; \
-                false ; \
-        fi
-	@if test "$(cc-name)" != "clang" \
-	    && test "$(cc-fullversion)" = "040200" \
-	    && test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \
-		echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
-		echo 'kernel with modules enabled.' ; \
-		echo -n '*** Please use a different GCC version or ' ; \
-		echo 'disable kernel modules' ; \
-		false ; \
-	fi
 	@if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \
 	    && $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \
 		echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \
-- 
2.20.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.19 65/81] powerpc/smp: Fix NMI IPI xmon timeout
From: Sasha Levin @ 2019-05-07  5:35 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Sasha Levin, linuxppc-dev, Nicholas Piggin
In-Reply-To: <20190507053554.30848-1-sashal@kernel.org>

From: Nicholas Piggin <npiggin@gmail.com>

[ Upstream commit 88b9a3d1425a436e95c41f09986fdae2daee437a ]

The xmon debugger IPI handler waits in the callback function while
xmon is still active. This means they don't complete the IPI, and the
initiator always times out waiting for them.

Things manage to work after the timeout because there is some fallback
logic to keep NMI IPI state sane in case of the timeout, but this is a
bit ugly.

This patch changes NMI IPI back to half-asynchronous (i.e., wait for
everyone to call in, do not wait for IPI function to complete), but
the complexity is avoided by going one step further and allowing new
IPIs to be issued before the IPI functions to all complete.

If synchronization against that is required, it is left up to the
caller, but current callers don't require that. In fact with the
timeout handling, callers must be able to cope with this already.

Fixes: 5b73151fff63 ("powerpc: NMI IPI make NMI IPIs fully sychronous")
Cc: stable@vger.kernel.org # v4.19+
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
---
 arch/powerpc/kernel/smp.c | 93 ++++++++++++---------------------------
 1 file changed, 29 insertions(+), 64 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 22abba5f4cf0..6dc43205382b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -338,13 +338,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
  * NMI IPIs may not be recoverable, so should not be used as ongoing part of
  * a running system. They can be used for crash, debug, halt/reboot, etc.
  *
- * NMI IPIs are globally single threaded. No more than one in progress at
- * any time.
- *
  * The IPI call waits with interrupts disabled until all targets enter the
- * NMI handler, then the call returns.
+ * NMI handler, then returns. Subsequent IPIs can be issued before targets
+ * have returned from their handlers, so there is no guarantee about
+ * concurrency or re-entrancy.
  *
- * No new NMI can be initiated until targets exit the handler.
+ * A new NMI can be issued before all targets exit the handler.
  *
  * The IPI call may time out without all targets entering the NMI handler.
  * In that case, there is some logic to recover (and ignore subsequent
@@ -355,7 +354,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 
 static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
 static struct cpumask nmi_ipi_pending_mask;
-static int nmi_ipi_busy_count = 0;
+static bool nmi_ipi_busy = false;
 static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
 
 static void nmi_ipi_lock_start(unsigned long *flags)
@@ -394,7 +393,7 @@ static void nmi_ipi_unlock_end(unsigned long *flags)
  */
 int smp_handle_nmi_ipi(struct pt_regs *regs)
 {
-	void (*fn)(struct pt_regs *);
+	void (*fn)(struct pt_regs *) = NULL;
 	unsigned long flags;
 	int me = raw_smp_processor_id();
 	int ret = 0;
@@ -405,29 +404,17 @@ int smp_handle_nmi_ipi(struct pt_regs *regs)
 	 * because the caller may have timed out.
 	 */
 	nmi_ipi_lock_start(&flags);
-	if (!nmi_ipi_busy_count)
-		goto out;
-	if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask))
-		goto out;
-
-	fn = nmi_ipi_function;
-	if (!fn)
-		goto out;
-
-	cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
-	nmi_ipi_busy_count++;
-	nmi_ipi_unlock();
-
-	ret = 1;
-
-	fn(regs);
-
-	nmi_ipi_lock();
-	if (nmi_ipi_busy_count > 1) /* Can race with caller time-out */
-		nmi_ipi_busy_count--;
-out:
+	if (cpumask_test_cpu(me, &nmi_ipi_pending_mask)) {
+		cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+		fn = READ_ONCE(nmi_ipi_function);
+		WARN_ON_ONCE(!fn);
+		ret = 1;
+	}
 	nmi_ipi_unlock_end(&flags);
 
+	if (fn)
+		fn(regs);
+
 	return ret;
 }
 
@@ -453,7 +440,7 @@ static void do_smp_send_nmi_ipi(int cpu, bool safe)
  * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
  * - fn is the target callback function.
  * - delay_us > 0 is the delay before giving up waiting for targets to
- *   complete executing the handler, == 0 specifies indefinite delay.
+ *   begin executing the handler, == 0 specifies indefinite delay.
  */
 int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool safe)
 {
@@ -467,31 +454,33 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool
 	if (unlikely(!smp_ops))
 		return 0;
 
-	/* Take the nmi_ipi_busy count/lock with interrupts hard disabled */
 	nmi_ipi_lock_start(&flags);
-	while (nmi_ipi_busy_count) {
+	while (nmi_ipi_busy) {
 		nmi_ipi_unlock_end(&flags);
-		spin_until_cond(nmi_ipi_busy_count == 0);
+		spin_until_cond(!nmi_ipi_busy);
 		nmi_ipi_lock_start(&flags);
 	}
-
+	nmi_ipi_busy = true;
 	nmi_ipi_function = fn;
 
+	WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask));
+
 	if (cpu < 0) {
 		/* ALL_OTHERS */
 		cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
 		cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
 	} else {
-		/* cpumask starts clear */
 		cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
 	}
-	nmi_ipi_busy_count++;
+
 	nmi_ipi_unlock();
 
+	/* Interrupts remain hard disabled */
+
 	do_smp_send_nmi_ipi(cpu, safe);
 
 	nmi_ipi_lock();
-	/* nmi_ipi_busy_count is held here, so unlock/lock is okay */
+	/* nmi_ipi_busy is set here, so unlock/lock is okay */
 	while (!cpumask_empty(&nmi_ipi_pending_mask)) {
 		nmi_ipi_unlock();
 		udelay(1);
@@ -499,34 +488,19 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool
 		if (delay_us) {
 			delay_us--;
 			if (!delay_us)
-				goto timeout;
+				break;
 		}
 	}
 
-	while (nmi_ipi_busy_count > 1) {
-		nmi_ipi_unlock();
-		udelay(1);
-		nmi_ipi_lock();
-		if (delay_us) {
-			delay_us--;
-			if (!delay_us)
-				goto timeout;
-		}
-	}
-
-timeout:
 	if (!cpumask_empty(&nmi_ipi_pending_mask)) {
 		/* Timeout waiting for CPUs to call smp_handle_nmi_ipi */
 		ret = 0;
 		cpumask_clear(&nmi_ipi_pending_mask);
 	}
-	if (nmi_ipi_busy_count > 1) {
-		/* Timeout waiting for CPUs to execute fn */
-		ret = 0;
-		nmi_ipi_busy_count = 1;
-	}
 
-	nmi_ipi_busy_count--;
+	nmi_ipi_function = NULL;
+	nmi_ipi_busy = false;
+
 	nmi_ipi_unlock_end(&flags);
 
 	return ret;
@@ -594,17 +568,8 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
 static void nmi_stop_this_cpu(struct pt_regs *regs)
 {
 	/*
-	 * This is a special case because it never returns, so the NMI IPI
-	 * handling would never mark it as done, which makes any later
-	 * smp_send_nmi_ipi() call spin forever. Mark it done now.
-	 *
 	 * IRQs are already hard disabled by the smp_handle_nmi_ipi.
 	 */
-	nmi_ipi_lock();
-	if (nmi_ipi_busy_count > 1)
-		nmi_ipi_busy_count--;
-	nmi_ipi_unlock();
-
 	spin_begin();
 	while (1)
 		spin_cpu_relax();
-- 
2.20.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.19 64/81] powerpc/smp: Fix NMI IPI timeout
From: Sasha Levin @ 2019-05-07  5:35 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Sasha Levin, linuxppc-dev, Nicholas Piggin
In-Reply-To: <20190507053554.30848-1-sashal@kernel.org>

From: Nicholas Piggin <npiggin@gmail.com>

[ Upstream commit 1b5fc84aba170bdfe3533396ca9662ceea1609b7 ]

The NMI IPI timeout logic is broken, if __smp_send_nmi_ipi() times out
on the first condition, delay_us will be zero which will send it into
the second spin loop with no timeout so it will spin forever.

Fixes: 5b73151fff63 ("powerpc: NMI IPI make NMI IPIs fully sychronous")
Cc: stable@vger.kernel.org # v4.19+
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
---
 arch/powerpc/kernel/smp.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 61c1fadbc644..22abba5f4cf0 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -499,7 +499,7 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool
 		if (delay_us) {
 			delay_us--;
 			if (!delay_us)
-				break;
+				goto timeout;
 		}
 	}
 
@@ -510,10 +510,11 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool
 		if (delay_us) {
 			delay_us--;
 			if (!delay_us)
-				break;
+				goto timeout;
 		}
 	}
 
+timeout:
 	if (!cpumask_empty(&nmi_ipi_pending_mask)) {
 		/* Timeout waiting for CPUs to call smp_handle_nmi_ipi */
 		ret = 0;
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH v2] powerpc/64s: support nospectre_v2 cmdline option
From: Andrew Donnellan @ 2019-05-07  5:10 UTC (permalink / raw)
  To: Christopher M. Riedl, linuxppc-dev
In-Reply-To: <20190507024321.22281-1-cmr@informatik.wtf>

On 7/5/19 12:43 pm, Christopher M. Riedl wrote:
> Add support for disabling the kernel implemented spectre v2 mitigation
> (count cache flush on context switch) via the nospectre_v2 cmdline
> option.
> 
> Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
> Signed-off-by: Christopher M. Riedl <cmr@informatik.wtf>

Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>

> ---
> v1->v2:
> 	add call to toggle_count_cache_flush(false)
> 
>   arch/powerpc/kernel/security.c | 16 ++++++++++++++--
>   1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
> index b33bafb8fcea..d775da9b9227 100644
> --- a/arch/powerpc/kernel/security.c
> +++ b/arch/powerpc/kernel/security.c
> @@ -28,7 +28,7 @@ static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NO
>   bool barrier_nospec_enabled;
>   static bool no_nospec;
>   static bool btb_flush_enabled;
> -#ifdef CONFIG_PPC_FSL_BOOK3E
> +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
>   static bool no_spectrev2;
>   #endif
>   
> @@ -106,7 +106,7 @@ static __init int barrier_nospec_debugfs_init(void)
>   device_initcall(barrier_nospec_debugfs_init);
>   #endif /* CONFIG_DEBUG_FS */
>   
> -#ifdef CONFIG_PPC_FSL_BOOK3E
> +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
>   static int __init handle_nospectre_v2(char *p)
>   {
>   	no_spectrev2 = true;
> @@ -114,6 +114,9 @@ static int __init handle_nospectre_v2(char *p)
>   	return 0;
>   }
>   early_param("nospectre_v2", handle_nospectre_v2);
> +#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
> +
> +#ifdef CONFIG_PPC_FSL_BOOK3E
>   void setup_spectre_v2(void)
>   {
>   	if (no_spectrev2)
> @@ -391,6 +394,15 @@ static void toggle_count_cache_flush(bool enable)
>   
>   void setup_count_cache_flush(void)
>   {
> +	if (no_spectrev2) {
> +		if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED)
> +		    || security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
> +			pr_warn("Spectre v2 mitigations not under software control, can't disable\n");
> +
> +		toggle_count_cache_flush(false);
> +		return;
> +	}
> +
>   	toggle_count_cache_flush(true);
>   }
>   
> 

-- 
Andrew Donnellan              OzLabs, ADL Canberra
ajd@linux.ibm.com             IBM Australia Limited


^ permalink raw reply

* [PATCH v2 5/6] powerpc/eeh: EEH for pSeries hot plug
From: Sam Bobroff @ 2019-05-07  4:30 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aik, oohall, tyreld
In-Reply-To: <cover.1557203383.git.sbobroff@linux.ibm.com>

On PowerNV and pSeries, devices currently acquire EEH support from
several different places: Boot-time devices from eeh_probe_devices()
and eeh_addr_cache_build(), Virtual Function devices from the pcibios
bus add device hooks and hot plugged devices from pci_hp_add_devices()
(with other platforms using other methods as well).  Unfortunately,
pSeries machines currently discover hot plugged devices using
pci_rescan_bus(), not pci_hp_add_devices(), and so those devices do
not receive EEH support.

Rather than adding another case for pci_rescan_bus(), this change
widens the scope of the pcibios bus add device hooks so that they can
handle all devices. As a side effect this also supports devices
discovered after manually rescanning via /sys/bus/pci/rescan.

Note that on PowerNV, this change allows the EEH subsystem to become
enabled after boot as long as it has not been forced off, which was
not previously possible (it was already possible on pSeries).

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
v2 - Dropped changes to the PowerNV PHB EEH flag, instead refactor just enough to
     use the existing flag from multiple places.
   - Merge the little remaining work from the above change into the patch where
     it's used.

 arch/powerpc/kernel/eeh.c                    |  2 +-
 arch/powerpc/kernel/of_platform.c            |  3 +-
 arch/powerpc/platforms/powernv/eeh-powernv.c | 39 +++++++++-----
 arch/powerpc/platforms/pseries/eeh_pseries.c | 54 ++++++++++----------
 4 files changed, 56 insertions(+), 42 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 4160514d997c..1ed80adb40a1 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1285,7 +1285,7 @@ void eeh_add_device_late(struct pci_dev *dev)
 	struct pci_dn *pdn;
 	struct eeh_dev *edev;
 
-	if (!dev || !eeh_enabled())
+	if (!dev)
 		return;
 
 	pr_debug("EEH: Adding device %s\n", pci_name(dev));
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index becaec990140..d5818e9c4069 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -86,7 +86,8 @@ static int of_pci_phb_probe(struct platform_device *dev)
 	pcibios_claim_one_bus(phb->bus);
 
 	/* Finish EEH setup */
-	eeh_add_device_tree_late(phb->bus);
+	if (!eeh_has_flag(EEH_FORCE_DISABLED))
+		eeh_add_device_tree_late(phb->bus);
 
 	/* Add probed PCI devices to the device model */
 	pci_bus_add_devices(phb->bus);
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 0e374cdba961..90729d908a54 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -47,7 +47,7 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
 {
 	struct pci_dn *pdn = pci_get_pdn(pdev);
 
-	if (!pdev->is_virtfn)
+	if (eeh_has_flag(EEH_FORCE_DISABLED))
 		return;
 
 	pr_debug("%s: EEH: Setting up device %s.\n", __func__, pci_name(pdev));
@@ -226,6 +226,25 @@ static const struct file_operations eeh_tree_state_debugfs_ops = {
 
 #endif /* CONFIG_DEBUG_FS */
 
+void pnv_eeh_enable_phbs(void)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+		/*
+		 * If EEH is enabled, we're going to rely on that.
+		 * Otherwise, we restore to conventional mechanism
+		 * to clear frozen PE during PCI config access.
+		 */
+		if (eeh_enabled())
+			phb->flags |= PNV_PHB_FLAG_EEH;
+		else
+			phb->flags &= ~PNV_PHB_FLAG_EEH;
+	}
+}
+
 /**
  * pnv_eeh_post_init - EEH platform dependent post initialization
  *
@@ -264,19 +283,11 @@ int pnv_eeh_post_init(void)
 	if (!eeh_enabled())
 		disable_irq(eeh_event_irq);
 
+	pnv_eeh_enable_phbs();
+
 	list_for_each_entry(hose, &hose_list, list_node) {
 		phb = hose->private_data;
 
-		/*
-		 * If EEH is enabled, we're going to rely on that.
-		 * Otherwise, we restore to conventional mechanism
-		 * to clear frozen PE during PCI config access.
-		 */
-		if (eeh_enabled())
-			phb->flags |= PNV_PHB_FLAG_EEH;
-		else
-			phb->flags &= ~PNV_PHB_FLAG_EEH;
-
 		/* Create debugfs entries */
 #ifdef CONFIG_DEBUG_FS
 		if (phb->has_dbgfs || !phb->dbgfs)
@@ -487,7 +498,11 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
 	 * Enable EEH explicitly so that we will do EEH check
 	 * while accessing I/O stuff
 	 */
-	eeh_add_flag(EEH_ENABLED);
+	if (!eeh_has_flag(EEH_ENABLED)) {
+		enable_irq(eeh_event_irq);
+		pnv_eeh_enable_phbs();
+		eeh_add_flag(EEH_ENABLED);
+	}
 
 	/* Save memory bars */
 	eeh_save_bars(edev);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index ae06878fbdea..e68c79164974 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -55,44 +55,44 @@ static int ibm_get_config_addr_info;
 static int ibm_get_config_addr_info2;
 static int ibm_configure_pe;
 
-#ifdef CONFIG_PCI_IOV
 void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
 {
 	struct pci_dn *pdn = pci_get_pdn(pdev);
-	struct pci_dn *physfn_pdn;
-	struct eeh_dev *edev;
 
-	if (!pdev->is_virtfn)
+	if (eeh_has_flag(EEH_FORCE_DISABLED))
 		return;
 
 	pr_debug("%s: EEH: Setting up device %s.\n", __func__, pci_name(pdev));
+#ifdef CONFIG_PCI_IOV
+	if (pdev->is_virtfn) {
+		struct pci_dn *physfn_pdn;
 
-	pdn->device_id  =  pdev->device;
-	pdn->vendor_id  =  pdev->vendor;
-	pdn->class_code =  pdev->class;
-	/*
-	 * Last allow unfreeze return code used for retrieval
-	 * by user space in eeh-sysfs to show the last command
-	 * completion from platform.
-	 */
-	pdn->last_allow_rc =  0;
-	physfn_pdn      =  pci_get_pdn(pdev->physfn);
-	pdn->pe_number  =  physfn_pdn->pe_num_map[pdn->vf_index];
-	edev = pdn_to_eeh_dev(pdn);
-
-	/*
-	 * The following operations will fail if VF's sysfs files
-	 * aren't created or its resources aren't finalized.
-	 */
+		pdn->device_id  =  pdev->device;
+		pdn->vendor_id  =  pdev->vendor;
+		pdn->class_code =  pdev->class;
+		/*
+		 * Last allow unfreeze return code used for retrieval
+		 * by user space in eeh-sysfs to show the last command
+		 * completion from platform.
+		 */
+		pdn->last_allow_rc =  0;
+		physfn_pdn      =  pci_get_pdn(pdev->physfn);
+		pdn->pe_number  =  physfn_pdn->pe_num_map[pdn->vf_index];
+	}
+#endif
 	eeh_add_device_early(pdn);
 	eeh_add_device_late(pdev);
-	edev->pe_config_addr =  (pdn->busno << 16) | (pdn->devfn << 8);
-	eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */
-	eeh_add_to_parent_pe(edev);   /* Add as VF PE type */
-	eeh_sysfs_add_device(pdev);
+#ifdef CONFIG_PCI_IOV
+	if (pdev->is_virtfn) {
+		struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 
-}
+		edev->pe_config_addr =  (pdn->busno << 16) | (pdn->devfn << 8);
+		eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */
+		eeh_add_to_parent_pe(edev);   /* Add as VF PE type */
+	}
 #endif
+	eeh_sysfs_add_device(pdev);
+}
 
 /*
  * Buffer for reporting slot-error-detail rtas calls. Its here
@@ -159,10 +159,8 @@ static int pseries_eeh_init(void)
 	/* Set EEH probe mode */
 	eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
 
-#ifdef CONFIG_PCI_IOV
 	/* Set EEH machine dependent code */
 	ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
-#endif
 
 	return 0;
 }
-- 
2.19.0.2.gcad72f5712


^ permalink raw reply related

* [PATCH v2 6/6] powerpc/eeh: Refactor around eeh_probe_devices()
From: Sam Bobroff @ 2019-05-07  4:30 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aik, oohall, tyreld
In-Reply-To: <cover.1557203383.git.sbobroff@linux.ibm.com>

Now that EEH support for all devices (on PowerNV and pSeries) is
provided by the pcibios bus add device hooks, eeh_probe_devices() and
eeh_addr_cache_build() are redundant and can be removed.

Move the EEH enabled message into it's own function so that it can be
called from multiple places.

Note that previously on pSeries, useless EEH sysfs files were created
for some devices that did not have EEH support and this change
prevents them from being created.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
v2 - As it's so small, merged the enablement message patch into this one (where it's used).
   - Reworked enablement messages.

 arch/powerpc/include/asm/eeh.h               |  7 ++---
 arch/powerpc/kernel/eeh.c                    | 27 ++++++-----------
 arch/powerpc/kernel/eeh_cache.c              | 32 --------------------
 arch/powerpc/platforms/powernv/eeh-powernv.c |  4 +--
 arch/powerpc/platforms/pseries/pci.c         |  3 +-
 5 files changed, 14 insertions(+), 59 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 12baf1df134c..3994d45ae0d4 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -283,13 +283,12 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
 
 struct eeh_dev *eeh_dev_init(struct pci_dn *pdn);
 void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
-void eeh_probe_devices(void);
+void eeh_show_enabled(void);
 int __init eeh_ops_register(struct eeh_ops *ops);
 int __exit eeh_ops_unregister(const char *name);
 int eeh_check_failure(const volatile void __iomem *token);
 int eeh_dev_check_failure(struct eeh_dev *edev);
 void eeh_addr_cache_init(void);
-void eeh_addr_cache_build(void);
 void eeh_add_device_early(struct pci_dn *);
 void eeh_add_device_tree_early(struct pci_dn *);
 void eeh_add_device_late(struct pci_dev *);
@@ -333,7 +332,7 @@ static inline bool eeh_enabled(void)
         return false;
 }
 
-static inline void eeh_probe_devices(void) { }
+static inline void eeh_show_enabled(void) { }
 
 static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
 {
@@ -351,8 +350,6 @@ static inline int eeh_check_failure(const volatile void __iomem *token)
 
 static inline void eeh_addr_cache_init(void) { }
 
-static inline void eeh_addr_cache_build(void) { }
-
 static inline void eeh_add_device_early(struct pci_dn *pdn) { }
 
 static inline void eeh_add_device_tree_early(struct pci_dn *pdn) { }
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 1ed80adb40a1..f905235f0307 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -163,6 +163,16 @@ static int __init eeh_setup(char *str)
 }
 __setup("eeh=", eeh_setup);
 
+void eeh_show_enabled(void)
+{
+	if (eeh_has_flag(EEH_FORCE_DISABLED))
+		pr_info("EEH: Recovery disabled by kernel parameter.\n");
+	else if (eeh_has_flag(EEH_ENABLED))
+		pr_info("EEH: Capable adapter found: recovery enabled.\n");
+	else
+		pr_info("EEH: No capable adapters found: recovery disabled.\n");
+}
+
 /*
  * This routine captures assorted PCI configuration space data
  * for the indicated PCI device, and puts them into a buffer
@@ -1156,23 +1166,6 @@ static struct notifier_block eeh_reboot_nb = {
 	.notifier_call = eeh_reboot_notifier,
 };
 
-void eeh_probe_devices(void)
-{
-	struct pci_controller *hose, *tmp;
-	struct pci_dn *pdn;
-
-	/* Enable EEH for all adapters */
-	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
-		pdn = hose->pci_data;
-		traverse_pci_dn(pdn, eeh_ops->probe, NULL);
-	}
-	if (eeh_enabled())
-		pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
-	else
-		pr_info("EEH: No capable adapters found\n");
-
-}
-
 /**
  * eeh_init - EEH initialization
  *
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index f93dd5cf6a39..c40078d036af 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -278,38 +278,6 @@ void eeh_addr_cache_init(void)
 	spin_lock_init(&pci_io_addr_cache_root.piar_lock);
 }
 
-/**
- * eeh_addr_cache_build - Build a cache of I/O addresses
- *
- * Build a cache of pci i/o addresses.  This cache will be used to
- * find the pci device that corresponds to a given address.
- * This routine scans all pci busses to build the cache.
- * Must be run late in boot process, after the pci controllers
- * have been scanned for devices (after all device resources are known).
- */
-void eeh_addr_cache_build(void)
-{
-	struct pci_dn *pdn;
-	struct eeh_dev *edev;
-	struct pci_dev *dev = NULL;
-
-	for_each_pci_dev(dev) {
-		pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
-		if (!pdn)
-			continue;
-
-		edev = pdn_to_eeh_dev(pdn);
-		if (!edev)
-			continue;
-
-		dev->dev.archdata.edev = edev;
-		edev->pdev = dev;
-
-		eeh_addr_cache_insert_dev(dev);
-		eeh_sysfs_add_device(dev);
-	}
-}
-
 static int eeh_addr_cache_show(struct seq_file *s, void *v)
 {
 	struct pci_io_addr_range *piar;
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 90729d908a54..22a94f4b8586 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -259,9 +259,7 @@ int pnv_eeh_post_init(void)
 	struct pnv_phb *phb;
 	int ret = 0;
 
-	/* Probe devices & build address cache */
-	eeh_probe_devices();
-	eeh_addr_cache_build();
+	eeh_show_enabled();
 
 	/* Register OPAL event notifier */
 	eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 37a77e57893e..d6a5f4f27507 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -242,8 +242,7 @@ void __init pSeries_final_fixup(void)
 
 	pSeries_request_regions();
 
-	eeh_probe_devices();
-	eeh_addr_cache_build();
+	eeh_show_enabled();
 
 #ifdef CONFIG_PCI_IOV
 	ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
-- 
2.19.0.2.gcad72f5712


^ permalink raw reply related

* [PATCH v2 3/6] powerpc/eeh: Improve debug messages around device addition
From: Sam Bobroff @ 2019-05-07  4:30 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aik, oohall, tyreld
In-Reply-To: <cover.1557203383.git.sbobroff@linux.ibm.com>

Also remove useless comment.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 arch/powerpc/kernel/eeh.c                    |  2 +-
 arch/powerpc/platforms/powernv/eeh-powernv.c | 14 ++++++++----
 arch/powerpc/platforms/pseries/eeh_pseries.c | 23 +++++++++++++++-----
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 8d3c36a1f194..b14d89547895 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1291,7 +1291,7 @@ void eeh_add_device_late(struct pci_dev *dev)
 	pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
 	edev = pdn_to_eeh_dev(pdn);
 	if (edev->pdev == dev) {
-		pr_debug("EEH: Already referenced !\n");
+		pr_debug("EEH: Device %s already referenced!\n", pci_name(dev));
 		return;
 	}
 
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 6fc1a463b796..0e374cdba961 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -50,10 +50,7 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
 	if (!pdev->is_virtfn)
 		return;
 
-	/*
-	 * The following operations will fail if VF's sysfs files
-	 * aren't created or its resources aren't finalized.
-	 */
+	pr_debug("%s: EEH: Setting up device %s.\n", __func__, pci_name(pdev));
 	eeh_add_device_early(pdn);
 	eeh_add_device_late(pdev);
 	eeh_sysfs_add_device(pdev);
@@ -397,6 +394,10 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
 	int ret;
 	int config_addr = (pdn->busno << 8) | (pdn->devfn);
 
+	pr_debug("%s: probing %04x:%02x:%02x.%01x\n",
+		__func__, hose->global_number, pdn->busno,
+		PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+
 	/*
 	 * When probing the root bridge, which doesn't have any
 	 * subordinate PCI devices. We don't have OF node for
@@ -491,6 +492,11 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
 	/* Save memory bars */
 	eeh_save_bars(edev);
 
+	pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%x-PE#%x\n",
+		__func__, pdn->busno, PCI_SLOT(pdn->devfn),
+		PCI_FUNC(pdn->devfn), edev->pe->phb->global_number,
+		edev->pe->addr);
+
 	return NULL;
 }
 
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 7aa50258dd42..ae06878fbdea 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -65,6 +65,8 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
 	if (!pdev->is_virtfn)
 		return;
 
+	pr_debug("%s: EEH: Setting up device %s.\n", __func__, pci_name(pdev));
+
 	pdn->device_id  =  pdev->device;
 	pdn->vendor_id  =  pdev->vendor;
 	pdn->class_code =  pdev->class;
@@ -251,6 +253,10 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
 	int enable = 0;
 	int ret;
 
+	pr_debug("%s: probing %04x:%02x:%02x.%01x\n",
+		__func__, pdn->phb->global_number, pdn->busno,
+		PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+
 	/* Retrieve OF node and eeh device */
 	edev = pdn_to_eeh_dev(pdn);
 	if (!edev || edev->pe)
@@ -294,7 +300,12 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
 
 	/* Enable EEH on the device */
 	ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
-	if (!ret) {
+	if (ret) {
+		pr_debug("%s: EEH failed to enable on %02x:%02x.%01x PHB#%x-PE#%x (code %d)\n",
+			__func__, pdn->busno, PCI_SLOT(pdn->devfn),
+			PCI_FUNC(pdn->devfn), pe.phb->global_number,
+			pe.addr, ret);
+	} else {
 		/* Retrieve PE address */
 		edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
 		pe.addr = edev->pe_config_addr;
@@ -310,11 +321,6 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
 		if (enable) {
 			eeh_add_flag(EEH_ENABLED);
 			eeh_add_to_parent_pe(edev);
-
-			pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%x-PE#%x\n",
-				__func__, pdn->busno, PCI_SLOT(pdn->devfn),
-				PCI_FUNC(pdn->devfn), pe.phb->global_number,
-				pe.addr);
 		} else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) &&
 			   (pdn_to_eeh_dev(pdn->parent))->pe) {
 			/* This device doesn't support EEH, but it may have an
@@ -323,6 +329,11 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
 			edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
 			eeh_add_to_parent_pe(edev);
 		}
+		pr_debug("%s: EEH %s on %02x:%02x.%01x PHB#%x-PE#%x (code %d)\n",
+			__func__, (enable ? "enabled" : "unsupported"),
+			pdn->busno, PCI_SLOT(pdn->devfn),
+			PCI_FUNC(pdn->devfn), pe.phb->global_number,
+			pe.addr, ret);
 	}
 
 	/* Save memory bars */
-- 
2.19.0.2.gcad72f5712


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox