LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables
From: Peter Zijlstra @ 2020-06-23 15:24 UTC (permalink / raw)
  To: Ahmed S. Darwish
  Cc: linux-s390, linuxppc-dev, bigeasy, x86, heiko.carstens,
	linux-kernel, rostedt, davem, sparclinux, linux, tglx, will,
	mingo
In-Reply-To: <20200623150031.GA2986783@debian-buster-darwi.lab.linutronix.de>

On Tue, Jun 23, 2020 at 05:00:31PM +0200, Ahmed S. Darwish wrote:
> On Tue, Jun 23, 2020 at 10:36:52AM +0200, Peter Zijlstra wrote:
> ...
> > -#define lockdep_assert_irqs_disabled()	do {				\
> > -		WARN_ONCE(debug_locks && !current->lockdep_recursion &&	\
> > -			  current->hardirqs_enabled,			\
> > -			  "IRQs not disabled as expected\n");		\
> > -	} while (0)
> > +#define lockdep_assert_irqs_enabled()					\
> > +do {									\
> > +	WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled));	\
> > +} while (0)
> >
> 
> Can we add a small comment on top of lockdep_off(), stating that lockdep
> IRQ tracking will still be kept after a lockdep_off call?

That would only legitimize lockdep_off(). The only comment I want to put
on that is: "if you use this, you're doing it wrong'.

^ permalink raw reply

* Re: [PATCH] KVM: PPC: Book3S HV: increase KVMPPC_NR_LPIDS on POWER8 and POWER9
From: Cédric Le Goater @ 2020-06-23 15:08 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linuxppc-dev, Paul Mackerras, kvm-ppc, Nicholas Piggin, kvm
In-Reply-To: <20200608115714.1139735-1-clg@kaod.org>

On 6/8/20 1:57 PM, Cédric Le Goater wrote:
> POWER8 and POWER9 have 12-bit LPIDs. Change LPID_RSVD to support up to
> (4096 - 2) guests on these processors. POWER7 is kept the same with a
> limitation of (1024 - 2), but it might be time to drop KVM support for
> POWER7.
> 
> Tested with 2048 guests * 4 vCPUs on a witherspoon system with 512G
> RAM and a bit of swap.

For the record, it is possible to run 4094 guests * 4 vCPUs on a POWER9 
system with 1TB. It takes ~5m to boot them all.

CONFIG_NR_IRQS needs to be increased to support 4094 * 4 escalation 
interrupts.

Cheers,

C.


> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  arch/powerpc/include/asm/reg.h      | 3 ++-
>  arch/powerpc/kvm/book3s_64_mmu_hv.c | 8 ++++++--
>  2 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index 88e6c78100d9..b70bbfb0ea3c 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -473,7 +473,8 @@
>  #ifndef SPRN_LPID
>  #define SPRN_LPID	0x13F	/* Logical Partition Identifier */
>  #endif
> -#define   LPID_RSVD	0x3ff		/* Reserved LPID for partn switching */
> +#define   LPID_RSVD_POWER7	0x3ff	/* Reserved LPID for partn switching */
> +#define   LPID_RSVD		0xfff	/* Reserved LPID for partn switching */
>  #define	SPRN_HMER	0x150	/* Hypervisor maintenance exception reg */
>  #define   HMER_DEBUG_TRIG	(1ul << (63 - 17)) /* Debug trigger */
>  #define	SPRN_HMEER	0x151	/* Hyp maintenance exception enable reg */
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 18aed9775a3c..23035ab2ec50 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -260,11 +260,15 @@ int kvmppc_mmu_hv_init(void)
>  	if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
>  		return -EINVAL;
>  
> -	/* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
>  	host_lpid = 0;
>  	if (cpu_has_feature(CPU_FTR_HVMODE))
>  		host_lpid = mfspr(SPRN_LPID);
> -	rsvd_lpid = LPID_RSVD;
> +
> +	/* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */
> +	if (cpu_has_feature(CPU_FTR_ARCH_207S))
> +		rsvd_lpid = LPID_RSVD;
> +	else
> +		rsvd_lpid = LPID_RSVD_POWER7;
>  
>  	kvmppc_init_lpid(rsvd_lpid + 1);
>  
> 


^ permalink raw reply

* Re: [PATCH v6 1/5] KVM: s390: clean up redundant 'kvm_run' parameters
From: Christian Borntraeger @ 2020-06-23 15:31 UTC (permalink / raw)
  To: Tianjia Zhang, pbonzini, tsbogend, paulus, mpe, benh, frankja,
	david, cohuck, heiko.carstens, gor, sean.j.christopherson,
	vkuznets, wanpengli, jmattson, joro, tglx, mingo, bp, x86, hpa,
	maz, james.morse, julien.thierry.kdev, suzuki.poulose,
	christoffer.dall, peterx, thuth, chenhuacai
  Cc: linux-s390, kvm, linux-mips, kvm-ppc, linux-kernel, linuxppc-dev,
	kvmarm, linux-arm-kernel
In-Reply-To: <20200623131418.31473-2-tianjia.zhang@linux.alibaba.com>



On 23.06.20 15:14, Tianjia Zhang wrote:
> In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu'
> structure. For historical reasons, many kvm-related function parameters
> retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
> patch does a unified cleanup of these remaining redundant parameters.
> 
> Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
> Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  arch/s390/kvm/kvm-s390.c | 23 +++++++++++++++--------
>  1 file changed, 15 insertions(+), 8 deletions(-)

Tinajia,

I have trouble seeing value in this particular patch. We add LOCs
without providing any noticable benefit. All other patches in this series at
least reduce the amount of code. So I would defer this to Paolo if he prefers
to have this way across all architectures. 
> 
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index d47c19718615..f5f96dc33712 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -4175,8 +4175,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
>  	return rc;
>  }
>  
> -static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
>  {
> +	struct kvm_run *kvm_run = vcpu->run;
>  	struct runtime_instr_cb *riccb;
>  	struct gs_cb *gscb;
>  
> @@ -4242,8 +4243,10 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  	/* SIE will load etoken directly from SDNX and therefore kvm_run */
>  }
>  
> -static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +static void sync_regs(struct kvm_vcpu *vcpu)
>  {
> +	struct kvm_run *kvm_run = vcpu->run;
> +
>  	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
>  		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
>  	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
> @@ -4272,7 +4275,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  
>  	/* Sync fmt2 only data */
>  	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
> -		sync_regs_fmt2(vcpu, kvm_run);
> +		sync_regs_fmt2(vcpu);
>  	} else {
>  		/*
>  		 * In several places we have to modify our internal view to
> @@ -4291,8 +4294,10 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  	kvm_run->kvm_dirty_regs = 0;
>  }
>  
> -static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +static void store_regs_fmt2(struct kvm_vcpu *vcpu)
>  {
> +	struct kvm_run *kvm_run = vcpu->run;
> +
>  	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
>  	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
>  	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
> @@ -4312,8 +4317,10 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  	/* SIE will save etoken directly into SDNX and therefore kvm_run */
>  }
>  
> -static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +static void store_regs(struct kvm_vcpu *vcpu)
>  {
> +	struct kvm_run *kvm_run = vcpu->run;
> +
>  	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
>  	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
>  	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
> @@ -4332,7 +4339,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
>  	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
>  	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
> -		store_regs_fmt2(vcpu, kvm_run);
> +		store_regs_fmt2(vcpu);
>  }
>  
>  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
> @@ -4370,7 +4377,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>  		goto out;
>  	}
>  
> -	sync_regs(vcpu, kvm_run);
> +	sync_regs(vcpu);
>  	enable_cpu_timer_accounting(vcpu);
>  
>  	might_fault();
> @@ -4392,7 +4399,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>  	}
>  
>  	disable_cpu_timer_accounting(vcpu);
> -	store_regs(vcpu, kvm_run);
> +	store_regs(vcpu);
>  
>  	kvm_sigset_deactivate(vcpu);
>  
> 

^ permalink raw reply

* Re: [PATCH 03/18] powerpc/numa: remove ability to enable topology updates
From: Srikar Dronamraju @ 2020-06-23 16:23 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: tyreld, ego, npiggin, svaidy, linuxppc-dev
In-Reply-To: <20200612051238.1007764-4-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2020-06-12 00:12:23]:

> Remove the /proc/powerpc/topology_updates interface and the
> topology_updates=on/off command line argument. The internal
> topology_updates_enabled flag remains for now, but always false.
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>

> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> ---
>  arch/powerpc/mm/numa.c | 71 +-----------------------------------------
>  1 file changed, 1 insertion(+), 70 deletions(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 9fcf2d195830..34d95de77bdd 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -984,27 +984,7 @@ static int __init early_numa(char *p)
>  }
>  early_param("numa", early_numa);
> 
> -/*
> - * The platform can inform us through one of several mechanisms
> - * (post-migration device tree updates, PRRN or VPHN) that the NUMA
> - * assignment of a resource has changed. This controls whether we act
> - * on that. Disabled by default.
> - */
> -static bool topology_updates_enabled;
> -
> -static int __init early_topology_updates(char *p)
> -{
> -	if (!p)
> -		return 0;
> -
> -	if (!strcmp(p, "on")) {
> -		pr_warn("Caution: enabling topology updates\n");
> -		topology_updates_enabled = true;
> -	}
> -
> -	return 0;
> -}
> -early_param("topology_updates", early_topology_updates);
> +static const bool topology_updates_enabled;
> 
>  #ifdef CONFIG_MEMORY_HOTPLUG
>  /*
> @@ -1632,52 +1612,6 @@ int prrn_is_enabled(void)
>  	return prrn_enabled;
>  }
> 
> -static int topology_read(struct seq_file *file, void *v)
> -{
> -	if (vphn_enabled || prrn_enabled)
> -		seq_puts(file, "on\n");
> -	else
> -		seq_puts(file, "off\n");
> -
> -	return 0;
> -}
> -
> -static int topology_open(struct inode *inode, struct file *file)
> -{
> -	return single_open(file, topology_read, NULL);
> -}
> -
> -static ssize_t topology_write(struct file *file, const char __user *buf,
> -			      size_t count, loff_t *off)
> -{
> -	char kbuf[4]; /* "on" or "off" plus null. */
> -	int read_len;
> -
> -	read_len = count < 3 ? count : 3;
> -	if (copy_from_user(kbuf, buf, read_len))
> -		return -EINVAL;
> -
> -	kbuf[read_len] = '\0';
> -
> -	if (!strncmp(kbuf, "on", 2)) {
> -		topology_updates_enabled = true;
> -		start_topology_update();
> -	} else if (!strncmp(kbuf, "off", 3)) {
> -		stop_topology_update();
> -		topology_updates_enabled = false;
> -	} else
> -		return -EINVAL;
> -
> -	return count;
> -}
> -
> -static const struct proc_ops topology_proc_ops = {
> -	.proc_read	= seq_read,
> -	.proc_write	= topology_write,
> -	.proc_open	= topology_open,
> -	.proc_release	= single_release,
> -};
> -
>  static int topology_update_init(void)
>  {
>  	start_topology_update();
> @@ -1685,9 +1619,6 @@ static int topology_update_init(void)
>  	if (vphn_enabled)
>  		topology_schedule_update();
> 
> -	if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_proc_ops))
> -		return -ENOMEM;
> -
>  	topology_inited = 1;
>  	return 0;
>  }
> -- 
> 2.25.4
> 

-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH 04/18] powerpc/numa: remove unreachable topology update code
From: Srikar Dronamraju @ 2020-06-23 16:24 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: tyreld, ego, npiggin, svaidy, linuxppc-dev
In-Reply-To: <20200612051238.1007764-5-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2020-06-12 00:12:24]:

> Since the topology_updates_enabled flag is now always false, remove it
> and the code which has become unreachable. This is the minimum change
> that prevents 'defined but unused' warnings emitted by the compiler
> after stubbing out the start/stop_topology_updates() functions.
> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>

-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH 05/18] powerpc/numa: make vphn_enabled, prrn_enabled flags const
From: Srikar Dronamraju @ 2020-06-23 16:24 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: tyreld, ego, npiggin, svaidy, linuxppc-dev
In-Reply-To: <20200612051238.1007764-6-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2020-06-12 00:12:25]:

> Previous changes have made it so these flags are never changed;
> enforce this by making them const.
> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> ---

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>

>  arch/powerpc/mm/numa.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 9e20f12e6caf..1b89bacb8975 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -1132,8 +1132,8 @@ struct topology_update_data {
>  #define TOPOLOGY_DEF_TIMER_SECS	60
> 
>  static cpumask_t cpu_associativity_changes_mask;
> -static int vphn_enabled;
> -static int prrn_enabled;
> +static const int vphn_enabled;
> +static const int prrn_enabled;
>  static void reset_topology_timer(void);
>  static int topology_timer_secs = 1;
>  static int topology_inited;
> -- 
> 2.25.4
> 

-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH 06/18] powerpc/numa: remove unreachable topology timer code
From: Srikar Dronamraju @ 2020-06-23 16:25 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: tyreld, ego, npiggin, svaidy, linuxppc-dev
In-Reply-To: <20200612051238.1007764-7-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2020-06-12 00:12:26]:

> Since vphn_enabled is always 0, we can stub out
> timed_topology_update() and remove the code which becomes unreachable.
> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> ---
>  arch/powerpc/mm/numa.c | 21 ---------------------
>  1 file changed, 21 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH 07/18] powerpc/numa: remove unreachable topology workqueue code
From: Srikar Dronamraju @ 2020-06-23 16:26 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: tyreld, ego, npiggin, svaidy, linuxppc-dev
In-Reply-To: <20200612051238.1007764-8-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2020-06-12 00:12:27]:

> Since vphn_enabled is always 0, we can remove the call to
> topology_schedule_update() and remove the code which becomes
> unreachable as a result.
> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> ---
>  arch/powerpc/mm/numa.c | 14 --------------
>  1 file changed, 14 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH 08/18] powerpc/numa: remove vphn_enabled and prrn_enabled internal flags
From: Srikar Dronamraju @ 2020-06-23 16:27 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: tyreld, ego, npiggin, svaidy, linuxppc-dev
In-Reply-To: <20200612051238.1007764-9-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2020-06-12 00:12:28]:

> These flags are always zero now; remove them and suitably adjust the
> remaining references to them.
> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> ---
>  arch/powerpc/mm/numa.c | 6 ++----
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH 09/18] powerpc/numa: stub out numa_update_cpu_topology()
From: Srikar Dronamraju @ 2020-06-23 16:27 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: tyreld, ego, npiggin, svaidy, linuxppc-dev
In-Reply-To: <20200612051238.1007764-10-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2020-06-12 00:12:29]:

> Previous changes have removed the code which sets bits in
> cpu_associativity_changes_mask and thus it is never modifed at
> runtime. From this we can reason that numa_update_cpu_topology()
> always returns 0 without doing anything. Remove the body of
> numa_update_cpu_topology() and remove all code which becomes
> unreachable as a result.
> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> ---
>  arch/powerpc/mm/numa.c | 193 +----------------------------------------
>  1 file changed, 1 insertion(+), 192 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH 10/18] powerpc/numa: remove timed_topology_update()
From: Srikar Dronamraju @ 2020-06-23 16:28 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: tyreld, ego, npiggin, svaidy, linuxppc-dev
In-Reply-To: <20200612051238.1007764-11-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2020-06-12 00:12:30]:

> timed_topology_update is a no-op now, so remove it and all call sites.
> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/topology.h          | 5 -----
>  arch/powerpc/mm/numa.c                       | 9 ---------
>  arch/powerpc/platforms/pseries/hotplug-cpu.c | 2 --
>  3 files changed, 16 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH 11/18] powerpc/numa: remove start/stop_topology_update()
From: Srikar Dronamraju @ 2020-06-23 16:28 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: tyreld, ego, npiggin, svaidy, linuxppc-dev
In-Reply-To: <20200612051238.1007764-12-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2020-06-12 00:12:31]:

> These APIs have become no-ops, so remove them and all call sites.
> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> ---

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH 12/18] powerpc/rtasd: simplify handle_rtas_event(), emit message on events
From: Srikar Dronamraju @ 2020-06-23 16:30 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: tyreld, ego, npiggin, svaidy, linuxppc-dev
In-Reply-To: <20200612051238.1007764-13-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2020-06-12 00:12:32]:

> prrn_is_enabled() always returns false/0, so handle_rtas_event() can
> be simplified and some dead code can be removed. Use machine_is()
> instead of #ifdef to run this code only on pseries, and add an
> informational ratelimited message that we are ignoring the
> events. PRRN events are relatively rare in normal operation and
> usually arise from operator-initiated actions such as a DPO (Dynamic
> Platform Optimizer) run.
> 
> Eventually we do want to consume these events and update the device
> tree, but that needs more care to be safe vs LPM and DLPAR.
> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> ---
>  arch/powerpc/kernel/rtasd.c | 28 +++-------------------------
>  1 file changed, 3 insertions(+), 25 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH 13/18] powerpc/numa: remove prrn_is_enabled()
From: Srikar Dronamraju @ 2020-06-23 16:31 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: tyreld, ego, npiggin, svaidy, linuxppc-dev
In-Reply-To: <20200612051238.1007764-14-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2020-06-12 00:12:33]:

> All users of this prrn_is_enabled() are gone; remove it.
> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/topology.h | 5 -----
>  arch/powerpc/mm/numa.c              | 5 -----
>  2 files changed, 10 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH 14/18] powerpc/numa: remove arch_update_cpu_topology
From: Srikar Dronamraju @ 2020-06-23 16:32 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: tyreld, ego, npiggin, svaidy, linuxppc-dev
In-Reply-To: <20200612051238.1007764-15-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2020-06-12 00:12:34]:

> Since arch_update_cpu_topology() doesn't do anything on powerpc now,
> remove it and associated dead code.
> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/topology.h |  6 ------
>  arch/powerpc/mm/numa.c              | 10 ----------
>  2 files changed, 16 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables
From: Peter Zijlstra @ 2020-06-23 16:37 UTC (permalink / raw)
  To: Ahmed S. Darwish
  Cc: linux-s390, elver, linuxppc-dev, bigeasy, x86, heiko.carstens,
	linux-kernel, rostedt, davem, sparclinux, linux, tglx, will,
	mingo
In-Reply-To: <20200623161320.GA2996373@debian-buster-darwi.lab.linutronix.de>

On Tue, Jun 23, 2020 at 06:13:21PM +0200, Ahmed S. Darwish wrote:
> Well, freshly merged code is using it. For example, KCSAN:
> 
>     => f1bc96210c6a ("kcsan: Make KCSAN compatible with lockdep")
>     => kernel/kcsan/report.c:
> 
>     void kcsan_report(...)
>     {
> 	...
>         /*
>          * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
>          * we do not turn off lockdep here; this could happen due to recursion
>          * into lockdep via KCSAN if we detect a race in utilities used by
>          * lockdep.
>          */
>         lockdep_off();
> 	...
>     }

Marco, do you remember what exactly happened there? Because I'm about to
wreck that. That is, I'm going to make TRACE_IRQFLAGS ignore
lockdep_off().

^ permalink raw reply

* [PATCH] KVM: PPC: Book3S HV: Use feature flag CPU_FTR_P9_TIDR when accessing TIDR
From: Cédric Le Goater @ 2020-06-23 16:50 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linuxppc-dev, Paul Mackerras, kvm, kvm-ppc, Cédric Le Goater

The TIDR register is only available on POWER9 systems and code
accessing this register is not always protected by the CPU_FTR_P9_TIDR
flag. Fix that to make sure POWER10 systems won't use it as TIDR has
been removed.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/kvm/book3s_hv.c            | 23 +++++++++++++++++------
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 16 ++++++++++++----
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d64a2dc1ccca..3e5410f27a2a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1755,7 +1755,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		*val = get_reg_val(id, vcpu->arch.wort);
 		break;
 	case KVM_REG_PPC_TIDR:
-		*val = get_reg_val(id, vcpu->arch.tid);
+		if (cpu_has_feature(CPU_FTR_P9_TIDR))
+			*val = get_reg_val(id, vcpu->arch.tid);
+		else
+			r = -ENXIO;
 		break;
 	case KVM_REG_PPC_PSSCR:
 		*val = get_reg_val(id, vcpu->arch.psscr);
@@ -1972,7 +1975,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		vcpu->arch.wort = set_reg_val(id, *val);
 		break;
 	case KVM_REG_PPC_TIDR:
-		vcpu->arch.tid = set_reg_val(id, *val);
+		if (cpu_has_feature(CPU_FTR_P9_TIDR))
+			vcpu->arch.tid = set_reg_val(id, *val);
+		else
+			r = -ENXIO;
 		break;
 	case KVM_REG_PPC_PSSCR:
 		vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS;
@@ -3526,13 +3532,15 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	unsigned long host_dscr = mfspr(SPRN_DSCR);
-	unsigned long host_tidr = mfspr(SPRN_TIDR);
+	unsigned long host_tidr;
 	unsigned long host_iamr = mfspr(SPRN_IAMR);
 	unsigned long host_amr = mfspr(SPRN_AMR);
 	s64 dec;
 	u64 tb;
 	int trap, save_pmu;
 
+	if (cpu_has_feature(CPU_FTR_P9_TIDR))
+		host_tidr = mfspr(SPRN_TIDR);
 	dec = mfspr(SPRN_DEC);
 	tb = mftb();
 	if (dec < 512)
@@ -3579,7 +3587,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
 	mtspr(SPRN_BESCR, vcpu->arch.bescr);
 	mtspr(SPRN_WORT, vcpu->arch.wort);
-	mtspr(SPRN_TIDR, vcpu->arch.tid);
+	if (cpu_has_feature(CPU_FTR_P9_TIDR))
+		mtspr(SPRN_TIDR, vcpu->arch.tid);
 	mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
 	mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
 	mtspr(SPRN_AMR, vcpu->arch.amr);
@@ -3653,7 +3662,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
 	vcpu->arch.bescr = mfspr(SPRN_BESCR);
 	vcpu->arch.wort = mfspr(SPRN_WORT);
-	vcpu->arch.tid = mfspr(SPRN_TIDR);
+	if (cpu_has_feature(CPU_FTR_P9_TIDR))
+		vcpu->arch.tid = mfspr(SPRN_TIDR);
 	vcpu->arch.amr = mfspr(SPRN_AMR);
 	vcpu->arch.uamor = mfspr(SPRN_UAMOR);
 	vcpu->arch.dscr = mfspr(SPRN_DSCR);
@@ -3662,7 +3672,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	mtspr(SPRN_WORT, 0);
 	mtspr(SPRN_UAMOR, 0);
 	mtspr(SPRN_DSCR, host_dscr);
-	mtspr(SPRN_TIDR, host_tidr);
+	if (cpu_has_feature(CPU_FTR_P9_TIDR))
+		mtspr(SPRN_TIDR, host_tidr);
 	mtspr(SPRN_IAMR, host_iamr);
 	mtspr(SPRN_PSPB, 0);
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 71943892c81c..64e454656749 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -697,9 +697,11 @@ kvmppc_got_guest:
 	/* Save host values of some registers */
 BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_TIDR
+	std	r5, STACK_SLOT_TID(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TIDR)
+BEGIN_FTR_SECTION
 	mfspr	r6, SPRN_PSSCR
 	mfspr	r7, SPRN_PID
-	std	r5, STACK_SLOT_TID(r1)
 	std	r6, STACK_SLOT_PSSCR(r1)
 	std	r7, STACK_SLOT_PID(r1)
 	mfspr	r5, SPRN_HFSCR
@@ -835,13 +837,15 @@ BEGIN_FTR_SECTION
 	nop
 FTR_SECTION_ELSE
 	/* POWER9-only registers */
+BEGIN_FTR_SECTION_NESTED(96);
 	ld	r5, VCPU_TID(r4)
+	mtspr	SPRN_TIDR, r5
+END_FTR_SECTION_NESTED_IFSET(CPU_FTR_P9_TIDR, 96)
 	ld	r6, VCPU_PSSCR(r4)
 	lbz	r8, HSTATE_FAKE_SUSPEND(r13)
 	oris	r6, r6, PSSCR_EC@h	/* This makes stop trap to HV */
 	rldimi	r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
 	ld	r7, VCPU_HFSCR(r4)
-	mtspr	SPRN_TIDR, r5
 	mtspr	SPRN_PSSCR, r6
 	mtspr	SPRN_HFSCR, r7
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
@@ -1637,9 +1641,11 @@ BEGIN_FTR_SECTION
 	std	r7, VCPU_CSIGR(r9)
 	std	r8, VCPU_TACR(r9)
 FTR_SECTION_ELSE
+BEGIN_FTR_SECTION_NESTED(96);
 	mfspr	r5, SPRN_TIDR
-	mfspr	r6, SPRN_PSSCR
 	std	r5, VCPU_TID(r9)
+END_FTR_SECTION_NESTED_IFSET(CPU_FTR_P9_TIDR, 96)
+	mfspr	r6, SPRN_PSSCR
 	rldicl	r6, r6, 4, 50		/* r6 &= PSSCR_GUEST_VIS */
 	rotldi	r6, r6, 60
 	std	r6, VCPU_PSSCR(r9)
@@ -1771,9 +1777,11 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 BEGIN_FTR_SECTION
 	ld	r5, STACK_SLOT_TID(r1)
+	mtspr	SPRN_TIDR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TIDR)
+BEGIN_FTR_SECTION
 	ld	r6, STACK_SLOT_PSSCR(r1)
 	ld	r7, STACK_SLOT_PID(r1)
-	mtspr	SPRN_TIDR, r5
 	mtspr	SPRN_PSSCR, r6
 	mtspr	SPRN_PID, r7
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-- 
2.25.4


^ permalink raw reply related

* [PATCH AUTOSEL 5.7 08/28] ibmvnic: Harden device login requests
From: Sasha Levin @ 2020-06-23 17:35 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, netdev, Thomas Falcon, linuxppc-dev,
	David S . Miller
In-Reply-To: <20200623173523.1355411-1-sashal@kernel.org>

From: Thomas Falcon <tlfalcon@linux.ibm.com>

[ Upstream commit dff515a3e71dc8ab3b9dcc2e23a9b5fca88b3c18 ]

The VNIC driver's "login" command sequence is the final step
in the driver's initialization process with device firmware,
confirming the available device queue resources to be utilized
by the driver. Under high system load, firmware may not respond
to the request in a timely manner or may abort the request. In
such cases, the driver should reattempt the login command
sequence. In case of a device error, the number of retries
is bounded.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 197dc5b2c0905..c265917487e84 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -842,12 +842,13 @@ static int ibmvnic_login(struct net_device *netdev)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	unsigned long timeout = msecs_to_jiffies(30000);
 	int retry_count = 0;
+	int retries = 10;
 	bool retry;
 	int rc;
 
 	do {
 		retry = false;
-		if (retry_count > IBMVNIC_MAX_QUEUES) {
+		if (retry_count > retries) {
 			netdev_warn(netdev, "Login attempts exceeded\n");
 			return -1;
 		}
@@ -862,11 +863,23 @@ static int ibmvnic_login(struct net_device *netdev)
 
 		if (!wait_for_completion_timeout(&adapter->init_done,
 						 timeout)) {
-			netdev_warn(netdev, "Login timed out\n");
-			return -1;
+			netdev_warn(netdev, "Login timed out, retrying...\n");
+			retry = true;
+			adapter->init_done_rc = 0;
+			retry_count++;
+			continue;
 		}
 
-		if (adapter->init_done_rc == PARTIALSUCCESS) {
+		if (adapter->init_done_rc == ABORTED) {
+			netdev_warn(netdev, "Login aborted, retrying...\n");
+			retry = true;
+			adapter->init_done_rc = 0;
+			retry_count++;
+			/* FW or device may be busy, so
+			 * wait a bit before retrying login
+			 */
+			msleep(500);
+		} else if (adapter->init_done_rc == PARTIALSUCCESS) {
 			retry_count++;
 			release_sub_crqs(adapter, 1);
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH AUTOSEL 5.4 07/24] ibmvnic: Harden device login requests
From: Sasha Levin @ 2020-06-23 17:35 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, netdev, Thomas Falcon, linuxppc-dev,
	David S . Miller
In-Reply-To: <20200623173559.1355728-1-sashal@kernel.org>

From: Thomas Falcon <tlfalcon@linux.ibm.com>

[ Upstream commit dff515a3e71dc8ab3b9dcc2e23a9b5fca88b3c18 ]

The VNIC driver's "login" command sequence is the final step
in the driver's initialization process with device firmware,
confirming the available device queue resources to be utilized
by the driver. Under high system load, firmware may not respond
to the request in a timely manner or may abort the request. In
such cases, the driver should reattempt the login command
sequence. In case of a device error, the number of retries
is bounded.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5a42ddeecfe50..4f503b9a674c4 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -779,12 +779,13 @@ static int ibmvnic_login(struct net_device *netdev)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	unsigned long timeout = msecs_to_jiffies(30000);
 	int retry_count = 0;
+	int retries = 10;
 	bool retry;
 	int rc;
 
 	do {
 		retry = false;
-		if (retry_count > IBMVNIC_MAX_QUEUES) {
+		if (retry_count > retries) {
 			netdev_warn(netdev, "Login attempts exceeded\n");
 			return -1;
 		}
@@ -799,11 +800,23 @@ static int ibmvnic_login(struct net_device *netdev)
 
 		if (!wait_for_completion_timeout(&adapter->init_done,
 						 timeout)) {
-			netdev_warn(netdev, "Login timed out\n");
-			return -1;
+			netdev_warn(netdev, "Login timed out, retrying...\n");
+			retry = true;
+			adapter->init_done_rc = 0;
+			retry_count++;
+			continue;
 		}
 
-		if (adapter->init_done_rc == PARTIALSUCCESS) {
+		if (adapter->init_done_rc == ABORTED) {
+			netdev_warn(netdev, "Login aborted, retrying...\n");
+			retry = true;
+			adapter->init_done_rc = 0;
+			retry_count++;
+			/* FW or device may be busy, so
+			 * wait a bit before retrying login
+			 */
+			msleep(500);
+		} else if (adapter->init_done_rc == PARTIALSUCCESS) {
 			retry_count++;
 			release_sub_crqs(adapter, 1);
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.19 07/15] ibmvnic: Harden device login requests
From: Sasha Levin @ 2020-06-23 17:36 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, netdev, Thomas Falcon, linuxppc-dev,
	David S . Miller
In-Reply-To: <20200623173630.1355971-1-sashal@kernel.org>

From: Thomas Falcon <tlfalcon@linux.ibm.com>

[ Upstream commit dff515a3e71dc8ab3b9dcc2e23a9b5fca88b3c18 ]

The VNIC driver's "login" command sequence is the final step
in the driver's initialization process with device firmware,
confirming the available device queue resources to be utilized
by the driver. Under high system load, firmware may not respond
to the request in a timely manner or may abort the request. In
such cases, the driver should reattempt the login command
sequence. In case of a device error, the number of retries
is bounded.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 645298628b6f7..5e9e45befc875 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -792,12 +792,13 @@ static int ibmvnic_login(struct net_device *netdev)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	unsigned long timeout = msecs_to_jiffies(30000);
 	int retry_count = 0;
+	int retries = 10;
 	bool retry;
 	int rc;
 
 	do {
 		retry = false;
-		if (retry_count > IBMVNIC_MAX_QUEUES) {
+		if (retry_count > retries) {
 			netdev_warn(netdev, "Login attempts exceeded\n");
 			return -1;
 		}
@@ -812,11 +813,23 @@ static int ibmvnic_login(struct net_device *netdev)
 
 		if (!wait_for_completion_timeout(&adapter->init_done,
 						 timeout)) {
-			netdev_warn(netdev, "Login timed out\n");
-			return -1;
+			netdev_warn(netdev, "Login timed out, retrying...\n");
+			retry = true;
+			adapter->init_done_rc = 0;
+			retry_count++;
+			continue;
 		}
 
-		if (adapter->init_done_rc == PARTIALSUCCESS) {
+		if (adapter->init_done_rc == ABORTED) {
+			netdev_warn(netdev, "Login aborted, retrying...\n");
+			retry = true;
+			adapter->init_done_rc = 0;
+			retry_count++;
+			/* FW or device may be busy, so
+			 * wait a bit before retrying login
+			 */
+			msleep(500);
+		} else if (adapter->init_done_rc == PARTIALSUCCESS) {
 			retry_count++;
 			release_sub_crqs(adapter, 1);
 
-- 
2.25.1


^ permalink raw reply related

* Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables
From: Marco Elver @ 2020-06-23 17:59 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-s390, linuxppc-dev, bigeasy, x86, heiko.carstens,
	linux-kernel, rostedt, davem, Ahmed S. Darwish, sparclinux, linux,
	tglx, will, mingo
In-Reply-To: <20200623163730.GA4800@hirez.programming.kicks-ass.net>

On Tue, Jun 23, 2020 at 06:37PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 23, 2020 at 06:13:21PM +0200, Ahmed S. Darwish wrote:
> > Well, freshly merged code is using it. For example, KCSAN:
> > 
> >     => f1bc96210c6a ("kcsan: Make KCSAN compatible with lockdep")
> >     => kernel/kcsan/report.c:
> > 
> >     void kcsan_report(...)
> >     {
> > 	...
> >         /*
> >          * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
> >          * we do not turn off lockdep here; this could happen due to recursion
> >          * into lockdep via KCSAN if we detect a race in utilities used by
> >          * lockdep.
> >          */
> >         lockdep_off();
> > 	...
> >     }
> 
> Marco, do you remember what exactly happened there? Because I'm about to
> wreck that. That is, I'm going to make TRACE_IRQFLAGS ignore
> lockdep_off().

Yeah, I was trying to squash any kind of recursion:

	lockdep -> other libs ->
		-> KCSAN
		-> print report
		-> dump stack, printk and friends
		-> lockdep -> other libs
			-> KCSAN ...

Some history:

* Initial patch to fix:
	https://lore.kernel.org/lkml/20200115162512.70807-1-elver@google.com/

* KCSAN+lockdep+ftrace:
	https://lore.kernel.org/lkml/20200214211035.209972-1-elver@google.com/

lockdep now has KCSAN_SANITIZE := n, but we still need to ensure that
there are no paths out of lockdep, or the IRQ flags tracing code, that
might lead through other libs, through KCSAN, libs used to generate a
report, and back to lockdep.

I never quite figured out the exact trace that led to corruption, but
avoiding any kind of potential for recursion was the only thing that
would avoid the check_flags() warnings.

Thanks,
-- Marco

^ permalink raw reply

* Re: [PATCH v2 0/2] cpufreq: Specify the default governor on command line
From: Quentin Perret @ 2020-06-23 18:04 UTC (permalink / raw)
  To: Doug Smythies
  Cc: juri.lelli, kernel-team, vincent.guittot, arnd, linux-pm, peterz,
	adharmap, rafael, rjw, linux-kernel, viresh.kumar, mingo, paulus,
	linuxppc-dev, tkjos
In-Reply-To: <002201d64987$5dc93b90$195bb2b0$@net>

Hi Doug,

On Tuesday 23 Jun 2020 at 10:54:33 (-0700), Doug Smythies wrote:
> Hi Quentin,
> 
> Because I am lazy and sometimes do not want to recompile
> the distro source, I have a need/desire for this.

Good to know I'm not the only one ;-)

> Tested these two grub command lines:
> 
> GRUB_CMDLINE_LINUX_DEFAULT="ipv6.disable=1 consoleblank=300 intel_pstate=disable cpufreq.default_governor=schedutil cpuidle_sysfs_switch cpuidle.governor=teo"
> 
> And
> 
> #GRUB_CMDLINE_LINUX_DEFAULT="ipv6.disable=1 consoleblank=450 intel_pstate=passive cpufreq.default_governor=schedutil cpuidle_sysfs_switch cpuidle.governor=teo"
> 
> And all worked as expected. I use Ubuntu as my distro, and also had to disable a startup script that switches to "ondemand", or similar, after 1 minute.

Good, thanks for giving it a try.

> As a side note (separate subject, but is one reason I tried it):
> My i5-9600K based computer seems to hit a power limit during boot approximately 3 seconds after kernel selection on grub.
> This had no effect on that issue (even when selecting powersave governor).

Interesting ... Could you confirm that compiling with powersave as
default doesn't fix the issue either?

Other question, when does the intel_pstate driver start on your device?
Before or after that 3 seconds boot time?

Thanks,
Quentin

^ permalink raw reply

* Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables
From: Peter Zijlstra @ 2020-06-23 18:12 UTC (permalink / raw)
  To: Marco Elver
  Cc: linux-s390, linuxppc-dev, bigeasy, x86, heiko.carstens,
	linux-kernel, rostedt, davem, Ahmed S. Darwish, sparclinux, linux,
	tglx, will, mingo
In-Reply-To: <20200623175957.GA106514@elver.google.com>

On Tue, Jun 23, 2020 at 07:59:57PM +0200, Marco Elver wrote:
> On Tue, Jun 23, 2020 at 06:37PM +0200, Peter Zijlstra wrote:
> > On Tue, Jun 23, 2020 at 06:13:21PM +0200, Ahmed S. Darwish wrote:
> > > Well, freshly merged code is using it. For example, KCSAN:
> > > 
> > >     => f1bc96210c6a ("kcsan: Make KCSAN compatible with lockdep")
> > >     => kernel/kcsan/report.c:
> > > 
> > >     void kcsan_report(...)
> > >     {
> > > 	...
> > >         /*
> > >          * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
> > >          * we do not turn off lockdep here; this could happen due to recursion
> > >          * into lockdep via KCSAN if we detect a race in utilities used by
> > >          * lockdep.
> > >          */
> > >         lockdep_off();
> > > 	...
> > >     }
> > 
> > Marco, do you remember what exactly happened there? Because I'm about to
> > wreck that. That is, I'm going to make TRACE_IRQFLAGS ignore
> > lockdep_off().
> 
> Yeah, I was trying to squash any kind of recursion:
> 
> 	lockdep -> other libs ->
> 		-> KCSAN
> 		-> print report
> 		-> dump stack, printk and friends
> 		-> lockdep -> other libs
> 			-> KCSAN ...
> 
> Some history:
> 
> * Initial patch to fix:
> 	https://lore.kernel.org/lkml/20200115162512.70807-1-elver@google.com/

That patch is weird; just :=n on lockdep.c should've cured that, the
rest is massive overkill.

> * KCSAN+lockdep+ftrace:
> 	https://lore.kernel.org/lkml/20200214211035.209972-1-elver@google.com/

That doesn't really have anything useful..

> lockdep now has KCSAN_SANITIZE := n, but we still need to ensure that
> there are no paths out of lockdep, or the IRQ flags tracing code, that
> might lead through other libs, through KCSAN, libs used to generate a
> report, and back to lockdep.
> 
> I never quite figured out the exact trace that led to corruption, but
> avoiding any kind of potential for recursion was the only thing that
> would avoid the check_flags() warnings.

Fair enough; I'll rip it all up and boot a KCSAN kernel, see what if
anything happens.

^ permalink raw reply

* Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables
From: Marco Elver @ 2020-06-23 18:39 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-s390, linuxppc-dev, bigeasy, the arch/x86 maintainers,
	heiko.carstens, LKML, Steven Rostedt, David S. Miller, Qian Cai,
	Ahmed S. Darwish, sparclinux, linux, Thomas Gleixner, Will Deacon,
	Ingo Molnar
In-Reply-To: <20200623181232.GB4800@hirez.programming.kicks-ass.net>

On Tue, 23 Jun 2020 at 20:13, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Tue, Jun 23, 2020 at 07:59:57PM +0200, Marco Elver wrote:
> > On Tue, Jun 23, 2020 at 06:37PM +0200, Peter Zijlstra wrote:
> > > On Tue, Jun 23, 2020 at 06:13:21PM +0200, Ahmed S. Darwish wrote:
> > > > Well, freshly merged code is using it. For example, KCSAN:
> > > >
> > > >     => f1bc96210c6a ("kcsan: Make KCSAN compatible with lockdep")
> > > >     => kernel/kcsan/report.c:
> > > >
> > > >     void kcsan_report(...)
> > > >     {
> > > >   ...
> > > >         /*
> > > >          * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
> > > >          * we do not turn off lockdep here; this could happen due to recursion
> > > >          * into lockdep via KCSAN if we detect a race in utilities used by
> > > >          * lockdep.
> > > >          */
> > > >         lockdep_off();
> > > >   ...
> > > >     }
> > >
> > > Marco, do you remember what exactly happened there? Because I'm about to
> > > wreck that. That is, I'm going to make TRACE_IRQFLAGS ignore
> > > lockdep_off().
> >
> > Yeah, I was trying to squash any kind of recursion:
> >
> >       lockdep -> other libs ->
> >               -> KCSAN
> >               -> print report
> >               -> dump stack, printk and friends
> >               -> lockdep -> other libs
> >                       -> KCSAN ...
> >
> > Some history:
> >
> > * Initial patch to fix:
> >       https://lore.kernel.org/lkml/20200115162512.70807-1-elver@google.com/
>
> That patch is weird; just :=n on lockdep.c should've cured that, the
> rest is massive overkill.
>
> > * KCSAN+lockdep+ftrace:
> >       https://lore.kernel.org/lkml/20200214211035.209972-1-elver@google.com/
>
> That doesn't really have anything useful..
>
> > lockdep now has KCSAN_SANITIZE := n, but we still need to ensure that
> > there are no paths out of lockdep, or the IRQ flags tracing code, that
> > might lead through other libs, through KCSAN, libs used to generate a
> > report, and back to lockdep.
> >
> > I never quite figured out the exact trace that led to corruption, but
> > avoiding any kind of potential for recursion was the only thing that
> > would avoid the check_flags() warnings.
>
> Fair enough; I'll rip it all up and boot a KCSAN kernel, see what if
> anything happens.

Thanks!

This was happening with Qian Cai's (Cc'd) test cases. If the kernel or
this patch changed things around so this doesn't happen anymore
regardless, then I don't see a problem.

Thanks,
-- Marco

^ permalink raw reply

* Re: [PATCH 1/2] powerpc/papr_scm: Fetch nvdimm performance stats from PHYP
From: Ira Weiny @ 2020-06-23 19:02 UTC (permalink / raw)
  To: Vaibhav Jain; +Cc: Aneesh Kumar K . V, linuxppc-dev, linux-nvdimm
In-Reply-To: <20200622042451.22448-2-vaibhav@linux.ibm.com>

On Mon, Jun 22, 2020 at 09:54:50AM +0530, Vaibhav Jain wrote:
> Update papr_scm.c to query dimm performance statistics from PHYP via
> H_SCM_PERFORMANCE_STATS hcall and export them to user-space as PAPR
> specific NVDIMM attribute 'perf_stats' in sysfs. The patch also
> provide a sysfs ABI documentation for the stats being reported and
> their meanings.
> 
> During NVDIMM probe time in papr_scm_nvdimm_init() a special variant
> of H_SCM_PERFORMANCE_STATS hcall is issued to check if collection of
> performance statistics is supported or not. If successful then a PHYP
> returns a maximum possible buffer length needed to read all
> performance stats. This returned value is stored in a per-nvdimm
> attribute 'len_stat_buffer'.
> 
> The layout of request buffer for reading NVDIMM performance stats from
> PHYP is defined in 'struct papr_scm_perf_stats' and 'struct
> papr_scm_perf_stat'. These structs are used in newly introduced
> drc_pmem_query_stats() that issues the H_SCM_PERFORMANCE_STATS hcall.
> 
> The sysfs access function perf_stats_show() uses value
> 'len_stat_buffer' to allocate a buffer large enough to hold all
> possible NVDIMM performance stats and passes it to
> drc_pmem_query_stats() to populate. Finally statistics reported in the
> buffer are formatted into the sysfs access function output buffer.
> 
> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
> ---
>  Documentation/ABI/testing/sysfs-bus-papr-pmem |  27 ++++
>  arch/powerpc/platforms/pseries/papr_scm.c     | 139 ++++++++++++++++++
>  2 files changed, 166 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem
> index 5b10d036a8d4..c1a67275c43f 100644
> --- a/Documentation/ABI/testing/sysfs-bus-papr-pmem
> +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem
> @@ -25,3 +25,30 @@ Description:
>  				  NVDIMM have been scrubbed.
>  		* "locked"	: Indicating that NVDIMM contents cant
>  				  be modified until next power cycle.
> +
> +What:		/sys/bus/nd/devices/nmemX/papr/perf_stats
> +Date:		May, 2020
> +KernelVersion:	v5.9
> +Contact:	linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, linux-nvdimm@lists.01.org,
> +Description:
> +		(RO) Report various performance stats related to papr-scm NVDIMM
> +		device.  Each stat is reported on a new line with each line
> +		composed of a stat-identifier followed by it value. Below are
> +		currently known dimm performance stats which are reported:
> +
> +		* "CtlResCt" : Controller Reset Count
> +		* "CtlResTm" : Controller Reset Elapsed Time
> +		* "PonSecs " : Power-on Seconds
> +		* "MemLife " : Life Remaining
> +		* "CritRscU" : Critical Resource Utilization
> +		* "HostLCnt" : Host Load Count
> +		* "HostSCnt" : Host Store Count
> +		* "HostSDur" : Host Store Duration
> +		* "HostLDur" : Host Load Duration
> +		* "MedRCnt " : Media Read Count
> +		* "MedWCnt " : Media Write Count
> +		* "MedRDur " : Media Read Duration
> +		* "MedWDur " : Media Write Duration
> +		* "CchRHCnt" : Cache Read Hit Count
> +		* "CchWHCnt" : Cache Write Hit Count
> +		* "FastWCnt" : Fast Write Count
> \ No newline at end of file
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> index 9c569078a09f..cb3f9acc325b 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -62,6 +62,24 @@
>  				    PAPR_PMEM_HEALTH_FATAL |	\
>  				    PAPR_PMEM_HEALTH_UNHEALTHY)
>  
> +#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS)
> +#define PAPR_SCM_PERF_STATS_VERSION 0x1
> +
> +/* Struct holding a single performance metric */
> +struct papr_scm_perf_stat {
> +	u8 statistic_id[8];
> +	u64 statistic_value;
> +};
> +
> +/* Struct exchanged between kernel and PHYP for fetching drc perf stats */
> +struct papr_scm_perf_stats {
> +	u8 eye_catcher[8];
> +	u32 stats_version;		/* Should be 0x01 */
                                                     ^^^^
				     PAPR_SCM_PERF_STATS_VERSION?

> +	u32 num_statistics;		/* Number of stats following */
> +	/* zero or more performance matrics */
> +	struct papr_scm_perf_stat scm_statistic[];
> +} __packed;
> +
>  /* private struct associated with each region */
>  struct papr_scm_priv {
>  	struct platform_device *pdev;
> @@ -89,6 +107,9 @@ struct papr_scm_priv {
>  
>  	/* Health information for the dimm */
>  	u64 health_bitmap;
> +
> +	/* length of the stat buffer as expected by phyp */
> +	size_t len_stat_buffer;
>  };
>  
>  static int drc_pmem_bind(struct papr_scm_priv *p)
> @@ -194,6 +215,75 @@ static int drc_pmem_query_n_bind(struct papr_scm_priv *p)
>  	return drc_pmem_bind(p);
>  }
>  
> +/*
> + * Query the Dimm performance stats from PHYP and copy them (if returned) to
> + * provided struct papr_scm_perf_stats instance 'stats' of 'size' in bytes.
> + * The value of R4 is copied to 'out' if the pointer is provided.
> + */
> +static int drc_pmem_query_stats(struct papr_scm_priv *p,
> +				struct papr_scm_perf_stats *buff_stats,
> +				size_t size, unsigned int num_stats,
> +				uint64_t *out)
> +{
> +	unsigned long ret[PLPAR_HCALL_BUFSIZE];
> +	struct papr_scm_perf_stat *stats;
> +	s64 rc, i;
> +
> +	/* Setup the out buffer */
> +	if (buff_stats) {
> +		memcpy(buff_stats->eye_catcher,
> +		       PAPR_SCM_PERF_STATS_EYECATCHER, 8);
> +		buff_stats->stats_version =
> +			cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION);
> +		buff_stats->num_statistics =
> +			cpu_to_be32(num_stats);
> +	} else {
> +		/* In case of no out buffer ignore the size */
> +		size = 0;
> +	}
> +
> +	/*
> +	 * Do the HCALL asking PHYP for info and if R4 was requested
> +	 * return its value in 'out' variable.
> +	 */
> +	rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index,
> +			 virt_to_phys(buff_stats), size);

You are calling virt_to_phys(NULL) here when called from
papr_scm_nvdimm_init()!  That can't be right.

> +	if (out)
> +		*out =  ret[0];
> +
> +	if (rc == H_PARTIAL) {
> +		dev_err(&p->pdev->dev,
> +			"Unknown performance stats, Err:0x%016lX\n", ret[0]);
> +		return -ENOENT;
> +	} else if (rc != H_SUCCESS) {
> +		dev_err(&p->pdev->dev,
> +			"Failed to query performance stats, Err:%lld\n", rc);
> +		return -ENXIO;
> +	}
> +
> +	/* Successfully fetched the requested stats from phyp */
> +	if (size != 0) {
> +		buff_stats->num_statistics =
> +			be32_to_cpu(buff_stats->num_statistics);
> +
> +		/* Transform the stats buffer values from BE to cpu native */
> +		for (i = 0, stats = buff_stats->scm_statistic;
> +		     i < buff_stats->num_statistics; ++i) {
> +			stats[i].statistic_value =
> +				be64_to_cpu(stats[i].statistic_value);
> +		}
> +		dev_dbg(&p->pdev->dev,
> +			"Performance stats returned %d stats\n",
> +			buff_stats->num_statistics);
> +	} else {
> +		/* Handle case where stat buffer size was requested */
> +		dev_dbg(&p->pdev->dev,
> +			"Performance stats size %ld\n", ret[0]);
> +	}
> +
> +	return 0;
> +}
> +
>  /*
>   * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the
>   * health information.
> @@ -631,6 +721,45 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
>  	return 0;
>  }
>  
> +static ssize_t perf_stats_show(struct device *dev,
> +			       struct device_attribute *attr, char *buf)
> +{
> +	int index, rc;
> +	struct seq_buf s;
> +	struct papr_scm_perf_stat *stat;
> +	struct papr_scm_perf_stats *stats;
> +	struct nvdimm *dimm = to_nvdimm(dev);
> +	struct papr_scm_priv *p = nvdimm_provider_data(dimm);
> +
> +	if (!p->len_stat_buffer)
> +		return -ENOENT;
> +
> +	/* Allocate the buffer for phyp where stats are written */
> +	stats = kzalloc(p->len_stat_buffer, GFP_KERNEL);

I'm concerned that this buffer does not seem to have anything to do with the
'num_stats' parameter passed to drc_pmem_query_stats().  Furthermore why is
num_stats always 0 in those calls?

> +	if (!stats)
> +		return -ENOMEM;
> +
> +	/* Ask phyp to return all dimm perf stats */
> +	rc = drc_pmem_query_stats(p, stats, p->len_stat_buffer, 0, NULL);
> +	if (!rc) {
> +		/*
> +		 * Go through the returned output buffer and print stats and
> +		 * values. Since statistic_id is essentially a char string of
> +		 * 8 bytes, simply use the string format specifier to print it.
> +		 */
> +		seq_buf_init(&s, buf, PAGE_SIZE);
> +		for (index = 0, stat = stats->scm_statistic;
> +		     index < stats->num_statistics; ++index, ++stat) {
> +			seq_buf_printf(&s, "%.8s = 0x%016llX\n",
> +				       stat->statistic_id, stat->statistic_value);
> +		}
> +	}
> +
> +	kfree(stats);
> +	return rc ? rc : seq_buf_used(&s);
> +}
> +DEVICE_ATTR_RO(perf_stats);
> +
>  static ssize_t flags_show(struct device *dev,
>  			  struct device_attribute *attr, char *buf)
>  {
> @@ -676,6 +805,7 @@ DEVICE_ATTR_RO(flags);
>  /* papr_scm specific dimm attributes */
>  static struct attribute *papr_nd_attributes[] = {
>  	&dev_attr_flags.attr,
> +	&dev_attr_perf_stats.attr,
>  	NULL,
>  };
>  
> @@ -696,6 +826,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>  	struct nd_region_desc ndr_desc;
>  	unsigned long dimm_flags;
>  	int target_nid, online_nid;
> +	u64 stat_size;
>  
>  	p->bus_desc.ndctl = papr_scm_ndctl;
>  	p->bus_desc.module = THIS_MODULE;
> @@ -759,6 +890,14 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>  		dev_info(dev, "Region registered with target node %d and online node %d",
>  			 target_nid, online_nid);
>  
> +	/* Try retriving the stat buffer and see if its supported */
> +	if (!drc_pmem_query_stats(p, NULL, 0, 0, &stat_size)) {
> +		p->len_stat_buffer = (size_t)stat_size;
> +		dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
> +			p->len_stat_buffer);
> +	} else {
> +		dev_info(&p->pdev->dev, "Limited dimm stat info available\n");

Do we really need this print?

Ira

> +	}
>  	return 0;
>  
>  err:	nvdimm_bus_unregister(p->bus);
> -- 
> 2.26.2
> _______________________________________________
> Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
> To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox