LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v2 1/4] powerpc/drmem: Make lmb_size 64 bit
From: Aneesh Kumar K.V @ 2020-08-06 16:23 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Nathan Lynch, Aneesh Kumar K.V, stable

Similar to commit 89c140bbaeee ("pseries: Fix 64 bit logical memory block panic")
make sure different variables tracking lmb_size are updated to be 64 bit.

This was found by code audit.

Cc: stable@vger.kernel.org
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/drmem.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 17ccc6474ab6..d719cbac34b2 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -21,7 +21,7 @@ struct drmem_lmb {
 struct drmem_lmb_info {
 	struct drmem_lmb        *lmbs;
 	int                     n_lmbs;
-	u32                     lmb_size;
+	u64                     lmb_size;
 };
 
 extern struct drmem_lmb_info *drmem_info;
@@ -67,7 +67,7 @@ struct of_drconf_cell_v2 {
 #define DRCONF_MEM_RESERVED	0x00000080
 #define DRCONF_MEM_HOTREMOVABLE	0x00000100
 
-static inline u32 drmem_lmb_size(void)
+static inline u64 drmem_lmb_size(void)
 {
 	return drmem_info->lmb_size;
 }
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH][V2] macintosh: windfarm: remove detatch debug containing spelling mistakes
From: Wolfram Sang @ 2020-08-06 11:24 UTC (permalink / raw)
  To: Colin King; +Cc: Wolfram Sang, kernel-janitors, linuxppc-dev, linux-kernel
In-Reply-To: <20200806102901.44988-1-colin.king@canonical.com>

[-- Attachment #1: Type: text/plain, Size: 387 bytes --]

On Thu, Aug 06, 2020 at 11:29:01AM +0100, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
> 
> There are spelling mistakes in two debug messages. As recommended
> by Wolfram Sang, these can be removed as there is plenty of debug
> in the driver core.
> 
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Wolfram Sang <wsa@kernel.org>


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH v2 1/2] powerpc/perf: consolidate GPCI hcall structs into asm/hvcall.h
From: Nathan Lynch @ 2020-08-06 15:19 UTC (permalink / raw)
  To: Scott Cheloha, linuxppc-dev; +Cc: Tyrel Datwylder
In-Reply-To: <20200727184605.2945095-1-cheloha@linux.ibm.com>

Scott Cheloha <cheloha@linux.ibm.com> writes:

> The H_GetPerformanceCounterInfo (GPCI) hypercall input/output structs are
> useful to modules outside of perf/, so move them into asm/hvcall.h to live
> alongside the other powerpc hypercall structs.
>
> Leave the perf-specific GPCI stuff in perf/hv-gpci.h.
>
> Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>

Acked-by: Nathan Lynch <nathanl@linux.ibm.com>

^ permalink raw reply

* Re: [PATCH v2 2/2] powerpc/pseries: new lparcfg key/value pair: partition_affinity_score
From: Nathan Lynch @ 2020-08-06 15:18 UTC (permalink / raw)
  To: Scott Cheloha, linuxppc-dev; +Cc: Tyrel Datwylder
In-Reply-To: <20200727184605.2945095-2-cheloha@linux.ibm.com>

Scott Cheloha <cheloha@linux.ibm.com> writes:
> The H_GetPerformanceCounterInfo (GPCI) PHYP hypercall has a subcall,
> Affinity_Domain_Info_By_Partition, which returns, among other things,
> a "partition affinity score" for a given LPAR.  This score, a value on
> [0-100], represents the processor-memory affinity for the LPAR in
> question.  A score of 0 indicates the worst possible affinity while a
> score of 100 indicates perfect affinity.  The score can be used to
> reason about performance.
>
> This patch adds the score for the local LPAR to the lparcfg procfile
> under a new 'partition_affinity_score' key.
>
> Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
> ---
>  arch/powerpc/platforms/pseries/lparcfg.c | 35 ++++++++++++++++++++++++
>  1 file changed, 35 insertions(+)
>
> diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
> index b8d28ab88178..e278390ab28d 100644
> --- a/arch/powerpc/platforms/pseries/lparcfg.c
> +++ b/arch/powerpc/platforms/pseries/lparcfg.c
> @@ -136,6 +136,39 @@ static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
>  	return rc;
>  }
>  
> +static void show_gpci_data(struct seq_file *m)
> +{
> +	struct hv_gpci_request_buffer *buf;
> +	unsigned int affinity_score;
> +	long ret;
> +
> +	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
> +	if (buf == NULL)
> +		return;
> +
> +	/*
> +	 * Show the local LPAR's affinity score.
> +	 *
> +	 * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall.
> +	 * The score is at byte 0xB in the output buffer.
> +	 */
> +	memset(&buf->params, 0, sizeof(buf->params));
> +	buf->params.counter_request = cpu_to_be32(0xB1);
> +	buf->params.starting_index = cpu_to_be32(-1);	/* local LPAR */
> +	buf->params.counter_info_version_in = 0x5;	/* v5+ for score */
> +	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf),
> +				 sizeof(*buf));
> +	if (ret != H_SUCCESS) {
> +		pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n",
> +			 ret, be32_to_cpu(buf->params.detail_rc));
> +		goto out;
> +	}
> +	affinity_score = buf->bytes[0xB];
> +	seq_printf(m, "partition_affinity_score=%u\n", affinity_score);
> +out:
> +	kfree(buf);
> +}
> +
>  static unsigned h_pic(unsigned long *pool_idle_time,
>  		      unsigned long *num_procs)
>  {
> @@ -487,6 +520,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
>  			   partition_active_processors * 100);
>  	}
>  
> +	show_gpci_data(m);
> +
>  	seq_printf(m, "partition_active_processors=%d\n",
>  		   partition_active_processors);

Acked-by: Nathan Lynch <nathanl@linux.ibm.com>

^ permalink raw reply

* Re: [PATCH v2 2/2] powerpc/pseries: new lparcfg key/value pair: partition_affinity_score
From: Nathan Lynch @ 2020-08-06 15:17 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Tyrel Datwyler, Scott Cheloha, linuxppc-dev
In-Reply-To: <871rkkymd5.fsf@mpe.ellerman.id.au>

Michael Ellerman <mpe@ellerman.id.au> writes:
> Tyrel Datwyler <tyreld@linux.ibm.com> writes:
>> On 7/27/20 11:46 AM, Scott Cheloha wrote:
>>> The H_GetPerformanceCounterInfo (GPCI) PHYP hypercall has a subcall,
>>> Affinity_Domain_Info_By_Partition, which returns, among other things,
>>> a "partition affinity score" for a given LPAR.  This score, a value on
>>> [0-100], represents the processor-memory affinity for the LPAR in
>>> question.  A score of 0 indicates the worst possible affinity while a
>>> score of 100 indicates perfect affinity.  The score can be used to
>>> reason about performance.
>>> 
>>> This patch adds the score for the local LPAR to the lparcfg procfile
>>> under a new 'partition_affinity_score' key.
>>> 
>>> Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
>>
>> I was hoping Michael would chime in the first time around on this patch series
>> about adding another key/value pair to lparcfg.
>
> That guy is so unreliable.
>
> I don't love adding new stuff in lparcfg, but given the file already
> exists and there's no prospect of removing it, it's probably not worth
> the effort to put the new field anywhere else.
>
> My other query with this was how on earth anyone is meant to interpret
> the metric. ie. if my metric is 50, what does that mean? If it's 90
> should I worry?

Here's some more background.

This interface is just passing up what the platform provides, and it's
identical to the partition affinity score described in the documentation
for the management console's lsmemopt command:

https://www.ibm.com/support/knowledgecenter/POWER9/p9edm/lsmemopt.html

The score is 0-100, higher values are better. To illustrate: I believe a
partition's score will be 100 (or very close to it) if all of its CPUs
and memory reside within one node. It will be lower than that when a
partition has some memory without local CPUs, and lower still when there
is no CPU-memory affinity within the partition. Beyond that I don't have
more specific information and the algorithm and scale are set by the
platform.

The intent is for this to be a metric to gather during problem
determination e.g. via sosreport or similar, but as far as Linux is
concerned this should be treated as an opaque value.

^ permalink raw reply

* Re: [PATCH 1/2] sched/topology: Allow archs to override cpu_smt_mask
From: Srikar Dronamraju @ 2020-08-06 14:09 UTC (permalink / raw)
  To: peterz
  Cc: Gautham R Shenoy, Michael Neuling, Vincent Guittot, Rik van Riel,
	linuxppc-dev, LKML, Valentin Schneider, Thomas Gleixner,
	Mel Gorman, Ingo Molnar, Dietmar Eggemann
In-Reply-To: <20200806131547.GC2674@hirez.programming.kicks-ass.net>

* peterz@infradead.org <peterz@infradead.org> [2020-08-06 15:15:47]:

> > But my understanding is most LPARs don't get migrated back and forth,
> > they'll start life on a P8 and only get migrated to a P9 once when the
> > customer gets a P9. They might then run for a long time (months to
> > years) on the P9 in P8 compat mode, not because they ever want to
> > migrate back to a real P8, but because the software in the LPAR is still
> > expecting to be on a P8.
> > 
> > I'm not a real expert on all the Enterprisey stuff though, so someone
> > else might be able to give us a better picture.
> > 
> > But the point of mentioning the migration stuff was mainly just to
> > explain why we feel we need to present SMT8 to userspace even on P9.
> 
> OK, fair enough. The patch wasn't particularly onerous, I was just
> wondering why etc..
> 
> The case of starting on a P8 and being migrated to a P9 makes sense to
> me; in that case you'd like to rebuild your sched domains, but can't go
> about changing user visible topolofy information.
> 
> I suppose:
> 
> Acked-by; Peter Zijlstra (Intel) <peterz@infradead.org>
> 
> An updated Changelog that recaps some of this discussion might also be
> nice.

Okay, will surely do the needful.

-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH 1/2] sched/topology: Allow archs to override cpu_smt_mask
From: peterz @ 2020-08-06 13:15 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Gautham R Shenoy, Michael Neuling, Vincent Guittot,
	Srikar Dronamraju, Rik van Riel, linuxppc-dev, LKML,
	Valentin Schneider, Thomas Gleixner, Mel Gorman, Ingo Molnar,
	Dietmar Eggemann
In-Reply-To: <87d044yn9z.fsf@mpe.ellerman.id.au>

On Thu, Aug 06, 2020 at 10:25:12PM +1000, Michael Ellerman wrote:
> peterz@infradead.org writes:
> > On Thu, Aug 06, 2020 at 03:32:25PM +1000, Michael Ellerman wrote:
> >
> >> That brings with it a bunch of problems, such as existing software that
> >> has been developed/configured for Power8 and expects to see SMT8.
> >> 
> >> We also allow LPARs to be live migrated from Power8 to Power9 (and back), so
> >> maintaining the illusion of SMT8 is considered a requirement to make that work.
> >
> > So how does that work if the kernel booted on P9 and demuxed the SMT8
> > into 2xSMT4? If you migrate that state onto a P8 with actual SMT8 you're
> > toast again.
> 
> The SMT mask would be inaccurate on the P8, rather than the current case
> where it's inaccurate on the P9.
> 
> Which would be our preference, because the backward migration case is
> not common AIUI.
> 
> Or am I missing a reason we'd be even more toast than that?

Well, the scheduler might do a wee bit funny. We just had a patch that
increase load-balancing opportunities between SMT siblings because they
all share L1 anyway.

But yeah, nothing terminal.

> Under PowerVM the kernel does know it's being migrated, so we could
> actually update the mask, but I'm not sure if that's really feasible.

As long as you get a notification, rebuilding the sched domains isn't
terribly hard to do, there's more code that does that.

> >> Yeah I agree the naming is confusing.
> >> 
> >> Let's call them "SMT4 cores" and "SMT8 cores"?
> >
> > Works for me, thanks!
> >
> >> The problem is we are already lying to userspace, because firmware lies to us.
> >> 
> >> ie. the firmware on these systems shows us an SMT8 core, and so current kernels
> >> show SMT8 to userspace. I don't think we can realistically change that fact now,
> >> as these systems are already out in the field.
> >> 
> >> What this patch tries to do is undo some of the mess, and at least give the
> >> scheduler the right information.
> >
> > What a mess... I think it depends on what you do with that P9 to P8
> > migration case. Does it make sense to have a "p8_compat" boot arg for
> > the case where you want LPAR migration back onto P8 systems -- in which
> > case it simply takes the firmware's word as gospel and doesn't untangle
> > things, because it can actually land on a P8.
> 
> We already get told by firmware that we're running in "p8 compat" mode,
> because we have to pretend to userspace that it's running on a P8. So we
> could use that as a signal to leave things alone.
> 
> But my understanding is most LPARs don't get migrated back and forth,
> they'll start life on a P8 and only get migrated to a P9 once when the
> customer gets a P9. They might then run for a long time (months to
> years) on the P9 in P8 compat mode, not because they ever want to
> migrate back to a real P8, but because the software in the LPAR is still
> expecting to be on a P8.
> 
> I'm not a real expert on all the Enterprisey stuff though, so someone
> else might be able to give us a better picture.
> 
> But the point of mentioning the migration stuff was mainly just to
> explain why we feel we need to present SMT8 to userspace even on P9.

OK, fair enough. The patch wasn't particularly onerous, I was just
wondering why etc..

The case of starting on a P8 and being migrated to a P9 makes sense to
me; in that case you'd like to rebuild your sched domains, but can't go
about changing user visible topolofy information.

I suppose:

Acked-by; Peter Zijlstra (Intel) <peterz@infradead.org>

An updated Changelog that recaps some of this discussion might also be
nice.

^ permalink raw reply

* Re: [PATCH 1/2] sched/topology: Allow archs to override cpu_smt_mask
From: Srikar Dronamraju @ 2020-08-06 12:53 UTC (permalink / raw)
  To: peterz
  Cc: Gautham R Shenoy, Michael Neuling, Vincent Guittot, Rik van Riel,
	linuxppc-dev, LKML, Valentin Schneider, Thomas Gleixner,
	Mel Gorman, Ingo Molnar, Dietmar Eggemann
In-Reply-To: <20200806085429.GX2674@hirez.programming.kicks-ass.net>

* peterz@infradead.org <peterz@infradead.org> [2020-08-06 10:54:29]:

> On Thu, Aug 06, 2020 at 03:32:25PM +1000, Michael Ellerman wrote:
> 
> > That brings with it a bunch of problems, such as existing software that
> > has been developed/configured for Power8 and expects to see SMT8.
> > 
> > We also allow LPARs to be live migrated from Power8 to Power9 (and back), so
> > maintaining the illusion of SMT8 is considered a requirement to make that work.
> 
> So how does that work if the kernel booted on P9 and demuxed the SMT8
> into 2xSMT4? If you migrate that state onto a P8 with actual SMT8 you're
> toast again.
> 

To add to what Michael already said, the reason we don't expose the demux of
SMT8 into 2xSMT4 to userspace, is to make the userspace believe they are on
a SMT8. When the kernel is live migrated from P8 to P9, till the time of reboot
they would only have the older P8 topology. After reboot the kernel topology
would change, but the userspace is made to believe that they are running on
SMT8 core by way of keeping the sibling_cpumask at SMT8 core level.

-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* [RFC PATCH] powerpc/drmem: use global variable instead of fetching again
From: Aneesh Kumar K.V @ 2020-08-06 12:52 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Nathan Lynch, Aneesh Kumar K.V, Hari Bathini
In-Reply-To: <20200806123604.248361-1-aneesh.kumar@linux.ibm.com>

use mem_addr_cells/mem_size_cells instead of fetching the values
again from device tree.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/mm/drmem.c | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index b2eeea39684c..f533a7b04ab9 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -14,8 +14,6 @@
 #include <asm/prom.h>
 #include <asm/drmem.h>
 
-static int n_root_addr_cells, n_root_size_cells;
-
 static struct drmem_lmb_info __drmem_info;
 struct drmem_lmb_info *drmem_info = &__drmem_info;
 
@@ -196,8 +194,8 @@ static void read_drconf_v1_cell(struct drmem_lmb *lmb,
 {
 	const __be32 *p = *prop;
 
-	lmb->base_addr = of_read_number(p, n_root_addr_cells);
-	p += n_root_addr_cells;
+	lmb->base_addr = of_read_number(p, mem_addr_cells);
+	p += mem_addr_cells;
 	lmb->drc_index = of_read_number(p++, 1);
 
 	p++; /* skip reserved field */
@@ -233,8 +231,8 @@ static void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
 	const __be32 *p = *prop;
 
 	dr_cell->seq_lmbs = of_read_number(p++, 1);
-	dr_cell->base_addr = of_read_number(p, n_root_addr_cells);
-	p += n_root_addr_cells;
+	dr_cell->base_addr = of_read_number(p, mem_addr_cells);
+	p += mem_addr_cells;
 	dr_cell->drc_index = of_read_number(p++, 1);
 	dr_cell->aa_index = of_read_number(p++, 1);
 	dr_cell->flags = of_read_number(p++, 1);
@@ -285,10 +283,6 @@ int __init walk_drmem_lmbs_early(unsigned long node, void *data,
 	if (!prop || len < dt_root_size_cells * sizeof(__be32))
 		return ret;
 
-	/* Get the address & size cells */
-	n_root_addr_cells = dt_root_addr_cells;
-	n_root_size_cells = dt_root_size_cells;
-
 	drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
 
 	usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
@@ -318,12 +312,12 @@ static int init_drmem_lmb_size(struct device_node *dn)
 		return 0;
 
 	prop = of_get_property(dn, "ibm,lmb-size", &len);
-	if (!prop || len < n_root_size_cells * sizeof(__be32)) {
+	if (!prop || len < mem_size_cells * sizeof(__be32)) {
 		pr_info("Could not determine LMB size\n");
 		return -1;
 	}
 
-	drmem_info->lmb_size = of_read_number(prop, n_root_size_cells);
+	drmem_info->lmb_size = of_read_number(prop, mem_size_cells);
 	return 0;
 }
 
@@ -353,12 +347,6 @@ int walk_drmem_lmbs(struct device_node *dn, void *data,
 	if (!of_root)
 		return ret;
 
-	/* Get the address & size cells */
-	of_node_get(of_root);
-	n_root_addr_cells = of_n_addr_cells(of_root);
-	n_root_size_cells = of_n_size_cells(of_root);
-	of_node_put(of_root);
-
 	if (init_drmem_lmb_size(dn))
 		return ret;
 
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH] powerpc/pseries/hotplug-cpu: increase wait time for vCPU death
From: Michael Ellerman @ 2020-08-06 12:51 UTC (permalink / raw)
  To: Michael Roth, Greg Kurz
  Cc: Nathan Lynch, linuxppc-dev, Cedric Le Goater,
	Thiago Jung Bauermann
In-Reply-To: <159666656828.15440.9097316124875217814@sif>

Michael Roth <mdroth@linux.vnet.ibm.com> writes:
> Quoting Michael Roth (2020-08-04 23:37:32)
>> Quoting Michael Ellerman (2020-08-04 22:07:08)
>> > Greg Kurz <groug@kaod.org> writes:
>> > > On Tue, 04 Aug 2020 23:35:10 +1000
>> > > Michael Ellerman <mpe@ellerman.id.au> wrote:
>> > >> Spinning forever seems like a bad idea, but as has been demonstrated at
>> > >> least twice now, continuing when we don't know the state of the other
>> > >> CPU can lead to straight up crashes.
>> > >> 
>> > >> So I think I'm persuaded that it's preferable to have the kernel stuck
>> > >> spinning rather than oopsing.
>> > >> 
>> > >
>> > > +1
>> > >
>> > >> I'm 50/50 on whether we should have a cond_resched() in the loop. My
>> > >> first instinct is no, if we're stuck here for 20s a stack trace would be
>> > >> good. But then we will probably hit that on some big and/or heavily
>> > >> loaded machine.
>> > >> 
>> > >> So possibly we should call cond_resched() but have some custom logic in
>> > >> the loop to print a warning if we are stuck for more than some
>> > >> sufficiently long amount of time.
>> > >
>> > > How long should that be ?
>> > 
>> > Yeah good question.
>> > 
>> > I guess step one would be seeing how long it can take on the 384 vcpu
>> > machine. And we can probably test on some other big machines.
>> > 
>> > Hopefully Nathan can give us some idea of how long he's seen it take on
>> > large systems? I know he was concerned about the 20s timeout of the
>> > softlockup detector.
>> > 
>> > Maybe a minute or two?
>> 
>> Hmm, so I took a stab at this where I called cond_resched() after
>> every 5 seconds of polling and printed a warning at the same time (FWIW
>> that doesn't seem to trigger any warnings on a loaded 96-core mihawk
>> system using KVM running the 384vcpu unplug loop)
>> 
>> But it sounds like that's not quite what you had in mind. How frequently
>> do you think we should call cond_resched()? Maybe after 25 iterations
>> of polling smp_query_cpu_stopped() to keep original behavior somewhat
>> similar?

I think we can just call it on every iteration, it should be cheap
compared to an RTAS call.

The concern was just by doing that you effectively prevent the
softlockup detector from reporting you as stuck in that path. Hence the
desire to manually print a warning after ~60s or something.

cheers

^ permalink raw reply

* [PATCH] powerpc/perf: Account for interrupts during PMC overflow for an invalid SIAR check
From: Athira Rajeev @ 2020-08-06 12:46 UTC (permalink / raw)
  To: mpe; +Cc: aik, maddy, linuxppc-dev

Performance monitor interrupt handler checks if any counter has overflown
and calls `record_and_restart` in core-book3s which invokes
`perf_event_overflow` to record the sample information.
Apart from creating sample, perf_event_overflow also does the interrupt
and period checks via perf_event_account_interrupt.

Currently we record information only if the SIAR valid bit is set
( using `siar_valid` check ) and hence the interrupt check.
But it is possible that we do sampling for some events that are not
generating valid SIAR and hence there is no chance to disable the event
if interrupts is more than max_samples_per_tick. This leads to soft lockup.

Fix this by adding perf_event_account_interrupt in the invalid siar
code path for a sampling event. ie if siar is invalid, just do interrupt
check and don't record the sample information.

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 arch/powerpc/perf/core-book3s.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 01d7028..626e587 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2101,6 +2101,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,

 		if (perf_event_overflow(event, &data, regs))
 			power_pmu_stop(event, 0);
+	} else if (period) {
+		/* Account for interrupt incase of invalid siar */
+		if (perf_event_account_interrupt(event))
+			power_pmu_stop(event, 0);
 	}
 }

-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH] powerpc/book3s64/radix: Make radix_mem_block_size 64bit
From: Aneesh Kumar K.V @ 2020-08-06 12:44 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev
In-Reply-To: <874kpgymty.fsf@mpe.ellerman.id.au>

Michael Ellerman <mpe@ellerman.id.au> writes:

> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
>> Similar to commit: 89c140bbaeee ("pseries: Fix 64 bit logical memory block panic")
>> make sure we update different variables tracking lmb_size are updated
>> to be 64 bit.
>
> That commit went to all stable releases, should this one also?
>

radix_mem_block_size got added recently and it is not yet upstram. But
the drmem_lmb_info change can be a stable candidate. We also need this

I will split this as two patches?

modified   arch/powerpc/include/asm/drmem.h
@@ -67,7 +67,7 @@ struct of_drconf_cell_v2 {
 #define DRCONF_MEM_RESERVED	0x00000080
 #define DRCONF_MEM_HOTREMOVABLE	0x00000100
 
-static inline u32 drmem_lmb_size(void)
+static inline u64 drmem_lmb_size(void)
 {
 	return drmem_info->lmb_size;
 }

-aneesh

^ permalink raw reply

* Re: [PATCH v2 2/2] powerpc/pseries: new lparcfg key/value pair: partition_affinity_score
From: Michael Ellerman @ 2020-08-06 12:44 UTC (permalink / raw)
  To: Tyrel Datwyler, Scott Cheloha, linuxppc-dev; +Cc: Nathan Lynch
In-Reply-To: <bc9858c9-7d55-88c0-1f85-157af48e1d8c@linux.ibm.com>

Tyrel Datwyler <tyreld@linux.ibm.com> writes:
> On 7/27/20 11:46 AM, Scott Cheloha wrote:
>> The H_GetPerformanceCounterInfo (GPCI) PHYP hypercall has a subcall,
>> Affinity_Domain_Info_By_Partition, which returns, among other things,
>> a "partition affinity score" for a given LPAR.  This score, a value on
>> [0-100], represents the processor-memory affinity for the LPAR in
>> question.  A score of 0 indicates the worst possible affinity while a
>> score of 100 indicates perfect affinity.  The score can be used to
>> reason about performance.
>> 
>> This patch adds the score for the local LPAR to the lparcfg procfile
>> under a new 'partition_affinity_score' key.
>> 
>> Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
>
> I was hoping Michael would chime in the first time around on this patch series
> about adding another key/value pair to lparcfg.

That guy is so unreliable.

I don't love adding new stuff in lparcfg, but given the file already
exists and there's no prospect of removing it, it's probably not worth
the effort to put the new field anywhere else.

My other query with this was how on earth anyone is meant to interpret
the metric. ie. if my metric is 50, what does that mean? If it's 90
should I worry?

Which makes me realise we have no documentation for lparcfg in the
kernel at all.

So it would be nice to have it mentioned somewhere in Documentation,
even if it just points to the manpage in powerpc-ibm-utils.

cheers


> So, barring a NACK from mpe:
>
> Reviewed-by: Tyrel Datwyler <tyreld@linux.ibm.com>
>
>> ---
>>  arch/powerpc/platforms/pseries/lparcfg.c | 35 ++++++++++++++++++++++++
>>  1 file changed, 35 insertions(+)
>> 
>> diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
>> index b8d28ab88178..e278390ab28d 100644
>> --- a/arch/powerpc/platforms/pseries/lparcfg.c
>> +++ b/arch/powerpc/platforms/pseries/lparcfg.c
>> @@ -136,6 +136,39 @@ static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
>>  	return rc;
>>  }
>>  
>> +static void show_gpci_data(struct seq_file *m)
>> +{
>> +	struct hv_gpci_request_buffer *buf;
>> +	unsigned int affinity_score;
>> +	long ret;
>> +
>> +	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
>> +	if (buf == NULL)
>> +		return;
>> +
>> +	/*
>> +	 * Show the local LPAR's affinity score.
>> +	 *
>> +	 * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall.
>> +	 * The score is at byte 0xB in the output buffer.
>> +	 */
>> +	memset(&buf->params, 0, sizeof(buf->params));
>> +	buf->params.counter_request = cpu_to_be32(0xB1);
>> +	buf->params.starting_index = cpu_to_be32(-1);	/* local LPAR */
>> +	buf->params.counter_info_version_in = 0x5;	/* v5+ for score */
>> +	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf),
>> +				 sizeof(*buf));
>> +	if (ret != H_SUCCESS) {
>> +		pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n",
>> +			 ret, be32_to_cpu(buf->params.detail_rc));
>> +		goto out;
>> +	}
>> +	affinity_score = buf->bytes[0xB];
>> +	seq_printf(m, "partition_affinity_score=%u\n", affinity_score);
>> +out:
>> +	kfree(buf);
>> +}
>> +
>>  static unsigned h_pic(unsigned long *pool_idle_time,
>>  		      unsigned long *num_procs)
>>  {
>> @@ -487,6 +520,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
>>  			   partition_active_processors * 100);
>>  	}
>>  
>> +	show_gpci_data(m);
>> +
>>  	seq_printf(m, "partition_active_processors=%d\n",
>>  		   partition_active_processors);
>>  
>> 

^ permalink raw reply

* Re: [PATCH] ASoC: fsl-asoc-card: Get "extal" clock rate by clk_get_rate
From: Mark Brown @ 2020-08-06 12:37 UTC (permalink / raw)
  To: Shengjiu Wang
  Cc: alsa-devel, timur, Xiubo.Lee, linuxppc-dev, tiwai, lgirdwood,
	perex, nicoleotsuka, festevam, linux-kernel
In-Reply-To: <1596699585-27429-1-git-send-email-shengjiu.wang@nxp.com>

[-- Attachment #1: Type: text/plain, Size: 663 bytes --]

On Thu, Aug 06, 2020 at 03:39:45PM +0800, Shengjiu Wang wrote:

>  	} else if (of_node_name_eq(cpu_np, "esai")) {
> +		struct clk *esai_clk = clk_get(&cpu_pdev->dev, "extal");
> +
> +		if (!IS_ERR(esai_clk)) {
> +			priv->cpu_priv.sysclk_freq[TX] = clk_get_rate(esai_clk);
> +			priv->cpu_priv.sysclk_freq[RX] = clk_get_rate(esai_clk);
> +			clk_put(esai_clk);
> +		}

This should handle probe deferral.  Also if this clock is in use
shouldn't we be enabling it?  It looks like it's intended to be a
crystal so it's probably forced on all the time but sometimes there's
power control for crystals, or perhaps someone might do something
unusual with the hardware.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* [RFC PATCH 3/3] powerpc/lmb-size: Use addr #size-cells value when fetching lmb-size
From: Aneesh Kumar K.V @ 2020-08-06 12:36 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Nathan Lynch, Aneesh Kumar K.V, Hari Bathini
In-Reply-To: <20200806123604.248361-1-aneesh.kumar@linux.ibm.com>

Make it consistent with other usages.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/mm/book3s64/radix_pgtable.c        |  7 ++++---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 10 ++++++----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index ca76d9d6372a..a48e6618a27b 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -497,7 +497,7 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname,
 					  depth, void *data)
 {
 	unsigned long *mem_block_size = (unsigned long *)data;
-	const __be64 *prop;
+	const __be32 *prop;
 	int len;
 
 	if (depth != 1)
@@ -507,13 +507,14 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname,
 		return 0;
 
 	prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
-	if (!prop || len < sizeof(__be64))
+
+	if (!prop || len < dt_root_size_cells * sizeof(__be32))
 		/*
 		 * Nothing in the device tree
 		 */
 		*mem_block_size = MIN_MEMORY_BLOCK_SIZE;
 	else
-		*mem_block_size = be64_to_cpup(prop);
+		*mem_block_size = of_read_number(prop, dt_root_size_cells);
 	return 1;
 }
 
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 5d545b78111f..aba23ef8dfdd 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -30,12 +30,14 @@ unsigned long pseries_memory_block_size(void)
 
 	np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
 	if (np) {
-		const __be64 *size;
+		int len;
+		const __be32 *prop;
 
-		size = of_get_property(np, "ibm,lmb-size", NULL);
-		if (size)
-			memblock_size = be64_to_cpup(size);
+		prop = of_get_property(np, "ibm,lmb-size", &len);
+		if (prop && len >= mem_size_cells * sizeof(__be32))
+			memblock_size = of_read_number(prop, mem_size_cells);
 		of_node_put(np);
+
 	} else  if (machine_is(pseries)) {
 		/* This fallback really only applies to pseries */
 		unsigned int memzero_size = 0;
-- 
2.26.2


^ permalink raw reply related

* [RFC PATCH 2/3] powerpc/numa: Use global variable instead of fetching again
From: Aneesh Kumar K.V @ 2020-08-06 12:36 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Nathan Lynch, Aneesh Kumar K.V, Hari Bathini
In-Reply-To: <20200806123604.248361-1-aneesh.kumar@linux.ibm.com>

use mem_addr_cells/mem_size_cells instead of fetching the values
again from device tree.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/mm/numa.c | 36 ++++++++++--------------------------
 1 file changed, 10 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 77d41d9775d2..c420872acd61 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -52,7 +52,6 @@ EXPORT_SYMBOL(node_to_cpumask_map);
 EXPORT_SYMBOL(node_data);
 
 static int min_common_depth;
-static int n_mem_addr_cells, n_mem_size_cells;
 static int form1_affinity;
 
 #define MAX_DISTANCE_REF_POINTS 4
@@ -355,19 +354,6 @@ static int __init find_min_common_depth(void)
 	return -1;
 }
 
-static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
-{
-	struct device_node *memory = NULL;
-
-	memory = of_find_node_by_type(memory, "memory");
-	if (!memory)
-		panic("numa.c: No memory nodes found!");
-
-	*n_addr_cells = of_n_addr_cells(memory);
-	*n_size_cells = of_n_size_cells(memory);
-	of_node_put(memory);
-}
-
 /*  dt_mem_next_cell is __init  */
 static unsigned long read_n_cells(int n, const __be32 **buf)
 {
@@ -639,12 +625,12 @@ static inline int __init read_usm_ranges(const __be32 **usm)
 	 * a counter followed by that many (base, size) duple.
 	 * read the counter from linux,drconf-usable-memory
 	 */
-	return read_n_cells(n_mem_size_cells, usm);
+	return read_n_cells(mem_size_cells, usm);
 }
 
 /*
  * Extract NUMA information from the ibm,dynamic-reconfiguration-memory
- * node.  This assumes n_mem_{addr,size}_cells have been set.
+ * node.  This assumes mem_{addr,size}_cells have been set.
  */
 static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
 					const __be32 **usm,
@@ -677,8 +663,8 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
 
 	do {
 		if (is_kexec_kdump) {
-			base = read_n_cells(n_mem_addr_cells, usm);
-			size = read_n_cells(n_mem_size_cells, usm);
+			base = read_n_cells(mem_addr_cells, usm);
+			size = read_n_cells(mem_size_cells, usm);
 		}
 
 		nid = of_drconf_to_nid_single(lmb);
@@ -741,8 +727,6 @@ static int __init parse_numa_properties(void)
 		node_set_online(nid);
 	}
 
-	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
-
 	for_each_node_by_type(memory, "memory") {
 		unsigned long start;
 		unsigned long size;
@@ -759,11 +743,11 @@ static int __init parse_numa_properties(void)
 			continue;
 
 		/* ranges in cell */
-		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
+		ranges = (len >> 2) / (mem_addr_cells + mem_size_cells);
 new_range:
 		/* these are order-sensitive, and modify the buffer pointer */
-		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
-		size = read_n_cells(n_mem_size_cells, &memcell_buf);
+		start = read_n_cells(mem_addr_cells, &memcell_buf);
+		size = read_n_cells(mem_size_cells, &memcell_buf);
 
 		/*
 		 * Assumption: either all memory nodes or none will
@@ -1042,11 +1026,11 @@ static int hot_add_node_scn_to_nid(unsigned long scn_addr)
 			continue;
 
 		/* ranges in cell */
-		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
+		ranges = (len >> 2) / (mem_addr_cells + mem_size_cells);
 
 		while (ranges--) {
-			start = read_n_cells(n_mem_addr_cells, &memcell_buf);
-			size = read_n_cells(n_mem_size_cells, &memcell_buf);
+			start = read_n_cells(mem_addr_cells, &memcell_buf);
+			size = read_n_cells(mem_size_cells, &memcell_buf);
 
 			if ((scn_addr < start) || (scn_addr >= (start + size)))
 				continue;
-- 
2.26.2


^ permalink raw reply related

* [RFC PATCH 1/3] powerpc/mem: Store the dt_root_size/addr cell values for later usage
From: Aneesh Kumar K.V @ 2020-08-06 12:36 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Nathan Lynch, Aneesh Kumar K.V, Hari Bathini

dt_root_addr_cells and dt_root_size_cells are __initdata variables.
So make a copy of the same which can be used post init.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/drmem.h | 2 ++
 arch/powerpc/kernel/prom.c       | 7 +++++++
 arch/powerpc/mm/numa.c           | 1 +
 3 files changed, 10 insertions(+)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 07c158c5f939..1f0eaf432755 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -123,4 +123,6 @@ static inline void lmb_clear_nid(struct drmem_lmb *lmb)
 }
 #endif
 
+extern int mem_addr_cells, mem_size_cells;
+
 #endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index d8a2fb87ba0c..9a1701e85747 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -73,6 +73,7 @@ u64 ppc64_rma_size;
 #endif
 static phys_addr_t first_memblock_size;
 static int __initdata boot_cpu_count;
+int mem_addr_cells, mem_size_cells;
 
 static int __init early_parse_mem(char *p)
 {
@@ -536,6 +537,12 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node,
 						const char *uname,
 						int depth, void *data)
 {
+	/*
+	 * Make a copy from __initdata variable
+	 */
+	mem_addr_cells = dt_root_addr_cells;
+	mem_size_cells = dt_root_size_cells;
+
 #ifdef CONFIG_PPC_PSERIES
 	if (depth == 1 &&
 	    strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 058fee9a0835..77d41d9775d2 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -368,6 +368,7 @@ static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
 	of_node_put(memory);
 }
 
+/*  dt_mem_next_cell is __init  */
 static unsigned long read_n_cells(int n, const __be32 **buf)
 {
 	unsigned long result = 0;
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH] powerpc/book3s64/radix: Make radix_mem_block_size 64bit
From: Michael Ellerman @ 2020-08-06 12:34 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linuxppc-dev; +Cc: Aneesh Kumar K.V
In-Reply-To: <20200806081415.208546-1-aneesh.kumar@linux.ibm.com>

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> Similar to commit: 89c140bbaeee ("pseries: Fix 64 bit logical memory block panic")
> make sure we update different variables tracking lmb_size are updated
> to be 64 bit.

That commit went to all stable releases, should this one also?

cheers

> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
> index 55442d45c597..1a0c9d09950f 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
> @@ -85,7 +85,7 @@ extern unsigned int mmu_base_pid;
>  /*
>   * memory block size used with radix translation.
>   */
> -extern unsigned int __ro_after_init radix_mem_block_size;
> +extern unsigned long __ro_after_init radix_mem_block_size;
>  
>  #define PRTB_SIZE_SHIFT	(mmu_pid_bits + 4)
>  #define PRTB_ENTRIES	(1ul << mmu_pid_bits)
> diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
> index 17ccc6474ab6..07c158c5f939 100644
> --- a/arch/powerpc/include/asm/drmem.h
> +++ b/arch/powerpc/include/asm/drmem.h
> @@ -21,7 +21,7 @@ struct drmem_lmb {
>  struct drmem_lmb_info {
>  	struct drmem_lmb        *lmbs;
>  	int                     n_lmbs;
> -	u32                     lmb_size;
> +	u64                     lmb_size;
>  };
>  
>  extern struct drmem_lmb_info *drmem_info;
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 28c784976bed..ca76d9d6372a 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -34,7 +34,7 @@
>  
>  unsigned int mmu_pid_bits;
>  unsigned int mmu_base_pid;
> -unsigned int radix_mem_block_size __ro_after_init;
> +unsigned long radix_mem_block_size __ro_after_init;
>  
>  static __ref void *early_alloc_pgtable(unsigned long size, int nid,
>  			unsigned long region_start, unsigned long region_end)
> -- 
> 2.26.2

^ permalink raw reply

* Re: [PATCH] powerpc/40x: Fix assembler warning about r0
From: Michael Ellerman @ 2020-08-06 12:33 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev
In-Reply-To: <7faebe28-07f9-b5df-6b6d-a25342e2bcad@csgroup.eu>

Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> Le 06/08/2020 à 04:18, Michael Ellerman a écrit :
>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>> Le 22/07/2020 à 04:24, Michael Ellerman a écrit :
>>>> The assembler says:
>>>>     arch/powerpc/kernel/head_40x.S:623: Warning: invalid register expression
>>>
>>> I get exactly the same with head_32.S, for the exact same reason.
>> 
>> Ah yep, I see it. I mostly build pmac32_defconfig which doesn't have
>> BDI_SWITCH enabled.
>> 
>> Send a patch? :)
>
> Done.
>
>> 
>> Do we still need the BDI_SWITCH code? Is it likely anyone still has one,
>> that works?
>
> I have three (One for 83xx and two for 8xx) and they work, allthough I'm 
> using them only for Uboot and for very very very early Linux boot 
> debugging (Last time I used it with Linux was when implementing KASAN)

OK, happy to keep the code around if it works and is being used, even
just a little bit.

cheers

^ permalink raw reply

* Re: [PATCH v2] powerpc/uaccess: simplify the get_fs() set_fs() logic
From: Michael Ellerman @ 2020-08-06 12:33 UTC (permalink / raw)
  To: Christophe Leroy, Christophe Leroy, Benjamin Herrenschmidt,
	Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <2d0b777d-f1aa-08a1-f287-47ac68efbd99@csgroup.eu>

Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> Le 25/07/2020 à 13:22, Michael Ellerman a écrit :
>> Hi Christophe,
>> 
>> Unfortunately this would collide messily with "uaccess: remove
>> segment_eq" in linux-next, so I'll ask you to do a respin based on that,
>> some comments below.
>
> Done, sent as v3, together with the 2 patchs from Linux next to get it 
> build and boot.

Thanks.

>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>>> On powerpc, we only have USER_DS and KERNEL_DS
>>>
>>> Today, this is managed as an 'unsigned long' data space limit
>>> which is used to compare the passed address with, plus a bit
>>> in the thread_info flags that is set whenever modifying the limit
>>> to enable the verification in addr_limit_user_check()
>>>
>>> The limit is either the last address of user space when USER_DS is
>>> set, and the last address of address space when KERNEL_DS is set.
>>> In both cases, the limit is a compiletime constant.
>>>
>>> get_fs() returns the limit, which is part of thread_info struct
>>> set_fs() updates the limit then set the TI_FSCHECK flag.
>>> addr_limit_user_check() check the flag, and if it is set it checks
>>> the limit is the user limit, then unsets the TI_FSCHECK flag.
>>>
>>> In addition, when the flag is set the syscall exit work is involved.
>>> This exit work is heavy compared to normal syscall exit as it goes
>>> through normal exception exit instead of the fast syscall exit.
>>>
>>> Rename this TI_FSCHECK flag to TIF_KERNEL_DS flag which tells whether
>>> KERNEL_DS or USER_DS is set. Get mm_segment_t be redifined as a bool
>>> struct that is either false (for USER_DS) or true (for KERNEL_DS).
>>> When TIF_KERNEL_DS is set, the limit is ~0UL. Otherwise it is
>>> TASK_SIZE_USER (resp TASK_SIZE_USER64 on PPC64). When KERNEL_DS is
>>> set, there is no range to check. Define TI_FSCHECK as an alias to
>>> TIF_KERNEL_DS.
>> 
>> I'd rather avoid the "DS" name any more than we have to. Maybe it means
>> "data space" but that's not a very common term.
>
> I thought it was a reference to the ds/fs/gs ... segment registers in 
> the 8086 ?

Yes.

In your changelog you used "data space limit", so I thought you were
trying to retrospectively redefine the "DS" acronym to mean that.

cheers

^ permalink raw reply

* Re: [PATCH 1/2] sched/topology: Allow archs to override cpu_smt_mask
From: Michael Ellerman @ 2020-08-06 12:25 UTC (permalink / raw)
  To: peterz
  Cc: Gautham R Shenoy, Michael Neuling, Vincent Guittot,
	Srikar Dronamraju, Rik van Riel, linuxppc-dev, LKML,
	Valentin Schneider, Thomas Gleixner, Mel Gorman, Ingo Molnar,
	Dietmar Eggemann
In-Reply-To: <20200806085429.GX2674@hirez.programming.kicks-ass.net>

peterz@infradead.org writes:
> On Thu, Aug 06, 2020 at 03:32:25PM +1000, Michael Ellerman wrote:
>
>> That brings with it a bunch of problems, such as existing software that
>> has been developed/configured for Power8 and expects to see SMT8.
>> 
>> We also allow LPARs to be live migrated from Power8 to Power9 (and back), so
>> maintaining the illusion of SMT8 is considered a requirement to make that work.
>
> So how does that work if the kernel booted on P9 and demuxed the SMT8
> into 2xSMT4? If you migrate that state onto a P8 with actual SMT8 you're
> toast again.

The SMT mask would be inaccurate on the P8, rather than the current case
where it's inaccurate on the P9.

Which would be our preference, because the backward migration case is
not common AIUI.

Or am I missing a reason we'd be even more toast than that?

Under PowerVM the kernel does know it's being migrated, so we could
actually update the mask, but I'm not sure if that's really feasible.

>> Yeah I agree the naming is confusing.
>> 
>> Let's call them "SMT4 cores" and "SMT8 cores"?
>
> Works for me, thanks!
>
>> The problem is we are already lying to userspace, because firmware lies to us.
>> 
>> ie. the firmware on these systems shows us an SMT8 core, and so current kernels
>> show SMT8 to userspace. I don't think we can realistically change that fact now,
>> as these systems are already out in the field.
>> 
>> What this patch tries to do is undo some of the mess, and at least give the
>> scheduler the right information.
>
> What a mess... I think it depends on what you do with that P9 to P8
> migration case. Does it make sense to have a "p8_compat" boot arg for
> the case where you want LPAR migration back onto P8 systems -- in which
> case it simply takes the firmware's word as gospel and doesn't untangle
> things, because it can actually land on a P8.

We already get told by firmware that we're running in "p8 compat" mode,
because we have to pretend to userspace that it's running on a P8. So we
could use that as a signal to leave things alone.

But my understanding is most LPARs don't get migrated back and forth,
they'll start life on a P8 and only get migrated to a P9 once when the
customer gets a P9. They might then run for a long time (months to
years) on the P9 in P8 compat mode, not because they ever want to
migrate back to a real P8, but because the software in the LPAR is still
expecting to be on a P8.

I'm not a real expert on all the Enterprisey stuff though, so someone
else might be able to give us a better picture.

But the point of mentioning the migration stuff was mainly just to
explain why we feel we need to present SMT8 to userspace even on P9.

cheers

^ permalink raw reply

* Re: [PATCH V5 0/4] powerpc/perf: Add support for perf extended regs in powerpc
From: Arnaldo Carvalho de Melo @ 2020-08-06 12:20 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: Ravi Bangoria, Michael Neuling, maddy, kjain, linuxppc-dev,
	Jiri Olsa, Jiri Olsa
In-Reply-To: <CA3D75F3-5F63-425B-A3C1-00C181E41108@linux.vnet.ibm.com>

Em Fri, Jul 31, 2020 at 11:04:14PM +0530, Athira Rajeev escreveu:
> 
> 
> > On 31-Jul-2020, at 1:20 AM, Jiri Olsa <jolsa@redhat.com> wrote:
> > 
> > On Thu, Jul 30, 2020 at 01:24:40PM +0530, Athira Rajeev wrote:
> >> 
> >> 
> >>> On 27-Jul-2020, at 10:46 PM, Athira Rajeev <atrajeev@linux.vnet.ibm.com> wrote:
> >>> 
> >>> Patch set to add support for perf extended register capability in
> >>> powerpc. The capability flag PERF_PMU_CAP_EXTENDED_REGS, is used to
> >>> indicate the PMU which support extended registers. The generic code
> >>> define the mask of extended registers as 0 for non supported architectures.
> >>> 
> >>> Patches 1 and 2 are the kernel side changes needed to include
> >>> base support for extended regs in powerpc and in power10.
> >>> Patches 3 and 4 are the perf tools side changes needed to support the
> >>> extended registers.
> >>> 
> >> 
> >> Hi Arnaldo, Jiri
> >> 
> >> please let me know if you have any comments/suggestions on this patch series to add support for perf extended regs.
> > 
> > hi,
> > can't really tell for powerpc, but in general
> > perf tool changes look ok
> > 
> 
> Hi Jiri,
> Thanks for checking the patchset.

So I'dd say you submit a v6, split into the kernel part, that probably
should go via the PPC arch tree, and I can pick the tooling part, ok?

- Arnaldo

^ permalink raw reply

* [PATCH 2/2] powerpc: drop hard_reset_now() and poweroff_now() declaration
From: Christophe Leroy @ 2020-08-06 12:20 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <d5641ada199b8dd2af16ad00a66084cf974f2704.1596716418.git.christophe.leroy@csgroup.eu>

Those function have never existed. Drop their declaration.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/processor.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 9ebcb2f095db..2b4dc10230da 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -422,8 +422,6 @@ extern void power9_idle_type(unsigned long stop_psscr_val,
 			      unsigned long stop_psscr_mask);
 
 extern void flush_instruction_cache(void);
-extern void hard_reset_now(void);
-extern void poweroff_now(void);
 extern int fix_alignment(struct pt_regs *);
 extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val);
 
-- 
2.25.0


^ permalink raw reply related

* [PATCH 1/2] powerpc/fpu: Drop cvt_fd() and cvt_df()
From: Christophe Leroy @ 2020-08-06 12:20 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Those two functions have been unused since commit identified below.
Drop them.

Fixes: 31bfdb036f12 ("powerpc: Use instruction emulation infrastructure to handle alignment faults")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/processor.h |  2 --
 arch/powerpc/kernel/fpu.S            | 15 ---------------
 2 files changed, 17 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 5c20b6d509ae..9ebcb2f095db 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -425,8 +425,6 @@ extern void flush_instruction_cache(void);
 extern void hard_reset_now(void);
 extern void poweroff_now(void);
 extern int fix_alignment(struct pt_regs *);
-extern void cvt_fd(float *from, double *to);
-extern void cvt_df(double *from, float *to);
 extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val);
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 4ae39db70044..825893d4cb59 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -134,18 +134,3 @@ _GLOBAL(save_fpu)
 	mffs	fr0
 	stfd	fr0,FPSTATE_FPSCR(r6)
 	blr
-
-/*
- * These are used in the alignment trap handler when emulating
- * single-precision loads and stores.
- */
-
-_GLOBAL(cvt_fd)
-	lfs	0,0(r3)
-	stfd	0,0(r4)
-	blr
-
-_GLOBAL(cvt_df)
-	lfd	0,0(r3)
-	stfs	0,0(r4)
-	blr
-- 
2.25.0


^ permalink raw reply related

* [PATCH] powerpc/irq: Drop forward declaration of struct irqaction
From: Christophe Leroy @ 2020-08-06 12:19 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Since the commit identified below, the forward declaration of
struct irqaction is useless. Drop it.

Fixes: b709c0832824 ("ppc64: move stack switching up in interrupt processing")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/irq.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 814dfab7e392..4f983ca4030a 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -35,7 +35,6 @@ static __inline__ int irq_canonicalize(int irq)
 
 extern int distribute_irqs;
 
-struct irqaction;
 struct pt_regs;
 
 #define __ARCH_HAS_DO_SOFTIRQ
-- 
2.25.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox