LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 02/10] powerpc: Rename 'funcaddr' to 'addr' in 'struct ppc64_opd_entry'
From: Christophe Leroy @ 2021-10-11 15:25 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Andrew Morton, James E.J. Bottomley, Helge Deller, Arnd Bergmann,
	Kees Cook, Greg Kroah-Hartman
  Cc: linux-arch, linux-ia64, linux-parisc, linux-kernel, linux-mm,
	linuxppc-dev
In-Reply-To: <cover.1633964380.git.christophe.leroy@csgroup.eu>

There are three architectures with function descriptors, try to
have common names for the address they contain in order to
refactor some functions into generic functions later.

powerpc has 'funcaddr'
ia64 has 'ip'
parisc has 'addr'

Vote for 'addr' and update 'struct ppc64_opd_entry' accordingly.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/elf.h      | 2 +-
 arch/powerpc/include/asm/sections.h | 2 +-
 arch/powerpc/kernel/module_64.c     | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 64b523848cd7..90c3259a81ab 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -179,7 +179,7 @@ void relocate(unsigned long final_address);
 /* There's actually a third entry here, but it's unused */
 struct ppc64_opd_entry
 {
-	unsigned long funcaddr;
+	unsigned long addr;
 	unsigned long r2;
 };
 
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 6e4af4492a14..32e7035863ac 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -77,7 +77,7 @@ static inline void *dereference_function_descriptor(void *ptr)
 	struct ppc64_opd_entry *desc = ptr;
 	void *p;
 
-	if (!get_kernel_nofault(p, (void *)&desc->funcaddr))
+	if (!get_kernel_nofault(p, (void *)&desc->addr))
 		ptr = p;
 	return ptr;
 }
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 6baa676e7cb6..82908c9be627 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -72,11 +72,11 @@ static func_desc_t func_desc(unsigned long addr)
 }
 static unsigned long func_addr(unsigned long addr)
 {
-	return func_desc(addr).funcaddr;
+	return func_desc(addr).addr;
 }
 static unsigned long stub_func_addr(func_desc_t func)
 {
-	return func.funcaddr;
+	return func.addr;
 }
 static unsigned int local_entry_offset(const Elf64_Sym *sym)
 {
@@ -187,7 +187,7 @@ static int relacmp(const void *_x, const void *_y)
 static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
 				    const Elf64_Shdr *sechdrs)
 {
-	/* One extra reloc so it's always 0-funcaddr terminated */
+	/* One extra reloc so it's always 0-addr terminated */
 	unsigned long relocs = 1;
 	unsigned i;
 
-- 
2.31.1


^ permalink raw reply related

* [PATCH v1 10/10] lkdtm: Fix execute_[user]_location()
From: Christophe Leroy @ 2021-10-11 15:25 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Andrew Morton, James E.J. Bottomley, Helge Deller, Arnd Bergmann,
	Kees Cook, Greg Kroah-Hartman
  Cc: linux-arch, linux-ia64, linux-parisc, linux-kernel, linux-mm,
	linuxppc-dev
In-Reply-To: <cover.1633964380.git.christophe.leroy@csgroup.eu>

execute_location() and execute_user_location() intent
to copy do_nothing() text and execute it at a new location.
However, at the time being it doesn't copy do_nothing() function
but do_nothing() function descriptor which still points to the
original text. So at the end it still executes do_nothing() at
its original location allthough using a copied function descriptor.

So, fix that by really copying do_nothing() text and build a new
function descriptor by copying do_nothing() function descriptor and
updating the target address with the new location.

Also fix the displayed addresses by dereferencing do_nothing()
function descriptor.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 drivers/misc/lkdtm/perms.c | 45 +++++++++++++++++++++++++++++++-------
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index da16564e1ecd..1d03cd44c21d 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -44,19 +44,42 @@ static noinline void do_overwritten(void)
 	return;
 }
 
+static void *setup_function_descriptor(funct_descr_t *fdesc, void *dst)
+{
+	int err;
+
+	if (!__is_defined(HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR))
+		return dst;
+
+	err = copy_from_kernel_nofault(fdesc, do_nothing, sizeof(*fdesc));
+	if (err < 0)
+		return ERR_PTR(err);
+
+	fdesc->addr = (unsigned long)dst;
+	barrier();
+
+	return fdesc;
+}
+
 static noinline void execute_location(void *dst, bool write)
 {
-	void (*func)(void) = dst;
+	void (*func)(void);
+	funct_descr_t fdesc;
+	void *do_nothing_text = dereference_symbol_descriptor(do_nothing);
 
-	pr_info("attempting ok execution at %px\n", do_nothing);
+	pr_info("attempting ok execution at %px\n", do_nothing_text);
 	do_nothing();
 
 	if (write == CODE_WRITE) {
-		memcpy(dst, do_nothing, EXEC_SIZE);
+		memcpy(dst, do_nothing_text, EXEC_SIZE);
 		flush_icache_range((unsigned long)dst,
 				   (unsigned long)dst + EXEC_SIZE);
 	}
-	pr_info("attempting bad execution at %px\n", func);
+	func = setup_function_descriptor(&fdesc, dst);
+	if (IS_ERR(func))
+		return;
+
+	pr_info("attempting bad execution at %px\n", dst);
 	func();
 	pr_err("FAIL: func returned\n");
 }
@@ -66,16 +89,22 @@ static void execute_user_location(void *dst)
 	int copied;
 
 	/* Intentionally crossing kernel/user memory boundary. */
-	void (*func)(void) = dst;
+	void (*func)(void);
+	funct_descr_t fdesc;
+	void *do_nothing_text = dereference_symbol_descriptor(do_nothing);
 
-	pr_info("attempting ok execution at %px\n", do_nothing);
+	pr_info("attempting ok execution at %px\n", do_nothing_text);
 	do_nothing();
 
-	copied = access_process_vm(current, (unsigned long)dst, do_nothing,
+	copied = access_process_vm(current, (unsigned long)dst, do_nothing_text,
 				   EXEC_SIZE, FOLL_WRITE);
 	if (copied < EXEC_SIZE)
 		return;
-	pr_info("attempting bad execution at %px\n", func);
+	func = setup_function_descriptor(&fdesc, dst);
+	if (IS_ERR(func))
+		return;
+
+	pr_info("attempting bad execution at %px\n", dst);
 	func();
 	pr_err("FAIL: func returned\n");
 }
-- 
2.31.1


^ permalink raw reply related

* [PATCH v1 09/10] lkdtm: Fix lkdtm_EXEC_RODATA()
From: Christophe Leroy @ 2021-10-11 15:25 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Andrew Morton, James E.J. Bottomley, Helge Deller, Arnd Bergmann,
	Kees Cook, Greg Kroah-Hartman
  Cc: linux-arch, linux-ia64, linux-parisc, linux-kernel, linux-mm,
	linuxppc-dev
In-Reply-To: <cover.1633964380.git.christophe.leroy@csgroup.eu>

Behind a location, lkdtm_EXEC_RODATA() executes a real function,
not a copy of do_nothing().

So do it directly instead of using execute_location().

And fix displayed addresses by dereferencing the function descriptors.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 drivers/misc/lkdtm/perms.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index 442d60ed25ef..da16564e1ecd 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -153,7 +153,14 @@ void lkdtm_EXEC_VMALLOC(void)
 
 void lkdtm_EXEC_RODATA(void)
 {
-	execute_location(lkdtm_rodata_do_nothing, CODE_AS_IS);
+	pr_info("attempting ok execution at %px\n",
+		dereference_symbol_descriptor(do_nothing));
+	do_nothing();
+
+	pr_info("attempting bad execution at %px\n",
+		dereference_symbol_descriptor(lkdtm_rodata_do_nothing));
+	lkdtm_rodata_do_nothing();
+	pr_err("FAIL: func returned\n");
 }
 
 void lkdtm_EXEC_USERSPACE(void)
-- 
2.31.1


^ permalink raw reply related

* Re: [PATCH] KVM: PPC: Book3S HV: H_ENTER filter out reserved HPTE[B] value
From: Fabiano Rosas @ 2021-10-11 14:47 UTC (permalink / raw)
  To: Nicholas Piggin, kvm-ppc, linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211004145749.1331331-1-npiggin@gmail.com>

Nicholas Piggin <npiggin@gmail.com> writes:

> The HPTE B field is a 2-bit field with values 0b10 and 0b11 reserved.
> This field is also taken from the HPTE and used when KVM executes
> TLBIEs to set the B field of those instructions.
>
> Disallow the guest setting B to a reserved value with H_ENTER by
> rejecting it. This is the same approach already taken for rejecting
> reserved (unsupported) LLP values. This prevents the guest from being
> able to induce the host to execute TLBIE with reserved values, which
> is not known to be a problem with current processors but in theory it
> could prevent the TLBIE from working correctly in a future processor.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

The ISA says:

B Segment Size Selector
0b00 - 256 MB (s=28)
0b01 - 1 TB (s=40)
0b10 - reserved
0b11 - reserved

So that looks good. I couldn't find any other guest initiated PTE
modifications, so I think we're covered.

Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>

> ---
>  arch/powerpc/include/asm/kvm_book3s_64.h | 4 ++++
>  arch/powerpc/kvm/book3s_hv_rm_mmu.c      | 9 +++++++++
>  2 files changed, 13 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
> index 19b6942c6969..fff391b9b97b 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -378,6 +378,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
>  		rb |= 1;		/* L field */
>  		rb |= r & 0xff000 & ((1ul << a_pgshift) - 1); /* LP field */
>  	}
> +	/*
> +	 * This sets both bits of the B field in the PTE. 0b1x values are
> +	 * reserved, but those will have been filtered by kvmppc_do_h_enter.
> +	 */
>  	rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8;	/* B field */
>  	return rb;
>  }
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 632b2545072b..2c1f3c6e72d1 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -207,6 +207,15 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
>
>  	if (kvm_is_radix(kvm))
>  		return H_FUNCTION;
> +	/*
> +	 * The HPTE gets used by compute_tlbie_rb() to set TLBIE bits, so
> +	 * these functions should work together -- must ensure a guest can not
> +	 * cause problems with the TLBIE that KVM executes.
> +	 */
> +	if ((pteh >> HPTE_V_SSIZE_SHIFT) & 0x2) {
> +		/* B=0b1x is a reserved value, disallow it. */
> +		return H_PARAMETER;
> +	}
>  	psize = kvmppc_actual_pgsz(pteh, ptel);
>  	if (!psize)
>  		return H_PARAMETER;

^ permalink raw reply

* Re: [V3 0/4] powerpc/perf: Add instruction and data address registers to extended regs
From: kajoljain @ 2021-10-11 13:09 UTC (permalink / raw)
  To: Athira Rajeev, mpe, acme, jolsa; +Cc: maddy, linuxppc-dev, rnsastry
In-Reply-To: <20211007065505.27809-1-atrajeev@linux.vnet.ibm.com>



On 10/7/21 12:25 PM, Athira Rajeev wrote:
> Patch set adds PMU registers namely Sampled Instruction Address Register
> (SIAR) and Sampled Data Address Register (SDAR) as part of extended regs
> in PowerPC. These registers provides the instruction/data address and
> adding these to extended regs helps in debug purposes.
> 
> Patch 1/4 and 2/4 refactors the existing macro definition of
> PERF_REG_PMU_MASK_300 and PERF_REG_PMU_MASK_31 to make it more
> readable.
> Patch 3/4 adds SIAR and SDAR as part of the extended regs mask.
> Patch 4/4 includes perf tools side changes to add the SPRs to
> sample_reg_mask to use with -I? option.
> 
> Changelog:
> Change from v2 -> v3:
> Addressed review comments from Michael Ellerman
> - Fixed the macro definition to use "unsigned long long"
>   which otherwise will cause build error with perf on
>   32-bit.
> - Added Reviewed-by from Daniel Axtens for patch3.
> 
> Change from v1 -> v2:
> Addressed review comments from Michael Ellerman
> - Refactored the perf reg extended mask value macros for
>   PERF_REG_PMU_MASK_300 and PERF_REG_PMU_MASK_31 to
>   make it more readable. Also moved PERF_REG_EXTENDED_MAX
>   along with enum definition similar to PERF_REG_POWERPC_MAX.
> 
> Athira Rajeev (4):
>   powerpc/perf: Refactor the code definition of perf reg extended mask
>   tools/perf: Refactor the code definition of perf reg extended mask in
>     tools side header file
>   powerpc/perf: Expose instruction and data address registers as part of
>     extended regs
>   tools/perf: Add perf tools support to expose instruction and data
>     address registers as part of extended regs

Patch-set looks good to me.

Reviewed-by: Kajol Jain<kjain@linux.ibm.com>

Thanks,
Kajol Jain
> 
>  arch/powerpc/include/uapi/asm/perf_regs.h     | 28 ++++++++++++-------
>  arch/powerpc/perf/perf_regs.c                 |  4 +++
>  .../arch/powerpc/include/uapi/asm/perf_regs.h | 28 ++++++++++++-------
>  tools/perf/arch/powerpc/include/perf_regs.h   |  2 ++
>  tools/perf/arch/powerpc/util/perf_regs.c      |  2 ++
>  5 files changed, 44 insertions(+), 20 deletions(-)
> 

^ permalink raw reply

* Re: [PATCH v2 4/4] powerpc/pseries/cpuhp: remove obsolete comment from pseries_cpu_die
From: Michael Ellerman @ 2021-10-11 12:34 UTC (permalink / raw)
  To: Nathan Lynch, Daniel Henrique Barboza, linuxppc-dev
  Cc: tyreld, ldufour, aneesh.kumar
In-Reply-To: <87k0izfux7.fsf@linux.ibm.com>

Nathan Lynch <nathanl@linux.ibm.com> writes:
> Michael Ellerman <mpe@ellerman.id.au> writes:
>> Daniel Henrique Barboza <danielhb413@gmail.com> writes:
>>> This is enough to say that we can't easily see the history behind this comment.
>>> I also believe that we're better of without it since it doesn't make sense
>>> with the current codebase.
>>
>> It was added by the original CPU hotplug commit for ppc64::
>>
>> https://github.com/mpe/linux-fullhistory/commit/0e9fd9441cd2113b67b14e739267c9e69761489b
>>
>>
>> The code was fairly similar:
>>
>> void __cpu_die(unsigned int cpu)
>> {
>> 	int tries;
>> 	int cpu_status;
>> 	unsigned int pcpu = get_hard_smp_processor_id(cpu);
>>
>> 	for (tries = 0; tries < 5; tries++) {
>> 		cpu_status = query_cpu_stopped(pcpu);
>>
>> 		if (cpu_status == 0)
>> 			break;
>> 		set_current_state(TASK_UNINTERRUPTIBLE);
>> 		schedule_timeout(HZ);
>> 	}
>> 	if (cpu_status != 0) {
>> 		printk("Querying DEAD? cpu %i (%i) shows %i\n",
>> 		       cpu, pcpu, cpu_status);
>> 	}
>>
>> 	/* Isolation and deallocation are definatly done by
>> 	 * drslot_chrp_cpu.  If they were not they would be
>> 	 * done here.  Change isolate state to Isolate and
>> 	 * change allocation-state to Unusable.
>> 	 */
>> 	paca[cpu].xProcStart = 0;
>>
>> 	/* So we can recognize if it fails to come up next time. */
>> 	cpu_callin_map[cpu] = 0;
>> }
>>
>>
>> drslot_chrp_cpu() still exists in drmgr:
>>
>>   https://github.com/ibm-power-utilities/powerpc-utils/blob/e798c4a09fbf0fa0f421e624cfa366a6c405c9fe/src/drmgr/drslot_chrp_cpu.c#L406
>>
>>
>> I agree the comment is no longer meaningful and can be removed.
>
> Thanks for providing this background.
>
>> It might be good to then add a comment explaining why we need to set
>> cpu_start = 0.
>
> Sure, I can take that as a follow-up. Or perhaps it should be moved to
> the online path.

Yeah possibly.

>> It's not immediately clear why we need to. When we bring a CPU back
>> online in smp_pSeries_kick_cpu() we ask RTAS to start it and then
>> immediately set cpu_start = 1, ie. there isn't a separate step that sets
>> cpu_start = 1 for hotplugged CPUs.
>
> Hmm I'm not following the distinction you seem to be drawing between
> bringing a CPU back online and a hotplugged CPU. kick_cpu is used in all
> cases AFAIK.

Yeah that wasn't very clear, reading it back I have half confused myself.

At boot we need the paca->cpu_start flag because some CPUs can be
spinning in generic_secondary_common_init, in head_64.S.

ie. they're not in RTAS, they're spinning in kernel code, and the only
thing that stops them coming "online" in the Linux sense is
paca->cpu_start.

You can see that in pseries/smp.c:

static inline int smp_startup_cpu(unsigned int lcpu)
{
	...
	if (cpumask_test_cpu(lcpu, of_spin_mask))
		/* Already started by OF and sitting in spin loop */
		return 1;


We also hit that case when kexec'ing, where all the secondaries come in
that way.


On the other hand when we offline a CPU, we set paca->cpu_start = 0, in
pseries_cpu_die(), and then we return the CPU to RTAS.

The only way it *should* come back online is via smp_pSeries_kick_cpu(),
which calls smp_startup_cpu() to bring the CPU out of RTAS, and then
smp_pSeries_kick_cpu() immediately sets cpu_start = 1.

So the sequence is:

	CPU goes offline from Linux POV
	paca->cpu_start = 0;
        CPU offline in RTAS
        ...
        CPU brought out of RTAS
	paca->cpu_start = 1;
	CPU comes back online from Linux POV


But I guess I kind of answered my own question above, where I said
*should*. Clearing paca->cpu_start when we offline the CPU gives us a
little bit of backup if the CPU comes out of RTAS unexpectedly. ie. it
would then spin on paca->cpu_start, rather than spontaneously coming
back into Linux when we weren't expecting it.

cheers

^ permalink raw reply

* Re: [PATCH 0/1] Arch use of pci_dev_is_added()
From: Michael Ellerman @ 2021-10-11 12:06 UTC (permalink / raw)
  To: Niklas Schnelle, Bjorn Helgaas
  Cc: linux-arch, linux-s390, linux-pci, linux-kernel,
	Oliver O'Halloran, linuxppc-dev
In-Reply-To: <20210910141940.2598035-1-schnelle@linux.ibm.com>

On Fri, 10 Sep 2021 16:19:39 +0200, Niklas Schnelle wrote:
> In my proposal to make pci_dev_is_added() more regularly usable by arch code
> you mentioned[0] that you believe the uses in arch/powerpc are not necessary
> anymore. From code reading I agree and so does Oliver O'Halloran[1].
> 
> So as promised here is a patch removing them. I only compile tested this as
> I don't have access to a powerpc system.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: Drop superfluous pci_dev_is_added() calls
      https://git.kernel.org/powerpc/c/452f145eca73945222923525a7eba59cf37909cc

cheers

^ permalink raw reply

* Re: [PATCH v2 0/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n
From: Michael Ellerman @ 2021-10-11 12:06 UTC (permalink / raw)
  To: paul.gortmaker, linuxppc-dev, mpe, Xiaoming Ni, stable, oss, benh,
	chenhui.zhao, linux-kernel, gregkh, paulus, Yuantian.Tang
  Cc: liuwenliang, wangle6, chenjianguo3
In-Reply-To: <20210929033646.39630-1-nixiaoming@huawei.com>

On Wed, 29 Sep 2021 11:36:44 +0800, Xiaoming Ni wrote:
> When CONFIG_SMP=y, timebase synchronization is required for mpc8572 when
>  the second kernel is started
> 	arch/powerpc/kernel/smp.c:
> 	int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> 	{
> 		...
> 		if (smp_ops->give_timebase)
> 			smp_ops->give_timebase();
> 		...
> 	}
> 
> [...]

Applied to powerpc/next.

[1/2] powerpc:85xx:Fix oops when mpc85xx_smp_guts_ids node cannot be found
      https://git.kernel.org/powerpc/c/3c2172c1c47b4079c29f0e6637d764a99355ebcd
[2/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n
      https://git.kernel.org/powerpc/c/c45361abb9185b1e172bd75eff51ad5f601ccae4

cheers

^ permalink raw reply

* Re: [PATCH] powerpc: clean up UPD_CONSTR
From: Michael Ellerman @ 2021-10-11 12:06 UTC (permalink / raw)
  To: Nick Desaulniers, Michael Ellerman
  Cc: Michal Hocko, David Hildenbrand, Peter Zijlstra, linuxppc-dev,
	Boqun Feng, linux-kernel, kvm-ppc, Christopher M. Riedl,
	Nathan Chancellor, Paul Mackerras, Dan Williams, Will Deacon,
	Daniel Axtens
In-Reply-To: <20210914161712.2463458-1-ndesaulniers@google.com>

On Tue, 14 Sep 2021 09:17:04 -0700, Nick Desaulniers wrote:
> UPD_CONSTR was previously a preprocessor define for an old GCC 4.9 inline
> asm bug with m<> constraints.
> 
> 
> 
> 

Applied to powerpc/next.

[1/1] powerpc: clean up UPD_CONSTR
      https://git.kernel.org/powerpc/c/2a24d80fc86bcd70c8e780078254e873ea217379

cheers

^ permalink raw reply

* Re: [PATCH v2 0/4] CPU DLPAR/hotplug for v5.16
From: Michael Ellerman @ 2021-10-11 12:06 UTC (permalink / raw)
  To: linuxppc-dev, Nathan Lynch; +Cc: aneesh.kumar, ldufour, danielhb413, tyreld
In-Reply-To: <20210927201933.76786-1-nathanl@linux.ibm.com>

On Mon, 27 Sep 2021 15:19:29 -0500, Nathan Lynch wrote:
> Fixes for some vintage bugs in handling cache node addition and removal, a
> miscellaneous BUG->WARN conversion, and removal of the fragile "by count"
> CPU DLPAR code that probably has no users.
> 
> Changes since v1:
> * Remove set but unused local variable (0day)
> * Additional comment cleanup patch
> 
> [...]

Applied to powerpc/next.

[1/4] powerpc/pseries/cpuhp: cache node corrections
      https://git.kernel.org/powerpc/c/7edd5c9a8820bedb22870b34a809d45f2a86a35a
[2/4] powerpc/cpuhp: BUG -> WARN conversion in offline path
      https://git.kernel.org/powerpc/c/983f9101740641434cea4f2e172175ff4b0276ad
[3/4] powerpc/pseries/cpuhp: delete add/remove_by_count code
      https://git.kernel.org/powerpc/c/fa2a5dfe2ddd0e7c77e5f608e1fa374192e5be97
[4/4] powerpc/pseries/cpuhp: remove obsolete comment from pseries_cpu_die
      https://git.kernel.org/powerpc/c/f9473a65719e59c45f1638cc04db7c80de8fcc1a

cheers

^ permalink raw reply

* Re: [PATCH v2 0/2] powerpc/paravirt: vcpu_is_preempted() tweaks
From: Michael Ellerman @ 2021-10-11 12:06 UTC (permalink / raw)
  To: linuxppc-dev, Nathan Lynch; +Cc: srikar, npiggin
In-Reply-To: <20210928214147.312412-1-nathanl@linux.ibm.com>

On Tue, 28 Sep 2021 16:41:45 -0500, Nathan Lynch wrote:
> Minor changes arising from discovering that this code throws warnings with
> DEBUG_PREEMPT kernels.
> 
> Changes since v1:
> * Additional commentary to (1) distinguish hypervisor dispatch and preempt
>   behavior from kernel scheduler preemption; and (2) more clearly justify
>   the use of raw_smp_processor_id().
> * Additional patch to update existing comments before making the functional
>   change.
> 
> [...]

Applied to powerpc/next.

[1/2] powerpc/paravirt: vcpu_is_preempted() commentary
      https://git.kernel.org/powerpc/c/799f9b51db688608b50e630a57bee5f699b268ca
[2/2] powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted()
      https://git.kernel.org/powerpc/c/fda0eb220021a97c1d656434b9340ebf3fc4704a

cheers

^ permalink raw reply

* Re: [PATCH] powerpc: fix unbalanced node refcount in check_kvm_guest()
From: Michael Ellerman @ 2021-10-11 12:06 UTC (permalink / raw)
  To: linuxppc-dev, Nathan Lynch; +Cc: srikar, npiggin
In-Reply-To: <20210928124550.132020-1-nathanl@linux.ibm.com>

On Tue, 28 Sep 2021 07:45:50 -0500, Nathan Lynch wrote:
> When check_kvm_guest() succeeds in looking up a /hypervisor OF node, it
> returns without performing a matching put for the lookup, leaving the
> node's reference count elevated.
> 
> Add the necessary call to of_node_put(), rearranging the code slightly to
> avoid repetition or goto.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: fix unbalanced node refcount in check_kvm_guest()
      https://git.kernel.org/powerpc/c/56537faf8821e361d739fc5ff58c9c40f54a1d4c

cheers

^ permalink raw reply

* Re: [PATCH trivial v2] powerpc/powernv/dump: Fix typo in comment
From: Michael Ellerman @ 2021-10-11 12:06 UTC (permalink / raw)
  To: linuxppc-dev, Vasant Hegde; +Cc: joel
In-Reply-To: <20210914143802.54325-1-hegdevasant@linux.vnet.ibm.com>

On Tue, 14 Sep 2021 20:08:02 +0530, Vasant Hegde wrote:
> 


Applied to powerpc/next.

[1/1] powerpc/powernv/dump: Fix typo in comment
      https://git.kernel.org/powerpc/c/ee87843795ec5dc2f3bb315fade3ec098c88f639

cheers

^ permalink raw reply

* Re: [PATCH] powerpc: Remove unused prototype for of_show_percpuinfo
From: Michael Ellerman @ 2021-10-11 12:06 UTC (permalink / raw)
  To: linuxppc-dev, Daniel Axtens
In-Reply-To: <20210903063246.70691-1-dja@axtens.net>

On Fri, 3 Sep 2021 16:32:46 +1000, Daniel Axtens wrote:
> commit 6d7f58b04d82 ("[PATCH] powerpc: Some minor cleanups to setup_32.c")
> removed of_show_percpuinfo but didn't remove the prototype.
> 
> Remove it.
> 
> 
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: Remove unused prototype for of_show_percpuinfo
      https://git.kernel.org/powerpc/c/93fa8e9d88118bd2bdf2c61f9b63e7d4d9b648fe

cheers

^ permalink raw reply

* Re: [PATCH] video: fbdev: use memset_io() instead of memset()
From: Michael Ellerman @ 2021-10-11 12:06 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Andrew Morton
  Cc: linux-fbdev, Finn Thain, Stan Johnson, linux-kernel, dri-devel,
	linuxppc-dev
In-Reply-To: <884a54f1e5cb774c1d9b4db780209bee5d4f6718.1631712563.git.christophe.leroy@csgroup.eu>

On Wed, 15 Sep 2021 15:34:35 +0200, Christophe Leroy wrote:
> While investigating a lockup at startup on Powerbook 3400C, it was
> identified that the fbdev driver generates alignment exception at
> startup:
> 
> 	--- interrupt: 600 at memset+0x60/0xc0
> 	NIP:  c0021414 LR: c03fc49c CTR: 00007fff
> 	REGS: ca021c10 TRAP: 0600   Tainted: G        W          (5.14.2-pmac-00727-g12a41fa69492)
> 	MSR:  00009032 <EE,ME,IR,DR,RI>  CR: 44008442  XER: 20000100
> 	DAR: cab80020 DSISR: 00017c07
> 	GPR00: 00000007 ca021cd0 c14412e0 cab80000 00000000 00100000 cab8001c 00000004
> 	GPR08: 00100000 00007fff 00000000 00000000 84008442 00000000 c0006fb4 00000000
> 	GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00100000
> 	GPR24: 00000000 81800000 00000320 c15fa400 c14d1878 00000000 c14d1800 c094e19c
> 	NIP [c0021414] memset+0x60/0xc0
> 	LR [c03fc49c] chipsfb_pci_init+0x160/0x580
> 	--- interrupt: 600
> 	[ca021cd0] [c03fc46c] chipsfb_pci_init+0x130/0x580 (unreliable)
> 	[ca021d20] [c03a3a70] pci_device_probe+0xf8/0x1b8
> 	[ca021d50] [c043d584] really_probe.part.0+0xac/0x388
> 	[ca021d70] [c043d914] __driver_probe_device+0xb4/0x170
> 	[ca021d90] [c043da18] driver_probe_device+0x48/0x144
> 	[ca021dc0] [c043e318] __driver_attach+0x11c/0x1c4
> 	[ca021de0] [c043ad30] bus_for_each_dev+0x88/0xf0
> 	[ca021e10] [c043c724] bus_add_driver+0x190/0x22c
> 	[ca021e40] [c043ee94] driver_register+0x9c/0x170
> 	[ca021e60] [c0006c28] do_one_initcall+0x54/0x1ec
> 	[ca021ed0] [c08246e4] kernel_init_freeable+0x1c0/0x270
> 	[ca021f10] [c0006fdc] kernel_init+0x28/0x11c
> 	[ca021f30] [c0017148] ret_from_kernel_thread+0x14/0x1c
> 	Instruction dump:
> 	7d4601a4 39490777 7d4701a4 39490888 7d4801a4 39490999 7d4901a4 39290aaa
> 	7d2a01a4 4c00012c 4bfffe88 0fe00000 <4bfffe80> 9421fff0 38210010 48001970
> 
> [...]

Applied to powerpc/next.

[1/1] video: fbdev: use memset_io() instead of memset()
      https://git.kernel.org/powerpc/c/f2719b26ae27282c145202ffd656d5ff1fe737cc

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/powermac: Remove stale declaration of pmac_md
From: Michael Ellerman @ 2021-10-11 12:06 UTC (permalink / raw)
  To: Paul Mackerras, Christophe Leroy, Benjamin Herrenschmidt,
	Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <b2e52934e5a500f149e6d94db3cfa0569bc35081.1630657402.git.christophe.leroy@csgroup.eu>

On Fri, 3 Sep 2021 08:23:52 +0000 (UTC), Christophe Leroy wrote:
> pmac_md doesn't exist anymore, remove stall declaration.
> 

Applied to powerpc/next.

[1/1] powerpc/powermac: Remove stale declaration of pmac_md
      https://git.kernel.org/powerpc/c/9d7fb0643a156a5dddd887814e1263b648501cb0

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/mem: Fix arch/powerpc/mm/mem.c:53:12: error: no previous prototype for 'create_section_mapping'
From: Michael Ellerman @ 2021-10-11 12:06 UTC (permalink / raw)
  To: Paul Mackerras, Christophe Leroy, Benjamin Herrenschmidt,
	Michael Ellerman
  Cc: linuxppc-dev, kernel test robot, linux-kernel
In-Reply-To: <025754fde3d027904ae9d0191f395890bec93369.1631541649.git.christophe.leroy@csgroup.eu>

On Mon, 13 Sep 2021 17:17:26 +0200, Christophe Leroy wrote:
> Commit 8e11d62e2e87 ("powerpc/mem: Add back missing header to fix 'no
> previous prototype' error") was supposed to fix the problem, but in
> the meantime commit a927bd6ba952 ("mm: fix phys_to_target_node() and*
> memory_add_physaddr_to_nid() exports") moved create_section_mapping()
> prototype from asm/sparsemem.h to asm/mmzone.h
> 
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/mem: Fix arch/powerpc/mm/mem.c:53:12: error: no previous prototype for 'create_section_mapping'
      https://git.kernel.org/powerpc/c/7eff9bc00ddf1e2281dff575884b7f676c85b006

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/476: Fix sparse report
From: Michael Ellerman @ 2021-10-11 12:06 UTC (permalink / raw)
  To: Paul Mackerras, Christophe Leroy, Benjamin Herrenschmidt,
	Michael Ellerman
  Cc: Alistair Popple, linuxppc-dev, kernel test robot, linux-kernel
In-Reply-To: <aa6055769b92a5d8685b8d0adab99c48a0b0ef4b.1631956926.git.christophe.leroy@csgroup.eu>

On Sat, 18 Sep 2021 11:22:32 +0200, Christophe Leroy wrote:
> 	arch/powerpc/platforms/44x/ppc476.c:236:17: warning: cast removes address space '__iomem' of expression
> 	arch/powerpc/platforms/44x/ppc476.c:241:34: warning: incorrect type in argument 1 (different address spaces)
> 	arch/powerpc/platforms/44x/ppc476.c:241:34:    expected void const volatile [noderef] __iomem *addr
> 	arch/powerpc/platforms/44x/ppc476.c:241:34:    got unsigned char [usertype] *
> 	arch/powerpc/platforms/44x/ppc476.c:243:17: warning: incorrect type in argument 1 (different address spaces)
> 	arch/powerpc/platforms/44x/ppc476.c:243:17:    expected void volatile [noderef] __iomem *addr
> 	arch/powerpc/platforms/44x/ppc476.c:243:17:    got unsigned char [usertype] *[assigned] fpga
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/476: Fix sparse report
      https://git.kernel.org/powerpc/c/494f238a3861863d908af7b98a369f6d8a986c85

cheers

^ permalink raw reply

* Re: [PATCH kernel v2] powerps/pseries/dma: Add support for 2M IOMMU page size
From: Michael Ellerman @ 2021-10-11 12:06 UTC (permalink / raw)
  To: linuxppc-dev, Alexey Kardashevskiy; +Cc: Frederic Barrat, Leonardo Bras
In-Reply-To: <20211006044735.1114669-1-aik@ozlabs.ru>

On Wed, 6 Oct 2021 15:47:35 +1100, Alexey Kardashevskiy wrote:
> The upcoming PAPR spec adds a 2M page size, bit 23 right after 16G page
> size in the "ibm,query-pe-dma-window" call.
> 
> This adds support for the new page size. Since the new page size is out
> of sorted order, this changes the loop to not assume that shift[] is
> sorted.
> 
> [...]

Applied to powerpc/next.

[1/1] powerps/pseries/dma: Add support for 2M IOMMU page size
      https://git.kernel.org/powerpc/c/3872731187141d5d0a5c4fb30007b8b9ec36a44d

cheers

^ permalink raw reply

* Re: [PATCH v2 3/3] powerpc/numa: Fill distance_lookup_table for offline nodes
From: Michael Ellerman @ 2021-10-11 11:45 UTC (permalink / raw)
  To: Srikar Dronamraju
  Cc: Nathan Lynch, Gautham R Shenoy, Vincent Guittot,
	kernel test robot, Peter Zijlstra, Geetika Moolchandani,
	Valentin Schneider, Laurent Dufour, linuxppc-dev, Ingo Molnar
In-Reply-To: <20210923175748.GC2004@linux.vnet.ibm.com>

Srikar Dronamraju <srikar@linux.vnet.ibm.com> writes:
> * Michael Ellerman <mpe@ellerman.id.au> [2021-09-23 21:17:25]:
>
>> Srikar Dronamraju <srikar@linux.vnet.ibm.com> writes:
>> > * Michael Ellerman <mpe@ellerman.id.au> [2021-08-26 23:36:53]:
>> >
>> >> Srikar Dronamraju <srikar@linux.vnet.ibm.com> writes:
>> >> > Scheduler expects unique number of node distances to be available at
>> >> > boot.
>> ...
>> >
>> >> > Fake the offline node's distance_lookup_table entries so that all
>> >> > possible node distances are updated.
>> >>
>> >> Does this work if we have a single node offline at boot?
>> >>
>> >
>> > It should.
>> >
>> >> Say we start with:
>> >>
>> >> node distances:
>> >> node   0   1
>> >>   0:  10  20
>> >>   1:  20  10
>> >>
>> >> And node 2 is offline at boot. We can only initialise that nodes entries
>> >> in the distance_lookup_table:
>> >>
>> >> 		while (i--)
>> >> 			distance_lookup_table[node][i] = node;
>> >>
>> >> By filling them all with 2 that causes node_distance(2, X) to return the
>> >> maximum distance for all other nodes X, because we won't break out of
>> >> the loop in __node_distance():
>> >>
>> >> 	for (i = 0; i < distance_ref_points_depth; i++) {
>> >> 		if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
>> >> 			break;
>> >>
>> >> 		/* Double the distance for each NUMA level */
>> >> 		distance *= 2;
>> >> 	}
>> >>
>> >> If distance_ref_points_depth was 4 we'd return 160.
>> >
>> > As you already know, distance 10, 20, .. are defined by Powerpc, form1
>> > affinity. PAPR doesn't define actual distances, it only provides us the
>> > associativity. If there are distance_ref_points_depth is 4,
>> > (distance_ref_points_depth doesn't take local distance into consideration)
>> > 10, 20, 40, 80, 160.
>> >
>> >>
>> >> That'd leave us with 3 unique distances at boot, 10, 20, 160.
>> >>
>> >
>> > So if there are unique distances, then the distances as per the current
>> > code has to be 10, 20, 40, 80.. I dont see a way in which we have a break in
>> > the series. like having 160 without 80.
>>
>> I'm confused what you mean there.
>
> At the outset, if we have a better probable solution, do let me know, I am
> willing to try that too.

I don't have one in mind no, I'm just trying to satisfy myself that this
solution will work in all cases we're likely to encounter.

>> If we have a node that's offline at boot then we get 160 for that node,
>> that's just the result of having no info for it, so we never break out
>> of the for loop.
>>
>> So if we have two nodes, one hop apart, and then an offline node we get
>> 10, 20, 160.
>>
>> Or if you're using depth = 3 then it's 10, 20, 80.
>
> My understanding is as below:
>
> device-tree provides the max hops by way of
> ibm,associativity-reference-points. This is mapped to
> distance_ref_points_depth in Linux-powerpc.
>
> Now Linux-powerpc encodes hops as (dis-regarding local distance) 20, 40, 80,
> 160, 320 ...
> So if the distance_ref_points_depth is 3, then the hops are 20, 40, 80.
>
> Do you disagree?

I'm not sure. You didn't really address my point.

You said that we can't have 160 without 80 (for depth = 4).

I gave an example where we could see a gap in the used distance values,
ie. 10, 20, 80 for a depth of 3.

Which is not to say that distance 40 doesn't exist in that scenario,
rather that it's not used by any node.


>> >> But when node 2 comes online it might introduce more than 1 new distance
>> >> value, eg. it could be that the actual distances are:
>> >>
>> >> node distances:
>> >> node   0   1   2
>> >>   0:  10  20  40
>> >>   1:  20  10  80
>> >>   2:  40  80  10
>> >>
>> >> ie. we now have 4 distances, 10, 20, 40, 80.
>> >>
>> >> What am I missing?
>> >
>> > As I said above, I am not sure how we can have a break in the series.
>> > If distance_ref_points_depth is 3, the distances has to be 10,20,40,80 as
>> > atleast for form1 affinity.
>>
>> I agree for depth 3 we have to see 10, 20, 40, 80. But nothing
>> guarantees we see each value (other than 10).
>
> The hop distances are not from the device-tree, the device-tree only gives
> us the max hops possible. Linux-powerpc is actually hard-coding the
> distances which each hop distance being 2x the previous.

Yes. I guess I was sloppy to say "see each value", I didn't mean we see
those values directly in the device-tree.

> So we may not see any nodes at a particular hop, but we know maximum hops.
> And if distance_ref_points_depth is 3, then hops are 20, 40, 80 only.

OK, so we agree that "we may not see any nodes at a particular hop".

Which is what I was trying to say above.

>> We can have two nodes one hop apart, so we have 10 and 20, then a third
>> node is added 3 hops away, so we get 10, 20, 80.
>>
>
>> The real problem is that the third node could be 3 hops from node 0
>> and 2 hops from node 1, and so the addition of the third node causes
>> two new distance values (40 & 80) to be required.
>
> So here the max hops as given by device-tree is 3. So we know that we are
> looking for max-distance of 80 by way of distance_ref_points_depth.
>
> Even if the 3rd node was at 4 hops, we would already know the max distance
> of 160, by way of distance_ref_points_depth.

I agree we know that the max value is, and therefore the total number of
possible distance values.

But I think there are topologies where we can not represent all the
possible distances in the distance table.

> However in the most unlikely scenarios where the number of possible
> nodes are less than the distance_ref_points_depth(aka max hops) +
> there are CPUless/memoryless nodes we may not have initialized to the
> right distances.

OK, so I think you're saying you agree that there are situations where
we might not be able to represent all the distances.

But you say that's an "unlikely scenario", why is it unlikely?

If you can convince me it's 100% unlikely then maybe we can forget about
it :)

>> I think maybe what you're saying is that in practice we don't see setups
>> like that. But I don't know if I'm happy with a solution that doesn't
>> work in the general case, and relies on the particular properties of our
>> current set of systems.
>
> But our current set of systems are having a problem (Systems can likely
> crash on adding a CPU to a node.)

Yes I agree that's bad, but also we don't want to merge a solution (and
presumably backport it everywhere) if it doesn't work for some cases.

> The only other way I can think of is the previous approach were we ask
> scheduler hook which tells how many unique node distances are
> possible. But then it was stuck down because, we didnt want to add a
> hook just for one arch.

OK.

> However isn't this is much much better than the current situation we are in?
> i.e This is not going to cause any regression for the other setups.

Yes that's true. But equally if we can find a 100% solution that would
save us having to fix the same issue again in future.

>> Possibly we just need to detect that case and WARN about it. The only
>> problem is we won't know until the system is already up and running, ie.
>> we can't know at boot that the onlining of the third node will cause 2
>> new distance values to be added.
>
> Yes, We should be able to detect this very easily.
> At the end of the function (v2 or v3) if cur_depth != max_depth then we
> havent initialized all possible node distances.

The only issue is what do we do when we detect that case?

We can't BUG/panic, because we don't know for sure that the offline
nodes will cause new distance values to be needed. So the system might
be completely fine, all we know is it might not be. If we print a scary
warning we'll end up with bugs filed for that.

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/xive: Discard disabled interrupts in get_irqchip_state()
From: seeteena @ 2021-10-11  9:50 UTC (permalink / raw)
  To: clg; +Cc: linuxppc-dev, stable
In-Reply-To: <20211011070203.99726-1-clg@kaod.org>

[-- Attachment #1: Type: text/plain, Size: 158 bytes --]

Tested-by: seeteena<s1seetee@linux.vnet.ibm.com>

I have used a KVM guest with a passthrough ethernet adapter and the 
lspci output to identify the adapter.


[-- Attachment #2: Type: text/html, Size: 2399 bytes --]

^ permalink raw reply

* Re: [PATCH] powerpc/xive: Discard disabled interrupts in get_irqchip_state()
From: seeteena @ 2021-10-11  9:47 UTC (permalink / raw)
  To: clg; +Cc: linuxppc-dev, stable
In-Reply-To: <20211011070203.99726-1-clg@kaod.org>


[-- Attachment #1.1: Type: text/plain, Size: 158 bytes --]

Tested-by: seeteena<s1seetee@linux.vnet.ibm.com>

I have used a KVM guest with a passthrough ethernet adapter and the 
lspci output to identify the adapter.


[-- Attachment #1.2: Type: text/html, Size: 2245 bytes --]

[-- Attachment #2: PATCH-powerpc-xive-Discard-disabled-interrupts-in-get_irqchip_state.txt --]
[-- Type: text/plain, Size: 6088 bytes --]

From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <SRS0=o3RY=O7=lists.ozlabs.org=linuxppc-dev-bounces+linuxppc-dev=archiver.kernel.org@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 96058C433EF
	for <linuxppc-dev@archiver.kernel.org>; Mon, 11 Oct 2021 07:11:10 +0000 (UTC)
Received: from lists.ozlabs.org (lists.ozlabs.org [112.213.38.117])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by mail.kernel.org (Postfix) with ESMTPS id B21F760F24
	for <linuxppc-dev@archiver.kernel.org>; Mon, 11 Oct 2021 07:11:09 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.4.1 mail.kernel.org B21F760F24
Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=kaod.org
Authentication-Results: mail.kernel.org; spf=pass smtp.mailfrom=lists.ozlabs.org
Received: from boromir.ozlabs.org (localhost [IPv6:::1])
	by lists.ozlabs.org (Postfix) with ESMTP id 4HSVMH6wGdz3bjR
	for <linuxppc-dev@archiver.kernel.org>; Mon, 11 Oct 2021 18:11:07 +1100 (AEDT)
Authentication-Results: lists.ozlabs.org; spf=pass (sender SPF authorized)
 smtp.mailfrom=kaod.org (client-ip=79.137.123.220;
 helo=smtpout2.mo529.mail-out.ovh.net; envelope-from=clg@kaod.org;
 receiver=<UNKNOWN>)
X-Greylist: delayed 504 seconds by postgrey-1.36 at boromir;
 Mon, 11 Oct 2021 18:10:42 AEDT
Received: from smtpout2.mo529.mail-out.ovh.net
 (smtpout2.mo529.mail-out.ovh.net [79.137.123.220])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by lists.ozlabs.org (Postfix) with ESMTPS id 4HSVLp4Pwcz2yPv
 for <linuxppc-dev@lists.ozlabs.org>; Mon, 11 Oct 2021 18:10:42 +1100 (AEDT)
Received: from mxplan5.mail.ovh.net (unknown [10.109.156.216])
 by mo529.mail-out.ovh.net (Postfix) with ESMTPS id 0EC30C3BBB9B;
 Mon, 11 Oct 2021 09:02:09 +0200 (CEST)
Received: from kaod.org (37.59.142.95) by DAG4EX1.mxp5.local (172.16.2.31)
 with Microsoft SMTP Server (version=TLS1_2,
 cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2308.14; Mon, 11 Oct
 2021 09:02:09 +0200
Authentication-Results: garm.ovh; auth=pass
 (GARM-95G0010762b5aa-8db3-4685-afb6-69febc946e19,
 044DEDDE8B0E05FD49EE52B84AFD98BA54CEE260) smtp.auth=clg@kaod.org
X-OVh-ClientIp: 82.64.250.170
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
To: <linuxppc-dev@lists.ozlabs.org>
Subject: [PATCH] powerpc/xive: Discard disabled interrupts in
 get_irqchip_state()
Date: Mon, 11 Oct 2021 09:02:03 +0200
Message-ID: <20211011070203.99726-1-clg@kaod.org>
X-Mailer: git-send-email 2.31.1
MIME-Version: 1.0
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: 8bit
X-Originating-IP: [37.59.142.95]
X-ClientProxiedBy: DAG3EX1.mxp5.local (172.16.2.21) To DAG4EX1.mxp5.local
 (172.16.2.31)
X-Ovh-Tracer-GUID: 1fcd0286-05cf-4e09-8d7f-6cb5e00e2edd
X-Ovh-Tracer-Id: 16244765333835647968
X-VR-SPAMSTATE: OK
X-VR-SPAMSCORE: 0
X-VR-SPAMCAUSE: gggruggvucftvghtrhhoucdtuddrgedvtddrvddthedgudduvdcutefuodetggdotefrodftvfcurfhrohhfihhlvgemucfqggfjpdevjffgvefmvefgnecuuegrihhlohhuthemucehtddtnecunecujfgurhephffvufffkffogggtgfhisehtkeertdertdejnecuhfhrohhmpeevrogurhhitgcunfgvucfiohgrthgvrhcuoegtlhhgsehkrghougdrohhrgheqnecuggftrfgrthhtvghrnhepfedvuedtvdeikeekuefhkedujeejgffggffhtefglefgveevfeeghfdvgedtleevnecukfhppedtrddtrddtrddtpdefjedrheelrddugedvrdelheenucevlhhushhtvghrufhiiigvpedtnecurfgrrhgrmhepmhhouggvpehsmhhtphdqohhuthdphhgvlhhopehmgihplhgrnhehrdhmrghilhdrohhvhhdrnhgvthdpihhnvghtpedtrddtrddtrddtpdhmrghilhhfrhhomheptghlgheskhgrohgurdhorhhgpdhrtghpthhtoheptghlgheskhgrohgurdhorhhg
X-BeenThere: linuxppc-dev@lists.ozlabs.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Linux on PowerPC Developers Mail List <linuxppc-dev.lists.ozlabs.org>
List-Unsubscribe: <https://lists.ozlabs.org/options/linuxppc-dev>,
 <mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>
List-Archive: <http://lists.ozlabs.org/pipermail/linuxppc-dev/>
List-Post: <mailto:linuxppc-dev@lists.ozlabs.org>
List-Help: <mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>
List-Subscribe: <https://lists.ozlabs.org/listinfo/linuxppc-dev>,
 <mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>
Cc: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>,
 stable@vger.kernel.org
Errors-To: linuxppc-dev-bounces+linuxppc-dev=archiver.kernel.org@lists.ozlabs.org
Sender: "Linuxppc-dev"
 <linuxppc-dev-bounces+linuxppc-dev=archiver.kernel.org@lists.ozlabs.org>

When an interrupt is passed through, the KVM XIVE device calls the
set_vcpu_affinity() handler which raises the P bit to mask the
interrupt and to catch any in-flight interrupts while routing the
interrupt to the guest.

On the guest side, drivers (like some Intels) can request at probe
time some MSIs and call synchronize_irq() to check that there are no
in flight interrupts. This will call the XIVE get_irqchip_state()
handler which will always return true as the interrupt P bit has been
set on the host side and lock the CPU in an infinite loop.

Fix that by discarding disabled interrupts in get_irqchip_state().

Fixes: da15c03b047d ("powerpc/xive: Implement get_irqchip_state method for XIVE to fix shutdown race")
Cc: stable@vger.kernel.org#v5.4+
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/sysdev/xive/common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index c732ce5a3e1a..c5d75c02ad8b 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -945,7 +945,8 @@ static int xive_get_irqchip_state(struct irq_data *data,
 		 * interrupt to be inactive in that case.
 		 */
 		*state = (pq != XIVE_ESB_INVALID) && !xd->stale_p &&
-			(xd->saved_p || !!(pq & XIVE_ESB_VAL_P));
+			(xd->saved_p || (!!(pq & XIVE_ESB_VAL_P) &&
+			 !irqd_irq_disabled(data)));
 		return 0;
 	default:
 		return -EINVAL;
-- 
2.31.1



^ permalink raw reply related

* Re: [PATCH v2 00/11] Add Apple M1 support to PASemi i2c driver
From: Hector Martin @ 2021-10-11  9:23 UTC (permalink / raw)
  To: Wolfram Sang, Sven Peter, Christian Zigotzky, Michael Ellerman,
	Benjamin Herrenschmidt, Paul Mackerras, Olof Johansson,
	Arnd Bergmann, Mohamed Mediouni, Stan Skowronek, Mark Kettenis,
	Alyssa Rosenzweig, linux-arm-kernel, linuxppc-dev, linux-i2c,
	linux-kernel, R.T.Dickinson, Matthew Leaman, Darren Stevens
In-Reply-To: <YWP71c8cXlE3PcIo@kunai>

On 11/10/2021 17.54, Wolfram Sang wrote:
>> MAINTAINERS. It'll probably apply cleanly to 5.15-rc5 but if that happens again
> 
> It doesn't because Linus' git doesn't have:
> 
> Documentation/devicetree/bindings/pci/apple,pcie.yaml
> 
> Because MAINTAINER dependencies can be a bit nasty, I suggest I drop the
> MAINTAINER additions for now and we add them later. Then, you can add
> the pasemi-core as well. D'accord?
> 

We can just split the MAINTAINERS changes into a separate patch and I 
can push that one through the SoC tree, along with other MAINTAINERS 
updates. Does that work for everyone?

-- 
Hector Martin (marcan@marcan.st)
Public Key: https://mrcn.st/pub

^ permalink raw reply

* Re: [PATCH v2 00/11] Add Apple M1 support to PASemi i2c driver
From: Wolfram Sang @ 2021-10-11 10:04 UTC (permalink / raw)
  To: Sven Peter
  Cc: Arnd Bergmann, Hector Martin, linux-kernel, linux-i2c,
	Paul Mackerras, linux-arm-kernel, Christian Zigotzky,
	Olof Johansson, Mohamed Mediouni, Mark Kettenis, linuxppc-dev,
	Alyssa Rosenzweig, Stan Skowronek
In-Reply-To: <20211008163532.75569-1-sven@svenpeter.dev>

[-- Attachment #1: Type: text/plain, Size: 3100 bytes --]

On Fri, Oct 08, 2021 at 06:35:21PM +0200, Sven Peter wrote:
> Hi,
> 
> v1: https://lore.kernel.org/linux-i2c/20210926095847.38261-1-sven@svenpeter.dev/
> 
> Changes for v2:
>  - Added reviewed-by/acks
>  - Switched from ioport_map to pci_iomap as suggested by Arnd Bergmann
>  - Renamed i2c-pasemi-apple.c to i2c-pasemi-platform.c as suggested by
>    Wolfram Sang
>  - Replaced the ioport number in the adapter name with dev_name to be
>    able to identify separate busses in e.g. i2cdetect.
> 
> I still don't have access to any old PASemi hardware but the changes from
> v1 are pretty small and I expect them to still work. Would still be nice
> if someone with access to such hardware could give this a quick test.
> 
> 
> And for those who didn't see v1 the (almost) unchanged original cover letter:
> 
> This series adds support for the I2C controller found on Apple Silicon Macs
> which has quite a bit of history:
> 
> Apple bought P.A. Semi in 2008 and it looks like a part of its legacy continues
> to live on in the M1. This controller has actually been used since at least the
> iPhone 4S and hasn't changed much since then.
> Essentially, there are only a few differences that matter:
> 
> 	- The controller no longer is a PCI device
> 	- Starting at some iPhone an additional bit in one register
>           must be set in order to start transmissions.
> 	- The reference clock and hence the clock dividers are different
> 
> In order to add support for a platform device I first replaced PCI-specific
> bits and split out the PCI driver to its own file. Then I added support
> to make the clock divider configurable and converted the driver to use
> managed device resources to make it a bit simpler.
> 
> The Apple and PASemi driver will never be compiled in the same kernel
> since the Apple one will run on arm64 while the original PASemi driver
> will only be useful on powerpc.
> I've thus followed the octeon (mips)/thunderx(arm64) approach to do the
> split: I created a -core.c file which contains the shared logic and just
> compile that one for both the PASemi and the new Apple driver.
> 
> 
> Best,
> 
> Sven
> 
> Sven Peter (11):
>   dt-bindings: i2c: Add Apple I2C controller bindings
>   i2c: pasemi: Use io{read,write}32
>   i2c: pasemi: Use dev_name instead of port number
>   i2c: pasemi: Remove usage of pci_dev
>   i2c: pasemi: Split off common probing code
>   i2c: pasemi: Split pci driver to its own file
>   i2c: pasemi: Move common reset code to own function
>   i2c: pasemi: Allow to configure bus frequency
>   i2c: pasemi: Refactor _probe to use devm_*
>   i2c: pasemi: Add Apple platform driver
>   i2c: pasemi: Set enable bit for Apple variant
> 
>  .../devicetree/bindings/i2c/apple,i2c.yaml    |  61 +++++++++
>  MAINTAINERS                                   |   2 +
>  drivers/i2c/busses/Kconfig                    |  11 ++
>  drivers/i2c/busses/Makefile                   |   3 +

Applied to for-next with MAINTAINER bits dropped and added tags from
Olof and Christian, thanks!


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH 1/2] firmware: include drivers/firmware/Kconfig unconditionally
From: Arnd Bergmann @ 2021-10-11  9:45 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: linux-ia64@vger.kernel.org, Geert Uytterhoeven, Catalin Marinas,
	Linus Walleij, Bjorn Andersson, James E.J. Bottomley,
	H. Peter Anvin, linux-riscv, Will Deacon, Helge Deller,
	the arch/x86 maintainers, Russell King, Ingo Molnar,
	open list:BROADCOM NVRAM DRIVER, Paul Menzel, Albert Ou,
	Charles Keepax, Arnd Bergmann, Simon Trimmer, Mark Brown,
	Borislav Petkov, Paul Walmsley, Thomas Gleixner, Linux ARM,
	Thomas Bogendoerfer, Parisc List, Greg Kroah-Hartman,
	Liam Girdwood, Linux Kernel Mailing List, Palmer Dabbelt,
	Andrew Morton, linuxppc-dev
In-Reply-To: <CAMuHMdXspk27xd95YAsXa73ES8rfKxii3RUEtsBtxQTk9qLztA@mail.gmail.com>

On Mon, Oct 11, 2021 at 10:42 AM Geert Uytterhoeven
<geert@linux-m68k.org> wrote:
> On Sat, Oct 9, 2021 at 11:24 AM Paul Menzel <pmenzel@molgen.mpg.de> wrote:
> > Am 28.09.21 um 09:50 schrieb Arnd Bergmann:
> > > From: Arnd Bergmann <arnd@arndb.de>
> > +#
> > +# ARM System Control and Management Interface Protocol
> > +#
> > +# end of ARM System Control and Management Interface Protocol
> > +
> > +# CONFIG_FIRMWARE_MEMMAP is not set
> > +# CONFIG_GOOGLE_FIRMWARE is not set
> > +
> > +#
> > +# Tegra firmware driver
> > +#
> > +# end of Tegra firmware driver
> > +# end of Firmware Drivers
> > +
> >   # CONFIG_GNSS is not set
> >   CONFIG_MTD=m
> >   # CONFIG_MTD_TESTS is not set
> > ```
> >
> > No idea if the entries could be hidden for platforms not supporting them.
> >
> >          ARM System Control and Management Interface Protocol  ----
> >      [ ] Add firmware-provided memory map to sysfs
> >      [ ] Google Firmware Drivers  ----
> >          Tegra firmware driver  ----
>
> GOOGLE_FIRMWARE should probably depend on something.
> I highly doubt Google is running servers on e.g. h8300 and nds32.

GOOGLE_FIRMWARE is only the 'menuconfig' option that contains
the other options, but on architectures that have neither CONFIG_OF
nor CONFIG_ACPI, this is empty.  Most architectures of course
do support or require CONFIG_OF, so it's unclear whether we should
show the options for coreboot. Since it's a software-only driver, I
would tend to keep showing it, given that coreboot can be ported
to every architecture. The DT binding [1] seems to be neither
Google nor Arm specific.

CONFIG_FIRMWARE_MEMMAP in turn can be used for
anything that has memory hotplug, and in theory additional
drivers that register with this interface.

I'd lean towards "leave as is" for both, to avoid having to
change the dependencies again whenever something else
can use these.

        Arnd

[1] Documentation/devicetree/bindings/firmware/coreboot.txt

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox