LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH 02/13] nvme-multipath: add error handling support for add_disk()
From: Keith Busch @ 2021-10-16  0:01 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: nvdimm, vigneshr, linux-nvme, paulus, miquel.raynal, ira.weiny,
	hch, dave.jiang, sagi, minchan, vishal.l.verma, ngupta,
	linux-block, dan.j.williams, axboe, geoff, linux-kernel, jim,
	senozhatsky, richard, linux-mtd, linuxppc-dev
In-Reply-To: <20211015235219.2191207-3-mcgrof@kernel.org>

On Fri, Oct 15, 2021 at 04:52:08PM -0700, Luis Chamberlain wrote:
> We never checked for errors on add_disk() as this function
> returned void. Now that this is fixed, use the shiny new
> error handling.
> 
> Since we now can tell for sure when a disk was added, move
> setting the bit NVME_NSHEAD_DISK_LIVE only when we did
> add the disk successfully.
> 
> Nothing to do here as the cleanup is done elsewhere. We take
> care and use test_and_set_bit() because it is protects against
> two nvme paths simultaneously calling device_add_disk() on the
> same namespace head.

Looks good, thank you.

Reviewed-by: Keith Busch <kbusch@kernel.org>

^ permalink raw reply

* Re: [PATCH 06/13] nvdimm/blk: avoid calling del_gendisk() on early failures
From: Dan Williams @ 2021-10-16  0:13 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: Linux NVDIMM, vigneshr, linux-nvme, Paul Mackerras, miquel.raynal,
	Weiny, Ira, Christoph Hellwig, Dave Jiang, Sagi Grimberg,
	Minchan Kim, Vishal L Verma, Nitin Gupta, linux-block,
	Keith Busch, Jens Axboe, Geoff Levand, Linux Kernel Mailing List,
	Jim Paris, senozhatsky, Richard Weinberger, linux-mtd,
	linuxppc-dev
In-Reply-To: <20211015235219.2191207-7-mcgrof@kernel.org>

On Fri, Oct 15, 2021 at 4:53 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
>
> If nd_integrity_init() fails we'd get del_gendisk() called,
> but that's not correct as we should only call that if we're
> done with device_add_disk(). Fix this by providing unwinding
> prior to the devm call being registered and moving the devm
> registration to the very end.
>
> This should fix calling del_gendisk() if nd_integrity_init()
> fails. I only spotted this issue through code inspection. It
> does not fix any real world bug.
>

Just fyi, I'm preparing patches to delete this driver completely as it
is unused by any shipping platform. I hope to get that removal into
v5.16.

^ permalink raw reply

* Re: [PATCH 02/13] nvme-multipath: add error handling support for add_disk()
From: Christoph Hellwig @ 2021-10-16  4:39 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: nvdimm, vigneshr, linux-nvme, paulus, miquel.raynal, ira.weiny,
	hch, dave.jiang, sagi, minchan, vishal.l.verma, ngupta,
	linux-block, kbusch, dan.j.williams, axboe, geoff, linux-kernel,
	jim, senozhatsky, richard, linux-mtd, linuxppc-dev
In-Reply-To: <20211015235219.2191207-3-mcgrof@kernel.org>

Thanks,

applied to nvme-5.16.

^ permalink raw reply

* Re: [PATCH v2 13/13] lkdtm: Add a test for function descriptors protection
From: Christophe Leroy @ 2021-10-16  6:28 UTC (permalink / raw)
  To: Kees Cook
  Cc: linux-arch, linux-ia64, linux-parisc, Arnd Bergmann,
	Greg Kroah-Hartman, Helge Deller, linux-kernel,
	James E.J. Bottomley, linux-mm, Paul Mackerras, Andrew Morton,
	linuxppc-dev
In-Reply-To: <202110151433.6270D717@keescook>



Le 15/10/2021 à 23:35, Kees Cook a écrit :
> On Thu, Oct 14, 2021 at 07:50:02AM +0200, Christophe Leroy wrote:
>> Add WRITE_OPD to check that you can't modify function
>> descriptors.
>>
>> Gives the following result when function descriptors are
>> not protected:
>>
>> 	lkdtm: Performing direct entry WRITE_OPD
>> 	lkdtm: attempting bad 16 bytes write at c00000000269b358
>> 	lkdtm: FAIL: survived bad write
>> 	lkdtm: do_nothing was hijacked!
>>
>> Looks like a standard compiler barrier(); is not enough to force
>> GCC to use the modified function descriptor. Add to add a fake empty
>> inline assembly to force GCC to reload the function descriptor.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> ---
>>   drivers/misc/lkdtm/core.c  |  1 +
>>   drivers/misc/lkdtm/lkdtm.h |  1 +
>>   drivers/misc/lkdtm/perms.c | 22 ++++++++++++++++++++++
>>   3 files changed, 24 insertions(+)
>>
>> diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
>> index fe6fd34b8caf..de092aa03b5d 100644
>> --- a/drivers/misc/lkdtm/core.c
>> +++ b/drivers/misc/lkdtm/core.c
>> @@ -148,6 +148,7 @@ static const struct crashtype crashtypes[] = {
>>   	CRASHTYPE(WRITE_RO),
>>   	CRASHTYPE(WRITE_RO_AFTER_INIT),
>>   	CRASHTYPE(WRITE_KERN),
>> +	CRASHTYPE(WRITE_OPD),
>>   	CRASHTYPE(REFCOUNT_INC_OVERFLOW),
>>   	CRASHTYPE(REFCOUNT_ADD_OVERFLOW),
>>   	CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW),
>> diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
>> index c212a253edde..188bd0fd6575 100644
>> --- a/drivers/misc/lkdtm/lkdtm.h
>> +++ b/drivers/misc/lkdtm/lkdtm.h
>> @@ -105,6 +105,7 @@ void __init lkdtm_perms_init(void);
>>   void lkdtm_WRITE_RO(void);
>>   void lkdtm_WRITE_RO_AFTER_INIT(void);
>>   void lkdtm_WRITE_KERN(void);
>> +void lkdtm_WRITE_OPD(void);
>>   void lkdtm_EXEC_DATA(void);
>>   void lkdtm_EXEC_STACK(void);
>>   void lkdtm_EXEC_KMALLOC(void);
>> diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
>> index 96b3ebfcb8ed..3870bc82d40d 100644
>> --- a/drivers/misc/lkdtm/perms.c
>> +++ b/drivers/misc/lkdtm/perms.c
>> @@ -44,6 +44,11 @@ static noinline void do_overwritten(void)
>>   	return;
>>   }
>>   
>> +static noinline void do_almost_nothing(void)
>> +{
>> +	pr_info("do_nothing was hijacked!\n");
>> +}
>> +
>>   static void *setup_function_descriptor(func_desc_t *fdesc, void *dst)
>>   {
>>   	memcpy(fdesc, do_nothing, sizeof(*fdesc));
>> @@ -143,6 +148,23 @@ void lkdtm_WRITE_KERN(void)
>>   	do_overwritten();
>>   }
>>   
>> +void lkdtm_WRITE_OPD(void)
>> +{
>> +	size_t size = sizeof(func_desc_t);
>> +	void (*func)(void) = do_nothing;
>> +
>> +	if (!have_function_descriptors()) {
>> +		pr_info("Platform doesn't have function descriptors.\n");
> 
> This should be more explicit ('xfail'):
> 
> 	pr_info("XFAIL: platform doesn't use function descriptors.\n");

Ok


> 
>> +		return;
>> +	}
>> +	pr_info("attempting bad %zu bytes write at %px\n", size, do_nothing);
>> +	memcpy(do_nothing, do_almost_nothing, size);
>> +	pr_err("FAIL: survived bad write\n");
>> +
>> +	asm("" : "=m"(func));
> 
> Since this is a descriptor, I assume no icache flush is needed. Are
> function descriptors strictly dcache? (Is anything besides just a
> barrier needed?)

No flush is needed, the code just loads the function address from memory 
into CTR, loads R2 and branch to CTR:

	 19c:	e9 21 00 70 	ld      r9,112(r1)
	 1a0:	e9 49 00 00 	ld      r10,0(r9)
	 1a4:	7d 49 03 a6 	mtctr   r10
	 1a8:	e8 49 00 08 	ld      r2,8(r9)
	 1ac:	4e 80 04 21 	bctrl


> 
>> +	func();
>> +}
>> +
>>   void lkdtm_EXEC_DATA(void)
>>   {
>>   	execute_location(data_area, CODE_WRITE);
>> -- 
>> 2.31.1
>>
> 

^ permalink raw reply

* Re: [PATCH v2 11/13] lkdtm: Fix lkdtm_EXEC_RODATA()
From: Christophe Leroy @ 2021-10-16  6:41 UTC (permalink / raw)
  To: Kees Cook
  Cc: linux-arch, linux-ia64, linux-parisc, Arnd Bergmann,
	Greg Kroah-Hartman, Helge Deller, linux-kernel,
	James E.J. Bottomley, linux-mm, Paul Mackerras, Andrew Morton,
	linuxppc-dev
In-Reply-To: <202110151432.D8203C19@keescook>



Le 15/10/2021 à 23:32, Kees Cook a écrit :
> On Thu, Oct 14, 2021 at 07:50:00AM +0200, Christophe Leroy wrote:
>> Behind its location, lkdtm_EXEC_RODATA() executes
>> lkdtm_rodata_do_nothing() which is a real function,
>> not a copy of do_nothing().
>>
>> So executes it directly instead of using execute_location().
>>
>> This is necessary because following patch will fix execute_location()
>> to use a copy of the function descriptor of do_nothing() and
>> function descriptor of lkdtm_rodata_do_nothing() might be different.
>>
>> And fix displayed addresses by dereferencing the function descriptors.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> 
> I still don't understand this -- it doesn't look needed at all given the
> changes in patch 12. (i.e. everything is using
> dereference_function_descriptor() now)

dereference_function_descriptor() only deals with the function address, 
not the function TOC.

do_nothing() is a function. It has a function descriptor with a given 
address (address of .do_nothing) and a given TOC, say TOC1.

lkdtm_rodata_do_nothing() is another function. It has its own function 
descriptor with a given address (address of .lkdtm_rodata_do_nothing) 
and a given TOC, say TOC2.

If we use execute_location(), it will copy do_nothing() function 
descriptor and change the function address to the address of 
lkdtm_rodata_do_nothing(). So it will call lkdtm_rodata_do_nothing() 
with TOC1 instead of calling it with TOC2.

> 
> Can't this patch be dropped?

It is likely that the TOC will be the same for both functions, and 
anyway those functions are so simple that they don't use the TOC at all, 
so yes it would likely work without this patch but from my point of view 
it is incorrect to call one function with the TOC from the descriptor of 
another function.

If you thing we can take the risk, then I'm happy to drop the patch and 
replace it by

	execute_location(dereference_function_descriptor(lkdtm_rodata_do_nothing), CODE_AS_IS)

Christophe

^ permalink raw reply

* Re: [PATCH v2 12/13] lkdtm: Fix execute_[user]_location()
From: Christophe Leroy @ 2021-10-16  6:42 UTC (permalink / raw)
  To: Kees Cook
  Cc: linux-arch, linux-ia64, linux-parisc, Arnd Bergmann,
	Greg Kroah-Hartman, Helge Deller, linux-kernel,
	James E.J. Bottomley, linux-mm, Paul Mackerras, Andrew Morton,
	linuxppc-dev
In-Reply-To: <202110151428.187B1CF@keescook>



Le 15/10/2021 à 23:31, Kees Cook a écrit :
> On Thu, Oct 14, 2021 at 07:50:01AM +0200, Christophe Leroy wrote:
>> execute_location() and execute_user_location() intent
>> to copy do_nothing() text and execute it at a new location.
>> However, at the time being it doesn't copy do_nothing() function
>> but do_nothing() function descriptor which still points to the
>> original text. So at the end it still executes do_nothing() at
>> its original location allthough using a copied function descriptor.
>>
>> So, fix that by really copying do_nothing() text and build a new
>> function descriptor by copying do_nothing() function descriptor and
>> updating the target address with the new location.
>>
>> Also fix the displayed addresses by dereferencing do_nothing()
>> function descriptor.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> ---
>>   drivers/misc/lkdtm/perms.c     | 25 +++++++++++++++++++++----
>>   include/asm-generic/sections.h |  5 +++++
>>   2 files changed, 26 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
>> index 5266dc28df6e..96b3ebfcb8ed 100644
>> --- a/drivers/misc/lkdtm/perms.c
>> +++ b/drivers/misc/lkdtm/perms.c
>> @@ -44,19 +44,32 @@ static noinline void do_overwritten(void)
>>   	return;
>>   }
>>   
>> +static void *setup_function_descriptor(func_desc_t *fdesc, void *dst)
>> +{
>> +	memcpy(fdesc, do_nothing, sizeof(*fdesc));
>> +	fdesc->addr = (unsigned long)dst;
>> +	barrier();
>> +
>> +	return fdesc;
>> +}
> 
> How about collapsing the "have_function_descriptors()" check into
> setup_function_descriptor()?
> 
> static void *setup_function_descriptor(func_desc_t *fdesc, void *dst)
> {
> 	if (__is_defined(HAVE_FUNCTION_DESCRIPTORS)) {
> 		memcpy(fdesc, do_nothing, sizeof(*fdesc));
> 		fdesc->addr = (unsigned long)dst;
> 		barrier();
> 		return fdesc;
> 	} else {
> 		return dst;
> 	}
> }

Ok

> 
>> +
>>   static noinline void execute_location(void *dst, bool write)
>>   {
>>   	void (*func)(void) = dst;
>> +	func_desc_t fdesc;
>> +	void *do_nothing_text = dereference_function_descriptor(do_nothing);
>>   
>> -	pr_info("attempting ok execution at %px\n", do_nothing);
>> +	pr_info("attempting ok execution at %px\n", do_nothing_text);
>>   	do_nothing();
>>   
>>   	if (write == CODE_WRITE) {
>> -		memcpy(dst, do_nothing, EXEC_SIZE);
>> +		memcpy(dst, do_nothing_text, EXEC_SIZE);
>>   		flush_icache_range((unsigned long)dst,
>>   				   (unsigned long)dst + EXEC_SIZE);
>>   	}
>>   	pr_info("attempting bad execution at %px\n", func);
>> +	if (have_function_descriptors())
>> +		func = setup_function_descriptor(&fdesc, dst);
>>   	func();
>>   	pr_err("FAIL: func returned\n");
>>   }
>> @@ -67,15 +80,19 @@ static void execute_user_location(void *dst)
>>   
>>   	/* Intentionally crossing kernel/user memory boundary. */
>>   	void (*func)(void) = dst;
>> +	func_desc_t fdesc;
>> +	void *do_nothing_text = dereference_function_descriptor(do_nothing);
>>   
>> -	pr_info("attempting ok execution at %px\n", do_nothing);
>> +	pr_info("attempting ok execution at %px\n", do_nothing_text);
>>   	do_nothing();
>>   
>> -	copied = access_process_vm(current, (unsigned long)dst, do_nothing,
>> +	copied = access_process_vm(current, (unsigned long)dst, do_nothing_text,
>>   				   EXEC_SIZE, FOLL_WRITE);
>>   	if (copied < EXEC_SIZE)
>>   		return;
>>   	pr_info("attempting bad execution at %px\n", func);
>> +	if (have_function_descriptors())
>> +		func = setup_function_descriptor(&fdesc, dst);
>>   	func();
>>   	pr_err("FAIL: func returned\n");
>>   }
> 
> 
>> diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
>> index 76163883c6ff..d225318538bd 100644
>> --- a/include/asm-generic/sections.h
>> +++ b/include/asm-generic/sections.h
>> @@ -70,6 +70,11 @@ typedef struct {
>>   } func_desc_t;
>>   #endif
>>   
>> +static inline bool have_function_descriptors(void)
>> +{
>> +	return __is_defined(HAVE_FUNCTION_DESCRIPTORS);
>> +}
>> +
>>   /* random extra sections (if any).  Override
>>    * in asm/sections.h */
>>   #ifndef arch_is_kernel_text
> 
> This hunk seems like it should live in a separate patch.
> 

Ok I move it in a previous patch.

^ permalink raw reply

* Re: [PATCH v3 10/52] KVM: PPC: Book3S HV: Don't always save PMU for guest capable of nesting
From: Fabiano Rosas @ 2021-10-16 12:38 UTC (permalink / raw)
  To: Nicholas Piggin, kvm-ppc, linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211004160049.1338837-11-npiggin@gmail.com>

Nicholas Piggin <npiggin@gmail.com> writes:

> Provide a config option that controls the workaround added by commit
> 63279eeb7f93 ("KVM: PPC: Book3S HV: Always save guest pmu for guest
> capable of nesting"). The option defaults to y for now, but is expected
> to go away within a few releases.
>
> Nested capable guests running with the earlier commit ("KVM: PPC: Book3S
> HV Nested: Indicate guest PMU in-use in VPA") will now indicate the PMU

I think the commit reference is now: 178266389794 (KVM: PPC: Book3S HV
Nested: Reflect guest PMU in-use to L0 when guest SPRs are live)

> in-use status of their guests, which means the parent does not need to
> unconditionally save the PMU for nested capable guests.
>
> After this latest round of performance optimisations, this option costs
> about 540 cycles or 10% entry/exit performance on a POWER9 nested-capable
> guest.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/kvm/Kconfig     | 15 +++++++++++++++
>  arch/powerpc/kvm/book3s_hv.c | 10 ++++++++--
>  2 files changed, 23 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
> index ff581d70f20c..1e7aae522be8 100644
> --- a/arch/powerpc/kvm/Kconfig
> +++ b/arch/powerpc/kvm/Kconfig
> @@ -130,6 +130,21 @@ config KVM_BOOK3S_HV_EXIT_TIMING
>
>  	  If unsure, say N.
>
> +config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND
> +	bool "Nested L0 host workaround for L1 KVM host PMU handling bug" if EXPERT
> +	depends on KVM_BOOK3S_HV_POSSIBLE
> +	default !EXPERT
> +	help
> +	  Old nested HV capable Linux guests have a bug where the don't

s/the/they/

Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>

^ permalink raw reply

* Re: [PATCH v3 17/52] KVM: PPC: Book3S HV: CTRL SPR does not require read-modify-write
From: Fabiano Rosas @ 2021-10-16 12:54 UTC (permalink / raw)
  To: Nicholas Piggin, kvm-ppc, linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211004160049.1338837-18-npiggin@gmail.com>

Nicholas Piggin <npiggin@gmail.com> writes:

> Processors that support KVM HV do not require read-modify-write of
> the CTRL SPR to set/clear their thread's runlatch. Just write 1 or 0
> to it.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>

> ---
>  arch/powerpc/kvm/book3s_hv.c            |  2 +-
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S | 15 ++++++---------
>  2 files changed, 7 insertions(+), 10 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index f0ad3fb2eabd..1c5b81bd02c1 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -4058,7 +4058,7 @@ static void load_spr_state(struct kvm_vcpu *vcpu)
>  	 */
>
>  	if (!(vcpu->arch.ctrl & 1))
> -		mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
> +		mtspr(SPRN_CTRLT, 0);
>  }
>
>  static void store_spr_state(struct kvm_vcpu *vcpu)
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 7fa0df632f89..070e228b3c20 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -775,12 +775,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
>  	mtspr	SPRN_AMR,r5
>  	mtspr	SPRN_UAMOR,r6
>
> -	/* Restore state of CTRL run bit; assume 1 on entry */
> +	/* Restore state of CTRL run bit; the host currently has it set to 1 */
>  	lwz	r5,VCPU_CTRL(r4)
>  	andi.	r5,r5,1
>  	bne	4f
> -	mfspr	r6,SPRN_CTRLF
> -	clrrdi	r6,r6,1
> +	li	r6,0
>  	mtspr	SPRN_CTRLT,r6
>  4:
>  	/* Secondary threads wait for primary to have done partition switch */
> @@ -1203,12 +1202,12 @@ guest_bypass:
>  	stw	r0, VCPU_CPU(r9)
>  	stw	r0, VCPU_THREAD_CPU(r9)
>
> -	/* Save guest CTRL register, set runlatch to 1 */
> +	/* Save guest CTRL register, set runlatch to 1 if it was clear */
>  	mfspr	r6,SPRN_CTRLF
>  	stw	r6,VCPU_CTRL(r9)
>  	andi.	r0,r6,1
>  	bne	4f
> -	ori	r6,r6,1
> +	li	r6,1
>  	mtspr	SPRN_CTRLT,r6
>  4:
>  	/*
> @@ -2178,8 +2177,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
>  	 * Also clear the runlatch bit before napping.
>  	 */
>  kvm_do_nap:
> -	mfspr	r0, SPRN_CTRLF
> -	clrrdi	r0, r0, 1
> +	li	r0,0
>  	mtspr	SPRN_CTRLT, r0
>
>  	li	r0,1
> @@ -2198,8 +2196,7 @@ kvm_nap_sequence:		/* desired LPCR value in r5 */
>
>  	bl	isa206_idle_insn_mayloss
>
> -	mfspr	r0, SPRN_CTRLF
> -	ori	r0, r0, 1
> +	li	r0,1
>  	mtspr	SPRN_CTRLT, r0
>
>  	mtspr	SPRN_SRR1, r3

^ permalink raw reply

* Re: [PATCH v3 18/52] KVM: PPC: Book3S HV P9: Move SPRG restore to restore_p9_host_os_sprs
From: Fabiano Rosas @ 2021-10-16 12:59 UTC (permalink / raw)
  To: Nicholas Piggin, kvm-ppc, linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211004160049.1338837-19-npiggin@gmail.com>

Nicholas Piggin <npiggin@gmail.com> writes:

> Move the SPR update into its relevant helper function. This will
> help with SPR scheduling improvements in later changes.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>

> ---
>  arch/powerpc/kvm/book3s_hv.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 1c5b81bd02c1..fca89ed2244f 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -4093,6 +4093,8 @@ static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
>  static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
>  				    struct p9_host_os_sprs *host_os_sprs)
>  {
> +	mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
> +
>  	mtspr(SPRN_PSPB, 0);
>  	mtspr(SPRN_UAMOR, 0);
>
> @@ -4293,8 +4295,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
>
>  	timer_rearm_host_dec(tb);
>
> -	mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
> -
>  	kvmppc_subcore_exit_guest();
>
>  	return trap;

^ permalink raw reply

* Re: [PATCH v3 19/52] KVM: PPC: Book3S HV P9: Reduce mtmsrd instructions required to save host SPRs
From: Fabiano Rosas @ 2021-10-16 13:45 UTC (permalink / raw)
  To: Nicholas Piggin, kvm-ppc, linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211004160049.1338837-20-npiggin@gmail.com>

Nicholas Piggin <npiggin@gmail.com> writes:

> This reduces the number of mtmsrd required to enable facility bits when
> saving/restoring registers, by having the KVM code set all bits up front
> rather than using individual facility functions that set their particular
> MSR bits.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>

Aside: at msr_check_and_set what's with MSR_VSX always being implicitly
set whenever MSR_FP is set? I get that it depends on MSR_FP, but if FP
always implies VSX, then you could stop setting MSR_VSX in this patch.

> ---
>  arch/powerpc/include/asm/switch_to.h  |  2 +
>  arch/powerpc/kernel/process.c         | 28 +++++++++++++
>  arch/powerpc/kvm/book3s_hv.c          | 59 ++++++++++++++++++---------
>  arch/powerpc/kvm/book3s_hv_p9_entry.c |  1 +
>  4 files changed, 71 insertions(+), 19 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
> index 9d1fbd8be1c7..e8013cd6b646 100644
> --- a/arch/powerpc/include/asm/switch_to.h
> +++ b/arch/powerpc/include/asm/switch_to.h
> @@ -112,6 +112,8 @@ static inline void clear_task_ebb(struct task_struct *t)
>  #endif
>  }
>
> +void kvmppc_save_user_regs(void);
> +
>  extern int set_thread_tidr(struct task_struct *t);
>
>  #endif /* _ASM_POWERPC_SWITCH_TO_H */
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 50436b52c213..3fca321b820d 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -1156,6 +1156,34 @@ static inline void save_sprs(struct thread_struct *t)
>  #endif
>  }
>
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +void kvmppc_save_user_regs(void)
> +{
> +	unsigned long usermsr;
> +
> +	if (!current->thread.regs)
> +		return;
> +
> +	usermsr = current->thread.regs->msr;
> +
> +	if (usermsr & MSR_FP)
> +		save_fpu(current);
> +
> +	if (usermsr & MSR_VEC)
> +		save_altivec(current);
> +
> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> +	if (usermsr & MSR_TM) {
> +		current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
> +		current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
> +		current->thread.tm_texasr = mfspr(SPRN_TEXASR);
> +		current->thread.regs->msr &= ~MSR_TM;
> +	}
> +#endif
> +}
> +EXPORT_SYMBOL_GPL(kvmppc_save_user_regs);
> +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
> +
>  static inline void restore_sprs(struct thread_struct *old_thread,
>  				struct thread_struct *new_thread)
>  {
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index fca89ed2244f..16365c0e9872 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -4140,6 +4140,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
>  	struct p9_host_os_sprs host_os_sprs;
>  	s64 dec;
>  	u64 tb, next_timer;
> +	unsigned long msr;
>  	int trap;
>
>  	WARN_ON_ONCE(vcpu->arch.ceded);
> @@ -4151,8 +4152,23 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
>  	if (next_timer < time_limit)
>  		time_limit = next_timer;
>
> +	vcpu->arch.ceded = 0;
> +
>  	save_p9_host_os_sprs(&host_os_sprs);
>
> +	/* MSR bits may have been cleared by context switch */
> +	msr = 0;
> +	if (IS_ENABLED(CONFIG_PPC_FPU))
> +		msr |= MSR_FP;
> +	if (cpu_has_feature(CPU_FTR_ALTIVEC))
> +		msr |= MSR_VEC;
> +	if (cpu_has_feature(CPU_FTR_VSX))
> +		msr |= MSR_VSX;
> +	if (cpu_has_feature(CPU_FTR_TM) ||
> +	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
> +		msr |= MSR_TM;
> +	msr = msr_check_and_set(msr);
> +
>  	kvmppc_subcore_enter_guest();
>
>  	vc->entry_exit_map = 1;
> @@ -4161,12 +4177,13 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
>  	vcpu_vpa_increment_dispatch(vcpu);
>
>  	if (cpu_has_feature(CPU_FTR_TM) ||
> -	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
> +	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
>  		kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
> +		msr = mfmsr(); /* TM restore can update msr */
> +	}
>
>  	switch_pmu_to_guest(vcpu, &host_os_sprs);
>
> -	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
>  	load_fp_state(&vcpu->arch.fp);
>  #ifdef CONFIG_ALTIVEC
>  	load_vr_state(&vcpu->arch.vr);
> @@ -4275,7 +4292,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
>
>  	restore_p9_host_os_sprs(vcpu, &host_os_sprs);
>
> -	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
>  	store_fp_state(&vcpu->arch.fp);
>  #ifdef CONFIG_ALTIVEC
>  	store_vr_state(&vcpu->arch.vr);
> @@ -4825,19 +4841,24 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
>  	unsigned long user_tar = 0;
>  	unsigned int user_vrsave;
>  	struct kvm *kvm;
> +	unsigned long msr;
>
>  	if (!vcpu->arch.sane) {
>  		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
>  		return -EINVAL;
>  	}
>
> +	/* No need to go into the guest when all we'll do is come back out */
> +	if (signal_pending(current)) {
> +		run->exit_reason = KVM_EXIT_INTR;
> +		return -EINTR;
> +	}
> +
> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>  	/*
>  	 * Don't allow entry with a suspended transaction, because
>  	 * the guest entry/exit code will lose it.
> -	 * If the guest has TM enabled, save away their TM-related SPRs
> -	 * (they will get restored by the TM unavailable interrupt).
>  	 */
> -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>  	if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
>  	    (current->thread.regs->msr & MSR_TM)) {
>  		if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
> @@ -4845,12 +4866,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
>  			run->fail_entry.hardware_entry_failure_reason = 0;
>  			return -EINVAL;
>  		}
> -		/* Enable TM so we can read the TM SPRs */
> -		mtmsr(mfmsr() | MSR_TM);
> -		current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
> -		current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
> -		current->thread.tm_texasr = mfspr(SPRN_TEXASR);
> -		current->thread.regs->msr &= ~MSR_TM;
>  	}
>  #endif
>
> @@ -4865,18 +4880,24 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
>
>  	kvmppc_core_prepare_to_enter(vcpu);
>
> -	/* No need to go into the guest when all we'll do is come back out */
> -	if (signal_pending(current)) {
> -		run->exit_reason = KVM_EXIT_INTR;
> -		return -EINTR;
> -	}
> -
>  	kvm = vcpu->kvm;
>  	atomic_inc(&kvm->arch.vcpus_running);
>  	/* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */
>  	smp_mb();
>
> -	flush_all_to_thread(current);
> +	msr = 0;
> +	if (IS_ENABLED(CONFIG_PPC_FPU))
> +		msr |= MSR_FP;
> +	if (cpu_has_feature(CPU_FTR_ALTIVEC))
> +		msr |= MSR_VEC;
> +	if (cpu_has_feature(CPU_FTR_VSX))
> +		msr |= MSR_VSX;
> +	if (cpu_has_feature(CPU_FTR_TM) ||
> +	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
> +		msr |= MSR_TM;
> +	msr = msr_check_and_set(msr);
> +
> +	kvmppc_save_user_regs();
>
>  	/* Save userspace EBB and other register values */
>  	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
> diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
> index a7f63082b4e3..fb9cb34445ea 100644
> --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
> +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
> @@ -224,6 +224,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
>  		vc->tb_offset_applied = vc->tb_offset;
>  	}
>
> +	/* Could avoid mfmsr by passing around, but probably no big deal */
>  	msr = mfmsr();
>
>  	host_hfscr = mfspr(SPRN_HFSCR);

^ permalink raw reply

* Re: [PATCH] ibmvscsi: use GFP_KERNEL with dma_alloc_coherent in initialize_event_pool
From: Martin K. Petersen @ 2021-10-17  2:20 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Tyrel Datwyler, linux-scsi, martin.petersen, james.bottomley,
	tyreld, brking, linuxppc-dev
In-Reply-To: <878ryuykd3.fsf@mpe.ellerman.id.au>


Michael,

> It's marked "Changes Requested" here:

Not sure why.

Applied to 5.16/scsi-staging, thanks!

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply

* Re: [PATCH v2 11/13] lkdtm: Fix lkdtm_EXEC_RODATA()
From: Christophe Leroy @ 2021-10-17  7:50 UTC (permalink / raw)
  To: Kees Cook
  Cc: linux-arch, linux-ia64, linux-parisc, Arnd Bergmann,
	Greg Kroah-Hartman, Helge Deller, linux-kernel,
	James E.J. Bottomley, linux-mm, Paul Mackerras, Andrew Morton,
	linuxppc-dev
In-Reply-To: <61a3d2c4-4997-c221-3eef-d74aef5ba584@csgroup.eu>



Le 16/10/2021 à 08:41, Christophe Leroy a écrit :
> 
> 
> Le 15/10/2021 à 23:32, Kees Cook a écrit :
>> On Thu, Oct 14, 2021 at 07:50:00AM +0200, Christophe Leroy wrote:
>>> Behind its location, lkdtm_EXEC_RODATA() executes
>>> lkdtm_rodata_do_nothing() which is a real function,
>>> not a copy of do_nothing().
>>>
>>> So executes it directly instead of using execute_location().
>>>
>>> This is necessary because following patch will fix execute_location()
>>> to use a copy of the function descriptor of do_nothing() and
>>> function descriptor of lkdtm_rodata_do_nothing() might be different.
>>>
>>> And fix displayed addresses by dereferencing the function descriptors.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>
>> I still don't understand this -- it doesn't look needed at all given the
>> changes in patch 12. (i.e. everything is using
>> dereference_function_descriptor() now)
> 
> dereference_function_descriptor() only deals with the function address, 
> not the function TOC.
> 
> do_nothing() is a function. It has a function descriptor with a given 
> address (address of .do_nothing) and a given TOC, say TOC1.
> 
> lkdtm_rodata_do_nothing() is another function. It has its own function 
> descriptor with a given address (address of .lkdtm_rodata_do_nothing) 
> and a given TOC, say TOC2.
> 
> If we use execute_location(), it will copy do_nothing() function 
> descriptor and change the function address to the address of 
> lkdtm_rodata_do_nothing(). So it will call lkdtm_rodata_do_nothing() 
> with TOC1 instead of calling it with TOC2.
> 
>>
>> Can't this patch be dropped?
> 
> It is likely that the TOC will be the same for both functions, and 
> anyway those functions are so simple that they don't use the TOC at all, 
> so yes it would likely work without this patch but from my point of view 
> it is incorrect to call one function with the TOC from the descriptor of 
> another function.
> 
> If you thing we can take the risk, then I'm happy to drop the patch and 
> replace it by
> 
>      execute_location(dereference_function_descriptor(lkdtm_rodata_do_nothing), CODE_AS_IS)
> 

Once we have patch 12 EXEC_RODATA works well on powerpc without this 
patch so I will drop this patch for now and will propose something else 
as a follow-up to my series.

Christophe

^ permalink raw reply

* Re: [PATCH] powerpc/xive: Discard disabled interrupts in get_irqchip_state()
From: Michael Ellerman @ 2021-10-17 12:28 UTC (permalink / raw)
  To: Cédric Le Goater, linuxppc-dev; +Cc: stable
In-Reply-To: <20211011070203.99726-1-clg@kaod.org>

On Mon, 11 Oct 2021 09:02:03 +0200, Cédric Le Goater wrote:
> When an interrupt is passed through, the KVM XIVE device calls the
> set_vcpu_affinity() handler which raises the P bit to mask the
> interrupt and to catch any in-flight interrupts while routing the
> interrupt to the guest.
> 
> On the guest side, drivers (like some Intels) can request at probe
> time some MSIs and call synchronize_irq() to check that there are no
> in flight interrupts. This will call the XIVE get_irqchip_state()
> handler which will always return true as the interrupt P bit has been
> set on the host side and lock the CPU in an infinite loop.
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc/xive: Discard disabled interrupts in get_irqchip_state()
      https://git.kernel.org/powerpc/c/6f779e1d359b8d5801f677c1d49dcfa10bf95674

cheers

^ permalink raw reply

* Re: [PATCH 1/2] KVM: PPC: Book3S HV: Fix stack handling in idle_kvm_start_guest()
From: Michael Ellerman @ 2021-10-17 12:28 UTC (permalink / raw)
  To: linuxppc-dev, Michael Ellerman; +Cc: npiggin, kvm-ppc
In-Reply-To: <20211015133929.832061-1-mpe@ellerman.id.au>

On Sat, 16 Oct 2021 00:39:28 +1100, Michael Ellerman wrote:
> In commit 10d91611f426 ("powerpc/64s: Reimplement book3s idle code in
> C") kvm_start_guest() became idle_kvm_start_guest(). The old code
> allocated a stack frame on the emergency stack, but didn't use the
> frame to store anything, and also didn't store anything in its caller's
> frame.
> 
> idle_kvm_start_guest() on the other hand is written more like a normal C
> function, it creates a frame on entry, and also stores CR/LR into its
> callers frame (per the ABI). The problem is that there is no caller
> frame on the emergency stack.
> 
> [...]

Applied to powerpc/fixes.

[1/2] KVM: PPC: Book3S HV: Fix stack handling in idle_kvm_start_guest()
      https://git.kernel.org/powerpc/c/9b4416c5095c20e110c82ae602c254099b83b72f
[2/2] KVM: PPC: Book3S HV: Make idle_kvm_start_guest() return 0 if it went to guest
      https://git.kernel.org/powerpc/c/cdeb5d7d890e14f3b70e8087e745c4a6a7d9f337

cheers

^ permalink raw reply

* Re: [V3 0/4] powerpc/perf: Add instruction and data address registers to extended regs
From: Michael Ellerman @ 2021-10-17 12:32 UTC (permalink / raw)
  To: Athira Rajeev, mpe, jolsa, acme; +Cc: kjain, maddy, linuxppc-dev, rnsastry
In-Reply-To: <20211007065505.27809-1-atrajeev@linux.vnet.ibm.com>

On Thu, 7 Oct 2021 12:25:01 +0530, Athira Rajeev wrote:
> Patch set adds PMU registers namely Sampled Instruction Address Register
> (SIAR) and Sampled Data Address Register (SDAR) as part of extended regs
> in PowerPC. These registers provides the instruction/data address and
> adding these to extended regs helps in debug purposes.
> 
> Patch 1/4 and 2/4 refactors the existing macro definition of
> PERF_REG_PMU_MASK_300 and PERF_REG_PMU_MASK_31 to make it more
> readable.
> Patch 3/4 adds SIAR and SDAR as part of the extended regs mask.
> Patch 4/4 includes perf tools side changes to add the SPRs to
> sample_reg_mask to use with -I? option.
> 
> [...]

Patches 1 and 3 applied to powerpc/next.

[1/4] powerpc/perf: Refactor the code definition of perf reg extended mask
      https://git.kernel.org/powerpc/c/02b182e67482d9167a13a0ff19b55037b70b21ad
[3/4] powerpc/perf: Expose instruction and data address registers as part of extended regs
      https://git.kernel.org/powerpc/c/29908bbf7b8960d261dfdd428bbaa656275e80f3

cheers

^ permalink raw reply

* Re: [V2] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10
From: Michael Ellerman @ 2021-10-17 12:32 UTC (permalink / raw)
  To: Athira Rajeev, mpe; +Cc: kjain, maddy, linuxppc-dev, rnsastry
In-Reply-To: <20211007075121.28497-1-atrajeev@linux.vnet.ibm.com>

On Thu, 7 Oct 2021 13:21:21 +0530, Athira Rajeev wrote:
> From: Athira Rajeev <atrajeev@linux.vnet.ibm.cm>
> 
> In power9 and before platforms, the default event used for cyles and
> instructions is PM_CYC (0x0001e) and PM_INST_CMPL (0x00002) respectively.
> These events uses two programmable PMCs and by default will count
> irrespective of the run latch state. But since it is using programmable
> PMCs, these events will cause multiplexing with basic event set supported
> by perf stat. Hence in power10, performance monitoring unit (PMU) driver
> uses performance monitor counter 5 (PMC5) and performance monitor counter6
> (PMC6) for counting instructions and cycles.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10
      https://git.kernel.org/powerpc/c/8f6aca0e0f26eaaee670cd27896993a45cdc8f9e

cheers

^ permalink raw reply

* Re: [PATCH] powerpc: Set max_mapnr correctly
From: Michael Ellerman @ 2021-10-17 12:32 UTC (permalink / raw)
  To: Paul Mackerras, Benjamin Herrenschmidt, Michael Ellerman,
	Christophe Leroy
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <77d99037782ac4b3c3b0124fc4ae80ce7b760b05.1634035228.git.christophe.leroy@csgroup.eu>

On Tue, 12 Oct 2021 12:40:37 +0200, Christophe Leroy wrote:
> max_mapnr is used by virt_addr_valid() to check if a linear
> address is valid.
> 
> It must only include lowmem PFNs, like other architectures.
> 
> Problem detected on a system with 1G mem (Only 768M are mapped), with
> CONFIG_DEBUG_VIRTUAL and CONFIG_TEST_DEBUG_VIRTUAL, it didn't report
> virt_to_phys(VMALLOC_START), VMALLOC_START being 0xf1000000.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: Set max_mapnr correctly
      https://git.kernel.org/powerpc/c/602946ec2f90d5bd965857753880db29d2d9a1e9

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/boot: Use CONFIG_PPC_POWERNV to compile OPAL support
From: Michael Ellerman @ 2021-10-17 12:32 UTC (permalink / raw)
  To: Cédric Le Goater, linuxppc-dev
In-Reply-To: <20211011070356.99952-1-clg@kaod.org>

On Mon, 11 Oct 2021 09:03:56 +0200, Cédric Le Goater wrote:
> CONFIG_PPC64_BOOT_WRAPPER is selected by CPU_LITTLE_ENDIAN which is
> used to compile support for other platforms such as Microwatt. There
> is no need for OPAL calls on these.
> 
> 

Applied to powerpc/next.

[1/1] powerpc/boot: Use CONFIG_PPC_POWERNV to compile OPAL support
      https://git.kernel.org/powerpc/c/6ffeb56ee2109b30a9e393f7e0c22c9b5030ac38

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/eeh: Fix docstrings in eeh.c
From: Michael Ellerman @ 2021-10-17 12:32 UTC (permalink / raw)
  To: Kai Song, linuxppc-dev; +Cc: paulus, oohall, linux-kernel, dja
In-Reply-To: <20211009041630.4135-1-songkai01@inspur.com>

On Sat, 9 Oct 2021 12:16:30 +0800, Kai Song wrote:
> We fix the following warnings when building kernel with W=1:
> arch/powerpc/kernel/eeh.c:598: warning: Function parameter or member 'function' not described in 'eeh_pci_enable'
> arch/powerpc/kernel/eeh.c:774: warning: Function parameter or member 'edev' not described in 'eeh_set_dev_freset'
> arch/powerpc/kernel/eeh.c:774: warning: expecting prototype for eeh_set_pe_freset(). Prototype was for eeh_set_dev_freset() instead
> arch/powerpc/kernel/eeh.c:814: warning: Function parameter or member 'include_passed' not described in 'eeh_pe_reset_full'
> arch/powerpc/kernel/eeh.c:944: warning: Function parameter or member 'ops' not described in 'eeh_init'
> arch/powerpc/kernel/eeh.c:1451: warning: Function parameter or member 'include_passed' not described in 'eeh_pe_reset'
> arch/powerpc/kernel/eeh.c:1526: warning: Function parameter or member 'func' not described in 'eeh_pe_inject_err'
> arch/powerpc/kernel/eeh.c:1526: warning: Excess function parameter 'function' described in 'eeh_pe_inject_err'
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/eeh: Fix docstrings in eeh.c
      https://git.kernel.org/powerpc/c/b616230e2325e5560330c40172a4d4e4c5da2c59

cheers

^ permalink raw reply

* [GIT PULL] Please pull powerpc/linux.git powerpc-5.15-4 tag
From: Michael Ellerman @ 2021-10-17 12:36 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linuxppc-dev, clg, linux-kernel

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256

Hi Linus,

Please pull some more powerpc fixes for 5.15:

The following changes since commit eb8257a12192f43ffd41bd90932c39dade958042:

  pseries/eeh: Fix the kdump kernel crash during eeh_pseries_init (2021-10-07 23:37:22 +1100)

are available in the git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.15-4

for you to fetch changes up to cdeb5d7d890e14f3b70e8087e745c4a6a7d9f337:

  KVM: PPC: Book3S HV: Make idle_kvm_start_guest() return 0 if it went to guest (2021-10-16 00:40:03 +1100)

- ------------------------------------------------------------------
powerpc fixes for 5.15 #4

Fix a bug where guests on P9 with interrupts passed through could get stuck in
synchronize_irq().

Fix a bug in KVM on P8 where secondary threads entering a guest would write outside their
allocated stack.

Fix a bug in KVM on P8 where secondary threads could confuse the host offline code and
cause the guest or host to crash.

Thanks to: Cédric Le Goater

- ------------------------------------------------------------------
Cédric Le Goater (1):
      powerpc/xive: Discard disabled interrupts in get_irqchip_state()

Michael Ellerman (2):
      KVM: PPC: Book3S HV: Fix stack handling in idle_kvm_start_guest()
      KVM: PPC: Book3S HV: Make idle_kvm_start_guest() return 0 if it went to guest


 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 28 ++++++++++++--------
 arch/powerpc/sysdev/xive/common.c       |  3 ++-
 2 files changed, 19 insertions(+), 12 deletions(-)
-----BEGIN PGP SIGNATURE-----

iQIzBAEBCAAdFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAmFsGG0ACgkQUevqPMjh
pYCAJRAAi8wEtHdIziXc22xYVaI8znMeogpoYnqKmEjRoGhyv79JNzh1/kOsNWyQ
E8hGut8YrKl+0xswbF41E5qdQCLfHiJYhqe8Ltl38ltoSx5YO/FqTtDMuVvc9bO1
SuyNOQWwrfPo6PolAHKtWv+yn7lMV6tP2NBp0lXe6DQkrCC7FEQ/s+12qQFmmnmb
hZ4KQuu27kcWqADTyhqipH+pLS+0nSdWFrBXyxQVtPlAxy0trvwU3m+9rNvcWcob
D07uH8n0TS1nORYs0s8YStxz5Kme0ICUIj64hvox7T1CqcTldl5iMrpzjvHRUa+s
sbuAgjAnyaksCiKyDJjIo8NtpXFYYLq4SeAn0/6dCxb3gtyQsRBDG9oe7F/q5jvO
j0flo6Bmcl4es3dTEz5mX7G/EChniTYyeq4xfVFZpA/56LkXeCdcTkBeyTeopBSe
QLtR40DUFYBZZVBUotBp9K13Grqg8xrvLYFZhUihA5rnC7HRZjwt70Rh3FmZUKcd
i7blHpIssf8obH5ERhEUOF4ITHwmFkLDBgqTh/jlPYlMBz4FYn/2IxWnMfZQpv3u
igC6DHCwJ9FmTI0iGyN2xbxGqW/WrAZrRCTFB3lZSw38O4e31viIXbfi9u+Iib2e
j0oUBZmoroH6POha/udMK/FUdxTlkS8OTiUDM7K2J81L7Cz9F6k=
=a0+j
-----END PGP SIGNATURE-----

^ permalink raw reply

* [PATCH v3 01/12] powerpc: Move and rename func_descr_t
From: Christophe Leroy @ 2021-10-17 12:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Andrew Morton, James E.J. Bottomley, Helge Deller, Arnd Bergmann,
	Kees Cook, Greg Kroah-Hartman
  Cc: linux-arch, linux-ia64, linux-parisc, linux-kernel, linux-mm,
	linuxppc-dev
In-Reply-To: <cover.1634457599.git.christophe.leroy@csgroup.eu>

There are three architectures with function descriptors, try to
have common names for the address they contain in order to
refactor some functions into generic functions later.

powerpc has 'entry'
ia64 has 'ip'
parisc has 'addr'

Vote for 'addr' and update 'func_descr_t' accordingly.

Move it in asm/elf.h to have it at the same place on all
three architectures, remove the typedef which hides its real
type, and change it to a smoother name 'struct func_desc'.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/code-patching.h | 2 +-
 arch/powerpc/include/asm/elf.h           | 6 ++++++
 arch/powerpc/include/asm/types.h         | 6 ------
 arch/powerpc/kernel/signal_64.c          | 8 ++++----
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 4ba834599c4d..c6e805976e6f 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -110,7 +110,7 @@ static inline unsigned long ppc_function_entry(void *func)
 	 * function's descriptor. The first entry in the descriptor is the
 	 * address of the function text.
 	 */
-	return ((func_descr_t *)func)->entry;
+	return ((struct func_desc *)func)->addr;
 #else
 	return (unsigned long)func;
 #endif
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index b8425e3cfd81..971589a21bc0 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -176,4 +176,10 @@ do {									\
 /* Relocate the kernel image to @final_address */
 void relocate(unsigned long final_address);
 
+struct func_desc {
+	unsigned long addr;
+	unsigned long toc;
+	unsigned long env;
+};
+
 #endif /* _ASM_POWERPC_ELF_H */
diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h
index f1630c553efe..97da77bc48c9 100644
--- a/arch/powerpc/include/asm/types.h
+++ b/arch/powerpc/include/asm/types.h
@@ -23,12 +23,6 @@
 
 typedef __vector128 vector128;
 
-typedef struct {
-	unsigned long entry;
-	unsigned long toc;
-	unsigned long env;
-} func_descr_t;
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_TYPES_H */
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 1831bba0582e..36537d7d5191 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -933,11 +933,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		 * descriptor is the entry address of signal and the second
 		 * entry is the TOC value we need to use.
 		 */
-		func_descr_t __user *funct_desc_ptr =
-			(func_descr_t __user *) ksig->ka.sa.sa_handler;
+		struct func_desc __user *ptr =
+			(struct func_desc __user *)ksig->ka.sa.sa_handler;
 
-		err |= get_user(regs->ctr, &funct_desc_ptr->entry);
-		err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
+		err |= get_user(regs->ctr, &ptr->addr);
+		err |= get_user(regs->gpr[2], &ptr->toc);
 	}
 
 	/* enter the signal handler in native-endian mode */
-- 
2.31.1


^ permalink raw reply related

* [PATCH v3 10/12] lkdtm: Really write into kernel text in WRITE_KERN
From: Christophe Leroy @ 2021-10-17 12:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Andrew Morton, James E.J. Bottomley, Helge Deller, Arnd Bergmann,
	Kees Cook, Greg Kroah-Hartman
  Cc: linux-arch, linux-ia64, linux-parisc, linux-kernel, linux-mm,
	linuxppc-dev
In-Reply-To: <cover.1634457599.git.christophe.leroy@csgroup.eu>

WRITE_KERN is supposed to overwrite some kernel text, namely
do_overwritten() function.

But at the time being it overwrites do_overwritten() function
descriptor, not function text.

Fix it by dereferencing the function descriptor to obtain
function text pointer.

And make do_overwritten() noinline so that it is really
do_overwritten() which is called by lkdtm_WRITE_KERN().

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 drivers/misc/lkdtm/perms.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index 60b3b2fe929d..035fcca441f0 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -10,6 +10,7 @@
 #include <linux/mman.h>
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
+#include <asm/sections.h>
 
 /* Whether or not to fill the target memory area with do_nothing(). */
 #define CODE_WRITE	true
@@ -37,7 +38,7 @@ static noinline void do_nothing(void)
 }
 
 /* Must immediately follow do_nothing for size calculuations to work out. */
-static void do_overwritten(void)
+static noinline void do_overwritten(void)
 {
 	pr_info("do_overwritten wasn't overwritten!\n");
 	return;
@@ -113,8 +114,9 @@ void lkdtm_WRITE_KERN(void)
 	size_t size;
 	volatile unsigned char *ptr;
 
-	size = (unsigned long)do_overwritten - (unsigned long)do_nothing;
-	ptr = (unsigned char *)do_overwritten;
+	size = (unsigned long)dereference_function_descriptor(do_overwritten) -
+	       (unsigned long)dereference_function_descriptor(do_nothing);
+	ptr = dereference_function_descriptor(do_overwritten);
 
 	pr_info("attempting bad %zu byte write at %px\n", size, ptr);
 	memcpy((void *)ptr, (unsigned char *)do_nothing, size);
-- 
2.31.1


^ permalink raw reply related

* [PATCH v3 09/12] lkdtm: Force do_nothing() out of line
From: Christophe Leroy @ 2021-10-17 12:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Andrew Morton, James E.J. Bottomley, Helge Deller, Arnd Bergmann,
	Kees Cook, Greg Kroah-Hartman
  Cc: linux-arch, linux-ia64, linux-parisc, linux-kernel, linux-mm,
	linuxppc-dev
In-Reply-To: <cover.1634457599.git.christophe.leroy@csgroup.eu>

LKDTM tests display that the run do_nothing() at a given
address, but in reality do_nothing() is inlined into the
caller.

Force it out of line so that it really runs text at the
displayed address.

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 drivers/misc/lkdtm/perms.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index 2dede2ef658f..60b3b2fe929d 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -21,7 +21,7 @@
 /* This is non-const, so it will end up in the .data section. */
 static u8 data_area[EXEC_SIZE];
 
-/* This is cost, so it will end up in the .rodata section. */
+/* This is const, so it will end up in the .rodata section. */
 static const unsigned long rodata = 0xAA55AA55;
 
 /* This is marked __ro_after_init, so it should ultimately be .rodata. */
@@ -31,7 +31,7 @@ static unsigned long ro_after_init __ro_after_init = 0x55AA5500;
  * This just returns to the caller. It is designed to be copied into
  * non-executable memory regions.
  */
-static void do_nothing(void)
+static noinline void do_nothing(void)
 {
 	return;
 }
-- 
2.31.1


^ permalink raw reply related

* [PATCH v3 03/12] powerpc: Remove 'struct ppc64_opd_entry'
From: Christophe Leroy @ 2021-10-17 12:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Andrew Morton, James E.J. Bottomley, Helge Deller, Arnd Bergmann,
	Kees Cook, Greg Kroah-Hartman
  Cc: linux-arch, linux-ia64, linux-parisc, linux-kernel, linux-mm,
	Nicholas Piggin, linuxppc-dev
In-Reply-To: <cover.1634457599.git.christophe.leroy@csgroup.eu>

'struct ppc64_opd_entry' doesn't belong to uapi/asm/elf.h

It was initially in module_64.c and commit 2d291e902791 ("Fix compile
failure with non modular builds") moved it into asm/elf.h

But it was by mistake added outside of __KERNEL__ section,
therefore commit c3617f72036c ("UAPI: (Scripted) Disintegrate
arch/powerpc/include/asm") moved it to uapi/asm/elf.h

Now that it is not used anymore by the kernel, remove it.

Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/uapi/asm/elf.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h
index 860c59291bfc..308857123a08 100644
--- a/arch/powerpc/include/uapi/asm/elf.h
+++ b/arch/powerpc/include/uapi/asm/elf.h
@@ -289,12 +289,4 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG];
 /* Keep this the last entry.  */
 #define R_PPC64_NUM		253
 
-/* There's actually a third entry here, but it's unused */
-struct ppc64_opd_entry
-{
-	unsigned long funcaddr;
-	unsigned long r2;
-};
-
-
 #endif /* _UAPI_ASM_POWERPC_ELF_H */
-- 
2.31.1


^ permalink raw reply related

* [PATCH v3 02/12] powerpc: Use 'struct func_desc' instead of 'struct ppc64_opd_entry'
From: Christophe Leroy @ 2021-10-17 12:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Andrew Morton, James E.J. Bottomley, Helge Deller, Arnd Bergmann,
	Kees Cook, Greg Kroah-Hartman
  Cc: linux-arch, linux-ia64, linux-parisc, linux-kernel, linux-mm,
	Nicholas Piggin, linuxppc-dev, Daniel Axtens
In-Reply-To: <cover.1634457599.git.christophe.leroy@csgroup.eu>

'struct ppc64_opd_entry' is somehow redundant with 'struct func_desc',
the later is more correct/complete as it includes the third
field which is unused.

So use 'struct func_desc' instead of 'struct ppc64_opd_entry'

Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Daniel Axtens <dja@axtens.net>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/sections.h |  4 ++--
 arch/powerpc/kernel/module_64.c     | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 6e4af4492a14..abd2e5213197 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -74,10 +74,10 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end)
 #undef dereference_function_descriptor
 static inline void *dereference_function_descriptor(void *ptr)
 {
-	struct ppc64_opd_entry *desc = ptr;
+	struct func_desc *desc = ptr;
 	void *p;
 
-	if (!get_kernel_nofault(p, (void *)&desc->funcaddr))
+	if (!get_kernel_nofault(p, (void *)&desc->addr))
 		ptr = p;
 	return ptr;
 }
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 6baa676e7cb6..a89da0ee25e2 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -64,19 +64,19 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym)
 #else
 
 /* An address is address of the OPD entry, which contains address of fn. */
-typedef struct ppc64_opd_entry func_desc_t;
+typedef struct func_desc func_desc_t;
 
 static func_desc_t func_desc(unsigned long addr)
 {
-	return *(struct ppc64_opd_entry *)addr;
+	return *(struct func_desc *)addr;
 }
 static unsigned long func_addr(unsigned long addr)
 {
-	return func_desc(addr).funcaddr;
+	return func_desc(addr).addr;
 }
 static unsigned long stub_func_addr(func_desc_t func)
 {
-	return func.funcaddr;
+	return func.addr;
 }
 static unsigned int local_entry_offset(const Elf64_Sym *sym)
 {
@@ -187,7 +187,7 @@ static int relacmp(const void *_x, const void *_y)
 static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
 				    const Elf64_Shdr *sechdrs)
 {
-	/* One extra reloc so it's always 0-funcaddr terminated */
+	/* One extra reloc so it's always 0-addr terminated */
 	unsigned long relocs = 1;
 	unsigned i;
 
-- 
2.31.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox