LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* Re: [PATCH 5/5] Powerpc/Watchpoint: Fix length calculation for unaligned target
From: Michael Neuling @ 2019-06-18 13:32 UTC (permalink / raw)
  To: Ravi Bangoria, mpe
  Cc: linux-kernel, npiggin, paulus, naveen.n.rao, linuxppc-dev
In-Reply-To: <20190618042732.5582-6-ravi.bangoria@linux.ibm.com>

On Tue, 2019-06-18 at 09:57 +0530, Ravi Bangoria wrote:
> Watchpoint match range is always doubleword(8 bytes) aligned on
> powerpc. If the given range is crossing doubleword boundary, we
> need to increase the length such that next doubleword also get
> covered. Ex,
> 
>           address   len = 6 bytes
>                 |=========.
>    |------------v--|------v--------|
>    | | | | | | | | | | | | | | | | |
>    |---------------|---------------|
>     <---8 bytes--->
> 
> In such case, current code configures hw as:
>   start_addr = address & ~HW_BREAKPOINT_ALIGN
>   len = 8 bytes
> 
> And thus read/write in last 4 bytes of the given range is ignored.
> Fix this by including next doubleword in the length. Watchpoint
> exception handler already ignores extraneous exceptions, so no
> changes required for that.

Nice catch. Thanks.

I assume this has been broken forever? Should we be CCing stable? If so, it
would be nice to have this self contained (separate from the refactor) so we can
more easily backport it.

Also, can you update 
tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c to catch this issue?

A couple more comments below.

> 
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/hw_breakpoint.h |  7 ++--
>  arch/powerpc/kernel/hw_breakpoint.c      | 44 +++++++++++++-----------
>  arch/powerpc/kernel/process.c            | 34 ++++++++++++++++--
>  3 files changed, 60 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/hw_breakpoint.h
> b/arch/powerpc/include/asm/hw_breakpoint.h
> index 8acbbdd4a2d5..749a357164d5 100644
> --- a/arch/powerpc/include/asm/hw_breakpoint.h
> +++ b/arch/powerpc/include/asm/hw_breakpoint.h
> @@ -34,6 +34,8 @@ struct arch_hw_breakpoint {
>  #define HW_BRK_TYPE_PRIV_ALL	(HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
>  				 HW_BRK_TYPE_HYP)
>  
> +#define HW_BREAKPOINT_ALIGN 0x7
> +
>  #ifdef CONFIG_HAVE_HW_BREAKPOINT
>  #include <linux/kdebug.h>
>  #include <asm/reg.h>
> @@ -45,8 +47,6 @@ struct pmu;
>  struct perf_sample_data;
>  struct task_struct;
>  
> -#define HW_BREAKPOINT_ALIGN 0x7
> -
>  extern int hw_breakpoint_slots(int type);
>  extern int arch_bp_generic_fields(int type, int *gen_bp_type);
>  extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
> @@ -76,7 +76,8 @@ static inline void hw_breakpoint_disable(void)
>  }
>  extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
>  int hw_breakpoint_handler(struct die_args *args);
> -
> +extern u16 hw_breakpoint_get_final_len(struct arch_hw_breakpoint *brk,
> +		unsigned long *start_addr, unsigned long *end_addr);
>  extern int set_dawr(struct arch_hw_breakpoint *brk);
>  extern bool dawr_force_enable;
>  static inline bool dawr_enabled(void)
> diff --git a/arch/powerpc/kernel/hw_breakpoint.c
> b/arch/powerpc/kernel/hw_breakpoint.c
> index 36bcf705df65..c122fd55aa44 100644
> --- a/arch/powerpc/kernel/hw_breakpoint.c
> +++ b/arch/powerpc/kernel/hw_breakpoint.c
> @@ -126,6 +126,28 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
>  	return 0;
>  }
>  
> +/* Maximum len for DABR is 8 bytes and DAWR is 512 bytes */
> +static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
> +{
> +	u16 length_max = 8;
> +	u16 final_len;
> +	unsigned long start_addr, end_addr;
> +
> +	final_len = hw_breakpoint_get_final_len(hw, &start_addr, &end_addr);
> +
> +	if (dawr_enabled()) {
> +		length_max = 512;
> +		/* DAWR region can't cross 512 bytes boundary */
> +		if ((start_addr >> 9) != (end_addr >> 9))
> +			return -EINVAL;
> +	}
> +
> +	if (final_len > length_max)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
>  /*
>   * Validate the arch-specific HW Breakpoint register settings
>   */
> @@ -133,12 +155,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
>  			     const struct perf_event_attr *attr,
>  			     struct arch_hw_breakpoint *hw)
>  {
> -	int length_max;
> -
>  	if (!ppc_breakpoint_available())
>  		return -ENODEV;
>  
> -	if (!bp)
> +	if (!bp || !attr->bp_len)
>  		return -EINVAL;
>  
>  	hw->type = HW_BRK_TYPE_TRANSLATE;
> @@ -160,23 +180,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
>  	hw->address = attr->bp_addr;
>  	hw->len = attr->bp_len;
>  
> -	length_max = 8; /* DABR */
> -	if (dawr_enabled()) {
> -		length_max = 512 ; /* 64 doublewords */
> -		/* DAWR region can't cross 512 bytes boundary */
> -		if ((hw->address >> 9) != ((hw->address + hw->len - 1) >> 9))
> -			return -EINVAL;
> -	}
> -
> -	/*
> -	 * Since breakpoint length can be a maximum of length_max and
> -	 * breakpoint addresses are aligned to nearest double-word
> -	 * HW_BREAKPOINT_ALIGN by rounding off to the lower address,
> -	 * the 'symbolsize' should satisfy the check below.
> -	 */
> -	if (hw->len > (length_max - (hw->address & HW_BREAKPOINT_ALIGN)))
> -		return -EINVAL;
> -	return 0;
> +	return hw_breakpoint_validate_len(hw);
>  }
>  
>  /*
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 265fac9fb3a4..159aaa70de46 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -802,9 +802,39 @@ static int disable_dawr(void)
>  	return 0;
>  }
>  
> +/*
> + * Watchpoint match range is always doubleword(8 bytes) aligned on
> + * powerpc. If the given range is crossing doubleword boundary, we
> + * need to increase the length such that next doubleword also get
> + * covered. Ex,
> + *
> + *          address   len = 6 bytes
> + *                |=========.
> + *   |------------v--|------v--------|
> + *   | | | | | | | | | | | | | | | | |
> + *   |---------------|---------------|
> + *    <---8 bytes--->
> + *
> + * In this case, we should configure hw as:
> + *   start_addr = address & ~HW_BREAKPOINT_ALIGN
> + *   len = 16 bytes
> + *
> + * @start_addr and @end_addr are inclusive.
> + */
> +u16 hw_breakpoint_get_final_len(struct arch_hw_breakpoint *brk,
> +				unsigned long *start_addr,
> +				unsigned long *end_addr)

I don't really like this.  "final" is not a good name. Something like hardware
would be better.

Also, can you put the start_addr and end addr in the arch_hw_breakpoint rather
than doing what you have above.  Call them hw_start_addr, hw_end_addr.

We could even set these two new addresses where we set the set of
arch_hw_breakpoint rather than having this late call.

> +{
> +	*start_addr = brk->address & ~HW_BREAKPOINT_ALIGN;
> +	*end_addr = (brk->address + brk->len - 1) | HW_BREAKPOINT_ALIGN;
> +	return *end_addr - *start_addr + 1;
> +}
> +
>  int set_dawr(struct arch_hw_breakpoint *brk)
>  {
>  	unsigned long dawr, dawrx, mrd;
> +	unsigned long start_addr, end_addr;
> +	u16 final_len;
>  
>  	if (brk->type == HW_BRK_TYPE_DISABLE)
>  		return disable_dawr();
> @@ -815,8 +845,8 @@ int set_dawr(struct arch_hw_breakpoint *brk)
>  	dawrx |= ((brk->type & HW_BRK_TYPE_TRANSLATE) >> 2) << (63 - 59);
>  	dawrx |= (brk->type & HW_BRK_TYPE_PRIV_ALL) >> 3;
>  
> -	/* brk->len is in bytes. */
> -	mrd = ((brk->len + 7) >> 3) - 1;
> +	final_len = hw_breakpoint_get_final_len(brk, &start_addr, &end_addr);

Again, hardware length, or something other than "final"

> +	mrd = ((final_len + 7) >> 3) - 1;
>  	dawrx |= (mrd & 0x3f) << (63 - 53);
>  
>  	if (ppc_md.set_dawr)


^ permalink raw reply

* Re: [PATCH] powerpc/mm: Ensure Huge-page memory is free before allocation
From: Vaibhav Jain @ 2019-06-18 13:27 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev
  Cc: Hari Bathini, Aneesh Kumar K.V, Mahesh J Salgaonkar
In-Reply-To: <87v9x3p04l.fsf@concordia.ellerman.id.au>

Michael Ellerman <mpe@ellerman.id.au> writes:

> Vaibhav Jain <vaibhav@linux.ibm.com> writes:
>> We recently discovered an bug where physical memory meant for
>> allocation of Huge-pages was inadvertently allocated by another component
>> during early boot.
>
> Can you give me some more detail on what that was? You're seemingly the
> only person who's ever hit this :)

The specific bug I investigated was in fadump which was trying to
reserve a large chunk of physically contiguous memory from memblock and
inadvertently stomped into a memory region that was reserved for
allocation of 16G hugepages. This happened because fadump reservation
happens much earlier in prom-init compared to hugepage reservation.

The bug manifested as a panic seen when trying to 'cat
/proc/pagetypeinfo' that dumps the buddy stats for each
zone/migrate-type.

Incidentally fadump after reserving this memory, would carve out a CMA
region that was then entered into the buddy-allocater. This would cause
pagetypeinfo_showfree_print() to fail when it tries to iterate over the
free list of this CMA migrate type as the corresponding memmap for these
pages was never initialized.

>
>> The behavior of memblock_reserve() where it wont
>> indicate whether an existing reserved block overlaps with the
>> requested reservation only makes such bugs hard to investigate.
>>
>> Hence this patch proposes adding a memblock reservation check in
>> htab_dt_scan_hugepage_blocks() just before call to memblock_reserve()
>> to ensure that the physical memory thats being reserved for is not
>> already reserved by someone else. In case this happens we panic the
>> the kernel to ensure that user of this huge-page doesn't accidentally
>> stomp on memory allocated to someone else.
>
> Do we really need to panic? Can't we just leave the block alone and not
> register it as huge page memory? With a big warning obviously.
Possibly yes, but Aneesh pointed out that this memory is supposed to be backed
only by 16G pages mapping due to limitation in phyp for hash page table size. 

>
> cheers
>
>> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
>> index 28ced26f2a00..a05be3adb8c9 100644
>> --- a/arch/powerpc/mm/book3s64/hash_utils.c
>> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
>> @@ -516,6 +516,11 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
>>  	printk(KERN_INFO "Huge page(16GB) memory: "
>>  			"addr = 0x%lX size = 0x%lX pages = %d\n",
>>  			phys_addr, block_size, expected_pages);
>> +
>> +	/* Ensure no one else has reserved memory for huge pages before */
>> +	BUG_ON(memblock_is_region_reserved(phys_addr,
>> +					   block_size * expected_pages));
>> +
>>  	if (phys_addr + block_size * expected_pages <= memblock_end_of_DRAM()) {
>>  		memblock_reserve(phys_addr, block_size * expected_pages);
>>  		pseries_add_gpage(phys_addr, block_size, expected_pages);
>> -- 
>> 2.21.0
>

-- 
Vaibhav Jain <vaibhav@linux.ibm.com>
Linux Technology Center, IBM India Pvt. Ltd.


^ permalink raw reply

* Re: [PATCH] powerpc/mm: Ensure Huge-page memory is free before allocation
From: Aneesh Kumar K.V @ 2019-06-18 12:58 UTC (permalink / raw)
  To: Michael Ellerman, Vaibhav Jain, linuxppc-dev, Paul Mackerras
  Cc: Hari Bathini, Mahesh J Salgaonkar
In-Reply-To: <87v9x3p04l.fsf@concordia.ellerman.id.au>

On 6/18/19 5:37 PM, Michael Ellerman wrote:
> Vaibhav Jain <vaibhav@linux.ibm.com> writes:
>> We recently discovered an bug where physical memory meant for
>> allocation of Huge-pages was inadvertently allocated by another component
>> during early boot.
> 
> Can you give me some more detail on what that was? You're seemingly the
> only person who's ever hit this :)
> 
>> The behavior of memblock_reserve() where it wont
>> indicate whether an existing reserved block overlaps with the
>> requested reservation only makes such bugs hard to investigate.
>>
>> Hence this patch proposes adding a memblock reservation check in
>> htab_dt_scan_hugepage_blocks() just before call to memblock_reserve()
>> to ensure that the physical memory thats being reserved for is not
>> already reserved by someone else. In case this happens we panic the
>> the kernel to ensure that user of this huge-page doesn't accidentally
>> stomp on memory allocated to someone else.
> 
> Do we really need to panic? Can't we just leave the block alone and not
> register it as huge page memory? With a big warning obviously.
> 

I need to check this again with Paul. But IIRC we have issues w.r.t hash 
page table size, if we use 16G pages as 64K mapped pages. ie, hypervisor 
create hash page table size with the assumptions that these pfns will 
only be mapped as 16G pages. If we try to put 64K hash pte entries  for 
them we will not have enough space in hash page table.

That is one of the reason we never allowed freeing the hugetlb reserved 
pool with 16G pages back to buddy.


Note: We should switch that BUG to panic. I guess using BUG that early 
don't work.

-aneesh


^ permalink raw reply

* Re: [PATCH] powerpc/32s: fix initial setup of segment registers on secondary CPU
From: Christophe Leroy @ 2019-06-18 12:44 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	erhard_f
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <87h88noz1d.fsf@concordia.ellerman.id.au>



Le 18/06/2019 à 14:31, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>> Le 11/06/2019 à 17:47, Christophe Leroy a écrit :
>>> The patch referenced below moved the loading of segment registers
>>> out of load_up_mmu() in order to do it earlier in the boot sequence.
>>> However, the secondary CPU still needs it to be done when loading up
>>> the MMU.
>>>
>>> Reported-by: Erhard F. <erhard_f@mailbox.org>
>>> Fixes: 215b823707ce ("powerpc/32s: set up an early static hash table for KASAN")
>>
>> Cc: stable@vger.kernel.org
> 
> Sorry patchwork didn't pick that up and I missed it. The AUTOSEL bot
> will probably pick it up anyway though.

Don't worry, this was unneeded because we added KASAN in 5.2.
My mistake.
Christophe

> 
> cheers
> 
>>> diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
>>> index 1d5f1bd0dacd..f255e22184b4 100644
>>> --- a/arch/powerpc/kernel/head_32.S
>>> +++ b/arch/powerpc/kernel/head_32.S
>>> @@ -752,6 +752,7 @@ __secondary_start:
>>>    	stw	r0,0(r3)
>>>    
>>>    	/* load up the MMU */
>>> +	bl	load_segment_registers
>>>    	bl	load_up_mmu
>>>    
>>>    	/* ptr to phys current thread */
>>>

^ permalink raw reply

* Re: [PATCH v4 19/28] docs: powerpc: convert docs to ReST and rename to *.rst
From: Michael Ellerman @ 2019-06-18 12:39 UTC (permalink / raw)
  To: Jonathan Corbet, Mauro Carvalho Chehab
  Cc: Linux Doc Mailing List, linux-pci, Oliver O'Halloran,
	Qiang Zhao, linux-scsi, Jiri Slaby, Linas Vepstas,
	Andrew Donnellan, Mauro Carvalho Chehab, Manoj N. Kumar,
	Bjorn Helgaas, linux-arm-kernel, Matthew R. Ochs, Uma Krishnan,
	Sam Bobroff, Greg Kroah-Hartman, linux-kernel, Li Yang,
	Andrew Donnellan, Frederic Barrat, Paul Mackerras, linuxppc-dev
In-Reply-To: <20190614143635.3aff154d@lwn.net>

Jonathan Corbet <corbet@lwn.net> writes:
> On Wed, 12 Jun 2019 14:52:55 -0300
> Mauro Carvalho Chehab <mchehab+samsung@kernel.org> wrote:
>
>> Convert docs to ReST and add them to the arch-specific
>> book.
>> 
>> The conversion here was trivial, as almost every file there
>> was already using an elegant format close to ReST standard.
>> 
>> The changes were mostly to mark literal blocks and add a few
>> missing section title identifiers.
>> 
>> One note with regards to "--": on Sphinx, this can't be used
>> to identify a list, as it will format it badly. This can be
>> used, however, to identify a long hyphen - and "---" is an
>> even longer one.
>> 
>> At its new index.rst, let's add a :orphan: while this is not linked to
>> the main index.rst file, in order to avoid build warnings.
>> 
>> Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
>> Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> # cxl
>
> This one fails to apply because ...
>
> [...]
>
>> diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst
>> index 83db42092935..acc21ecca322 100644
>> --- a/Documentation/PCI/pci-error-recovery.rst
>> +++ b/Documentation/PCI/pci-error-recovery.rst
>> @@ -422,3 +422,24 @@ That is, the recovery API only requires that:
>>     - drivers/net/cxgb3
>>     - drivers/net/s2io.c
>>     - drivers/net/qlge
>> +
>> +>>> As of this writing, there is a growing list of device drivers with
>> +>>> patches implementing error recovery. Not all of these patches are in
>> +>>> mainline yet. These may be used as "examples":
>> +>>>
>> +>>> drivers/scsi/ipr
>> +>>> drivers/scsi/sym53c8xx_2
>> +>>> drivers/scsi/qla2xxx
>> +>>> drivers/scsi/lpfc
>> +>>> drivers/next/bnx2.c
>> +>>> drivers/next/e100.c
>> +>>> drivers/net/e1000
>> +>>> drivers/net/e1000e
>> +>>> drivers/net/ixgb
>> +>>> drivers/net/ixgbe
>> +>>> drivers/net/cxgb3
>> +>>> drivers/net/s2io.c
>> +>>> drivers/net/qlge  
>
> ...of this, which has the look of a set of conflict markers that managed
> to get committed...?

I don't think so.

There's some other uses of >>> in that file, eg about line 162:

  >>> The current powerpc implementation assumes that a device driver will
  >>> *not* schedule or semaphore in this routine; the current powerpc
  >>> implementation uses one kernel thread to notify all devices;
  >>> thus, if one device sleeps/schedules, all devices are affected.
  >>> Doing better requires complex multi-threaded logic in the error
  >>> recovery implementation (e.g. waiting for all notification threads
  >>> to "join" before proceeding with recovery.)  This seems excessively
  >>> complex and not worth implementing.


So it's just an odd choice of emphasis device I think.

cheers

^ permalink raw reply

* Re: crypto/NX: Set receive window credits to max number of CRBs in RxFIFO
From: Michael Ellerman @ 2019-06-18 12:35 UTC (permalink / raw)
  To: Haren Myneni, Herbert Xu; +Cc: linuxppc-dev, linux-crypto, stable
In-Reply-To: <1560587942.17547.18.camel@hbabu-laptop>

Haren Myneni <haren@linux.vnet.ibm.com> writes:
>     
> System gets checkstop if RxFIFO overruns with more requests than the
> maximum possible number of CRBs in FIFO at the same time. So find max
> CRBs from FIFO size and set it to receive window credits.
>    
> CC: stable@vger.kernel.org # v4.14+
> Signed-off-by:Haren Myneni <haren@us.ibm.com>

It's helpful to mention the actual commit that's fixed, so that people
with backports can join things up, so should that be:

  Fixes: b0d6c9bab5e4 ("crypto/nx: Add P9 NX support for 842 compression engine")

???

cheers

> diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c
> index 4acbc47..e78ff5c 100644
> --- a/drivers/crypto/nx/nx-842-powernv.c
> +++ b/drivers/crypto/nx/nx-842-powernv.c
> @@ -27,8 +27,6 @@
>  #define WORKMEM_ALIGN	(CRB_ALIGN)
>  #define CSB_WAIT_MAX	(5000) /* ms */
>  #define VAS_RETRIES	(10)
> -/* # of requests allowed per RxFIFO at a time. 0 for unlimited */
> -#define MAX_CREDITS_PER_RXFIFO	(1024)
>  
>  struct nx842_workmem {
>  	/* Below fields must be properly aligned */
> @@ -812,7 +810,11 @@ static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
>  	rxattr.lnotify_lpid = lpid;
>  	rxattr.lnotify_pid = pid;
>  	rxattr.lnotify_tid = tid;
> -	rxattr.wcreds_max = MAX_CREDITS_PER_RXFIFO;
> +	/*
> +	 * Maximum RX window credits can not be more than #CRBs in
> +	 * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns.
> +	 */
> +	rxattr.wcreds_max = fifo_size / CRB_SIZE;
>  
>  	/*
>  	 * Open a VAS receice window which is used to configure RxFIFO

^ permalink raw reply

* Re: [PATCH] selftests/powerpc: Add missing newline at end of file
From: Geert Uytterhoeven @ 2019-06-18 12:33 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Geert Uytterhoeven, linuxppc-dev, Linux Kernel Mailing List,
	Paul Mackerras, open list:KERNEL SELFTEST FRAMEWORK, Shuah Khan
In-Reply-To: <87muifozfd.fsf@concordia.ellerman.id.au>

Hi Michael,

On Tue, Jun 18, 2019 at 2:23 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
> Geert Uytterhoeven <geert+renesas@glider.be> writes:
> > "git diff" says:
> >
> >     \ No newline at end of file
> >
> > after modifying the file.
>
> Is that a problem?
>
> Just curious because it was presumably me that broke it :)

It looks messy ;-)

> > diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
> > index ba919308fe3052f3..16861ab840f57e90 100644
> > --- a/tools/testing/selftests/powerpc/mm/.gitignore
> > +++ b/tools/testing/selftests/powerpc/mm/.gitignore
> > @@ -3,4 +3,4 @@ subpage_prot
> >  tempfile
> >  prot_sao
> >  segv_errors
> > -wild_bctr
> > \ No newline at end of file
> > +wild_bctr

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH] powerpc/32s: fix initial setup of segment registers on secondary CPU
From: Michael Ellerman @ 2019-06-18 12:31 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	erhard_f
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <b60946f5-dc70-61ce-e266-af91890cb702@c-s.fr>

Christophe Leroy <christophe.leroy@c-s.fr> writes:
> Le 11/06/2019 à 17:47, Christophe Leroy a écrit :
>> The patch referenced below moved the loading of segment registers
>> out of load_up_mmu() in order to do it earlier in the boot sequence.
>> However, the secondary CPU still needs it to be done when loading up
>> the MMU.
>> 
>> Reported-by: Erhard F. <erhard_f@mailbox.org>
>> Fixes: 215b823707ce ("powerpc/32s: set up an early static hash table for KASAN")
>
> Cc: stable@vger.kernel.org

Sorry patchwork didn't pick that up and I missed it. The AUTOSEL bot
will probably pick it up anyway though.

cheers

>> diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
>> index 1d5f1bd0dacd..f255e22184b4 100644
>> --- a/arch/powerpc/kernel/head_32.S
>> +++ b/arch/powerpc/kernel/head_32.S
>> @@ -752,6 +752,7 @@ __secondary_start:
>>   	stw	r0,0(r3)
>>   
>>   	/* load up the MMU */
>> +	bl	load_segment_registers
>>   	bl	load_up_mmu
>>   
>>   	/* ptr to phys current thread */
>> 

^ permalink raw reply

* Re: [PATCH kernel] powerpc/pci/of: Parse unassigned resources
From: Benjamin Herrenschmidt @ 2019-06-18 12:29 UTC (permalink / raw)
  To: Michael Ellerman, Alexey Kardashevskiy, linuxppc-dev
  Cc: Sam Bobroff, Alistair Popple, kvm-ppc, Oliver O'Halloran,
	David Gibson
In-Reply-To: <87sgs7ozsa.fsf@concordia.ellerman.id.au>

On Tue, 2019-06-18 at 22:15 +1000, Michael Ellerman wrote:
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
> > The pseries platform uses the PCI_PROBE_DEVTREE method of PCI probing
> > which is basically reading "assigned-addresses" of every PCI device.
> > However if the property is missing or zero sized, then there is
> > no fallback of any kind and the PCI resources remain undiscovered, i.e.
> > pdev->resource[] array is empty.
> > 
> > This adds a fallback which parses the "reg" property in pretty much same
> > way except it marks resources as "unset" which later makes Linux assign
> > those resources with proper addresses.
> 
> What happens under PowerVM is the big question.
> 
> ie. if we see such a device under PowerVM and then do our own assignment
> what happens?

May or may not work ... EEH will be probably b0rked, but then it
shouldn't happen.

Basically PowerVM itself doesn't do anything special with PCI. It maps
a whole PHB (or virtual PHB) into the guest and doesn't care much
beyond that for MMIOs.

What you see in Linux getting in the way is RTAS. It's the one
assigning BAR values etc... within that region setup by the HV, but
RTAS is running in the guest, from the HV perspective it's all the same
really.

So if such a device did exist, RTAS would lose track but it would still
work from a HW/HV perspective. RTAS-driven services such as EEH would
probably fail though.

But in practice this shouldn't happen bcs RTAS will set assigned-
addresses on everything.

Cheers,
Ben.

^ permalink raw reply

* Re: [PATCH v2] perf ioctl: Add check for the sample_period value
From: Michael Ellerman @ 2019-06-18 12:28 UTC (permalink / raw)
  To: Ravi Bangoria, peterz
  Cc: Ravi Bangoria, maddy, jolsa, linux-kernel, acme, linuxppc-dev
In-Reply-To: <e1d0fcf5-d7f8-44a0-a3b8-339f2b79fb2c@linux.ibm.com>

Ravi Bangoria <ravi.bangoria@linux.ibm.com> writes:
> Peter / mpe,
>
> Is the v2 looks good? If so, can anyone of you please pick this up.

I usually wouldn't take it, it's generic perf code. Unless
peter/ingo/acme tell me otherwise.

It's sort of a bug fix for 0819b2e30ccb, should it have a fixes and/or
stable tag?

  Fixes: 0819b2e30ccb ("perf: Limit perf_event_attr::sample_period to 63 bits")
  Cc: stable@vger.kernel.org # v3.15+

cheers

> On 6/4/19 9:59 AM, Ravi Bangoria wrote:
>> perf_event_open() limits the sample_period to 63 bits. See
>> commit 0819b2e30ccb ("perf: Limit perf_event_attr::sample_period
>> to 63 bits"). Make ioctl() consistent with it.
>> 
>> Also on powerpc, negative sample_period could cause a recursive
>> PMIs leading to a hang (reported when running perf-fuzzer).
>> 
>> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
>> ---
>>  kernel/events/core.c | 3 +++
>>  1 file changed, 3 insertions(+)
>> 
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index abbd4b3b96c2..e44c90378940 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -5005,6 +5005,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
>>  	if (perf_event_check_period(event, value))
>>  		return -EINVAL;
>>  
>> +	if (!event->attr.freq && (value & (1ULL << 63)))
>> +		return -EINVAL;
>> +
>>  	event_function_call(event, __perf_event_period, &value);
>>  
>>  	return 0;
>> 

^ permalink raw reply

* Re: [PATCH] selftests/powerpc: Add missing newline at end of file
From: Michael Ellerman @ 2019-06-18 12:23 UTC (permalink / raw)
  To: Geert Uytterhoeven, Benjamin Herrenschmidt, Paul Mackerras,
	Shuah Khan
  Cc: Geert Uytterhoeven, linuxppc-dev, linux-kernel, linux-kselftest
In-Reply-To: <20190617145204.6810-1-geert+renesas@glider.be>

Geert Uytterhoeven <geert+renesas@glider.be> writes:
> "git diff" says:
>
>     \ No newline at end of file
>
> after modifying the file.

Is that a problem?

Just curious because it was presumably me that broke it :)

cheers

> diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
> index ba919308fe3052f3..16861ab840f57e90 100644
> --- a/tools/testing/selftests/powerpc/mm/.gitignore
> +++ b/tools/testing/selftests/powerpc/mm/.gitignore
> @@ -3,4 +3,4 @@ subpage_prot
>  tempfile
>  prot_sao
>  segv_errors
> -wild_bctr
> \ No newline at end of file
> +wild_bctr
> -- 
> 2.17.1

^ permalink raw reply

* Re: [PATCH v1 1/6] mm: Section numbers use the type "unsigned long"
From: Michael Ellerman @ 2019-06-18 12:17 UTC (permalink / raw)
  To: Andrew Morton, Christophe Leroy
  Cc: Stephen Rothwell, Michal Hocko, Mel Gorman, Baoquan He,
	David Hildenbrand, Greg Kroah-Hartman, Rafael J. Wysocki,
	Pavel Tatashin, linux-kernel, linux-mm, linux-acpi, Mike Rapoport,
	Arun KS, Johannes Weiner, Dan Williams, Wei Yang, linuxppc-dev,
	Vlastimil Babka, Oscar Salvador
In-Reply-To: <20190617185757.b57402b465caff0cf6f85320@linux-foundation.org>

Andrew Morton <akpm@linux-foundation.org> writes:
> On Sat, 15 Jun 2019 10:06:54 +0200 Christophe Leroy <christophe.leroy@c-s.fr> wrote:
>> Le 14/06/2019 à 21:00, Andrew Morton a écrit :
>> > On Fri, 14 Jun 2019 12:01:09 +0200 David Hildenbrand <david@redhat.com> wrote:
>> > 
>> >> We are using a mixture of "int" and "unsigned long". Let's make this
>> >> consistent by using "unsigned long" everywhere. We'll do the same with
>> >> memory block ids next.
>> >>
>> >> ...
>> >>
>> >> -	int i, ret, section_count = 0;
>> >> +	unsigned long i;
>> >>
>> >> ...
>> >>
>> >> -	unsigned int i;
>> >> +	unsigned long i;
>> > 
>> > Maybe I did too much fortran back in the day, but I think the
>> > expectation is that a variable called "i" has type "int".
...
>> Codying style says the following, which makes full sense in my opinion:
>> 
>> LOCAL variable names should be short, and to the point.  If you have
>> some random integer loop counter, it should probably be called ``i``.
>> Calling it ``loop_counter`` is non-productive, if there is no chance of it
>> being mis-understood.
>
> Well.  It did say "integer".  Calling an unsigned long `i' is flat out
> misleading.

I always thought `i` was for loop `index` not `integer`.

But I've never written any Fortran :)

cheers

^ permalink raw reply

* Re: [PATCH 0/1] PPC32: fix ptrace() access to FPU registers
From: Radu Rendec @ 2019-06-18 12:16 UTC (permalink / raw)
  To: Daniel Axtens, linuxppc-dev; +Cc: Paul Mackerras, Oleg Nesterov
In-Reply-To: <87muif2y4l.fsf@dja-thinkpad.axtens.net>

On Tue, 2019-06-18 at 16:42 +1000, Daniel Axtens wrote:
> Radu Rendec <
> radu.rendec@gmail.com
> > writes:
> 
> > On Mon, 2019-06-17 at 11:19 +1000, Daniel Axtens wrote:
> > > Radu Rendec <
> > > radu.rendec@gmail.com
> > > 
> > > > writes:
> > > > Hi Everyone,
> > > > 
> > > > I'm following up on the ptrace() problem that I reported a few days ago.
> > > > I believe my version of the code handles all cases correctly. While the
> > > > problem essentially boils down to dividing the fpidx by 2 on PPC32, it
> > > > becomes tricky when the same code must work correctly on both PPC32 and
> > > > PPC64.
> > > > 
> > > > One other thing that I believe was handled incorrectly in the previous
> > > > version is the unused half of fpscr on PPC32. Note that while PT_FPSCR
> > > > is defined as (PT_FPR0 + 2*32 + 1), making only the upper half visible,
> > > > PT_FPR0 + 2*32 still corresponds to a possible address that userspace
> > > > can pass. In that case, comparing fpidx to (PT_FPSCR - PT_FPR0) would
> > > > cause an invalid access to the FPU registers array.
> > > > 
> > > > I tested the patch on 4.9.179, but that part of the code is identical in
> > > > recent kernels so it should work just the same.
> > > > 
> > > > I wrote a simple test program than can be used to quickly test (on an
> > > > x86_64 host) that all cases are handled correctly for both PPC32/PPC64.
> > > > The code is included below.
> > > > 
> > > > I also tested with gdbserver (test patch included below) and verified
> > > > that it generates two ptrace() calls for each FPU register, with
> > > > addresses between 0xc0 and 0x1bc.
> > > 
> > > Thanks for looking in to this. I can confirm your issue. What I'm
> > > currently wondering is: what is the behaviour with a 32-bit userspace on
> > > a 64-bit kernel? Should they also be going down the 32-bit path as far
> > > as calculating offsets goes?
> > 
> > Thanks for reviewing this. I haven't thought about the 32-bit userspace
> > on a 64-bit kernel, that is a very good question. Userspace passes a
> > pointer, so in theory it could go down either path as long as the
> > pointer points to the right data type.
> > 
> > I will go back to the gdb source code and try to figure out if that case
> > is handled in a special way. If not, it's probably safe to assume that a
> > 32-bit userspace should always go down the 32-bit path regardless of the
> > kernel bitness (in which case I think I have to change my patch).
> 
> It doesn't seem to reproduce on a 64-bit kernel with 32-bit
> userspace. Couldn't tell you why - if you can figure it out from the gdb
> source code I'd love to know! I have, however, tried it - and all the fp
> registers look correct and KASAN doesn't pick up any memory corruption.

I looked at the gdb source code and all it seems to care about is the
architecture it was compiled for. In other words, 32-bit gdb always
assumes 32-bit register layout, regardless of whether it's running on a
32-bit or 64-bit kernel.

So it's no surprise that the problem didn't reproduce and KASAN didn't
pick up anything in your experiment. Since the kernel is 64-bit, it
divides addresses by 8, so all indexes are within bounds. The part that
I don't get is how the FP registers looked correct.

Since you already have a working setup, it would be nice if you could
add a printk to arch_ptrace() to print the address and confirm what I
believe happens (by reading the gdb source code).

So for 32-bit gdb on 64-bit kernel, I think gdb will generate 48 x 4-
byte aligned addresses for the CPU registers, then 32 x 2 x 4-byte
aligned addresses for the FPU registers. The indexes will not overflow,
but access to all registers (CPU and FPU) will be broken because:
 * The kernel always divides by 8. The CPU register address range that
   gdb generates will be half of what the kernel expects and "odd" (i.e.
   non 8-byte aligned) addresses will fail with EIO because the kernel
   code checks that the last 3 bits are all zero.
 * The FPU register indexes will be off by 24. For example, when gdb
   thinks it asks for FPR0, it calculates the address as 4 x 48, but the
   kernel divides by 8, so it will get index 24, which it thinks is a
   CPU register.

When it stops on a breakpoint, gdb seems to read all registers (both CPU
and FPU) in ascending index order. So if you print the address in the
kernel it's actually very easy to verify my theory. I expect addresses
from 0 to 48 x 4 in increments of 4 (for the CPU registers), then
addresses from 48 x 4 to 48 x 4 + 32 x 2 x 4 in increments of 4 (for the
FPU registers).

Best regards,
Radu

^ permalink raw reply

* Re: [PATCH kernel] powerpc/pci/of: Parse unassigned resources
From: Michael Ellerman @ 2019-06-18 12:15 UTC (permalink / raw)
  To: Alexey Kardashevskiy, linuxppc-dev
  Cc: Alexey Kardashevskiy, Alistair Popple, kvm-ppc, Sam Bobroff,
	Oliver O'Halloran, David Gibson
In-Reply-To: <20190614025916.123589-1-aik@ozlabs.ru>

Alexey Kardashevskiy <aik@ozlabs.ru> writes:
> The pseries platform uses the PCI_PROBE_DEVTREE method of PCI probing
> which is basically reading "assigned-addresses" of every PCI device.
> However if the property is missing or zero sized, then there is
> no fallback of any kind and the PCI resources remain undiscovered, i.e.
> pdev->resource[] array is empty.
>
> This adds a fallback which parses the "reg" property in pretty much same
> way except it marks resources as "unset" which later makes Linux assign
> those resources with proper addresses.

What happens under PowerVM is the big question.

ie. if we see such a device under PowerVM and then do our own assignment
what happens?

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/mm: Ensure Huge-page memory is free before allocation
From: Michael Ellerman @ 2019-06-18 12:07 UTC (permalink / raw)
  To: Vaibhav Jain, linuxppc-dev
  Cc: Hari Bathini, Vaibhav Jain, Mahesh J Salgaonkar, Aneesh Kumar K.V
In-Reply-To: <20190618044609.19997-1-vaibhav@linux.ibm.com>

Vaibhav Jain <vaibhav@linux.ibm.com> writes:
> We recently discovered an bug where physical memory meant for
> allocation of Huge-pages was inadvertently allocated by another component
> during early boot.

Can you give me some more detail on what that was? You're seemingly the
only person who's ever hit this :)

> The behavior of memblock_reserve() where it wont
> indicate whether an existing reserved block overlaps with the
> requested reservation only makes such bugs hard to investigate.
>
> Hence this patch proposes adding a memblock reservation check in
> htab_dt_scan_hugepage_blocks() just before call to memblock_reserve()
> to ensure that the physical memory thats being reserved for is not
> already reserved by someone else. In case this happens we panic the
> the kernel to ensure that user of this huge-page doesn't accidentally
> stomp on memory allocated to someone else.

Do we really need to panic? Can't we just leave the block alone and not
register it as huge page memory? With a big warning obviously.

cheers

> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
> index 28ced26f2a00..a05be3adb8c9 100644
> --- a/arch/powerpc/mm/book3s64/hash_utils.c
> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
> @@ -516,6 +516,11 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
>  	printk(KERN_INFO "Huge page(16GB) memory: "
>  			"addr = 0x%lX size = 0x%lX pages = %d\n",
>  			phys_addr, block_size, expected_pages);
> +
> +	/* Ensure no one else has reserved memory for huge pages before */
> +	BUG_ON(memblock_is_region_reserved(phys_addr,
> +					   block_size * expected_pages));
> +
>  	if (phys_addr + block_size * expected_pages <= memblock_end_of_DRAM()) {
>  		memblock_reserve(phys_addr, block_size * expected_pages);
>  		pseries_add_gpage(phys_addr, block_size, expected_pages);
> -- 
> 2.21.0

^ permalink raw reply

* Re: [PATCH] selftests/powerpc: Fix earlyclobber in tm-vmxcopy
From: Segher Boessenkool @ 2019-06-18 12:00 UTC (permalink / raw)
  To: Gustavo Romero; +Cc: mikey, linuxppc-dev
In-Reply-To: <1560806698-26651-1-git-send-email-gromero@linux.vnet.ibm.com>

On Mon, Jun 17, 2019 at 05:24:58PM -0400, Gustavo Romero wrote:
> In some cases, compiler can allocate the same register for operand 'res'
> and 'vecoutptr', resulting in segfault at 'stxvd2x 40,0,%[vecoutptr]'
> because base register will contain 1, yielding a false-positive.
> 
> This is because output 'res' must be marked as an earlyclobber operand so
> it may not overlap an input operand ('vecoutptr').
> 
> Signed-off-by: Gustavo Romero <gromero@linux.vnet.ibm.com>

Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>


Segher


> ---
>  tools/testing/selftests/powerpc/tm/tm-vmxcopy.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
> index 147c6dc..c1e788a 100644
> --- a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
> +++ b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
> @@ -79,7 +79,7 @@ int test_vmxcopy()
>  
>  		"5:;"
>  		"stxvd2x 40,0,%[vecoutptr];"
> -		: [res]"=r"(aborted)
> +		: [res]"=&r"(aborted)
>  		: [vecinptr]"r"(&vecin),
>  		  [vecoutptr]"r"(&vecout),
>  		  [map]"r"(a)
> -- 
> 2.7.4

^ permalink raw reply

* Re: [PATCH v3 4/9] KVM: PPC: Ultravisor: Add generic ultravisor call handler
From: Segher Boessenkool @ 2019-06-18 11:47 UTC (permalink / raw)
  To: Paul Mackerras
  Cc: Madhavan Srinivasan, Michael Anderson, Ram Pai, Claudio Carvalho,
	kvm-ppc, Bharata B Rao, linuxppc-dev, Sukadev Bhattiprolu,
	Thiago Bauermann, Anshuman Khandual
In-Reply-To: <20190617020632.yywfoqwfinjxs3pb@oak.ozlabs.ibm.com>

Hi Paul,

On Mon, Jun 17, 2019 at 12:06:32PM +1000, Paul Mackerras wrote:
> The thing we need to consider is that when SMFCTRL[E] = 0, a ucall
> instruction becomes a hcall (that is, sc 2 is executed as if it was
> sc 1).  In that case, the first argument to the ucall will be
> interpreted as the hcall number.  Mostly that will happen not to be a
> valid hcall number, but sometimes it might unavoidably be a valid but
> unintended hcall number.

Shouldn't a caller of the ultravisor *know* that it is talking to the
ultravisor in the first place?  And not to the hypervisor.


Segher

^ permalink raw reply

* Re: [PATCH 3/5] Powerpc/hw-breakpoint: Refactor set_dawr()
From: Ravi Bangoria @ 2019-06-18  7:13 UTC (permalink / raw)
  To: Michael Neuling
  Cc: Ravi Bangoria, linux-kernel, npiggin, paulus, naveen.n.rao,
	linuxppc-dev
In-Reply-To: <b8d8682c8cf13d307ca1e936f924f31a9eac3227.camel@neuling.org>



On 6/18/19 11:41 AM, Michael Neuling wrote:
> This is going to collide with this patch 
> https://patchwork.ozlabs.org/patch/1109594/

Yeah, I'm aware of the patch. I just developed this on powerpc/next.
I'll rebase my patches accordingly once mpe picks up that patche.


^ permalink raw reply

* Re: [PATCH 2/5] Powerpc/hw-breakpoint: Refactor hw_breakpoint_arch_parse()
From: Ravi Bangoria @ 2019-06-18  7:10 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Ravi Bangoria, mikey, linux-kernel, npiggin, paulus, naveen.n.rao,
	linuxppc-dev
In-Reply-To: <66e70f57-befa-f241-9476-8e06519bac90@c-s.fr>



On 6/18/19 11:51 AM, Christophe Leroy wrote:
> 
> 
> Le 18/06/2019 à 06:27, Ravi Bangoria a écrit :
>> Move feature availability check at the start of the function.
>> Rearrange comment to it's associated code. Use hw->address and
>> hw->len in the 512 bytes boundary check(to write if statement
>> in a single line). Add spacing between code blocks.
> 
> Are those cosmetic changes in the boundary check worth it since they disappear in the final patch ?

Nope.. not necessary. I was just going bit more patch by patch.
I don't mind keeping the code as it is and then change it in
the final patch.


^ permalink raw reply

* Re: [PATCH kernel v3 0/3] powerpc/ioda2: Yet another attempt to allow DMA masks between 32 and 59
From: Shawn Anastasio @ 2019-06-18  7:00 UTC (permalink / raw)
  To: Alexey Kardashevskiy, linuxppc-dev
  Cc: Sam Bobroff, Alistair Popple, Oliver O'Halloran, David Gibson
In-Reply-To: <d4a8d06e-aa5b-dab7-4b20-d1aa77b5304a@ozlabs.ru>

On 6/18/19 1:39 AM, Alexey Kardashevskiy wrote:
> 
> 
> On 18/06/2019 14:26, Shawn Anastasio wrote:
>> On 6/12/19 2:15 PM, Shawn Anastasio wrote:
>>> On 6/12/19 2:07 AM, Alexey Kardashevskiy wrote:
>>>>
>>>>
>>>> On 12/06/2019 15:05, Shawn Anastasio wrote:
>>>>> On 6/5/19 11:11 PM, Shawn Anastasio wrote:
>>>>>> On 5/30/19 2:03 AM, Alexey Kardashevskiy wrote:
>>>>>>> This is an attempt to allow DMA masks between 32..59 which are not
>>>>>>> large
>>>>>>> enough to use either a PHB3 bypass mode or a sketchy bypass.
>>>>>>> Depending
>>>>>>> on the max order, up to 40 is usually available.
>>>>>>>
>>>>>>>
>>>>>>> This is based on v5.2-rc2.
>>>>>>>
>>>>>>> Please comment. Thanks.
>>>>>>
>>>>>> I have tested this patch set with an AMD GPU that's limited to <64bit
>>>>>> DMA (I believe it's 40 or 42 bit). It successfully allows the card to
>>>>>> operate without falling back to 32-bit DMA mode as it does without
>>>>>> the patches.
>>>>>>
>>>>>> Relevant kernel log message:
>>>>>> ```
>>>>>> [    0.311211] pci 0033:01     : [PE# 00] Enabling 64-bit DMA bypass
>>>>>> ```
>>>>>>
>>>>>> Tested-by: Shawn Anastasio <shawn@anastas.io>
>>>>>
>>>>> After a few days of further testing, I've started to run into stability
>>>>> issues with the patch applied and used with an AMD GPU. Specifically,
>>>>> the system sometimes spontaneously crashes. Not just EEH errors either,
>>>>> the whole system shuts down in what looks like a checkstop.
>>>>>
>>>>> Perhaps some subtle corruption is occurring?
>>>>
>>>> Have you tried this?
>>>>
>>>> https://patchwork.ozlabs.org/patch/1113506/
>>>
>>> I have not. I'll give it a shot and try it out for a few days to see
>>> if I'm able to reproduce the crashes.
>>
>> A few days later and I was able to reproduce the checkstop while
>> watching a video in mpv. At this point the system had ~4 day
>> uptime and this wasn't the first video I watched during that time.
>>
>> This is with https://patchwork.ozlabs.org/patch/1113506/ applied, too.
> 
> 
> Any logs left? What was the reason for the checkstop and what is the
> hardware? "lscpu" and "lspci -vv" for the starter would help. Thanks,

The machine is a Talos II with 2x 8 core DD2.2 Sforza modules.
I've added the output of lscpu and lspci below. As for logs,
it doesn't seem there are any kernel logs of the event.
The opal-gard utility shows some error records which I have
also included below.

opal-gard:
```
$ sudo ./opal-gard show 1
Record ID:    0x00000001
========================
Error ID:     0x9000000b
Error Type:   Fatal (0xe3)
Path Type: physical
 >Sys, Instance #0
  >Node, Instance #0
   >Proc, Instance #1
    >EQ, Instance #0
     >EX, Instance #0

$ sudo ./opal-gard show 2
Record ID:    0x00000002
========================
Error ID:     0x90000021
Error Type:   Fatal (0xe3)
Path Type: physical
 >Sys, Instance #0
  >Node, Instance #0
   >Proc, Instance #1
    >EQ, Instance #2
     >EX, Instance #1

```

lscpu:
```
Architecture:        ppc64le
Byte Order:          Little Endian
CPU(s):              52
On-line CPU(s) list: 0-3,8-31,36-47,52-63
Thread(s) per core:  4
Core(s) per socket:  6
Socket(s):           2
NUMA node(s):        2
Model:               2.2 (pvr 004e 1202)
Model name:          POWER9, altivec supported
CPU max MHz:         3800.0000
CPU min MHz:         2154.0000
L1d cache:           32K
L1i cache:           32K
L2 cache:            512K
L3 cache:            10240K
NUMA node0 CPU(s):   0-3,8-31
NUMA node8 CPU(s):   36-47,52-63

```

lspci -vv:
Output at: https://upaste.anastas.io/IwVQzt

^ permalink raw reply

* Re: [PATCH 5/5] Powerpc/Watchpoint: Fix length calculation for unaligned target
From: Christophe Leroy @ 2019-06-18  6:46 UTC (permalink / raw)
  To: Ravi Bangoria, mpe
  Cc: mikey, linux-kernel, npiggin, paulus, naveen.n.rao, linuxppc-dev
In-Reply-To: <20190618042732.5582-6-ravi.bangoria@linux.ibm.com>



Le 18/06/2019 à 06:27, Ravi Bangoria a écrit :
> Watchpoint match range is always doubleword(8 bytes) aligned on
> powerpc. If the given range is crossing doubleword boundary, we
> need to increase the length such that next doubleword also get
> covered. Ex,
> 
>            address   len = 6 bytes
>                  |=========.
>     |------------v--|------v--------|
>     | | | | | | | | | | | | | | | | |
>     |---------------|---------------|
>      <---8 bytes--->
> 
> In such case, current code configures hw as:
>    start_addr = address & ~HW_BREAKPOINT_ALIGN
>    len = 8 bytes
> 
> And thus read/write in last 4 bytes of the given range is ignored.
> Fix this by including next doubleword in the length. Watchpoint
> exception handler already ignores extraneous exceptions, so no
> changes required for that.
> 
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/hw_breakpoint.h |  7 ++--
>   arch/powerpc/kernel/hw_breakpoint.c      | 44 +++++++++++++-----------
>   arch/powerpc/kernel/process.c            | 34 ++++++++++++++++--
>   3 files changed, 60 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
> index 8acbbdd4a2d5..749a357164d5 100644
> --- a/arch/powerpc/include/asm/hw_breakpoint.h
> +++ b/arch/powerpc/include/asm/hw_breakpoint.h
> @@ -34,6 +34,8 @@ struct arch_hw_breakpoint {
>   #define HW_BRK_TYPE_PRIV_ALL	(HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
>   				 HW_BRK_TYPE_HYP)
>   
> +#define HW_BREAKPOINT_ALIGN 0x7
> +
>   #ifdef CONFIG_HAVE_HW_BREAKPOINT
>   #include <linux/kdebug.h>
>   #include <asm/reg.h>
> @@ -45,8 +47,6 @@ struct pmu;
>   struct perf_sample_data;
>   struct task_struct;
>   
> -#define HW_BREAKPOINT_ALIGN 0x7
> -
>   extern int hw_breakpoint_slots(int type);
>   extern int arch_bp_generic_fields(int type, int *gen_bp_type);
>   extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
> @@ -76,7 +76,8 @@ static inline void hw_breakpoint_disable(void)
>   }
>   extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
>   int hw_breakpoint_handler(struct die_args *args);
> -
> +extern u16 hw_breakpoint_get_final_len(struct arch_hw_breakpoint *brk,
> +		unsigned long *start_addr, unsigned long *end_addr);
>   extern int set_dawr(struct arch_hw_breakpoint *brk);
>   extern bool dawr_force_enable;
>   static inline bool dawr_enabled(void)
> diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
> index 36bcf705df65..c122fd55aa44 100644
> --- a/arch/powerpc/kernel/hw_breakpoint.c
> +++ b/arch/powerpc/kernel/hw_breakpoint.c
> @@ -126,6 +126,28 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
>   	return 0;
>   }
>   
> +/* Maximum len for DABR is 8 bytes and DAWR is 512 bytes */
> +static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
> +{
> +	u16 length_max = 8;
> +	u16 final_len;

You should be more consistent in naming. If one is called final_len, the 
other one should be called max_len.

> +	unsigned long start_addr, end_addr;
> +
> +	final_len = hw_breakpoint_get_final_len(hw, &start_addr, &end_addr);
> +
> +	if (dawr_enabled()) {
> +		length_max = 512;
> +		/* DAWR region can't cross 512 bytes boundary */
> +		if ((start_addr >> 9) != (end_addr >> 9))
> +			return -EINVAL;
> +	}
> +
> +	if (final_len > length_max)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +

Is many places, we have those numeric 512 and 9 shift. Could we replace 
them by some symbol, for instance DAWR_SIZE and DAWR_SHIFT ?

>   /*
>    * Validate the arch-specific HW Breakpoint register settings
>    */
> @@ -133,12 +155,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
>   			     const struct perf_event_attr *attr,
>   			     struct arch_hw_breakpoint *hw)
>   {
> -	int length_max;
> -
>   	if (!ppc_breakpoint_available())
>   		return -ENODEV;
>   
> -	if (!bp)
> +	if (!bp || !attr->bp_len)
>   		return -EINVAL;
>   
>   	hw->type = HW_BRK_TYPE_TRANSLATE;
> @@ -160,23 +180,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
>   	hw->address = attr->bp_addr;
>   	hw->len = attr->bp_len;
>   
> -	length_max = 8; /* DABR */
> -	if (dawr_enabled()) {
> -		length_max = 512 ; /* 64 doublewords */
> -		/* DAWR region can't cross 512 bytes boundary */
> -		if ((hw->address >> 9) != ((hw->address + hw->len - 1) >> 9))
> -			return -EINVAL;
> -	}
> -
> -	/*
> -	 * Since breakpoint length can be a maximum of length_max and
> -	 * breakpoint addresses are aligned to nearest double-word
> -	 * HW_BREAKPOINT_ALIGN by rounding off to the lower address,
> -	 * the 'symbolsize' should satisfy the check below.
> -	 */
> -	if (hw->len > (length_max - (hw->address & HW_BREAKPOINT_ALIGN)))
> -		return -EINVAL;
> -	return 0;
> +	return hw_breakpoint_validate_len(hw);
>   }
>   
>   /*
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 265fac9fb3a4..159aaa70de46 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -802,9 +802,39 @@ static int disable_dawr(void)
>   	return 0;
>   }
>   
> +/*
> + * Watchpoint match range is always doubleword(8 bytes) aligned on
> + * powerpc. If the given range is crossing doubleword boundary, we
> + * need to increase the length such that next doubleword also get
> + * covered. Ex,
> + *
> + *          address   len = 6 bytes
> + *                |=========.
> + *   |------------v--|------v--------|
> + *   | | | | | | | | | | | | | | | | |
> + *   |---------------|---------------|
> + *    <---8 bytes--->
> + *
> + * In this case, we should configure hw as:
> + *   start_addr = address & ~HW_BREAKPOINT_ALIGN
> + *   len = 16 bytes
> + *
> + * @start_addr and @end_addr are inclusive.
> + */
> +u16 hw_breakpoint_get_final_len(struct arch_hw_breakpoint *brk,
> +				unsigned long *start_addr,
> +				unsigned long *end_addr)
> +{
> +	*start_addr = brk->address & ~HW_BREAKPOINT_ALIGN;
> +	*end_addr = (brk->address + brk->len - 1) | HW_BREAKPOINT_ALIGN;
> +	return *end_addr - *start_addr + 1;
> +}

This function gives horrible code (a couple of unneeded store/re-read 
and read/re-read).

000006bc <hw_breakpoint_get_final_len>:
      6bc:	81 23 00 00 	lwz     r9,0(r3)
      6c0:	55 29 00 38 	rlwinm  r9,r9,0,0,28
      6c4:	91 24 00 00 	stw     r9,0(r4)
      6c8:	81 43 00 00 	lwz     r10,0(r3)
      6cc:	a1 23 00 06 	lhz     r9,6(r3)
      6d0:	38 6a ff ff 	addi    r3,r10,-1
      6d4:	7c 63 4a 14 	add     r3,r3,r9
      6d8:	60 63 00 07 	ori     r3,r3,7
      6dc:	90 65 00 00 	stw     r3,0(r5)
      6e0:	38 63 00 01 	addi    r3,r3,1
      6e4:	81 24 00 00 	lwz     r9,0(r4)
      6e8:	7c 69 18 50 	subf    r3,r9,r3
      6ec:	54 63 04 3e 	clrlwi  r3,r3,16
      6f0:	4e 80 00 20 	blr

Below code gives something better:

u16 hw_breakpoint_get_final_len(struct arch_hw_breakpoint *brk,
				unsigned long *start_addr,
				unsigned long *end_addr)
{
	unsigned long address = brk->address;
	unsigned long len = brk->len;
	unsigned long start = address & ~HW_BREAKPOINT_ALIGN;
	unsigned long end = (address + len - 1) | HW_BREAKPOINT_ALIGN;

	*start_addr = start;
	*end_addr = end;
	return end - start + 1;
}

000006bc <hw_breakpoint_get_final_len>:
      6bc:	81 43 00 00 	lwz     r10,0(r3)
      6c0:	a1 03 00 06 	lhz     r8,6(r3)
      6c4:	39 2a ff ff 	addi    r9,r10,-1
      6c8:	7d 28 4a 14 	add     r9,r8,r9
      6cc:	55 4a 00 38 	rlwinm  r10,r10,0,0,28
      6d0:	61 29 00 07 	ori     r9,r9,7
      6d4:	91 44 00 00 	stw     r10,0(r4)
      6d8:	20 6a 00 01 	subfic  r3,r10,1
      6dc:	91 25 00 00 	stw     r9,0(r5)
      6e0:	7c 63 4a 14 	add     r3,r3,r9
      6e4:	54 63 04 3e 	clrlwi  r3,r3,16
      6e8:	4e 80 00 20 	blr


And regardless, that's a pitty to have this function using pointers 
which are from local variables in the callers, as we loose the benefit 
of registers. Couldn't this function go in the .h as a static inline ? 
I'm sure the result would be worth it.

Christophe

> +
>   int set_dawr(struct arch_hw_breakpoint *brk)
>   {
>   	unsigned long dawr, dawrx, mrd;
> +	unsigned long start_addr, end_addr;
> +	u16 final_len;
>   
>   	if (brk->type == HW_BRK_TYPE_DISABLE)
>   		return disable_dawr();
> @@ -815,8 +845,8 @@ int set_dawr(struct arch_hw_breakpoint *brk)
>   	dawrx |= ((brk->type & HW_BRK_TYPE_TRANSLATE) >> 2) << (63 - 59);
>   	dawrx |= (brk->type & HW_BRK_TYPE_PRIV_ALL) >> 3;
>   
> -	/* brk->len is in bytes. */
> -	mrd = ((brk->len + 7) >> 3) - 1;
> +	final_len = hw_breakpoint_get_final_len(brk, &start_addr, &end_addr);
> +	mrd = ((final_len + 7) >> 3) - 1;
>   	dawrx |= (mrd & 0x3f) << (63 - 53);
>   
>   	if (ppc_md.set_dawr)
> 

^ permalink raw reply

* Re: [PATCH 0/1] PPC32: fix ptrace() access to FPU registers
From: Daniel Axtens @ 2019-06-18  6:42 UTC (permalink / raw)
  To: Radu Rendec, linuxppc-dev; +Cc: Paul Mackerras, Oleg Nesterov
In-Reply-To: <5fcdb5767b7cf4c7d5b7496c0032021e43115d39.camel@gmail.com>

Radu Rendec <radu.rendec@gmail.com> writes:

> On Mon, 2019-06-17 at 11:19 +1000, Daniel Axtens wrote:
>> Radu Rendec <
>> radu.rendec@gmail.com
>> > writes:
>> 
>> > Hi Everyone,
>> > 
>> > I'm following up on the ptrace() problem that I reported a few days ago.
>> > I believe my version of the code handles all cases correctly. While the
>> > problem essentially boils down to dividing the fpidx by 2 on PPC32, it
>> > becomes tricky when the same code must work correctly on both PPC32 and
>> > PPC64.
>> > 
>> > One other thing that I believe was handled incorrectly in the previous
>> > version is the unused half of fpscr on PPC32. Note that while PT_FPSCR
>> > is defined as (PT_FPR0 + 2*32 + 1), making only the upper half visible,
>> > PT_FPR0 + 2*32 still corresponds to a possible address that userspace
>> > can pass. In that case, comparing fpidx to (PT_FPSCR - PT_FPR0) would
>> > cause an invalid access to the FPU registers array.
>> > 
>> > I tested the patch on 4.9.179, but that part of the code is identical in
>> > recent kernels so it should work just the same.
>> > 
>> > I wrote a simple test program than can be used to quickly test (on an
>> > x86_64 host) that all cases are handled correctly for both PPC32/PPC64.
>> > The code is included below.
>> > 
>> > I also tested with gdbserver (test patch included below) and verified
>> > that it generates two ptrace() calls for each FPU register, with
>> > addresses between 0xc0 and 0x1bc.
>> 
>> Thanks for looking in to this. I can confirm your issue. What I'm
>> currently wondering is: what is the behaviour with a 32-bit userspace on
>> a 64-bit kernel? Should they also be going down the 32-bit path as far
>> as calculating offsets goes?
>
> Thanks for reviewing this. I haven't thought about the 32-bit userspace
> on a 64-bit kernel, that is a very good question. Userspace passes a
> pointer, so in theory it could go down either path as long as the
> pointer points to the right data type.
>
> I will go back to the gdb source code and try to figure out if that case
> is handled in a special way. If not, it's probably safe to assume that a
> 32-bit userspace should always go down the 32-bit path regardless of the
> kernel bitness (in which case I think I have to change my patch).

It doesn't seem to reproduce on a 64-bit kernel with 32-bit
userspace. Couldn't tell you why - if you can figure it out from the gdb
source code I'd love to know! I have, however, tried it - and all the fp
registers look correct and KASAN doesn't pick up any memory corruption.

Regards,
Daniel
>
> Best regards,
> Radu
>
>> > 8<--------------- Makefile ---------------------------------------------
>> > .PHONY: all clean
>> > 
>> > all: ptrace-fpregs-32 ptrace-fpregs-64
>> > 
>> > ptrace-fpregs-32: ptrace-fpregs.c
>> > 	$(CC) -o ptrace-fpregs-32 -Wall -O2 -m32 ptrace-fpregs.c
>> > 
>> > ptrace-fpregs-64: ptrace-fpregs.c
>> > 	$(CC) -o ptrace-fpregs-64 -Wall -O2 ptrace-fpregs.c
>> > 
>> > clean:
>> > 	rm -f ptrace-fpregs-32 ptrace-fpregs-64
>> > 8<--------------- ptrace-fpregs.c --------------------------------------
>> > #include <stdio.h>
>> > #include <errno.h>
>> > 
>> > #define PT_FPR0	48
>> > 
>> > #ifndef __x86_64
>> > 
>> > #define PT_FPR31 (PT_FPR0 + 2*31)
>> > #define PT_FPSCR (PT_FPR0 + 2*32 + 1)
>> > 
>> > #else
>> > 
>> > #define PT_FPSCR (PT_FPR0 + 32)
>> > 
>> > #endif
>> > 
>> > int test_access(unsigned long addr)
>> > {
>> > 	int ret;
>> > 
>> > 	do {
>> > 		unsigned long index, fpidx;
>> > 
>> > 		ret = -EIO;
>> > 
>> > 		/* convert to index and check */
>> > 		index = addr / sizeof(long);
>> > 		if ((addr & (sizeof(long) - 1)) || (index > PT_FPSCR))
>> > 			break;
>> > 
>> > 		if (index < PT_FPR0) {
>> > 			ret = printf("ptrace_put_reg(%lu)", index);
>> > 			break;
>> > 		}
>> > 
>> > 		ret = 0;
>> > #ifndef __x86_64
>> > 		if (index == PT_FPSCR - 1) {
>> > 			/* corner case for PPC32; do nothing */
>> > 			printf("corner_case");
>> > 			break;
>> > 		}
>> > #endif
>> > 		if (index == PT_FPSCR) {
>> > 			printf("fpscr");
>> > 			break;
>> > 		}
>> > 
>> > 		/*
>> > 		 * FPR is always 64-bit; on PPC32, userspace does two 32-bit
>> > 		 * accesses. Add bit2 to allow accessing the upper half on
>> > 		 * 32-bit; on 64-bit, bit2 is always 0 (we validate it above).
>> > 		 */
>> > 		fpidx = (addr - PT_FPR0 * sizeof(long)) / 8;
>> > 		printf("TS_FPR[%lu] + %lu", fpidx, addr & 4);
>> > 		break;
>> > 	} while (0);
>> > 
>> > 	return ret;
>> > }
>> > 
>> > int main(void)
>> > {
>> > 	unsigned long addr;
>> > 	int rc;
>> > 
>> > 	for (addr = 0; addr < PT_FPSCR * sizeof(long) + 16; addr++) {
>> > 		printf("0x%04lx: ", addr);
>> > 		rc = test_access(addr);
>> > 		if (rc < 0)
>> > 			printf("!err!");
>> > 		printf("\t<%d>\n", rc);
>> > 	}
>> > 
>> > 	return 0;
>> > }
>> > 8<--------------- gdb.patch --------------------------------------------
>> > --- gdb/gdbserver/linux-low.c.orig	2019-06-10 11:45:53.810882669 -0400
>> > +++ gdb/gdbserver/linux-low.c	2019-06-10 11:49:32.272929766 -0400
>> > @@ -4262,6 +4262,8 @@ store_register (struct regcache *regcach
>> >    pid = lwpid_of (get_thread_lwp (current_inferior));
>> >    for (i = 0; i < size; i += sizeof (PTRACE_XFER_TYPE))
>> >      {
>> > +      printf("writing register #%d offset %d at address %#x\n",
>> > +             regno, i, (unsigned int)regaddr);
>> >        errno = 0;
>> >        ptrace (PTRACE_POKEUSER, pid,
>> >  	    /* Coerce to a uintptr_t first to avoid potential gcc warning
>> > 8<----------------------------------------------------------------------
>> > 
>> > Radu Rendec (1):
>> >   PPC32: fix ptrace() access to FPU registers
>> > 
>> >  arch/powerpc/kernel/ptrace.c | 85 ++++++++++++++++++++++--------------
>> >  1 file changed, 52 insertions(+), 33 deletions(-)
>> > 
>> > -- 
>> > 2.20.1

^ permalink raw reply

* Re: [PATCH kernel v3 0/3] powerpc/ioda2: Yet another attempt to allow DMA masks between 32 and 59
From: Alexey Kardashevskiy @ 2019-06-18  6:39 UTC (permalink / raw)
  To: Shawn Anastasio, linuxppc-dev
  Cc: Sam Bobroff, Alistair Popple, Oliver O'Halloran, David Gibson
In-Reply-To: <57d69807-a31d-da21-b401-701389fe885b@anastas.io>



On 18/06/2019 14:26, Shawn Anastasio wrote:
> On 6/12/19 2:15 PM, Shawn Anastasio wrote:
>> On 6/12/19 2:07 AM, Alexey Kardashevskiy wrote:
>>>
>>>
>>> On 12/06/2019 15:05, Shawn Anastasio wrote:
>>>> On 6/5/19 11:11 PM, Shawn Anastasio wrote:
>>>>> On 5/30/19 2:03 AM, Alexey Kardashevskiy wrote:
>>>>>> This is an attempt to allow DMA masks between 32..59 which are not
>>>>>> large
>>>>>> enough to use either a PHB3 bypass mode or a sketchy bypass.
>>>>>> Depending
>>>>>> on the max order, up to 40 is usually available.
>>>>>>
>>>>>>
>>>>>> This is based on v5.2-rc2.
>>>>>>
>>>>>> Please comment. Thanks.
>>>>>
>>>>> I have tested this patch set with an AMD GPU that's limited to <64bit
>>>>> DMA (I believe it's 40 or 42 bit). It successfully allows the card to
>>>>> operate without falling back to 32-bit DMA mode as it does without
>>>>> the patches.
>>>>>
>>>>> Relevant kernel log message:
>>>>> ```
>>>>> [    0.311211] pci 0033:01     : [PE# 00] Enabling 64-bit DMA bypass
>>>>> ```
>>>>>
>>>>> Tested-by: Shawn Anastasio <shawn@anastas.io>
>>>>
>>>> After a few days of further testing, I've started to run into stability
>>>> issues with the patch applied and used with an AMD GPU. Specifically,
>>>> the system sometimes spontaneously crashes. Not just EEH errors either,
>>>> the whole system shuts down in what looks like a checkstop.
>>>>
>>>> Perhaps some subtle corruption is occurring?
>>>
>>> Have you tried this?
>>>
>>> https://patchwork.ozlabs.org/patch/1113506/
>>
>> I have not. I'll give it a shot and try it out for a few days to see
>> if I'm able to reproduce the crashes.
> 
> A few days later and I was able to reproduce the checkstop while
> watching a video in mpv. At this point the system had ~4 day
> uptime and this wasn't the first video I watched during that time.
> 
> This is with https://patchwork.ozlabs.org/patch/1113506/ applied, too.


Any logs left? What was the reason for the checkstop and what is the
hardware? "lscpu" and "lspci -vv" for the starter would help. Thanks,


-- 
Alexey

^ permalink raw reply

* Re: [PATCH 4/5] Powerpc/hw-breakpoint: Optimize disable path
From: Christophe Leroy @ 2019-06-18  6:31 UTC (permalink / raw)
  To: Ravi Bangoria, mpe
  Cc: mikey, linux-kernel, npiggin, paulus, naveen.n.rao, linuxppc-dev
In-Reply-To: <20190618042732.5582-5-ravi.bangoria@linux.ibm.com>



Le 18/06/2019 à 06:27, Ravi Bangoria a écrit :
> Directly setting dawr and dawrx with 0 should be enough to
> disable watchpoint. No need to reset individual bits in
> variable and then set in hw.
> 
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/hw_breakpoint.h |  3 ++-
>   arch/powerpc/kernel/process.c            | 12 ++++++++++++
>   2 files changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
> index 78202d5fb13a..8acbbdd4a2d5 100644
> --- a/arch/powerpc/include/asm/hw_breakpoint.h
> +++ b/arch/powerpc/include/asm/hw_breakpoint.h
> @@ -19,6 +19,7 @@ struct arch_hw_breakpoint {
>   /* Note: Don't change the the first 6 bits below as they are in the same order
>    * as the dabr and dabrx.
>    */
> +#define HW_BRK_TYPE_DISABLE		0x00

I'd rather call it HW_BRK_TYPE_NONE

>   #define HW_BRK_TYPE_READ		0x01
>   #define HW_BRK_TYPE_WRITE		0x02
>   #define HW_BRK_TYPE_TRANSLATE		0x04
> @@ -68,7 +69,7 @@ static inline void hw_breakpoint_disable(void)
>   	struct arch_hw_breakpoint brk;
>   
>   	brk.address = 0;
> -	brk.type = 0;
> +	brk.type = HW_BRK_TYPE_DISABLE;
>   	brk.len = 0;
>   	if (ppc_breakpoint_available())
>   		__set_breakpoint(&brk);
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index f002d2ffff86..265fac9fb3a4 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -793,10 +793,22 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk)
>   	return __set_dabr(dabr, dabrx);
>   }
>   
> +static int disable_dawr(void)
> +{
> +	if (ppc_md.set_dawr)
> +		return ppc_md.set_dawr(0, 0);
> +
> +	mtspr(SPRN_DAWRX, 0);

And SPRN_DAWR ?

The above code looks pretty similar to the one at the end of set_dawr(). 
You should factorise it, for instance

static int __set_dawr(int dawr, int dawrx)
{
	if (ppc_md.set_dawr)
		return ppc_md.set_dawr(dawr, dawrx);
	mtspr(SPRN_DAWR, dawr);
	mtspr(SPRN_DAWRX, dawrx);
	return 0;
}

> +	return 0;
> +}
> +
>   int set_dawr(struct arch_hw_breakpoint *brk)
>   {
>   	unsigned long dawr, dawrx, mrd;
>   
> +	if (brk->type == HW_BRK_TYPE_DISABLE)
> +		return disable_dawr();

Then replace by __set_dawr(0, 0);

> +
>   	dawr = brk->address;
>   
>   	dawrx  = (brk->type & HW_BRK_TYPE_RDWR) << (63 - 58);
> 

And use the new helper at the end of the function too.

Christophe

^ permalink raw reply

* Re: [PATCH 3/5] Powerpc/hw-breakpoint: Refactor set_dawr()
From: Christophe Leroy @ 2019-06-18  6:24 UTC (permalink / raw)
  To: Ravi Bangoria, mpe
  Cc: mikey, linux-kernel, npiggin, paulus, naveen.n.rao, linuxppc-dev
In-Reply-To: <20190618042732.5582-4-ravi.bangoria@linux.ibm.com>



Le 18/06/2019 à 06:27, Ravi Bangoria a écrit :
> Remove unnecessary comments. Code itself is self explanatory.
> And, ISA already talks about MRD field. I Don't think we need
> to re-describe it.

In an RFC patch you may "don't think".
But in the final patch you need to make a decision and write it as such.

Ie, you should write: "We don't need to re-describe it."


> 
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> ---
>   arch/powerpc/kernel/process.c | 17 +++++------------
>   1 file changed, 5 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index f0fbbf6a6a1f..f002d2ffff86 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -799,18 +799,11 @@ int set_dawr(struct arch_hw_breakpoint *brk)
>   
>   	dawr = brk->address;
>   
> -	dawrx  = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE)) \
> -		                   << (63 - 58); //* read/write bits */
> -	dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) \
> -		                   << (63 - 59); //* translate */
> -	dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) \
> -		                   >> 3; //* PRIM bits */
> -	/* dawr length is stored in field MDR bits 48:53.  Matches range in
> -	   doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
> -	   0b111111=64DW.
> -	   brk->len is in bytes.
> -	   This aligns up to double word size, shifts and does the bias.
> -	*/
> +	dawrx  = (brk->type & HW_BRK_TYPE_RDWR) << (63 - 58);
> +	dawrx |= ((brk->type & HW_BRK_TYPE_TRANSLATE) >> 2) << (63 - 59);
> +	dawrx |= (brk->type & HW_BRK_TYPE_PRIV_ALL) >> 3;
> +
> +	/* brk->len is in bytes. */
>   	mrd = ((brk->len + 7) >> 3) - 1;
>   	dawrx |= (mrd & 0x3f) << (63 - 53);
>   
> 

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox