LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/2] link vdso with linker
From: Nick Desaulniers @ 2020-09-01 22:25 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin
  Cc: Christophe Leroy, Joe Lawrence, Kees Cook, Fangrui Song,
	Nick Desaulniers, linux-kernel, clang-built-linux, Paul Mackerras,
	linuxppc-dev

Kees Cook is working on series that adds --orphan-section=warn to arm,
arm64, and x86.  I noticed that ppc vdso were still using cc-ldoption
for these which I removed.  It seems this results in that flag being
silently dropped.

I'm very confident with the first patch, but the second needs closer
review around the error mentioned below the fold related to the .got
section.

Nick Desaulniers (2):
  powerpc/vdso64: link vdso64 with linker
  powerpc/vdso32: link vdso64 with linker

 arch/powerpc/include/asm/vdso.h         | 17 ++---------------
 arch/powerpc/kernel/vdso32/Makefile     |  7 +++++--
 arch/powerpc/kernel/vdso32/vdso32.lds.S |  3 ++-
 arch/powerpc/kernel/vdso64/Makefile     |  8 ++++++--
 arch/powerpc/kernel/vdso64/vdso64.lds.S |  1 -
 5 files changed, 15 insertions(+), 21 deletions(-)

-- 
2.28.0.402.g5ffc5be6b7-goog

^ permalink raw reply

* [PATCH 1/2] powerpc/vdso64: link vdso64 with linker
From: Nick Desaulniers @ 2020-09-01 22:25 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin
  Cc: Christophe Leroy, Joe Lawrence, Kees Cook, Fangrui Song,
	Nick Desaulniers, linux-kernel, clang-built-linux, Paul Mackerras,
	linuxppc-dev
In-Reply-To: <20200901222523.1941988-1-ndesaulniers@google.com>

Rather than invoke the compiler as the driver, use the linker. That way
we can check --orphan-handling=warn support correctly, as cc-ldoption
was removed in
commit 055efab3120b ("kbuild: drop support for cc-ldoption").

Painstakingly compared the output between `objdump -a` before and after
this change. Now function symbols have the correct type of FUNC rather
than NONE, and the entry is slightly different (which doesn't matter for
the vdso). Binary size is the same.

Fixes: commit f2af201002a8 ("powerpc/build: vdso linker warning for orphan sections")
Link: https://lore.kernel.org/lkml/CAKwvOdnn3wxYdJomvnveyD_njwRku3fABWT_bS92duihhywLJQ@mail.gmail.com/
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
---
 arch/powerpc/include/asm/vdso.h         | 17 ++---------------
 arch/powerpc/kernel/vdso64/Makefile     |  8 ++++++--
 arch/powerpc/kernel/vdso64/vdso64.lds.S |  1 -
 3 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h
index 2ff884853f97..11b2ecf49f79 100644
--- a/arch/powerpc/include/asm/vdso.h
+++ b/arch/powerpc/include/asm/vdso.h
@@ -24,19 +24,7 @@ int vdso_getcpu_init(void);
 
 #else /* __ASSEMBLY__ */
 
-#ifdef __VDSO64__
-#define V_FUNCTION_BEGIN(name)		\
-	.globl name;			\
-	name:				\
-
-#define V_FUNCTION_END(name)		\
-	.size name,.-name;
-
-#define V_LOCAL_FUNC(name) (name)
-#endif /* __VDSO64__ */
-
-#ifdef __VDSO32__
-
+#if defined(__VDSO32__) || defined (__VDSO64__)
 #define V_FUNCTION_BEGIN(name)		\
 	.globl name;			\
 	.type name,@function; 		\
@@ -46,8 +34,7 @@ int vdso_getcpu_init(void);
 	.size name,.-name;
 
 #define V_LOCAL_FUNC(name) (name)
-
-#endif /* __VDSO32__ */
+#endif /* __VDSO{32|64}__ */
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 38c317f25141..7ea3ce537d0a 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -32,9 +32,13 @@ $(obj)/%.so: OBJCOPYFLAGS := -S
 $(obj)/%.so: $(obj)/%.so.dbg FORCE
 	$(call if_changed,objcopy)
 
+ldflags-y := -shared -soname linux-vdso64.so.1 \
+	$(call ld-option, --eh-frame-hdr) \
+	$(call ld-option, --orphan-handling=warn) -T
+
 # actual build commands
-quiet_cmd_vdso64ld = VDSO64L $@
-      cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn)
+quiet_cmd_vdso64ld = LD      $@
+      cmd_vdso64ld = $(cmd_ld)
 
 # install commands for the unstripped file
 quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
index 4e3a8d4ee614..58c33b704b6a 100644
--- a/arch/powerpc/kernel/vdso64/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S
@@ -11,7 +11,6 @@ OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle")
 OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc")
 #endif
 OUTPUT_ARCH(powerpc:common64)
-ENTRY(_start)
 
 SECTIONS
 {
-- 
2.28.0.402.g5ffc5be6b7-goog


^ permalink raw reply related

* [PATCH 2/2] powerpc/vdso32: link vdso64 with linker
From: Nick Desaulniers @ 2020-09-01 22:25 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin
  Cc: Christophe Leroy, Joe Lawrence, Kees Cook, Fangrui Song,
	Nick Desaulniers, linux-kernel, clang-built-linux, Paul Mackerras,
	linuxppc-dev
In-Reply-To: <20200901222523.1941988-1-ndesaulniers@google.com>

Rather than invoke the compiler as the driver, use the linker. That way
we can check --orphan-handling=warn support correctly, as cc-ldoption
was removed in
commit 055efab3120b ("kbuild: drop support for cc-ldoption").

Requires dropping the .got section.  I couldn't find how it was used in
the vdso32.

Fixes: commit f2af201002a8 ("powerpc/build: vdso linker warning for orphan sections")
Link: https://lore.kernel.org/lkml/CAKwvOdnn3wxYdJomvnveyD_njwRku3fABWT_bS92duihhywLJQ@mail.gmail.com/
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
---
Not sure removing .got is a good idea or not.  Otherwise I observe the
following link error:
powerpc-linux-gnu-ld: warning: orphan section `.got' from `arch/powerpc/kernel/vdso32/sigtramp.o' being placed in section `.got'
powerpc-linux-gnu-ld: _GLOBAL_OFFSET_TABLE_ not defined in linker created .got
powerpc-linux-gnu-ld: final link failed: bad value

sigtramp.c doesn't mention anything from the GOT AFAICT, and doesn't
look like it contains relocations that do, so I'm not sure where
references to _GLOBAL_OFFSET_TABLE_ are coming from.

 arch/powerpc/kernel/vdso32/Makefile     | 7 +++++--
 arch/powerpc/kernel/vdso32/vdso32.lds.S | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 87ab1152d5ce..611a5951945a 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -27,6 +27,9 @@ UBSAN_SANITIZE := n
 ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
 	-Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both
 asflags-y := -D__VDSO32__ -s
+ldflags-y := -shared -soname linux-vdso32.so.1 \
+	$(call ld-option, --eh-frame-hdr) \
+	$(call ld-option, --orphan-handling=warn) -T
 
 obj-y += vdso32_wrapper.o
 extra-y += vdso32.lds
@@ -49,8 +52,8 @@ $(obj-vdso32): %.o: %.S FORCE
 	$(call if_changed_dep,vdso32as)
 
 # actual build commands
-quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
+quiet_cmd_vdso32ld = LD      $@
+      cmd_vdso32ld = $(cmd_ld)
 quiet_cmd_vdso32as = VDSO32A $@
       cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $<
 
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 4c985467a668..0ccdebad18b8 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -61,7 +61,6 @@ SECTIONS
 	.fixup		: { *(.fixup) }
 
 	.dynamic	: { *(.dynamic) }		:text	:dynamic
-	.got		: { *(.got) }			:text
 	.plt		: { *(.plt) }
 
 	_end = .;
@@ -108,7 +107,9 @@ SECTIONS
 	.debug_varnames  0 : { *(.debug_varnames) }
 
 	/DISCARD/	: {
+		*(.got)
 		*(.note.GNU-stack)
+		*(.branch_lt)
 		*(.data .data.* .gnu.linkonce.d.* .sdata*)
 		*(.bss .sbss .dynbss .dynsbss)
 		*(.glink .iplt .plt .rela*)
-- 
2.28.0.402.g5ffc5be6b7-goog


^ permalink raw reply related

* Re: [PATCH v1 02/10] powerpc/kernel/iommu: Align size for IOMMU_PAGE_SIZE on iommu_*_coherent()
From: Leonardo Bras @ 2020-09-01 22:34 UTC (permalink / raw)
  To: Alexey Kardashevskiy, Michael Ellerman, Benjamin Herrenschmidt,
	Paul Mackerras, Christophe Leroy, Joel Stanley,
	Thiago Jung Bauermann, Ram Pai, Brian King,
	Murilo Fossa Vicentini, David Dai
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <81f106bd-8962-22f2-f14a-378d3486f57e@ozlabs.ru>

On Mon, 2020-08-31 at 10:47 +1000, Alexey Kardashevskiy wrote:
> > 
> > Maybe testing with host 64k pagesize and IOMMU 16MB pagesize in qemu
> > should be enough, is there any chance to get indirect mapping in qemu
> > like this? (DDW but with smaller DMA window available) 
> 
> You will have to hack the guest kernel to always do indirect mapping or
> hack QEMU's rtas_ibm_query_pe_dma_window() to return a small number of
> available TCEs. But you will be testing QEMU/KVM which behave quite
> differently to pHyp in this particular case.
> 

As you suggested before, building for 4k cpu pagesize should be the
best approach. It would allow testing for both pHyp and qemu scenarios.

> > > > > Because if we want the former (==support), then we'll have to align the
> > > > > size up to the bigger page size when allocating/zeroing system pages,
> > > > > etc. 
> > > > 
> > > > This part I don't understand. Why do we need to align everything to the
> > > > bigger pagesize? 
> > > > 
> > > > I mean, is not that enough that the range [ret, ret + size[ is both
> > > > allocated by mm and mapped on a iommu range?
> > > > 
> > > > Suppose a iommu_alloc_coherent() of 16kB on PAGESIZE = 4k and
> > > > IOMMU_PAGE_SIZE() == 64k.
> > > > Why 4 * cpu_pages mapped by a 64k IOMMU page is not enough? 
> > > > All the space the user asked for is allocated and mapped for DMA.
> > > 
> > > The user asked to map 16K, the rest - 48K - is used for something else
> > > (may be even mapped to another device) but you are making all 64K
> > > accessible by the device which only should be able to access 16K.
> > > 
> > > In practice, if this happens, H_PUT_TCE will simply fail.
> > 
> > I have noticed mlx5 driver getting a few bytes in a buffer, and using
> > iommu_map_page(). It does map a whole page for as few bytes as the user
> 
> Whole 4K system page or whole 64K iommu page?

I tested it in 64k system page + 64k iommu page.

The 64K system page may be used for anything, and a small portion of it
(say 128 bytes) needs to be used for DMA. 
The whole page is mapped by IOMMU, and the driver gets info of the
memory range it should access / modify.

> 
> > wants mapped, and the other bytes get used for something else, or just
> > mapped on another DMA page.
> > It seems to work fine.  
> 
> 
> With 4K system page and 64K IOMMU page? In practice it would take an
> effort or/and bad luck to see it crashing. Thanks,

I haven't tested it yet. On a 64k system page and 4k/64k iommu page, it
works as described above.

I am new to this, so I am trying to understand how a memory page mapped
as DMA, and used for something else could be a problem.

Thanks!

> 
> > > 
> > > > > Bigger pages are not the case here as I understand it.
> > > > 
> > > > I did not get this part, what do you mean?
> > > 
> > > Possible IOMMU page sizes are 4K, 64K, 2M, 16M, 256M, 1GB, and the
> > > supported set of sizes is different for P8/P9 and type of IO (PHB,
> > > NVLink/CAPI).
> > > 
> > > 
> > > > > > Update those functions to guarantee alignment with requested size
> > > > > > using IOMMU_PAGE_ALIGN() before doing iommu_alloc() / iommu_free().
> > > > > > 
> > > > > > Also, on iommu_range_alloc(), replace ALIGN(n, 1 << tbl->it_page_shift)
> > > > > > with IOMMU_PAGE_ALIGN(n, tbl), which seems easier to read.
> > > > > > 
> > > > > > Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
> > > > > > ---
> > > > > >  arch/powerpc/kernel/iommu.c | 17 +++++++++--------
> > > > > >  1 file changed, 9 insertions(+), 8 deletions(-)
> > > > > > 
> > > > > > diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> > > > > > index 9704f3f76e63..d7086087830f 100644
> > > > > > --- a/arch/powerpc/kernel/iommu.c
> > > > > > +++ b/arch/powerpc/kernel/iommu.c
> > > > > > @@ -237,10 +237,9 @@ static unsigned long iommu_range_alloc(struct device *dev,
> > > > > >  	}
> > > > > >  
> > > > > >  	if (dev)
> > > > > > -		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
> > > > > > -				      1 << tbl->it_page_shift);
> > > > > > +		boundary_size = IOMMU_PAGE_ALIGN(dma_get_seg_boundary(dev) + 1, tbl);
> > > > > 
> > > > > Run checkpatch.pl, should complain about a long line.
> > > > 
> > > > It's 86 columns long, which is less than the new limit of 100 columns
> > > > Linus announced a few weeks ago. checkpatch.pl was updated too:
> > > > https://www.phoronix.com/scan.php?page=news_item&px=Linux-Kernel-Deprecates-80-Col
> > > 
> > > Yay finally :) Thanks,
> > 
> > :)
> > 
> > > 
> > > > > >  	else
> > > > > > -		boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift);
> > > > > > +		boundary_size = IOMMU_PAGE_ALIGN(1UL << 32, tbl);
> > > > > >  	/* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
> > > > > >  
> > > > > >  	n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
> > > > > > @@ -858,6 +857,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
> > > > > >  	unsigned int order;
> > > > > >  	unsigned int nio_pages, io_order;
> > > > > >  	struct page *page;
> > > > > > +	size_t size_io = size;
> > > > > >  
> > > > > >  	size = PAGE_ALIGN(size);
> > > > > >  	order = get_order(size);
> > > > > > @@ -884,8 +884,9 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
> > > > > >  	memset(ret, 0, size);
> > > > > >  
> > > > > >  	/* Set up tces to cover the allocated range */
> > > > > > -	nio_pages = size >> tbl->it_page_shift;
> > > > > > -	io_order = get_iommu_order(size, tbl);
> > > > > > +	size_io = IOMMU_PAGE_ALIGN(size_io, tbl);
> > > > > > +	nio_pages = size_io >> tbl->it_page_shift;
> > > > > > +	io_order = get_iommu_order(size_io, tbl);
> > > > > >  	mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
> > > > > >  			      mask >> tbl->it_page_shift, io_order, 0);
> > > > > >  	if (mapping == DMA_MAPPING_ERROR) {
> > > > > > @@ -900,11 +901,11 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
> > > > > >  			 void *vaddr, dma_addr_t dma_handle)
> > > > > >  {
> > > > > >  	if (tbl) {
> > > > > > -		unsigned int nio_pages;
> > > > > > +		size_t size_io = IOMMU_PAGE_ALIGN(size, tbl);
> > > > > > +		unsigned int nio_pages = size_io >> tbl->it_page_shift;
> > > > > >  
> > > > > > -		size = PAGE_ALIGN(size);
> > > > > > -		nio_pages = size >> tbl->it_page_shift;
> > > > > >  		iommu_free(tbl, dma_handle, nio_pages);
> > > > > > +
> > > > > 
> > > > > Unrelated new line.
> > > > 
> > > > Will be removed. Thanks!
> > > > 
> > > > > >  		size = PAGE_ALIGN(size);
> > > > > >  		free_pages((unsigned long)vaddr, get_order(size));
> > > > > >  	}
> > > > > > 


^ permalink raw reply

* Re: fsl_espi errors on v5.7.15
From: Chris Packham @ 2020-09-01 23:29 UTC (permalink / raw)
  To: Nicholas Piggin, benh@kernel.crashing.org, broonie@kernel.org,
	Heiner Kallweit, mpe@ellerman.id.au, paulus@samba.org
  Cc: linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org,
	linux-spi@vger.kernel.org
In-Reply-To: <1598940515.6e06nwgi0c.astroid@bobo.none>


On 1/09/20 6:14 pm, Nicholas Piggin wrote:
> Excerpts from Chris Packham's message of September 1, 2020 11:25 am:
>> On 1/09/20 12:33 am, Heiner Kallweit wrote:
>>> On 30.08.2020 23:59, Chris Packham wrote:
>>>> On 31/08/20 9:41 am, Heiner Kallweit wrote:
>>>>> On 30.08.2020 23:00, Chris Packham wrote:
>>>>>> On 31/08/20 12:30 am, Nicholas Piggin wrote:
>>>>>>> Excerpts from Chris Packham's message of August 28, 2020 8:07 am:
>>>>>> <snip>
>>>>>>
>>>>>>>>>>>> I've also now seen the RX FIFO not empty error on the T2080RDB
>>>>>>>>>>>>
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>>>>>>>>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>>>>>>>>>>
>>>>>>>>>>>> With my current workaround of emptying the RX FIFO. It seems
>>>>>>>>>>>> survivable. Interestingly it only ever seems to be 1 extra byte in the
>>>>>>>>>>>> RX FIFO and it seems to be after either a READ_SR or a READ_FSR.
>>>>>>>>>>>>
>>>>>>>>>>>> fsl_espi ffe110000.spi: tx 70
>>>>>>>>>>>> fsl_espi ffe110000.spi: rx 03
>>>>>>>>>>>> fsl_espi ffe110000.spi: Extra RX 00
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>>>>>>>>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>>>>>>>>>> fsl_espi ffe110000.spi: tx 05
>>>>>>>>>>>> fsl_espi ffe110000.spi: rx 00
>>>>>>>>>>>> fsl_espi ffe110000.spi: Extra RX 03
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>>>>>>>>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>>>>>>>>>> fsl_espi ffe110000.spi: tx 05
>>>>>>>>>>>> fsl_espi ffe110000.spi: rx 00
>>>>>>>>>>>> fsl_espi ffe110000.spi: Extra RX 03
>>>>>>>>>>>>
>>>>>>>>>>>>      From all the Micron SPI-NOR datasheets I've got access to it is
>>>>>>>>>>>> possible to continually read the SR/FSR. But I've no idea why it
>>>>>>>>>>>> happens some times and not others.
>>>>>>>>>>> So I think I've got a reproduction and I think I've bisected the problem
>>>>>>>>>>> to commit 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in
>>>>>>>>>>> C"). My day is just finishing now so I haven't applied too much scrutiny
>>>>>>>>>>> to this result. Given the various rabbit holes I've been down on this
>>>>>>>>>>> issue already I'd take this information with a good degree of skepticism.
>>>>>>>>>>>
>>>>>>>>>> OK, so an easy test should be to re-test with a 5.4 kernel.
>>>>>>>>>> It doesn't have yet the change you're referring to, and the fsl-espi driver
>>>>>>>>>> is basically the same as in 5.7 (just two small changes in 5.7).
>>>>>>>>> There's 6cc0c16d82f88 and maybe also other interrupt related patches
>>>>>>>>> around this time that could affect book E, so it's good if that exact
>>>>>>>>> patch is confirmed.
>>>>>>>> My confirmation is basically that I can induce the issue in a 5.4 kernel
>>>>>>>> by cherry-picking 3282a3da25bd. I'm also able to "fix" the issue in
>>>>>>>> 5.9-rc2 by reverting that one commit.
>>>>>>>>
>>>>>>>> I both cases it's not exactly a clean cherry-pick/revert so I also
>>>>>>>> confirmed the bisection result by building at 3282a3da25bd (which sees
>>>>>>>> the issue) and the commit just before (which does not).
>>>>>>> Thanks for testing, that confirms it well.
>>>>>>>
>>>>>>> [snip patch]
>>>>>>>
>>>>>>>> I still saw the issue with this change applied. PPC_IRQ_SOFT_MASK_DEBUG
>>>>>>>> didn't report anything (either with or without the change above).
>>>>>>> Okay, it was a bit of a shot in the dark. I still can't see what
>>>>>>> else has changed.
>>>>>>>
>>>>>>> What would cause this, a lost interrupt? A spurious interrupt? Or
>>>>>>> higher interrupt latency?
>>>>>>>
>>>>>>> I don't think the patch should cause significantly worse latency,
>>>>>>> (it's supposed to be a bit better if anything because it doesn't set
>>>>>>> up the full interrupt frame). But it's possible.
>>>>>> My working theory is that the SPI_DON indication is all about the TX
>>>>>> direction an now that the interrupts are faster we're hitting an error
>>>>>> because there is still RX activity going on. Heiner disagrees with my
>>>>>> interpretation of the SPI_DON indication and the fact that it doesn't
>>>>>> happen every time does throw doubt on it.
>>>>>>
>>>>> It's right that the eSPI spec can be interpreted that SPI_DON refers to
>>>>> TX only. However this wouldn't really make sense, because also for RX
>>>>> we program the frame length, and therefore want to be notified once the
>>>>> full frame was received. Also practical experience shows that SPI_DON
>>>>> is set also after RX-only transfers.
>>>>> Typical SPI NOR use case is that you write read command + start address,
>>>>> followed by a longer read. If the TX-only interpretation would be right,
>>>>> we'd always end up with SPI_DON not being set.
>>>>>
>>>>>> I can't really explain the extra RX byte in the fifo. We know how many
>>>>>> bytes to expect and we pull that many from the fifo so it's not as if
>>>>>> we're missing an interrupt causing us to skip the last byte. I've been
>>>>>> looking for some kind of off-by-one calculation but again if it were
>>>>>> something like that it'd happen all the time.
>>>>>>
>>>>> Maybe it helps to know what value this extra byte in the FIFO has. Is it:
>>>>> - a duplicate of the last read byte
>>>>> - or the next byte (at <end address> + 1)
>>>>> - or a fixed value, e.g. always 0x00 or 0xff
>>>> The values were up thread a bit but I'll repeat them here
>>>>
>>>> fsl_espi ffe110000.spi: tx 70
>>>> fsl_espi ffe110000.spi: rx 03
>>>> fsl_espi ffe110000.spi: Extra RX 00
>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>> fsl_espi ffe110000.spi: tx 05
>>>> fsl_espi ffe110000.spi: rx 00
>>>> fsl_espi ffe110000.spi: Extra RX 03
>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>> fsl_espi ffe110000.spi: tx 05
>>>> fsl_espi ffe110000.spi: rx 00
>>>> fsl_espi ffe110000.spi: Extra RX 03
>>>>
>>>>
>>>> The rx 00 Extra RX 03 is a bit concerning. I've only ever seen them with
>>>> either a READ_SR or a READ_FSR. Never a data read.
>>>>
>>> Just remembered something about SPIE_DON:
>>> Transfers are always full duplex, therefore in case of a read the chip
>>> sends dummy zero's. Having said that in case of a read SPIE_DON means
>>> that the last dummy zero was shifted out.
>>>
>>> READ_SR and READ_FSR are the shortest transfers, 1 byte out and 1 byte in.
>>> So the issue may have a dependency on the length of the transfer.
>>> However I see no good explanation so far. You can try adding a delay of
>>> a few miroseconds between the following to commands in fsl_espi_bufs().
>>>
>>> 	fsl_espi_write_reg(espi, ESPI_SPIM, mask);
>>>
>>> 	/* Prevent filling the fifo from getting interrupted */
>>> 	spin_lock_irq(&espi->lock);
>>>
>>> Maybe enabling interrupts and seeing the SPIE_DON interrupt are too close.
>> I think this might be heading in the right direction. Playing about with
>> a delay does seem to make the two symptoms less likely. Although I have
>> to set it quite high (i.e. msleep(100)) to completely avoid any
>> possibility of seeing either message.
> The patch might replay the interrupt a little bit faster, but it would
> be a few microseconds at most I think (just from improved code).
>
> Would you be able to ftrace the interrupt handler function and see if you
> can see a difference in number or timing of interrupts? I'm at a bit of
> a loss.
This is getting really weird. I was setting up to run with ftrace and 
found I couldn't reproduce it on the tip of Linus's tree (currently 
pointing at e7a522c83b86). But I swear I could last week. Sure enough if 
I checkout 5.9-rc2 (or 5.7.15) I can reproduce the problem again.
> Thanks,
> Nick
>

^ permalink raw reply

* Re: fsl_espi errors on v5.7.15
From: Chris Packham @ 2020-09-01 23:31 UTC (permalink / raw)
  To: Nicholas Piggin, benh@kernel.crashing.org, broonie@kernel.org,
	Heiner Kallweit, mpe@ellerman.id.au, paulus@samba.org
  Cc: linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org,
	linux-spi@vger.kernel.org
In-Reply-To: <bfaaa982-33b9-c427-48a4-ddf9dd35e7b9@alliedtelesis.co.nz>


On 2/09/20 11:29 am, Chris Packham wrote:
>
> On 1/09/20 6:14 pm, Nicholas Piggin wrote:
>> Excerpts from Chris Packham's message of September 1, 2020 11:25 am:
>>> On 1/09/20 12:33 am, Heiner Kallweit wrote:
>>>> On 30.08.2020 23:59, Chris Packham wrote:
>>>>> On 31/08/20 9:41 am, Heiner Kallweit wrote:
>>>>>> On 30.08.2020 23:00, Chris Packham wrote:
>>>>>>> On 31/08/20 12:30 am, Nicholas Piggin wrote:
>>>>>>>> Excerpts from Chris Packham's message of August 28, 2020 8:07 am:
>>>>>>> <snip>
>>>>>>>
>>>>>>>>>>>>> I've also now seen the RX FIFO not empty error on the 
>>>>>>>>>>>>> T2080RDB
>>>>>>>>>>>>>
>>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's 
>>>>>>>>>>>>> aren't empty!
>>>>>>>>>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>>>>>>>>>>>
>>>>>>>>>>>>> With my current workaround of emptying the RX FIFO. It seems
>>>>>>>>>>>>> survivable. Interestingly it only ever seems to be 1 extra 
>>>>>>>>>>>>> byte in the
>>>>>>>>>>>>> RX FIFO and it seems to be after either a READ_SR or a 
>>>>>>>>>>>>> READ_FSR.
>>>>>>>>>>>>>
>>>>>>>>>>>>> fsl_espi ffe110000.spi: tx 70
>>>>>>>>>>>>> fsl_espi ffe110000.spi: rx 03
>>>>>>>>>>>>> fsl_espi ffe110000.spi: Extra RX 00
>>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's 
>>>>>>>>>>>>> aren't empty!
>>>>>>>>>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>>>>>>>>>>> fsl_espi ffe110000.spi: tx 05
>>>>>>>>>>>>> fsl_espi ffe110000.spi: rx 00
>>>>>>>>>>>>> fsl_espi ffe110000.spi: Extra RX 03
>>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's 
>>>>>>>>>>>>> aren't empty!
>>>>>>>>>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>>>>>>>>>>> fsl_espi ffe110000.spi: tx 05
>>>>>>>>>>>>> fsl_espi ffe110000.spi: rx 00
>>>>>>>>>>>>> fsl_espi ffe110000.spi: Extra RX 03
>>>>>>>>>>>>>
>>>>>>>>>>>>>      From all the Micron SPI-NOR datasheets I've got 
>>>>>>>>>>>>> access to it is
>>>>>>>>>>>>> possible to continually read the SR/FSR. But I've no idea 
>>>>>>>>>>>>> why it
>>>>>>>>>>>>> happens some times and not others.
>>>>>>>>>>>> So I think I've got a reproduction and I think I've 
>>>>>>>>>>>> bisected the problem
>>>>>>>>>>>> to commit 3282a3da25bd ("powerpc/64: Implement soft 
>>>>>>>>>>>> interrupt replay in
>>>>>>>>>>>> C"). My day is just finishing now so I haven't applied too 
>>>>>>>>>>>> much scrutiny
>>>>>>>>>>>> to this result. Given the various rabbit holes I've been 
>>>>>>>>>>>> down on this
>>>>>>>>>>>> issue already I'd take this information with a good degree 
>>>>>>>>>>>> of skepticism.
>>>>>>>>>>>>
>>>>>>>>>>> OK, so an easy test should be to re-test with a 5.4 kernel.
>>>>>>>>>>> It doesn't have yet the change you're referring to, and the 
>>>>>>>>>>> fsl-espi driver
>>>>>>>>>>> is basically the same as in 5.7 (just two small changes in 
>>>>>>>>>>> 5.7).
>>>>>>>>>> There's 6cc0c16d82f88 and maybe also other interrupt related 
>>>>>>>>>> patches
>>>>>>>>>> around this time that could affect book E, so it's good if 
>>>>>>>>>> that exact
>>>>>>>>>> patch is confirmed.
>>>>>>>>> My confirmation is basically that I can induce the issue in a 
>>>>>>>>> 5.4 kernel
>>>>>>>>> by cherry-picking 3282a3da25bd. I'm also able to "fix" the 
>>>>>>>>> issue in
>>>>>>>>> 5.9-rc2 by reverting that one commit.
>>>>>>>>>
>>>>>>>>> I both cases it's not exactly a clean cherry-pick/revert so I 
>>>>>>>>> also
>>>>>>>>> confirmed the bisection result by building at 3282a3da25bd 
>>>>>>>>> (which sees
>>>>>>>>> the issue) and the commit just before (which does not).
>>>>>>>> Thanks for testing, that confirms it well.
>>>>>>>>
>>>>>>>> [snip patch]
>>>>>>>>
>>>>>>>>> I still saw the issue with this change applied. 
>>>>>>>>> PPC_IRQ_SOFT_MASK_DEBUG
>>>>>>>>> didn't report anything (either with or without the change above).
>>>>>>>> Okay, it was a bit of a shot in the dark. I still can't see what
>>>>>>>> else has changed.
>>>>>>>>
>>>>>>>> What would cause this, a lost interrupt? A spurious interrupt? Or
>>>>>>>> higher interrupt latency?
>>>>>>>>
>>>>>>>> I don't think the patch should cause significantly worse latency,
>>>>>>>> (it's supposed to be a bit better if anything because it 
>>>>>>>> doesn't set
>>>>>>>> up the full interrupt frame). But it's possible.
>>>>>>> My working theory is that the SPI_DON indication is all about 
>>>>>>> the TX
>>>>>>> direction an now that the interrupts are faster we're hitting an 
>>>>>>> error
>>>>>>> because there is still RX activity going on. Heiner disagrees 
>>>>>>> with my
>>>>>>> interpretation of the SPI_DON indication and the fact that it 
>>>>>>> doesn't
>>>>>>> happen every time does throw doubt on it.
>>>>>>>
>>>>>> It's right that the eSPI spec can be interpreted that SPI_DON 
>>>>>> refers to
>>>>>> TX only. However this wouldn't really make sense, because also 
>>>>>> for RX
>>>>>> we program the frame length, and therefore want to be notified 
>>>>>> once the
>>>>>> full frame was received. Also practical experience shows that 
>>>>>> SPI_DON
>>>>>> is set also after RX-only transfers.
>>>>>> Typical SPI NOR use case is that you write read command + start 
>>>>>> address,
>>>>>> followed by a longer read. If the TX-only interpretation would be 
>>>>>> right,
>>>>>> we'd always end up with SPI_DON not being set.
>>>>>>
>>>>>>> I can't really explain the extra RX byte in the fifo. We know 
>>>>>>> how many
>>>>>>> bytes to expect and we pull that many from the fifo so it's not 
>>>>>>> as if
>>>>>>> we're missing an interrupt causing us to skip the last byte. 
>>>>>>> I've been
>>>>>>> looking for some kind of off-by-one calculation but again if it 
>>>>>>> were
>>>>>>> something like that it'd happen all the time.
>>>>>>>
>>>>>> Maybe it helps to know what value this extra byte in the FIFO 
>>>>>> has. Is it:
>>>>>> - a duplicate of the last read byte
>>>>>> - or the next byte (at <end address> + 1)
>>>>>> - or a fixed value, e.g. always 0x00 or 0xff
>>>>> The values were up thread a bit but I'll repeat them here
>>>>>
>>>>> fsl_espi ffe110000.spi: tx 70
>>>>> fsl_espi ffe110000.spi: rx 03
>>>>> fsl_espi ffe110000.spi: Extra RX 00
>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>>> fsl_espi ffe110000.spi: tx 05
>>>>> fsl_espi ffe110000.spi: rx 00
>>>>> fsl_espi ffe110000.spi: Extra RX 03
>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>>> fsl_espi ffe110000.spi: tx 05
>>>>> fsl_espi ffe110000.spi: rx 00
>>>>> fsl_espi ffe110000.spi: Extra RX 03
>>>>>
>>>>>
>>>>> The rx 00 Extra RX 03 is a bit concerning. I've only ever seen 
>>>>> them with
>>>>> either a READ_SR or a READ_FSR. Never a data read.
>>>>>
>>>> Just remembered something about SPIE_DON:
>>>> Transfers are always full duplex, therefore in case of a read the chip
>>>> sends dummy zero's. Having said that in case of a read SPIE_DON means
>>>> that the last dummy zero was shifted out.
>>>>
>>>> READ_SR and READ_FSR are the shortest transfers, 1 byte out and 1 
>>>> byte in.
>>>> So the issue may have a dependency on the length of the transfer.
>>>> However I see no good explanation so far. You can try adding a 
>>>> delay of
>>>> a few miroseconds between the following to commands in 
>>>> fsl_espi_bufs().
>>>>
>>>>     fsl_espi_write_reg(espi, ESPI_SPIM, mask);
>>>>
>>>>     /* Prevent filling the fifo from getting interrupted */
>>>>     spin_lock_irq(&espi->lock);
>>>>
>>>> Maybe enabling interrupts and seeing the SPIE_DON interrupt are too 
>>>> close.
>>> I think this might be heading in the right direction. Playing about 
>>> with
>>> a delay does seem to make the two symptoms less likely. Although I have
>>> to set it quite high (i.e. msleep(100)) to completely avoid any
>>> possibility of seeing either message.
>> The patch might replay the interrupt a little bit faster, but it would
>> be a few microseconds at most I think (just from improved code).
>>
>> Would you be able to ftrace the interrupt handler function and see if 
>> you
>> can see a difference in number or timing of interrupts? I'm at a bit of
>> a loss.
> This is getting really weird. I was setting up to run with ftrace and 
> found I couldn't reproduce it on the tip of Linus's tree (currently 
> pointing at e7a522c83b86). But I swear I could last week. Sure enough 
> if I checkout 5.9-rc2 (or 5.7.15) I can reproduce the problem again.

*Sigh* my master branch still has the interrupt changes reverted.

^ permalink raw reply

* Re: ptrace_syscall_32 is failing
From: Thomas Gleixner @ 2020-09-01 23:50 UTC (permalink / raw)
  To: Andy Lutomirski, Brian Gerst
  Cc: linux-s390, linuxppc-dev, Catalin Marinas, Vasily Gorbik,
	Heiko Carstens, X86 ML, LKML, Christian Borntraeger,
	Paul Mackerras, Andy Lutomirski, Will Deacon, linux-arm-kernel
In-Reply-To: <CALCETrXY1x0MReMoTOG2awcZvr4c7gp99JVNthK37vUUk-kyew@mail.gmail.com>

On Sun, Aug 30 2020 at 08:52, Andy Lutomirski wrote:
>> > [RUN]    SYSCALL
>> > [FAIL]    Initial args are wrong (nr=29, args=0 0 0 0 0 4289172732)
>> > [RUN]    SYSCALL
>> > [OK]    Args after SIGUSR1 are correct (ax = -514)
>> > [OK]    Child got SIGUSR1
>> > [RUN]    Step again
>> > [OK]    pause(2) restarted correctly
>>
>> Bisected to commit 0b085e68f407 ("x86/entry: Consolidate 32/64 bit
>> syscall entry").
>> It looks like it is because syscall_enter_from_user_mode() is called
>> before reading the 6th argument from the user stack.

Bah.I don't know how I managed to miss that part and interestingly
enough that none of the robots caught that either

> Thomas, can we revert the syscall_enter() and syscall_exit() part of
> the series?

Hrm.

> I think that they almost work for x86, but not quite as
> indicated by this bug.  Even if we imagine we can somehow hack around
> this bug, I imagine we're going to find other problems with this
> model, e.g. the potential upcoming exit problem I noted in my review.

What's the upcoming problem?

> I really think the model should be:
>
> void do_syscall_whatever(...)
> {
>   irqentry_enter(...);
>   instrumentation_begin();
>
>   /* Do whatever arch ABI oddities are needed on entry. */
>
>   Then either:
>   syscall_begin(arch, nr, regs);
>   dispatch the syscall;
>   syscall_end(arch, nr, regs);
>
>   Or just:
>   generic_do_syscall(arch, nr, regs);
>
>   /* Do whatever arch ABI oddities are needed on exit from the syscall. */
>
>   instrumentation_end();
>   irqentry_exit(...);
> }

I don't think we want that in general. The current variant is perfectly
fine for everything except the 32bit fast syscall nonsense. Also
irqentry_entry/exit is not equivalent to the syscall_enter/exit
counterparts.

> x86 has an ABI oddity needed on entry: this fast syscall argument
> fixup.  We also might end up with ABI oddities on exit if we ever try
> to make single-stepping of syscalls work fully correctly.  x86 sort of
> gets away without specifying arch because the arch helpers that get
> called for audit, etc can deduce the arch, but this is kind of gross.
> I suppose we could omit arch as an explicit parameter.

I had that in one of the early versions and was advised to drop that.

> Or I suppose we could try to rejigger the API in time for 5.9.
> Fortunately only x86 uses the new APIs so far.  I cc'd a bunch of
> other arch maintainers to see if other architectures fit well in the
> new syscall_enter() model, but I feel like the fact that x86 is
> already broken indicates that we messed it up a bit.

It's not unfixable and the fix is close to what you suggested above
except that it preserves the straight forward stuff for the !32bit fast
syscall case. Completely untested patch below. I run proper tests
tomorrow with brain awake.

Thanks,

        tglx
---
 arch/x86/entry/common.c      |   29 ++++++++++++++++--------
 include/linux/entry-common.h |   51 +++++++++++++++++++++++++++++++++++--------
 kernel/entry/common.c        |   35 ++++++++++++++++++++++++-----
 3 files changed, 91 insertions(+), 24 deletions(-)

--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -60,16 +60,10 @@
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
 {
-	unsigned int nr = (unsigned int)regs->orig_ax;
-
 	if (IS_ENABLED(CONFIG_IA32_EMULATION))
 		current_thread_info()->status |= TS_COMPAT;
-	/*
-	 * Subtlety here: if ptrace pokes something larger than 2^32-1 into
-	 * orig_ax, the unsigned int return value truncates it.  This may
-	 * or may not be necessary, but it matches the old asm behavior.
-	 */
-	return (unsigned int)syscall_enter_from_user_mode(regs, nr);
+
+	return (unsigned int)regs->orig_ax;
 }
 
 /*
@@ -91,15 +85,29 @@ static __always_inline void do_syscall_3
 {
 	unsigned int nr = syscall_32_enter(regs);
 
+	/*
+	 * Subtlety here: if ptrace pokes something larger than 2^32-1 into
+	 * orig_ax, the unsigned int return value truncates it.  This may
+	 * or may not be necessary, but it matches the old asm behavior.
+	 */
+	nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
+
 	do_syscall_32_irqs_on(regs, nr);
 	syscall_exit_to_user_mode(regs);
 }
 
 static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
 {
-	unsigned int nr	= syscall_32_enter(regs);
+	unsigned int nr = syscall_32_enter(regs);
 	int res;
 
+	/*
+	 * This cannot use syscall_enter_from_user_mode() as it has to
+	 * fetch EBP before invoking any of the syscall entry work
+	 * functions.
+	 */
+	syscall_enter_from_user_mode_prepare(regs);
+
 	instrumentation_begin();
 	/* Fetch EBP from where the vDSO stashed it. */
 	if (IS_ENABLED(CONFIG_X86_64)) {
@@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32
 		return false;
 	}
 
+	/* The case truncates any ptrace induced syscall nr > 2^32 -1 */
+	nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
+
 	/* Now this is just like a normal syscall. */
 	do_syscall_32_irqs_on(regs, nr);
 	syscall_exit_to_user_mode(regs);
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -110,15 +110,30 @@ static inline __must_check int arch_sysc
 #endif
 
 /**
- * syscall_enter_from_user_mode - Check and handle work before invoking
- *				 a syscall
+ * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
  * @regs:	Pointer to currents pt_regs
- * @syscall:	The syscall number
  *
  * Invoked from architecture specific syscall entry code with interrupts
  * disabled. The calling code has to be non-instrumentable. When the
- * function returns all state is correct and the subsequent functions can be
- * instrumented.
+ * function returns all state is correct, interrupts are enabled and the
+ * subsequent functions can be instrumented.
+ *
+ * This handles lockdep, RCU (context tracking) and tracing state.
+ *
+ * This is invoked when there is extra architecture specific functionality
+ * to be done between establishing state and handling user mode entry work.
+ */
+void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
+
+/**
+ * syscall_enter_from_user_mode_work - Check and handle work before invoking
+ *				       a syscall
+ * @regs:	Pointer to currents pt_regs
+ * @syscall:	The syscall number
+ *
+ * Invoked from architecture specific syscall entry code with interrupts
+ * enabled after invoking syscall_enter_from_user_mode_prepare() and extra
+ * architecture specific work.
  *
  * Returns: The original or a modified syscall number
  *
@@ -127,12 +142,30 @@ static inline __must_check int arch_sysc
  * syscall_set_return_value() first.  If neither of those are called and -1
  * is returned, then the syscall will fail with ENOSYS.
  *
- * The following functionality is handled here:
+ * It handles the following work items:
  *
- *  1) Establish state (lockdep, RCU (context tracking), tracing)
- *  2) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
+ *  1) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
  *     __secure_computing(), trace_sys_enter()
- *  3) Invocation of audit_syscall_entry()
+ *  2) Invocation of audit_syscall_entry()
+ */
+long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
+
+/**
+ * syscall_enter_from_user_mode - Establish state and check and handle work
+ *				  before invoking a syscall
+ * @regs:	Pointer to currents pt_regs
+ * @syscall:	The syscall number
+ *
+ * Invoked from architecture specific syscall entry code with interrupts
+ * disabled. The calling code has to be non-instrumentable. When the
+ * function returns all state is correct, interrupts are enabled and the
+ * subsequent functions can be instrumented.
+ *
+ * This is combination of syscall_enter_from_user_mode_prepare() and
+ * syscall_enter_from_user_mode_work().
+ *
+ * Returns: The original or a modified syscall number. See
+ * syscall_enter_from_user_mode_work() for further explanation.
  */
 long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
 
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -68,22 +68,45 @@ static long syscall_trace_enter(struct p
 	return ret ? : syscall;
 }
 
-noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
+static __always_inline long
+__syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
 {
 	unsigned long ti_work;
 
-	enter_from_user_mode(regs);
-	instrumentation_begin();
-
-	local_irq_enable();
 	ti_work = READ_ONCE(current_thread_info()->flags);
 	if (ti_work & SYSCALL_ENTER_WORK)
 		syscall = syscall_trace_enter(regs, syscall, ti_work);
-	instrumentation_end();
 
 	return syscall;
 }
 
+long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
+{
+	return __syscall_enter_from_user_work(regs, syscall);
+}
+
+noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
+{
+	long ret;
+
+	enter_from_user_mode(regs);
+
+	instrumentation_begin();
+	local_irq_enable();
+	ret = __syscall_enter_from_user_work(regs, syscall);
+	instrumentation_end();
+
+	return ret;
+}
+
+noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
+{
+	enter_from_user_mode(regs);
+	instrumentation_begin();
+	local_irq_enable();
+	instrumentation_end();
+}
+
 /**
  * exit_to_user_mode - Fixup state when exiting to user mode
  *

^ permalink raw reply

* [PATCH v3] powerpc: Warn about use of smt_snooze_delay
From: Joel Stanley @ 2020-09-02  0:00 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gautham R . Shenoy

It's not done anything for a long time. Save the percpu variable, and
emit a warning to remind users to not expect it to do anything.

This uses pr_warn_once instead of pr_warn_ratelimit as testing
'ppc64_cpu --smt=off' on a 24 core / 4 SMT system showed the warning to
be noisy, as the online/offline loop is slow.

Fixes: 3fa8cad82b94 ("powerpc/pseries/cpuidle: smt-snooze-delay cleanup.")
Cc: stable@vger.kernel.org # v3.14
Acked-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Joel Stanley <joel@jms.id.au>
--
v3:
 pr_warn_once instead of pr_warn_ratelimited
 Update meessages with mpe's suggestions
v2:
 Use pr_warn instead of WARN
 Reword and print proccess name with pid in message
 Leave CPU_FTR_SMT test in
---
 arch/powerpc/kernel/sysfs.c | 42 +++++++++++++++----------------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 46b4ebc33db7..5dea98fa2f93 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -32,29 +32,27 @@
 
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
 
-/*
- * SMT snooze delay stuff, 64-bit only for now
- */
-
 #ifdef CONFIG_PPC64
 
-/* Time in microseconds we delay before sleeping in the idle loop */
-static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
+/*
+ * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle:
+ * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in
+ * 2014:
+ *
+ *  "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean
+ *  up the kernel code."
+ *
+ * powerpc-utils stopped using it as of 1.3.8. At some point in the future this
+ * code should be removed.
+ */
 
 static ssize_t store_smt_snooze_delay(struct device *dev,
 				      struct device_attribute *attr,
 				      const char *buf,
 				      size_t count)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, dev);
-	ssize_t ret;
-	long snooze;
-
-	ret = sscanf(buf, "%ld", &snooze);
-	if (ret != 1)
-		return -EINVAL;
-
-	per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
+	pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n",
+		     current->comm, current->pid);
 	return count;
 }
 
@@ -62,9 +60,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev,
 				     struct device_attribute *attr,
 				     char *buf)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, dev);
-
-	return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
+	pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n",
+		     current->comm, current->pid);
+	return sprintf(buf, "100\n");
 }
 
 static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
@@ -72,16 +70,10 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
 
 static int __init setup_smt_snooze_delay(char *str)
 {
-	unsigned int cpu;
-	long snooze;
-
 	if (!cpu_has_feature(CPU_FTR_SMT))
 		return 1;
 
-	snooze = simple_strtol(str, NULL, 10);
-	for_each_possible_cpu(cpu)
-		per_cpu(smt_snooze_delay, cpu) = snooze;
-
+	pr_warn("smt-snooze-delay command line option has no effect\n");
 	return 1;
 }
 __setup("smt-snooze-delay=", setup_smt_snooze_delay);
-- 
2.28.0


^ permalink raw reply related

* Re: ptrace_syscall_32 is failing
From: Andy Lutomirski @ 2020-09-02  0:09 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: linux-s390, linuxppc-dev, Vasily Gorbik, Brian Gerst,
	Heiko Carstens, X86 ML, LKML, Christian Borntraeger,
	Paul Mackerras, Catalin Marinas, Andy Lutomirski, Will Deacon,
	linux-arm-kernel
In-Reply-To: <87k0xdjbtt.fsf@nanos.tec.linutronix.de>

On Tue, Sep 1, 2020 at 4:50 PM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Sun, Aug 30 2020 at 08:52, Andy Lutomirski wrote:
> >> > [RUN]    SYSCALL
> >> > [FAIL]    Initial args are wrong (nr=29, args=0 0 0 0 0 4289172732)
> >> > [RUN]    SYSCALL
> >> > [OK]    Args after SIGUSR1 are correct (ax = -514)
> >> > [OK]    Child got SIGUSR1
> >> > [RUN]    Step again
> >> > [OK]    pause(2) restarted correctly
> >>
> >> Bisected to commit 0b085e68f407 ("x86/entry: Consolidate 32/64 bit
> >> syscall entry").
> >> It looks like it is because syscall_enter_from_user_mode() is called
> >> before reading the 6th argument from the user stack.
>
> Bah.I don't know how I managed to miss that part and interestingly
> enough that none of the robots caught that either
>
> > Thomas, can we revert the syscall_enter() and syscall_exit() part of
> > the series?
>
> Hrm.
>
> > I think that they almost work for x86, but not quite as
> > indicated by this bug.  Even if we imagine we can somehow hack around
> > this bug, I imagine we're going to find other problems with this
> > model, e.g. the potential upcoming exit problem I noted in my review.
>
> What's the upcoming problem?

If we ever want to get single-stepping fully correct across syscalls,
we might need to inject SIGTRAP on syscall return. This would be more
awkward if we can't run instrumentable code after the syscall part of
the syscall is done.

>
> > I really think the model should be:
> >
> > void do_syscall_whatever(...)
> > {
> >   irqentry_enter(...);
> >   instrumentation_begin();
> >
> >   /* Do whatever arch ABI oddities are needed on entry. */
> >
> >   Then either:
> >   syscall_begin(arch, nr, regs);
> >   dispatch the syscall;
> >   syscall_end(arch, nr, regs);
> >
> >   Or just:
> >   generic_do_syscall(arch, nr, regs);
> >
> >   /* Do whatever arch ABI oddities are needed on exit from the syscall. */
> >
> >   instrumentation_end();
> >   irqentry_exit(...);
> > }
>
> I don't think we want that in general. The current variant is perfectly
> fine for everything except the 32bit fast syscall nonsense. Also
> irqentry_entry/exit is not equivalent to the syscall_enter/exit
> counterparts.

If there are any architectures in which actual work is needed to
figure out whether something is a syscall in the first place, they'll
want to do the usual kernel entry work before the syscall entry work.
Maybe your patch actually makes this possible -- I haven't digested
all the details yet.

Who advised you to drop the arch parameter?

> ---
>  arch/x86/entry/common.c      |   29 ++++++++++++++++--------
>  include/linux/entry-common.h |   51 +++++++++++++++++++++++++++++++++++--------
>  kernel/entry/common.c        |   35 ++++++++++++++++++++++++-----
>  3 files changed, 91 insertions(+), 24 deletions(-)
>
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -60,16 +60,10 @@
>  #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
>  static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
>  {
> -       unsigned int nr = (unsigned int)regs->orig_ax;
> -
>         if (IS_ENABLED(CONFIG_IA32_EMULATION))
>                 current_thread_info()->status |= TS_COMPAT;
> -       /*
> -        * Subtlety here: if ptrace pokes something larger than 2^32-1 into
> -        * orig_ax, the unsigned int return value truncates it.  This may
> -        * or may not be necessary, but it matches the old asm behavior.
> -        */
> -       return (unsigned int)syscall_enter_from_user_mode(regs, nr);
> +
> +       return (unsigned int)regs->orig_ax;
>  }
>
>  /*
> @@ -91,15 +85,29 @@ static __always_inline void do_syscall_3
>  {
>         unsigned int nr = syscall_32_enter(regs);
>
> +       /*
> +        * Subtlety here: if ptrace pokes something larger than 2^32-1 into
> +        * orig_ax, the unsigned int return value truncates it.  This may
> +        * or may not be necessary, but it matches the old asm behavior.
> +        */
> +       nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
> +
>         do_syscall_32_irqs_on(regs, nr);
>         syscall_exit_to_user_mode(regs);
>  }
>
>  static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
>  {
> -       unsigned int nr = syscall_32_enter(regs);
> +       unsigned int nr = syscall_32_enter(regs);
>         int res;
>
> +       /*
> +        * This cannot use syscall_enter_from_user_mode() as it has to
> +        * fetch EBP before invoking any of the syscall entry work
> +        * functions.
> +        */
> +       syscall_enter_from_user_mode_prepare(regs);

I'm getting lost in all these "enter" functions...

^ permalink raw reply

* Re: [PATCH] cpuidle-pseries: Fix CEDE latency conversion from tb to us
From: Joel Stanley @ 2020-09-02  1:08 UTC (permalink / raw)
  To: Gautham R. Shenoy
  Cc: linux-pm, Rafael J. Wysocki, Linux Kernel Mailing List,
	Vaidyanathan Srinivasan, linuxppc-dev
In-Reply-To: <1598969293-29228-1-git-send-email-ego@linux.vnet.ibm.com>

On Tue, 1 Sep 2020 at 14:09, Gautham R. Shenoy <ego@linux.vnet.ibm.com> wrote:
>
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
>
> commit d947fb4c965c ("cpuidle: pseries: Fixup exit latency for
> CEDE(0)") sets the exit latency of CEDE(0) based on the latency values
> of the Extended CEDE states advertised by the platform. The values
> advertised by the platform are in timebase ticks. However the cpuidle
> framework requires the latency values in microseconds.
>
> If the tb-ticks value advertised by the platform correspond to a value
> smaller than 1us, during the conversion from tb-ticks to microseconds,
> in the current code, the result becomes zero. This is incorrect as it
> puts a CEDE state on par with the snooze state.
>
> This patch fixes this by rounding up the result obtained while
> converting the latency value from tb-ticks to microseconds.
>
> Fixes: commit d947fb4c965c ("cpuidle: pseries: Fixup exit latency for
> CEDE(0)")
>
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>

Reviewed-by: Joel Stanley <joel@jms.id.au>

Should you check for the zero case and print a warning?

> ---
>  drivers/cpuidle/cpuidle-pseries.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
> index ff6d99e..9043358 100644
> --- a/drivers/cpuidle/cpuidle-pseries.c
> +++ b/drivers/cpuidle/cpuidle-pseries.c
> @@ -361,7 +361,7 @@ static void __init fixup_cede0_latency(void)
>         for (i = 0; i < nr_xcede_records; i++) {
>                 struct xcede_latency_record *record = &payload->records[i];
>                 u64 latency_tb = be64_to_cpu(record->latency_ticks);
> -               u64 latency_us = tb_to_ns(latency_tb) / NSEC_PER_USEC;
> +               u64 latency_us = DIV_ROUND_UP_ULL(tb_to_ns(latency_tb), NSEC_PER_USEC);
>
>                 if (latency_us < min_latency_us)
>                         min_latency_us = latency_us;
> --
> 1.9.4
>

^ permalink raw reply

* Re: [PATCH] soc: fsl: Remove bogus packed attributes from qman.h
From: Herbert Xu @ 2020-09-02  1:34 UTC (permalink / raw)
  To: Li Yang
  Cc: linuxppc-dev@lists.ozlabs.org, Linux Kernel Mailing List,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <CADRPPNTt5dCX1pRUp5OenZBuMNJcN+k8jMVmUo5qw5g0VLZ4hQ@mail.gmail.com>

On Tue, Sep 01, 2020 at 04:40:16PM -0500, Li Yang wrote:
>
> Looks like the CAAM driver and dependent QBMAN driver doesn't support
> COMPILE_TEST yet.  Are you trying to add the support for it?

Yes.

> I think this is a valid concern that if the parent structure doesn't
> meet certain alignment requirements, the alignment for the
> sub-structure cannot be guaranteed.  If we just remove the __packed
> attribute from the parent structure, the compiler could try to add
> padding in the parent structure to fulfill the alignment requirements
> of the sub structure which is not good.  I think the following changes
> are a better fix for the warnings:

This works for me.  Thanks!
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [PATCH v2] powerpc/powernv/pci: Drop pnv_phb->initialized
From: Oliver O'Halloran @ 2020-09-02  1:36 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

The pnv_phb->initialized flag is an odd beast. It was added back in 2012 in
commit db1266c85261 ("powerpc/powernv: Skip check on PE if necessary") to
allow devices to be enabled even if the device had not yet been assigned to
a PE. Allowing the device to be enabled before the PE is configured may
cause spurious EEH events since none of the IOMMU context has been setup.

I'm not entirely sure why this was ever necessary. My best guess is that it
was an workaround for a bug or some other undesireable behaviour from the
PCI core. Either way, it's unnecessary now since as of commit dc3d8f85bb57
("powerpc/powernv/pci: Re-work bus PE configuration") we can guarantee that
the PE will be configured before the PCI core will allow drivers to bind to
the device.

It's also worth pointing out that the ->initialized flag is only set in
pnv_pci_ioda_create_dbgfs(). That function has its entire body wrapped
in #ifdef CONFIG_DEBUG_FS. As a result, for kernels built without debugfs
(i.e. petitboot) the other checks in pnv_pci_enable_device_hook() are
bypassed entirely.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
v2: reword commit message a bit
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 17 -----------------
 arch/powerpc/platforms/powernv/pci.h      |  1 -
 2 files changed, 18 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 023a4f987bb2..6ac3c637b313 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2410,9 +2410,6 @@ static void pnv_pci_ioda_create_dbgfs(void)
 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
 		phb = hose->private_data;

-		/* Notify initialization of PHB done */
-		phb->initialized = 1;
-
 		sprintf(name, "PCI%04x", hose->global_number);
 		phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);

@@ -2609,17 +2606,8 @@ static resource_size_t pnv_pci_default_alignment(void)
  */
 static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
 {
-	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
 	struct pci_dn *pdn;

-	/* The function is probably called while the PEs have
-	 * not be created yet. For example, resource reassignment
-	 * during PCI probe period. We just skip the check if
-	 * PEs isn't ready.
-	 */
-	if (!phb->initialized)
-		return true;
-
 	pdn = pci_get_pdn(dev);
 	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
 		return false;
@@ -2629,14 +2617,9 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev)

 static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	struct pnv_phb *phb = hose->private_data;
 	struct pci_dn *pdn;
 	struct pnv_ioda_pe *pe;

-	if (!phb->initialized)
-		return true;
-
 	pdn = pci_get_pdn(dev);
 	if (!pdn)
 		return false;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 739a0b3b72e1..36d22920f5a3 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -119,7 +119,6 @@ struct pnv_phb {
 	int			flags;
 	void __iomem		*regs;
 	u64			regs_phys;
-	int			initialized;
 	spinlock_t		lock;

 #ifdef CONFIG_DEBUG_FS
-- 
2.26.2

^ permalink raw reply related

* Re: [PATCH 1/7] powerpc/watchpoint/kvm: Rename current DAWR macros and variables
From: Paul Mackerras @ 2020-09-02  1:49 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: christophe.leroy, mikey, rogealve, kvm, linux-kernel, npiggin,
	kvm-ppc, linux-kselftest, jniethe5, pedromfc, pbonzini,
	linuxppc-dev
In-Reply-To: <20200723102058.312282-2-ravi.bangoria@linux.ibm.com>

On Thu, Jul 23, 2020 at 03:50:52PM +0530, Ravi Bangoria wrote:
> Power10 is introducing second DAWR. Use real register names (with
> suffix 0) from ISA for current macros and variables used by kvm.

Most of this looks fine, but I think we should not change the existing
names in arch/powerpc/include/uapi/asm/kvm.h (and therefore also
Documentation/virt/kvm/api.rst).

> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 426f94582b7a..4dc18fe6a2bf 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -2219,8 +2219,8 @@ registers, find a list below:
>    PPC     KVM_REG_PPC_BESCR               64
>    PPC     KVM_REG_PPC_TAR                 64
>    PPC     KVM_REG_PPC_DPDES               64
> -  PPC     KVM_REG_PPC_DAWR                64
> -  PPC     KVM_REG_PPC_DAWRX               64
> +  PPC     KVM_REG_PPC_DAWR0               64
> +  PPC     KVM_REG_PPC_DAWRX0              64
>    PPC     KVM_REG_PPC_CIABR               64
>    PPC     KVM_REG_PPC_IC                  64
>    PPC     KVM_REG_PPC_VTB                 64
...
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> index 264e266a85bf..38d61b73f5ed 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -608,8 +608,8 @@ struct kvm_ppc_cpu_char {
>  #define KVM_REG_PPC_BESCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa7)
>  #define KVM_REG_PPC_TAR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa8)
>  #define KVM_REG_PPC_DPDES	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa9)
> -#define KVM_REG_PPC_DAWR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaa)
> -#define KVM_REG_PPC_DAWRX	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xab)
> +#define KVM_REG_PPC_DAWR0	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaa)
> +#define KVM_REG_PPC_DAWRX0	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xab)
>  #define KVM_REG_PPC_CIABR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xac)
>  #define KVM_REG_PPC_IC		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xad)
>  #define KVM_REG_PPC_VTB		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xae)

The existing names are an API, and if you change them you will break
compilation of existing userspace programs.  I don't see that adding
the '0' on the end is so important that we need to break userspace.

Paul.

^ permalink raw reply

* Re: [PATCH 2/7] powerpc/watchpoint/kvm: Add infrastructure to support 2nd DAWR
From: Paul Mackerras @ 2020-09-02  2:01 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: christophe.leroy, mikey, rogealve, kvm, linux-kernel, npiggin,
	kvm-ppc, linux-kselftest, jniethe5, pedromfc, pbonzini,
	linuxppc-dev
In-Reply-To: <20200723102058.312282-3-ravi.bangoria@linux.ibm.com>

On Thu, Jul 23, 2020 at 03:50:53PM +0530, Ravi Bangoria wrote:
> kvm code assumes single DAWR everywhere. Add code to support 2nd DAWR.
> DAWR is a hypervisor resource and thus H_SET_MODE hcall is used to set/
> unset it. Introduce new case H_SET_MODE_RESOURCE_SET_DAWR1 for 2nd DAWR.

Is this the same interface as will be defined in PAPR and available
under PowerVM, or is it a new/different interface for KVM?

> Also, kvm will support 2nd DAWR only if CPU_FTR_DAWR1 is set.

In general QEMU wants to be able to control all aspects of the virtual
machine presented to the guest, meaning that just because a host has a
particular hardware capability does not mean we should automatically
present that capability to the guest.

In this case, QEMU will want a way to control whether the guest sees
the availability of the second DAWR/X registers or not, i.e. whether a
H_SET_MODE to set DAWR[X]1 will succeed or fail.

Paul.

^ permalink raw reply

* Re: [PATCH 0/7] powerpc/watchpoint: 2nd DAWR kvm enablement + selftests
From: Paul Mackerras @ 2020-09-02  2:32 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: christophe.leroy, mikey, rogealve, kvm, linux-kernel, npiggin,
	kvm-ppc, linux-kselftest, jniethe5, pedromfc, pbonzini,
	linuxppc-dev
In-Reply-To: <20200723102058.312282-1-ravi.bangoria@linux.ibm.com>

On Thu, Jul 23, 2020 at 03:50:51PM +0530, Ravi Bangoria wrote:
> Patch #1, #2 and #3 enables p10 2nd DAWR feature for Book3S kvm guest. DAWR
> is a hypervisor resource and thus H_SET_MODE hcall is used to set/unset it.
> A new case H_SET_MODE_RESOURCE_SET_DAWR1 is introduced in H_SET_MODE hcall
> for setting/unsetting 2nd DAWR. Also, new capability KVM_CAP_PPC_DAWR1 has
> been added to query 2nd DAWR support via kvm ioctl.
> 
> This feature also needs to be enabled in Qemu to really use it. I'll reply
> link to qemu patches once I post them in qemu-devel mailing list.
> 
> Patch #4, #5, #6 and #7 adds selftests to test 2nd DAWR.

If/when you resubmit these patches, please split the KVM patches into
a separate series, since the KVM patches would go via my tree whereas
I expect the selftests/powerpc patches would go through Michael
Ellerman's tree.

Paul.

^ permalink raw reply

* Re: [PATCH 2/2] powerpc/vdso32: link vdso64 with linker
From: Kees Cook @ 2020-09-02  2:58 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Christophe Leroy, Joe Lawrence, Fangrui Song, linux-kernel,
	Nicholas Piggin, clang-built-linux, Paul Mackerras, linuxppc-dev
In-Reply-To: <20200901222523.1941988-3-ndesaulniers@google.com>

I think $subject needs a typo update... vdso32...

On Tue, Sep 01, 2020 at 03:25:23PM -0700, Nick Desaulniers wrote:
> Rather than invoke the compiler as the driver, use the linker. That way
> we can check --orphan-handling=warn support correctly, as cc-ldoption
> was removed in
> commit 055efab3120b ("kbuild: drop support for cc-ldoption").
> 
> Requires dropping the .got section.  I couldn't find how it was used in
> the vdso32.
> 
> Fixes: commit f2af201002a8 ("powerpc/build: vdso linker warning for orphan sections")
> Link: https://lore.kernel.org/lkml/CAKwvOdnn3wxYdJomvnveyD_njwRku3fABWT_bS92duihhywLJQ@mail.gmail.com/
> Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
> ---
> Not sure removing .got is a good idea or not.  Otherwise I observe the
> following link error:
> powerpc-linux-gnu-ld: warning: orphan section `.got' from `arch/powerpc/kernel/vdso32/sigtramp.o' being placed in section `.got'
> powerpc-linux-gnu-ld: _GLOBAL_OFFSET_TABLE_ not defined in linker created .got
> powerpc-linux-gnu-ld: final link failed: bad value

If it's like the x86 and arm toolchains, I think you'll be required to
keep .got, but you can assert it to a 0 size, e.g.:

	/*
	 * Sections that should stay zero sized, which is safer to
	 * explicitly check instead of blindly discarding.
	 */
	.got : {
		*(.got)
	}
	ASSERT(SIZEOF(.got) == 0, "Unexpected GOT entries detected!")

(and put that at the end of the linker script)


-Kees

> 
> sigtramp.c doesn't mention anything from the GOT AFAICT, and doesn't
> look like it contains relocations that do, so I'm not sure where
> references to _GLOBAL_OFFSET_TABLE_ are coming from.
> 
>  arch/powerpc/kernel/vdso32/Makefile     | 7 +++++--
>  arch/powerpc/kernel/vdso32/vdso32.lds.S | 3 ++-
>  2 files changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
> index 87ab1152d5ce..611a5951945a 100644
> --- a/arch/powerpc/kernel/vdso32/Makefile
> +++ b/arch/powerpc/kernel/vdso32/Makefile
> @@ -27,6 +27,9 @@ UBSAN_SANITIZE := n
>  ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
>  	-Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both
>  asflags-y := -D__VDSO32__ -s
> +ldflags-y := -shared -soname linux-vdso32.so.1 \
> +	$(call ld-option, --eh-frame-hdr) \
> +	$(call ld-option, --orphan-handling=warn) -T
>  
>  obj-y += vdso32_wrapper.o
>  extra-y += vdso32.lds
> @@ -49,8 +52,8 @@ $(obj-vdso32): %.o: %.S FORCE
>  	$(call if_changed_dep,vdso32as)
>  
>  # actual build commands
> -quiet_cmd_vdso32ld = VDSO32L $@
> -      cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
> +quiet_cmd_vdso32ld = LD      $@
> +      cmd_vdso32ld = $(cmd_ld)
>  quiet_cmd_vdso32as = VDSO32A $@
>        cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $<
>  
> diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
> index 4c985467a668..0ccdebad18b8 100644
> --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
> +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
> @@ -61,7 +61,6 @@ SECTIONS
>  	.fixup		: { *(.fixup) }
>  
>  	.dynamic	: { *(.dynamic) }		:text	:dynamic
> -	.got		: { *(.got) }			:text
>  	.plt		: { *(.plt) }
>  
>  	_end = .;
> @@ -108,7 +107,9 @@ SECTIONS
>  	.debug_varnames  0 : { *(.debug_varnames) }
>  
>  	/DISCARD/	: {
> +		*(.got)
>  		*(.note.GNU-stack)
> +		*(.branch_lt)
>  		*(.data .data.* .gnu.linkonce.d.* .sdata*)
>  		*(.bss .sbss .dynbss .dynsbss)
>  		*(.glink .iplt .plt .rela*)
> -- 
> 2.28.0.402.g5ffc5be6b7-goog
> 

-- 
Kees Cook

^ permalink raw reply

* Re: [PATCH] powerpc: Fix random segfault when freeing hugetlb range
From: Aneesh Kumar K.V @ 2020-09-02  3:23 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <f0cb2a5477cd87d1eaadb128042e20aeb2bc2859.1598860677.git.christophe.leroy@csgroup.eu>

Christophe Leroy <christophe.leroy@csgroup.eu> writes:

> The following random segfault is observed from time to time with
> map_hugetlb selftest:
>
> root@localhost:~# ./map_hugetlb 1 19
> 524288 kB hugepages
> Mapping 1 Mbytes
> Segmentation fault
>
> [   31.219972] map_hugetlb[365]: segfault (11) at 117 nip 77974f8c lr 779a6834 code 1 in ld-2.23.so[77966000+21000]
> [   31.220192] map_hugetlb[365]: code: 9421ffc0 480318d1 93410028 90010044 9361002c 93810030 93a10034 93c10038
> [   31.220307] map_hugetlb[365]: code: 93e1003c 93210024 8123007c 81430038 <80e90004> 814a0004 7f443a14 813a0004
> [   31.221911] BUG: Bad rss-counter state mm:(ptrval) type:MM_FILEPAGES val:33
> [   31.229362] BUG: Bad rss-counter state mm:(ptrval) type:MM_ANONPAGES val:5
>
> This fault is due to hugetlb_free_pgd_range() freeing page tables
> that are also used by regular pages.
>
> As explain in the comment at the beginning of
> hugetlb_free_pgd_range(), the verification done in free_pgd_range()
> on floor and ceiling is not done here, which means
> hugetlb_free_pte_range() can free outside the expected range.
>
> As the verification cannot be done in hugetlb_free_pgd_range(), it
> must be done in hugetlb_free_pte_range().
>

Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

> Fixes: b250c8c08c79 ("powerpc/8xx: Manage 512k huge pages as standard pages.")
> Cc: stable@vger.kernel.org
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>  arch/powerpc/mm/hugetlbpage.c | 18 ++++++++++++++++--
>  1 file changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
> index 26292544630f..e7ae2a2c4545 100644
> --- a/arch/powerpc/mm/hugetlbpage.c
> +++ b/arch/powerpc/mm/hugetlbpage.c
> @@ -330,10 +330,24 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
>  				 get_hugepd_cache_index(pdshift - shift));
>  }
>  
> -static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr)
> +static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
> +				   unsigned long addr, unsigned long end,
> +				   unsigned long floor, unsigned long ceiling)
>  {
> +	unsigned long start = addr;
>  	pgtable_t token = pmd_pgtable(*pmd);
>  
> +	start &= PMD_MASK;
> +	if (start < floor)
> +		return;
> +	if (ceiling) {
> +		ceiling &= PMD_MASK;
> +		if (!ceiling)
> +			return;
> +	}
> +	if (end - 1 > ceiling - 1)
> +		return;
> +

We do repeat that for pud/pmd/pte hugetlb_free_range. Can we consolidate
that with comment explaining we are checking if the pgtable entry is
mapping outside the range?

>  	pmd_clear(pmd);
>  	pte_free_tlb(tlb, token, addr);
>  	mm_dec_nr_ptes(tlb->mm);
> @@ -363,7 +377,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
>  			 */
>  			WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx));
>  
> -			hugetlb_free_pte_range(tlb, pmd, addr);
> +			hugetlb_free_pte_range(tlb, pmd, addr, end, floor, ceiling);
>  
>  			continue;
>  		}
> -- 
> 2.25.0

^ permalink raw reply

* [powerpc:fixes-test] BUILD SUCCESS fc1f178cdb31783ff37296ecae817a1045a1a513
From: kernel test robot @ 2020-09-02  3:38 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  fixes-test
branch HEAD: fc1f178cdb31783ff37296ecae817a1045a1a513  selftests/powerpc: Skip PROT_SAO test in guests/LPARS

elapsed time: 804m

configs tested: 166
configs skipped: 13

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
sh                           se7206_defconfig
mips                 pnx8335_stb225_defconfig
arm                            mmp2_defconfig
sh                           sh2007_defconfig
sh                        edosk7705_defconfig
mips                      bmips_stb_defconfig
sh                             espt_defconfig
mips                           rs90_defconfig
c6x                        evmc6474_defconfig
powerpc                           allnoconfig
arm                     eseries_pxa_defconfig
arm                      footbridge_defconfig
sh                            migor_defconfig
arm                         shannon_defconfig
riscv                             allnoconfig
sh                          rsk7264_defconfig
powerpc                      mgcoge_defconfig
sh                          r7780mp_defconfig
sh                           se7712_defconfig
sparc                       sparc64_defconfig
powerpc                             defconfig
s390                       zfcpdump_defconfig
arm                             pxa_defconfig
arm                          gemini_defconfig
microblaze                    nommu_defconfig
h8300                       h8s-sim_defconfig
powerpc                  storcenter_defconfig
arm                   milbeaut_m10v_defconfig
mips                            e55_defconfig
arm                        clps711x_defconfig
powerpc                    mvme5100_defconfig
sh                   sh7770_generic_defconfig
sh                           se7343_defconfig
mips                           jazz_defconfig
mips                malta_qemu_32r6_defconfig
mips                      pistachio_defconfig
c6x                        evmc6472_defconfig
arm                           efm32_defconfig
arm                        shmobile_defconfig
sh                         microdev_defconfig
arm                             rpc_defconfig
powerpc                      ppc40x_defconfig
powerpc                  mpc885_ads_defconfig
arm                       imx_v6_v7_defconfig
c6x                                 defconfig
ia64                         bigsur_defconfig
mips                        bcm63xx_defconfig
sh                          rsk7269_defconfig
ia64                                defconfig
sh                  sh7785lcr_32bit_defconfig
arm                         hackkit_defconfig
arm                        mvebu_v5_defconfig
xtensa                       common_defconfig
m68k                        m5407c3_defconfig
mips                         db1xxx_defconfig
arm                          ixp4xx_defconfig
sh                              ul2_defconfig
sh                         ecovec24_defconfig
mips                        nlm_xlp_defconfig
arm                            lart_defconfig
arm                      pxa255-idp_defconfig
arm                         mv78xx0_defconfig
arm                         s3c2410_defconfig
arm                              alldefconfig
riscv                    nommu_k210_defconfig
nios2                         3c120_defconfig
m68k                             alldefconfig
m68k                       m5475evb_defconfig
sh                ecovec24-romimage_defconfig
mips                         tb0287_defconfig
mips                     cu1000-neo_defconfig
mips                          malta_defconfig
powerpc                    gamecube_defconfig
arm                          pcm027_defconfig
arm                          simpad_defconfig
mips                      malta_kvm_defconfig
m68k                       m5249evb_defconfig
x86_64                              defconfig
sh                          kfr2r09_defconfig
arm                       aspeed_g5_defconfig
m68k                        stmark2_defconfig
nds32                               defconfig
sh                          r7785rp_defconfig
mips                         tb0226_defconfig
xtensa                         virt_defconfig
mips                         tb0219_defconfig
arm                          moxart_defconfig
arm                        magician_defconfig
arc                              allyesconfig
nds32                            alldefconfig
mips                    maltaup_xpa_defconfig
arm                            qcom_defconfig
mips                          rm200_defconfig
arc                         haps_hs_defconfig
powerpc                      ppc64e_defconfig
mips                         cobalt_defconfig
powerpc                       maple_defconfig
sh                          rsk7201_defconfig
arm                         nhk8815_defconfig
mips                         bigsur_defconfig
arm                        realview_defconfig
ia64                             allmodconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
nds32                             allnoconfig
c6x                              allyesconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
xtensa                           allyesconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
x86_64               randconfig-a004-20200901
x86_64               randconfig-a006-20200901
x86_64               randconfig-a003-20200901
x86_64               randconfig-a005-20200901
x86_64               randconfig-a001-20200901
x86_64               randconfig-a002-20200901
i386                 randconfig-a004-20200901
i386                 randconfig-a005-20200901
i386                 randconfig-a006-20200901
i386                 randconfig-a002-20200901
i386                 randconfig-a001-20200901
i386                 randconfig-a003-20200901
i386                 randconfig-a016-20200901
i386                 randconfig-a015-20200901
i386                 randconfig-a011-20200901
i386                 randconfig-a013-20200901
i386                 randconfig-a014-20200901
i386                 randconfig-a012-20200901
riscv                            allyesconfig
riscv                               defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a013-20200901
x86_64               randconfig-a016-20200901
x86_64               randconfig-a011-20200901
x86_64               randconfig-a012-20200901
x86_64               randconfig-a015-20200901
x86_64               randconfig-a014-20200901

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [powerpc:merge] BUILD SUCCESS 35f066fda170dde0a31f1447547a5d30b83c3920
From: kernel test robot @ 2020-09-02  3:45 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  merge
branch HEAD: 35f066fda170dde0a31f1447547a5d30b83c3920  Automatic merge of 'master', 'next' and 'fixes' (2020-09-01 14:36)

elapsed time: 809m

configs tested: 147
configs skipped: 11

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
sh                           se7206_defconfig
mips                 pnx8335_stb225_defconfig
arm                            mmp2_defconfig
sh                           sh2007_defconfig
sh                        edosk7705_defconfig
mips                      bmips_stb_defconfig
sh                             espt_defconfig
mips                           rs90_defconfig
c6x                        evmc6474_defconfig
powerpc                           allnoconfig
arm                         shannon_defconfig
arm                     eseries_pxa_defconfig
arm                      footbridge_defconfig
riscv                             allnoconfig
sh                            migor_defconfig
powerpc                      pasemi_defconfig
sh                ecovec24-romimage_defconfig
arm                        mvebu_v5_defconfig
mips                      maltasmvp_defconfig
s390                       zfcpdump_defconfig
arm                             pxa_defconfig
arm                          gemini_defconfig
microblaze                    nommu_defconfig
h8300                       h8s-sim_defconfig
powerpc                  storcenter_defconfig
arm                   milbeaut_m10v_defconfig
mips                            e55_defconfig
arm                        clps711x_defconfig
powerpc                    mvme5100_defconfig
sh                   sh7770_generic_defconfig
sh                           se7343_defconfig
arm                           efm32_defconfig
powerpc                             defconfig
arm                        shmobile_defconfig
sh                         microdev_defconfig
arm                             rpc_defconfig
powerpc                      ppc40x_defconfig
sh                          rsk7269_defconfig
ia64                                defconfig
sh                  sh7785lcr_32bit_defconfig
arm                            lart_defconfig
arm                      pxa255-idp_defconfig
arm                         mv78xx0_defconfig
arm                         s3c2410_defconfig
arm                              alldefconfig
riscv                    nommu_k210_defconfig
nios2                         3c120_defconfig
m68k                             alldefconfig
m68k                       m5475evb_defconfig
mips                         tb0287_defconfig
mips                     cu1000-neo_defconfig
mips                          malta_defconfig
powerpc                    gamecube_defconfig
mips                      malta_kvm_defconfig
m68k                       m5249evb_defconfig
x86_64                              defconfig
sh                          kfr2r09_defconfig
c6x                                 defconfig
arm                       aspeed_g5_defconfig
m68k                        stmark2_defconfig
xtensa                         virt_defconfig
mips                         tb0219_defconfig
arm                          moxart_defconfig
arm                        magician_defconfig
nds32                            alldefconfig
mips                    maltaup_xpa_defconfig
arm                            dove_defconfig
powerpc                     mpc512x_defconfig
arm                            qcom_defconfig
mips                          rm200_defconfig
arc                         haps_hs_defconfig
powerpc                      ppc64e_defconfig
arm                          ixp4xx_defconfig
mips                         cobalt_defconfig
powerpc                       maple_defconfig
arm                          simpad_defconfig
sh                          rsk7201_defconfig
arm                         nhk8815_defconfig
mips                         bigsur_defconfig
arm                        realview_defconfig
ia64                             allmodconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
c6x                              allyesconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
x86_64               randconfig-a004-20200901
x86_64               randconfig-a006-20200901
x86_64               randconfig-a003-20200901
x86_64               randconfig-a005-20200901
x86_64               randconfig-a001-20200901
x86_64               randconfig-a002-20200901
i386                 randconfig-a004-20200901
i386                 randconfig-a005-20200901
i386                 randconfig-a006-20200901
i386                 randconfig-a002-20200901
i386                 randconfig-a001-20200901
i386                 randconfig-a003-20200901
i386                 randconfig-a016-20200901
i386                 randconfig-a015-20200901
i386                 randconfig-a011-20200901
i386                 randconfig-a013-20200901
i386                 randconfig-a014-20200901
i386                 randconfig-a012-20200901
riscv                            allyesconfig
riscv                               defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a013-20200901
x86_64               randconfig-a016-20200901
x86_64               randconfig-a011-20200901
x86_64               randconfig-a012-20200901
x86_64               randconfig-a015-20200901
x86_64               randconfig-a014-20200901

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [powerpc:next-test] BUILD SUCCESS 64cc8701ff3161ff1c257f90375a1a3a8a083d78
From: kernel test robot @ 2020-09-02  3:45 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  next-test
branch HEAD: 64cc8701ff3161ff1c257f90375a1a3a8a083d78  powerpc/powernv: Print helpful message when cores guarded

elapsed time: 807m

configs tested: 155
configs skipped: 10

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
sh                           se7206_defconfig
mips                 pnx8335_stb225_defconfig
arm                            mmp2_defconfig
sh                           sh2007_defconfig
sh                        edosk7705_defconfig
mips                      bmips_stb_defconfig
sh                             espt_defconfig
mips                           rs90_defconfig
c6x                        evmc6474_defconfig
powerpc                           allnoconfig
arm                     eseries_pxa_defconfig
arm                      footbridge_defconfig
sh                            migor_defconfig
arm                         shannon_defconfig
riscv                             allnoconfig
powerpc                      pasemi_defconfig
sh                ecovec24-romimage_defconfig
arm                        mvebu_v5_defconfig
mips                      maltasmvp_defconfig
sh                          rsk7264_defconfig
powerpc                      mgcoge_defconfig
sh                          r7780mp_defconfig
sh                           se7712_defconfig
sparc                       sparc64_defconfig
powerpc                             defconfig
h8300                       h8s-sim_defconfig
powerpc                  storcenter_defconfig
arm                   milbeaut_m10v_defconfig
mips                            e55_defconfig
arm                           efm32_defconfig
arm                        shmobile_defconfig
sh                         microdev_defconfig
arm                             rpc_defconfig
powerpc                      ppc40x_defconfig
powerpc                  mpc885_ads_defconfig
arm                       imx_v6_v7_defconfig
c6x                                 defconfig
ia64                         bigsur_defconfig
mips                        bcm63xx_defconfig
sh                          rsk7269_defconfig
ia64                                defconfig
sh                  sh7785lcr_32bit_defconfig
s390                       zfcpdump_defconfig
arm                         hackkit_defconfig
xtensa                       common_defconfig
arm                            lart_defconfig
arm                      pxa255-idp_defconfig
arm                         mv78xx0_defconfig
arm                         s3c2410_defconfig
arm                              alldefconfig
riscv                    nommu_k210_defconfig
nios2                         3c120_defconfig
m68k                             alldefconfig
m68k                       m5475evb_defconfig
mips                         tb0287_defconfig
mips                     cu1000-neo_defconfig
mips                          malta_defconfig
powerpc                    gamecube_defconfig
mips                      malta_kvm_defconfig
m68k                       m5249evb_defconfig
x86_64                              defconfig
nds32                               defconfig
sh                          r7785rp_defconfig
mips                         tb0226_defconfig
xtensa                         virt_defconfig
mips                         tb0219_defconfig
arm                          moxart_defconfig
arm                        magician_defconfig
nds32                            alldefconfig
mips                    maltaup_xpa_defconfig
arm                            qcom_defconfig
mips                          rm200_defconfig
arc                         haps_hs_defconfig
powerpc                      ppc64e_defconfig
arm                          ixp4xx_defconfig
mips                         cobalt_defconfig
powerpc                       maple_defconfig
arm                          simpad_defconfig
sh                          rsk7201_defconfig
arm                         nhk8815_defconfig
arm                             pxa_defconfig
mips                         bigsur_defconfig
arm                        realview_defconfig
ia64                             allmodconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
c6x                              allyesconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
x86_64               randconfig-a004-20200901
x86_64               randconfig-a006-20200901
x86_64               randconfig-a003-20200901
x86_64               randconfig-a005-20200901
x86_64               randconfig-a001-20200901
x86_64               randconfig-a002-20200901
i386                 randconfig-a004-20200901
i386                 randconfig-a005-20200901
i386                 randconfig-a006-20200901
i386                 randconfig-a002-20200901
i386                 randconfig-a001-20200901
i386                 randconfig-a003-20200901
i386                 randconfig-a016-20200901
i386                 randconfig-a015-20200901
i386                 randconfig-a011-20200901
i386                 randconfig-a013-20200901
i386                 randconfig-a014-20200901
i386                 randconfig-a012-20200901
i386                 randconfig-a016-20200902
i386                 randconfig-a015-20200902
i386                 randconfig-a011-20200902
i386                 randconfig-a013-20200902
i386                 randconfig-a014-20200902
i386                 randconfig-a012-20200902
riscv                            allyesconfig
riscv                               defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a013-20200901
x86_64               randconfig-a016-20200901
x86_64               randconfig-a011-20200901
x86_64               randconfig-a012-20200901
x86_64               randconfig-a015-20200901
x86_64               randconfig-a014-20200901

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [PATCH v3 03/13] mm/debug_vm_pgtable/ppc64: Avoid setting top bits in radom value
From: Anshuman Khandual @ 2020-09-02  3:45 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm
  Cc: linux-arch, linux-s390, Christophe Leroy, x86, Mike Rapoport,
	Qian Cai, Gerald Schaefer, Vineet Gupta, linux-snps-arc,
	linuxppc-dev, linux-arm-kernel
In-Reply-To: <3f20130a-f9fc-db9d-50a9-76aca5a1a6d7@linux.ibm.com>



On 09/01/2020 01:25 PM, Aneesh Kumar K.V wrote:
> On 9/1/20 1:16 PM, Anshuman Khandual wrote:
>>
>>
>> On 09/01/2020 01:06 PM, Aneesh Kumar K.V wrote:
>>> On 9/1/20 1:02 PM, Anshuman Khandual wrote:
>>>>
>>>>
>>>> On 09/01/2020 11:51 AM, Aneesh Kumar K.V wrote:
>>>>> On 9/1/20 8:45 AM, Anshuman Khandual wrote:
>>>>>>
>>>>>>
>>>>>> On 08/27/2020 01:34 PM, Aneesh Kumar K.V wrote:
>>>>>>> ppc64 use bit 62 to indicate a pte entry (_PAGE_PTE). Avoid setting that bit in
>>>>>>> random value.
>>>>>>>
>>>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>>>>>>> ---
>>>>>>>     mm/debug_vm_pgtable.c | 13 ++++++++++---
>>>>>>>     1 file changed, 10 insertions(+), 3 deletions(-)
>>>>>>>
>>>>>>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>>>>>>> index 086309fb9b6f..bbf9df0e64c6 100644
>>>>>>> --- a/mm/debug_vm_pgtable.c
>>>>>>> +++ b/mm/debug_vm_pgtable.c
>>>>>>> @@ -44,10 +44,17 @@
>>>>>>>      * entry type. But these bits might affect the ability to clear entries with
>>>>>>>      * pxx_clear() because of how dynamic page table folding works on s390. So
>>>>>>>      * while loading up the entries do not change the lower 4 bits. It does not
>>>>>>> - * have affect any other platform.
>>>>>>> + * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
>>>>>>> + * used to mark a pte entry.
>>>>>>>      */
>>>>>>> -#define S390_MASK_BITS    4
>>>>>>> -#define RANDOM_ORVALUE    GENMASK(BITS_PER_LONG - 1, S390_MASK_BITS)
>>>>>>> +#define S390_SKIP_MASK        GENMASK(3, 0)
>>>>>>> +#ifdef CONFIG_PPC_BOOK3S_64
>>>>>>> +#define PPC64_SKIP_MASK        GENMASK(62, 62)
>>>>>>> +#else
>>>>>>> +#define PPC64_SKIP_MASK        0x0
>>>>>>> +#endif
>>>>>>
>>>>>> Please drop the #ifdef CONFIG_PPC_BOOK3S_64 here. We already accommodate skip
>>>>>> bits for a s390 platform requirement and can also do so for ppc64 as well. As
>>>>>> mentioned before, please avoid adding any platform specific constructs in the
>>>>>> test.
>>>>>>
>>>>>
>>>>>
>>>>> that is needed so that it can be built on 32 bit architectures.I did face build errors with arch-linux
>>>>
>>>> Could not (#if __BITS_PER_LONG == 32) be used instead or something like
>>>> that. But should be a generic conditional check identifying 32 bit arch
>>>> not anything platform specific.
>>>>
>>>
>>> that _PAGE_PTE bit is pretty much specific to PPC BOOK3S_64.  Not sure why other architectures need to bothered about ignoring bit 62.
>>
>> Thats okay as long it does not adversely affect other architectures, ignoring
>> some more bits is acceptable. Like existing S390_MASK_BITS gets ignored on all
>> other platforms even if it is a s390 specific constraint. Not having platform
>> specific #ifdef here, is essential.
>>
> 
> Why is it essential?
IIRC, I might have already replied on this couple of times. But let me try once more.
It is a generic test aimed at finding inconsistencies between different architectures
in terms of the page table helper semantics. Any platform specific construct here, to
'make things work' has the potential to hide such inconsistencies and defeat the very
purpose. The test/file here follows this rule consistently i.e there is not a single
platform specific #ifdef right now and would really like to continue maintaining this
property, unless until absolutely necessary. Current situation here wrt 32 bit archs
can easily be accommodated with a generic check such as __BITS_PER_LONG.

^ permalink raw reply

* Re: [PATCH v3 08/13] mm/debug_vm_pgtable/thp: Use page table depost/withdraw with THP
From: Anshuman Khandual @ 2020-09-02  3:45 UTC (permalink / raw)
  To: Christophe Leroy, Aneesh Kumar K.V, linux-mm, akpm
  Cc: linux-arch, linux-s390, x86, Mike Rapoport, Qian Cai,
	Gerald Schaefer, Vineet Gupta, linux-snps-arc, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <5d25b02a-887a-432e-7ecd-cc5cbcea9b02@csgroup.eu>



On 09/01/2020 01:21 PM, Christophe Leroy wrote:
> 
> 
> Le 01/09/2020 à 09:40, Aneesh Kumar K.V a écrit :
>> On 9/1/20 12:20 PM, Christophe Leroy wrote:
>>>
>>>
>>> Le 01/09/2020 à 08:25, Aneesh Kumar K.V a écrit :
>>>> On 9/1/20 8:52 AM, Anshuman Khandual wrote:
>>>>>
>>>>>
>>>>>
>>>>> There is a checkpatch.pl warning here.
>>>>>
>>>>> WARNING: Possible unwrapped commit description (prefer a maximum 75 chars per line)
>>>>> #7:
>>>>> Architectures like ppc64 use deposited page table while updating the huge pte
>>>>>
>>>>> total: 0 errors, 1 warnings, 40 lines checked
>>>>>
>>>>
>>>> I will ignore all these, because they are not really important IMHO.
>>>>
>>>
>>> When doing a git log in a 80 chars terminal window, having wrapping lines is not really convenient. It should be easy to avoid it.
>>>
>>
>> We have been ignoring that for a long time  isn't it?
>>
>> For example ppc64 checkpatch already had
>> --max-line-length=90
>>
>>
>> There was also recent discussion whether 80 character limit is valid any more. But I do keep it restricted to 80 character where ever it is easy/make sense.
>>
> 
> Here we are not talking about the code, but the commit log.
> 
> As far as I know, the discussions about 80 character lines, 90 lines in powerpc etc ... is for the code.
> 
> We still aim at keeping lines not longer than 75 chars in the commit log.

Agreed.

^ permalink raw reply

* Re: [PATCH v3 13/13] mm/debug_vm_pgtable: populate a pte entry before fetching it
From: Anshuman Khandual @ 2020-09-02  3:49 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, akpm
  Cc: linux-arch, linux-s390, Christophe Leroy, x86, Mike Rapoport,
	Qian Cai, Gerald Schaefer, Vineet Gupta, linux-snps-arc,
	linuxppc-dev, linux-arm-kernel
In-Reply-To: <7ef7c302-e7e6-570e-3100-5dd1bf9551be@linux.ibm.com>



On 09/01/2020 03:28 PM, Aneesh Kumar K.V wrote:
> On 9/1/20 1:08 PM, Anshuman Khandual wrote:
>>
>>
>> On 09/01/2020 12:07 PM, Aneesh Kumar K.V wrote:
>>> On 9/1/20 8:55 AM, Anshuman Khandual wrote:
>>>>
>>>>
>>>> On 08/27/2020 01:34 PM, Aneesh Kumar K.V wrote:
>>>>> pte_clear_tests operate on an existing pte entry. Make sure that is not a none
>>>>> pte entry.
>>>>>
>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>>>>> ---
>>>>>    mm/debug_vm_pgtable.c | 6 ++++--
>>>>>    1 file changed, 4 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>>>>> index 21329c7d672f..8527ebb75f2c 100644
>>>>> --- a/mm/debug_vm_pgtable.c
>>>>> +++ b/mm/debug_vm_pgtable.c
>>>>> @@ -546,7 +546,7 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
>>>>>    static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
>>>>>                       unsigned long vaddr)
>>>>>    {
>>>>> -    pte_t pte = ptep_get(ptep);
>>>>> +    pte_t pte =  ptep_get_and_clear(mm, vaddr, ptep);
>>>>
>>>> Seems like ptep_get_and_clear() here just clears the entry in preparation
>>>> for a following set_pte_at() which otherwise would have been a problem on
>>>> ppc64 as you had pointed out earlier i.e set_pte_at() should not update an
>>>> existing valid entry. So the commit message here is bit misleading.
>>>>
>>>
>>> and also fetch the pte value which is used further.
>>>
>>>
>>>>>          pr_debug("Validating PTE clear\n");
>>>>>        pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
>>>>> @@ -944,7 +944,7 @@ static int __init debug_vm_pgtable(void)
>>>>>        p4d_t *p4dp, *saved_p4dp;
>>>>>        pud_t *pudp, *saved_pudp;
>>>>>        pmd_t *pmdp, *saved_pmdp, pmd;
>>>>> -    pte_t *ptep;
>>>>> +    pte_t *ptep, pte;
>>>>>        pgtable_t saved_ptep;
>>>>>        pgprot_t prot, protnone;
>>>>>        phys_addr_t paddr;
>>>>> @@ -1049,6 +1049,8 @@ static int __init debug_vm_pgtable(void)
>>>>>         */
>>>>>          ptep = pte_alloc_map_lock(mm, pmdp, vaddr, &ptl);
>>>>> +    pte = pfn_pte(pte_aligned, prot);
>>>>> +    set_pte_at(mm, vaddr, ptep, pte);
>>>>
>>>> Not here, creating and populating an entry must be done in respective
>>>> test functions itself. Besides, this seems bit redundant as well. The
>>>> test pte_clear_tests() with the above change added, already
>>>>
>>>> - Clears the PTEP entry with ptep_get_and_clear()
>>>
>>> and fetch the old value set previously.
>>
>> In that case, please move above two lines i.e
>>
>> pte = pfn_pte(pte_aligned, prot);
>> set_pte_at(mm, vaddr, ptep, pte);
>>
>> from debug_vm_pgtable() to pte_clear_tests() and update it's arguments
>> as required.
>>
> 
> Frankly, I don't understand what these tests are testing. It all looks like some random clear and set.

The idea here is to have some value with some randomness preferably, in
a given PTEP before attempting to clear the entry, in order to make sure
that pte_clear() is indeed clearing something of non-zero value.

> 
> static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
>                    unsigned long vaddr, unsigned long pfn,
>                    pgprot_t prot)
> {
> 
>     pte_t pte = pfn_pte(pfn, prot);
>     set_pte_at(mm, vaddr, ptep, pte);
> 
>     pte =  ptep_get_and_clear(mm, vaddr, ptep);

Looking at this again, this preceding pfn_pte() followed by set_pte_at()
is not really required. Its reasonable to start with what ever was there
in the PTEP as a seed value which anyway gets added with RANDOM_ORVALUE.
s/ptep_get/ptep_get_and_clear is sufficient to take care of the powerpc
set_pte_at() constraint.

^ permalink raw reply

* [PATCH] powerpc/pci: Delete traverse_pci_dn()
From: Oliver O'Halloran @ 2020-09-02  3:51 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

Nothing uses it.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/include/asm/ppc-pci.h |  3 ---
 arch/powerpc/kernel/pci_dn.c       | 40 ------------------------------
 2 files changed, 43 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 0745422a8e57..2b9edbf6e929 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -28,9 +28,6 @@ struct pci_dn;
 void *pci_traverse_device_nodes(struct device_node *start,
 				void *(*fn)(struct device_node *, void *),
 				void *data);
-void *traverse_pci_dn(struct pci_dn *root,
-		      void *(*fn)(struct pci_dn *, void *),
-		      void *data);
 extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
 
 /* From rtas_pci.h */
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index e99b7c547d7e..54e240597fd9 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -443,46 +443,6 @@ void *pci_traverse_device_nodes(struct device_node *start,
 }
 EXPORT_SYMBOL_GPL(pci_traverse_device_nodes);
 
-static struct pci_dn *pci_dn_next_one(struct pci_dn *root,
-				      struct pci_dn *pdn)
-{
-	struct list_head *next = pdn->child_list.next;
-
-	if (next != &pdn->child_list)
-		return list_entry(next, struct pci_dn, list);
-
-	while (1) {
-		if (pdn == root)
-			return NULL;
-
-		next = pdn->list.next;
-		if (next != &pdn->parent->child_list)
-			break;
-
-		pdn = pdn->parent;
-	}
-
-	return list_entry(next, struct pci_dn, list);
-}
-
-void *traverse_pci_dn(struct pci_dn *root,
-		      void *(*fn)(struct pci_dn *, void *),
-		      void *data)
-{
-	struct pci_dn *pdn = root;
-	void *ret;
-
-	/* Only scan the child nodes */
-	for (pdn = pci_dn_next_one(root, pdn); pdn;
-	     pdn = pci_dn_next_one(root, pdn)) {
-		ret = fn(pdn, data);
-		if (ret)
-			return ret;
-	}
-
-	return NULL;
-}
-
 static void *add_pdn(struct device_node *dn, void *data)
 {
 	struct pci_controller *hose = data;
-- 
2.26.2


^ permalink raw reply related

* [PATCH] powerpc/pci: Remove unimplemented prototypes
From: Oliver O'Halloran @ 2020-09-02  3:51 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

The corresponding definitions were deleted in commit 3d5134ee8341
("[POWERPC] Rewrite IO allocation & mapping on powerpc64") which
was merged a mere 13 years ago.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/include/asm/ppc-pci.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 7f4be5a05eb3..0745422a8e57 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -13,10 +13,6 @@
 
 extern unsigned long isa_io_base;
 
-extern void pci_setup_phb_io(struct pci_controller *hose, int primary);
-extern void pci_setup_phb_io_dynamic(struct pci_controller *hose, int primary);
-
-
 extern struct list_head hose_list;
 
 extern struct pci_dev *isa_bridge_pcidev;	/* may be NULL if no ISA bus */
-- 
2.26.2


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox