* Re: [PATCH 3/3] powerpc: rewrite atomics to use ARCH_ATOMIC
From: Boqun Feng @ 2020-11-13 15:30 UTC (permalink / raw)
To: Nicholas Piggin
Cc: Christophe Leroy, linux-arch, Arnd Bergmann, Peter Zijlstra,
linuxppc-dev, linux-kernel, Alexey Kardashevskiy, Will Deacon
In-Reply-To: <20201111110723.3148665-4-npiggin@gmail.com>
Hi Nicholas,
On Wed, Nov 11, 2020 at 09:07:23PM +1000, Nicholas Piggin wrote:
> All the cool kids are doing it.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/powerpc/include/asm/atomic.h | 681 ++++++++++-------------------
> arch/powerpc/include/asm/cmpxchg.h | 62 +--
> 2 files changed, 248 insertions(+), 495 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
> index 8a55eb8cc97b..899aa2403ba7 100644
> --- a/arch/powerpc/include/asm/atomic.h
> +++ b/arch/powerpc/include/asm/atomic.h
> @@ -11,185 +11,285 @@
> #include <asm/cmpxchg.h>
> #include <asm/barrier.h>
>
> +#define ARCH_ATOMIC
> +
> +#ifndef CONFIG_64BIT
> +#include <asm-generic/atomic64.h>
> +#endif
> +
> /*
> * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
> * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
> * on the platform without lwsync.
> */
> #define __atomic_acquire_fence() \
> - __asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory")
> + asm volatile(PPC_ACQUIRE_BARRIER "" : : : "memory")
>
> #define __atomic_release_fence() \
> - __asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
> + asm volatile(PPC_RELEASE_BARRIER "" : : : "memory")
>
> -static __inline__ int atomic_read(const atomic_t *v)
> -{
> - int t;
> +#define __atomic_pre_full_fence smp_mb
>
> - __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
> +#define __atomic_post_full_fence smp_mb
>
Do you need to define __atomic_{pre,post}_full_fence for PPC? IIRC, they
are default smp_mb__{before,atomic}_atomic(), so are smp_mb() defautly
on PPC.
> - return t;
> +#define arch_atomic_read(v) __READ_ONCE((v)->counter)
> +#define arch_atomic_set(v, i) __WRITE_ONCE(((v)->counter), (i))
> +#ifdef CONFIG_64BIT
> +#define ATOMIC64_INIT(i) { (i) }
> +#define arch_atomic64_read(v) __READ_ONCE((v)->counter)
> +#define arch_atomic64_set(v, i) __WRITE_ONCE(((v)->counter), (i))
> +#endif
> +
[...]
>
> +#define ATOMIC_FETCH_OP_UNLESS_RELAXED(name, type, dtype, width, asm_op) \
> +static inline int arch_##name##_relaxed(type *v, dtype a, dtype u) \
I don't think we have atomic_fetch_*_unless_relaxed() at atomic APIs,
ditto for:
atomic_fetch_add_unless_relaxed()
atomic_inc_not_zero_relaxed()
atomic_dec_if_positive_relaxed()
, and we don't have the _acquire() and _release() variants for them
either, and if you don't define their fully-ordered version (e.g.
atomic_inc_not_zero()), atomic-arch-fallback.h will use read and cmpxchg
to implement them, and I think not what we want.
[...]
>
> #endif /* __KERNEL__ */
> #endif /* _ASM_POWERPC_ATOMIC_H_ */
> diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
> index cf091c4c22e5..181f7e8b3281 100644
> --- a/arch/powerpc/include/asm/cmpxchg.h
> +++ b/arch/powerpc/include/asm/cmpxchg.h
> @@ -192,7 +192,7 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
> (unsigned long)_x_, sizeof(*(ptr))); \
> })
>
> -#define xchg_relaxed(ptr, x) \
> +#define arch_xchg_relaxed(ptr, x) \
> ({ \
> __typeof__(*(ptr)) _x_ = (x); \
> (__typeof__(*(ptr))) __xchg_relaxed((ptr), \
> @@ -448,35 +448,7 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
> return old;
> }
>
> -static __always_inline unsigned long
> -__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
> - unsigned int size)
> -{
> - switch (size) {
> - case 1:
> - return __cmpxchg_u8_acquire(ptr, old, new);
> - case 2:
> - return __cmpxchg_u16_acquire(ptr, old, new);
> - case 4:
> - return __cmpxchg_u32_acquire(ptr, old, new);
> -#ifdef CONFIG_PPC64
> - case 8:
> - return __cmpxchg_u64_acquire(ptr, old, new);
> -#endif
> - }
> - BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
> - return old;
> -}
> -#define cmpxchg(ptr, o, n) \
> - ({ \
> - __typeof__(*(ptr)) _o_ = (o); \
> - __typeof__(*(ptr)) _n_ = (n); \
> - (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \
> - (unsigned long)_n_, sizeof(*(ptr))); \
> - })
> -
> -
If you remove {atomic_}_cmpxchg_{,_acquire}() and use the version
provided by atomic-arch-fallback.h, then a fail cmpxchg or
cmpxchg_acquire() will still result into a full barrier or a acquire
barrier after the RMW operation, the barrier is not necessary and
probably this is not what we want?
Regards,
Boqun
> -#define cmpxchg_local(ptr, o, n) \
> +#define arch_cmpxchg_local(ptr, o, n) \
> ({ \
> __typeof__(*(ptr)) _o_ = (o); \
> __typeof__(*(ptr)) _n_ = (n); \
> @@ -484,7 +456,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
> (unsigned long)_n_, sizeof(*(ptr))); \
> })
>
> -#define cmpxchg_relaxed(ptr, o, n) \
> +#define arch_cmpxchg_relaxed(ptr, o, n) \
> ({ \
> __typeof__(*(ptr)) _o_ = (o); \
> __typeof__(*(ptr)) _n_ = (n); \
> @@ -493,38 +465,20 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
> sizeof(*(ptr))); \
> })
>
> -#define cmpxchg_acquire(ptr, o, n) \
> -({ \
> - __typeof__(*(ptr)) _o_ = (o); \
> - __typeof__(*(ptr)) _n_ = (n); \
> - (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \
> - (unsigned long)_o_, (unsigned long)_n_, \
> - sizeof(*(ptr))); \
> -})
> #ifdef CONFIG_PPC64
> -#define cmpxchg64(ptr, o, n) \
> - ({ \
> - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
> - cmpxchg((ptr), (o), (n)); \
> - })
> -#define cmpxchg64_local(ptr, o, n) \
> +#define arch_cmpxchg64_local(ptr, o, n) \
> ({ \
> BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
> - cmpxchg_local((ptr), (o), (n)); \
> + arch_cmpxchg_local((ptr), (o), (n)); \
> })
> -#define cmpxchg64_relaxed(ptr, o, n) \
> -({ \
> - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
> - cmpxchg_relaxed((ptr), (o), (n)); \
> -})
> -#define cmpxchg64_acquire(ptr, o, n) \
> +#define arch_cmpxchg64_relaxed(ptr, o, n) \
> ({ \
> BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
> - cmpxchg_acquire((ptr), (o), (n)); \
> + arch_cmpxchg_relaxed((ptr), (o), (n)); \
> })
> #else
> #include <asm-generic/cmpxchg-local.h>
> -#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
> +#define arch_cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
> #endif
>
> #endif /* __KERNEL__ */
> --
> 2.23.0
>
^ permalink raw reply
* [PATCH] arch: pgtable: define MAX_POSSIBLE_PHYSMEM_BITS where needed
From: Arnd Bergmann @ 2020-11-13 14:59 UTC (permalink / raw)
To: linux-mm
Cc: linux-arch, linux-snps-arc, Arnd Bergmann, Vineet Gupta,
Albert Ou, Paul Walmsley, Russell King, Stefan Agner, linux-mips,
Minchan Kim, Thomas Bogendoerfer, Paul Mackerras,
Kirill A . Shutemov, Palmer Dabbelt, linux-riscv, Nitin Gupta,
linuxppc-dev, Mike Rapoport, linux-arm-kernel
From: Arnd Bergmann <arnd@arndb.de>
Stefan Agner reported a bug when using zsram on 32-bit Arm machines
with RAM above the 4GB address boundary:
Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = a27bd01c
[00000000] *pgd=236a0003, *pmd=1ffa64003
Internal error: Oops: 207 [#1] SMP ARM
Modules linked in: mdio_bcm_unimac(+) brcmfmac cfg80211 brcmutil raspberrypi_hwmon hci_uart crc32_arm_ce bcm2711_thermal phy_generic genet
CPU: 0 PID: 123 Comm: mkfs.ext4 Not tainted 5.9.6 #1
Hardware name: BCM2711
PC is at zs_map_object+0x94/0x338
LR is at zram_bvec_rw.constprop.0+0x330/0xa64
pc : [<c0602b38>] lr : [<c0bda6a0>] psr: 60000013
sp : e376bbe0 ip : 00000000 fp : c1e2921c
r10: 00000002 r9 : c1dda730 r8 : 00000000
r7 : e8ff7a00 r6 : 00000000 r5 : 02f9ffa0 r4 : e3710000
r3 : 000fdffe r2 : c1e0ce80 r1 : ebf979a0 r0 : 00000000
Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user
Control: 30c5383d Table: 235c2a80 DAC: fffffffd
Process mkfs.ext4 (pid: 123, stack limit = 0x495a22e6)
Stack: (0xe376bbe0 to 0xe376c000)
As it turns out, zsram needs to know the maximum memory size, which
is defined in MAX_PHYSMEM_BITS when CONFIG_SPARSEMEM is set, or in
MAX_POSSIBLE_PHYSMEM_BITS on the x86 architecture.
The same problem will be hit on all 32-bit architectures that have a
physical address space larger than 4GB and happen to not enable sparsemem
and include asm/sparsemem.h from asm/pgtable.h.
After the initial discussion, I suggested just always defining
MAX_POSSIBLE_PHYSMEM_BITS whenever CONFIG_PHYS_ADDR_T_64BIT is
set, or provoking a build error otherwise. This addresses all
configurations that can currently have this runtime bug, but
leaves all other configurations unchanged.
I looked up the possible number of bits in source code and
datasheets, here is what I found:
- on ARC, CONFIG_ARC_HAS_PAE40 controls whether 32 or 40 bits are used
- on ARM, CONFIG_LPAE enables 40 bit addressing, without it we never
support more than 32 bits, even though supersections in theory allow
up to 40 bits as well.
- on MIPS, some MIPS32r1 or later chips support 36 bits, and MIPS32r5
XPA supports up to 60 bits in theory, but 40 bits are more than
anyone will ever ship
- On PowerPC, there are three different implementations of 36 bit
addressing, but 32-bit is used without CONFIG_PTE_64BIT
- On RISC-V, the normal page table format can support 34 bit
addressing. There is no highmem support on RISC-V, so anything
above 2GB is unused, but it might be useful to eventually support
CONFIG_ZRAM for high pages.
Fixes: 61989a80fb3a ("staging: zsmalloc: zsmalloc memory allocation library")
Fixes: 02390b87a945 ("mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS")
Cc: Stefan Agner <stefan@agner.ch>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: linux-snps-arc@lists.infradead.org
Cc: Russell King <linux@armlinux.org.uk>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: linux-mips@vger.kernel.org
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: linux-riscv@lists.infradead.org
Link: https://lore.kernel.org/linux-mm/bdfa44bf1c570b05d6c70898e2bbb0acf234ecdf.1604762181.git.stefan@agner.ch/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
If everyone is happy with this version, I would suggest merging this as
a bugfix through my asm-generic tree for linux-5.10. I originally
said I'd send individual patches for each architecture tree, but
I now think this is easier and better documents what is going on.
---
arch/arc/include/asm/pgtable.h | 2 ++
arch/arm/include/asm/pgtable-2level.h | 2 ++
arch/arm/include/asm/pgtable-3level.h | 2 ++
arch/mips/include/asm/pgtable-32.h | 3 +++
arch/powerpc/include/asm/book3s/32/pgtable.h | 2 ++
arch/powerpc/include/asm/nohash/32/pgtable.h | 2 ++
arch/riscv/include/asm/pgtable-32.h | 2 ++
include/linux/pgtable.h | 13 +++++++++++++
8 files changed, 28 insertions(+)
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index f1ed17edb085..163641726a2b 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -134,8 +134,10 @@
#ifdef CONFIG_ARC_HAS_PAE40
#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
+#define MAX_POSSIBLE_PHYSMEM_BITS 40
#else
#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE)
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
#endif
/**************************************************************************
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index 3502c2f746ca..baf7d0204eb5 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -75,6 +75,8 @@
#define PTE_HWTABLE_OFF (PTE_HWTABLE_PTRS * sizeof(pte_t))
#define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u32))
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
+
/*
* PMD_SHIFT determines the size of the area a second-level page table can map
* PGDIR_SHIFT determines what a third-level page table entry can map
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index fbb6693c3352..2b85d175e999 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -25,6 +25,8 @@
#define PTE_HWTABLE_OFF (0)
#define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u64))
+#define MAX_POSSIBLE_PHYSMEM_BITS 40
+
/*
* PGDIR_SHIFT determines the size a top-level page table entry can map.
*/
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index a950fc1ddb4d..6c0532d7b211 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -154,6 +154,7 @@ static inline void pmd_clear(pmd_t *pmdp)
#if defined(CONFIG_XPA)
+#define MAX_POSSIBLE_PHYSMEM_BITS 40
#define pte_pfn(x) (((unsigned long)((x).pte_high >> _PFN_SHIFT)) | (unsigned long)((x).pte_low << _PAGE_PRESENT_SHIFT))
static inline pte_t
pfn_pte(unsigned long pfn, pgprot_t prot)
@@ -169,6 +170,7 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
#elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
#define pte_pfn(x) ((unsigned long)((x).pte_high >> 6))
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
@@ -183,6 +185,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
#else
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
#ifdef CONFIG_CPU_VR41XX
#define pte_pfn(x) ((unsigned long)((x).pte >> (PAGE_SHIFT + 2)))
#define pfn_pte(pfn, prot) __pte(((pfn) << (PAGE_SHIFT + 2)) | pgprot_val(prot))
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 36443cda8dcf..1376be95e975 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -36,8 +36,10 @@ static inline bool pte_user(pte_t pte)
*/
#ifdef CONFIG_PTE_64BIT
#define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
#else
#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
#endif
/*
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index ee2243ba96cf..96522f7f0618 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -153,8 +153,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
*/
#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
#define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
#else
#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
#endif
/*
diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h
index b0ab66e5fdb1..5b2e79e5bfa5 100644
--- a/arch/riscv/include/asm/pgtable-32.h
+++ b/arch/riscv/include/asm/pgtable-32.h
@@ -14,4 +14,6 @@
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 34
+
#endif /* _ASM_RISCV_PGTABLE_32_H */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 71125a4676c4..e237004d498d 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1427,6 +1427,19 @@ typedef unsigned int pgtbl_mod_mask;
#endif /* !__ASSEMBLY__ */
+#if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+/*
+ * ZSMALLOC needs to know the highest PFN on 32-bit architectures
+ * with physical address space extension, but falls back to
+ * BITS_PER_LONG otherwise.
+ */
+#error Missing MAX_POSSIBLE_PHYSMEM_BITS definition
+#else
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
+#endif
+#endif
+
#ifndef has_transparent_hugepage
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define has_transparent_hugepage() 1
--
2.27.0
^ permalink raw reply related
* Re: [PATCH 0/5] perf/mm: Fix PERF_SAMPLE_*_PAGE_SIZE
From: Christophe Leroy @ 2020-11-13 13:44 UTC (permalink / raw)
To: Peter Zijlstra, kan.liang, mingo, acme, mark.rutland,
alexander.shishkin, jolsa, eranian
Cc: linux-arch, ak, catalin.marinas, linuxppc-dev, willy,
linux-kernel, dave.hansen, npiggin, aneesh.kumar, sparclinux,
will, davem, kirill.shutemov
In-Reply-To: <20201113111901.743573013@infradead.org>
Hi
Le 13/11/2020 à 12:19, Peter Zijlstra a écrit :
> Hi,
>
> These patches provide generic infrastructure to determine TLB page size from
> page table entries alone. Perf will use this (for either data or code address)
> to aid in profiling TLB issues.
>
> While most architectures only have page table aligned large pages, some
> (notably ARM64, Sparc64 and Power) provide non page table aligned large pages
> and need to provide their own implementation of these functions.
>
> I've provided (completely untested) implementations for ARM64 and Sparc64, but
> failed to penetrate the _many_ Power MMUs. I'm hoping Nick or Aneesh can help
> me out there.
>
I can help with powerpc 8xx. It is a 32 bits powerpc. The PGD has 1024 entries, that means each
entry maps 4M.
Page sizes are 4k, 16k, 512k and 8M.
For the 8M pages we use hugepd with a single entry. The two related PGD entries point to the same
hugepd.
For the other sizes, they are in standard page tables. 16k pages appear 4 times in the page table.
512k entries appear 128 times in the page table.
When the PGD entry has _PMD_PAGE_8M bits, the PMD entry points to a hugepd with holds the single 8M
entry.
In the PTE, we have two bits: _PAGE_SPS and _PAGE_HUGE
_PAGE_HUGE means it is a 512k page
_PAGE_SPS means it is not a 4k page
The kernel can by build either with 4k pages as standard page size, or 16k pages. It doesn't change
the page table layout though.
Hope this is clear. Now I don't really know to wire that up to your series.
Christophe
^ permalink raw reply
* Re: [RFC PATCH kernel 2/2] powerpc/pci: Remove LSI mappings on device teardown
From: Cédric Le Goater @ 2020-11-13 12:06 UTC (permalink / raw)
To: Alexey Kardashevskiy, linuxppc-dev
Cc: Rob Herring, Marc Zyngier, linux-kernel, Oliver O'Halloran,
Frederic Barrat, Qian Cai, Thomas Gleixner, Michal Suchánek
In-Reply-To: <20201027090655.14118-3-aik@ozlabs.ru>
On 10/27/20 10:06 AM, Alexey Kardashevskiy wrote:
> From: Oliver O'Halloran <oohall@gmail.com>
>
> When a passthrough IO adapter is removed from a pseries machine using hash
> MMU and the XIVE interrupt mode, the POWER hypervisor expects the guest OS
> to clear all page table entries related to the adapter. If some are still
> present, the RTAS call which isolates the PCI slot returns error 9001
> "valid outstanding translations" and the removal of the IO adapter fails.
> This is because when the PHBs are scanned, Linux maps automatically the
> INTx interrupts in the Linux interrupt number space but these are never
> removed.
>
> This problem can be fixed by adding the corresponding unmap operation when
> the device is removed. There's no pcibios_* hook for the remove case, but
> the same effect can be achieved using a bus notifier.
>
> Cc: Cédric Le Goater <clg@kaod.org>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Thanks taking care of this.
C.
> ---
> arch/powerpc/kernel/pci-common.c | 21 +++++++++++++++++++++
> 1 file changed, 21 insertions(+)
>
> diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
> index be108616a721..95f4e173368a 100644
> --- a/arch/powerpc/kernel/pci-common.c
> +++ b/arch/powerpc/kernel/pci-common.c
> @@ -404,6 +404,27 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
> return 0;
> }
>
> +static int ppc_pci_unmap_irq_line(struct notifier_block *nb,
> + unsigned long action, void *data)
> +{
> + struct pci_dev *pdev = to_pci_dev(data);
> +
> + if (action == BUS_NOTIFY_DEL_DEVICE)
> + irq_dispose_mapping(pdev->irq);
> +
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block ppc_pci_unmap_irq_notifier = {
> + .notifier_call = ppc_pci_unmap_irq_line,
> +};
> +
> +static int ppc_pci_register_irq_notifier(void)
> +{
> + return bus_register_notifier(&pci_bus_type, &ppc_pci_unmap_irq_notifier);
> +}
> +arch_initcall(ppc_pci_register_irq_notifier);
> +
> /*
> * Platform support for /proc/bus/pci/X/Y mmap()s.
> * -- paulus.
>
^ permalink raw reply
* Re: [PATCH 2/5] mm: Introduce pXX_leaf_size()
From: Peter Zijlstra @ 2020-11-13 11:45 UTC (permalink / raw)
To: kan.liang, mingo, acme, mark.rutland, alexander.shishkin, jolsa,
eranian
Cc: linux-arch, ak, catalin.marinas, linuxppc-dev, willy,
linux-kernel, dave.hansen, npiggin, aneesh.kumar, sparclinux,
will, davem, kirill.shutemov
In-Reply-To: <20201113113426.465239104@infradead.org>
On Fri, Nov 13, 2020 at 12:19:03PM +0100, Peter Zijlstra wrote:
> A number of architectures have non-pagetable aligned huge/large pages.
> For such architectures a leaf can actually be part of a larger TLB
> entry.
>
> Provide generic helpers to determine the TLB size of a page-table
> leaf.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
> include/linux/pgtable.h | 16 ++++++++++++++++
> 1 file changed, 16 insertions(+)
>
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -1536,4 +1536,20 @@ typedef unsigned int pgtbl_mod_mask;
> #define pmd_leaf(x) 0
> #endif
>
> +#ifndef pgd_leaf_size
> +#define pgd_leaf_size(x) PGD_SIZE
Argh, I lost a refresh, that should've been:
+#define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT)
> +#endif
> +#ifndef p4d_leaf_size
> +#define p4d_leaf_size(x) P4D_SIZE
> +#endif
> +#ifndef pud_leaf_size
> +#define pud_leaf_size(x) PUD_SIZE
> +#endif
> +#ifndef pmd_leaf_size
> +#define pmd_leaf_size(x) PMD_SIZE
> +#endif
> +#ifndef pte_leaf_size
> +#define pte_leaf_size(x) PAGE_SIZE
> +#endif
> +
> #endif /* _LINUX_PGTABLE_H */
>
>
^ permalink raw reply
* [PATCH 5/5] sparc64/mm: Implement pXX_leaf_size() support
From: Peter Zijlstra @ 2020-11-13 11:19 UTC (permalink / raw)
To: kan.liang, mingo, acme, mark.rutland, alexander.shishkin, jolsa,
eranian
Cc: linux-arch, ak, catalin.marinas, peterz, linuxppc-dev, willy,
linux-kernel, dave.hansen, npiggin, aneesh.kumar, sparclinux,
will, davem, kirill.shutemov
In-Reply-To: <20201113111901.743573013@infradead.org>
Sparc64 has non-pagetable aligned large page support; wire up the
pXX_leaf_size() functions to report the correct TLB page size.
This enables PERF_SAMPLE_{DATA,CODE}_PAGE_SIZE to report accurate TLB
page sizes.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/sparc/include/asm/pgtable_64.h | 13 +++++++++++++
arch/sparc/mm/hugetlbpage.c | 19 +++++++++++++------
2 files changed, 26 insertions(+), 6 deletions(-)
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -1121,6 +1121,19 @@ extern unsigned long cmdline_memory_size
asmlinkage void do_sparc64_fault(struct pt_regs *regs);
+#ifdef CONFIG_HUGETLB_PAGE
+
+#define pud_leaf_size pud_leaf_size
+extern unsigned long pud_leaf_size(pud_t pud);
+
+#define pmd_leaf_size pmd_leaf_size
+extern unsigned long pmd_leaf_size(pmd_t pmd);
+
+#define pte_leaf_size pte_leaf_size
+extern unsigned long pte_leaf_size(pte_t pte);
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
#endif /* !(__ASSEMBLY__) */
#endif /* !(_SPARC64_PGTABLE_H) */
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -247,14 +247,17 @@ static unsigned int sun4u_huge_tte_to_sh
return shift;
}
-static unsigned int huge_tte_to_shift(pte_t entry)
+static unsigned long tte_to_shift(pte_t entry)
{
- unsigned long shift;
-
if (tlb_type == hypervisor)
- shift = sun4v_huge_tte_to_shift(entry);
- else
- shift = sun4u_huge_tte_to_shift(entry);
+ return sun4v_huge_tte_to_shift(entry);
+
+ return sun4u_huge_tte_to_shift(entry);
+}
+
+static unsigned int huge_tte_to_shift(pte_t entry)
+{
+ unsigned long shift = tte_to_shift(entry);
if (shift == PAGE_SHIFT)
WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
@@ -272,6 +275,10 @@ static unsigned long huge_tte_to_size(pt
return size;
}
+unsigned long pud_leaf_size(pud_t pud) { return 1UL << tte_to_shift((pte_t)pud); }
+unsigned long pmd_leaf_size(pmd_t pmd) { return 1UL << tte_to_shift((pte_t)pmd); }
+unsigned long pte_leaf_size(pte_t pte) { return 1UL << tte_to_shift((pte_t)pte); }
+
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
^ permalink raw reply
* [PATCH 0/5] perf/mm: Fix PERF_SAMPLE_*_PAGE_SIZE
From: Peter Zijlstra @ 2020-11-13 11:19 UTC (permalink / raw)
To: kan.liang, mingo, acme, mark.rutland, alexander.shishkin, jolsa,
eranian
Cc: linux-arch, ak, catalin.marinas, peterz, linuxppc-dev, willy,
linux-kernel, dave.hansen, npiggin, aneesh.kumar, sparclinux,
will, davem, kirill.shutemov
Hi,
These patches provide generic infrastructure to determine TLB page size from
page table entries alone. Perf will use this (for either data or code address)
to aid in profiling TLB issues.
While most architectures only have page table aligned large pages, some
(notably ARM64, Sparc64 and Power) provide non page table aligned large pages
and need to provide their own implementation of these functions.
I've provided (completely untested) implementations for ARM64 and Sparc64, but
failed to penetrate the _many_ Power MMUs. I'm hoping Nick or Aneesh can help
me out there.
^ permalink raw reply
* [PATCH 3/5] perf/core: Fix arch_perf_get_page_size()
From: Peter Zijlstra @ 2020-11-13 11:19 UTC (permalink / raw)
To: kan.liang, mingo, acme, mark.rutland, alexander.shishkin, jolsa,
eranian
Cc: linux-arch, ak, catalin.marinas, peterz, linuxppc-dev, willy,
linux-kernel, dave.hansen, npiggin, aneesh.kumar, sparclinux,
will, davem, kirill.shutemov
In-Reply-To: <20201113111901.743573013@infradead.org>
The (new) page-table walker in arch_perf_get_page_size() is broken in
various ways. Specifically while it is used in a locless manner, it
doesn't depend on CONFIG_HAVE_FAST_GUP nor uses the proper _lockless
offset methods, nor is careful to only read each entry only once.
Also the hugetlb support is broken due to calling pte_page() without
first checking pte_special().
Rewrite the whole thing to be a proper lockless page-table walker and
employ the new pXX_leaf_size() pgtable functions to determine the TLB
size without looking at the page-frames.
Fixes: 51b646b2d9f8 ("perf,mm: Handle non-page-table-aligned hugetlbfs")
Fixes: 8d97e71811aa ("perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/arm64/include/asm/pgtable.h | 3 +
arch/sparc/include/asm/pgtable_64.h | 13 ++++
arch/sparc/mm/hugetlbpage.c | 19 ++++--
include/linux/pgtable.h | 16 +++++
kernel/events/core.c | 102 +++++++++++++-----------------------
5 files changed, 82 insertions(+), 71 deletions(-)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7001,90 +7001,62 @@ static u64 perf_virt_to_phys(u64 virt)
return phys_addr;
}
-#ifdef CONFIG_MMU
-
/*
- * Return the MMU page size of a given virtual address.
- *
- * This generic implementation handles page-table aligned huge pages, as well
- * as non-page-table aligned hugetlbfs compound pages.
- *
- * If an architecture supports and uses non-page-table aligned pages in their
- * kernel mapping it will need to provide it's own implementation of this
- * function.
+ * Return the MMU/TLB page size of a given virtual address.
*/
-__weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
+static u64 perf_get_tlb_page_size(struct mm_struct *mm, unsigned long addr)
{
- struct page *page;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
+ u64 size = 0;
- pgd = pgd_offset(mm, addr);
- if (pgd_none(*pgd))
- return 0;
+#ifdef CONFIG_HAVE_FAST_GUP
+ pgd_t *pgdp, pgd;
+ p4d_t *p4dp, p4d;
+ pud_t *pudp, pud;
+ pmd_t *pmdp, pmd;
+ pte_t *ptep, pte;
- p4d = p4d_offset(pgd, addr);
- if (!p4d_present(*p4d))
+ pgdp = pgd_offset(mm, addr);
+ pgd = READ_ONCE(*pgdp);
+ if (pgd_none(pgd))
return 0;
- if (p4d_leaf(*p4d))
- return 1ULL << P4D_SHIFT;
+ if (pgd_leaf(pgd))
+ return pgd_leaf_size(pgd);
- pud = pud_offset(p4d, addr);
- if (!pud_present(*pud))
+ p4dp = p4d_offset_lockless(pgdp, pgd, addr);
+ p4d = READ_ONCE(*p4dp);
+ if (!p4d_present(p4d))
return 0;
- if (pud_leaf(*pud)) {
-#ifdef pud_page
- page = pud_page(*pud);
- if (PageHuge(page))
- return page_size(compound_head(page));
-#endif
- return 1ULL << PUD_SHIFT;
- }
+ if (p4d_leaf(p4d))
+ return p4d_leaf_size(p4d);
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd))
+ pudp = pud_offset_lockless(p4dp, p4d, addr);
+ pud = READ_ONCE(*pudp);
+ if (!pud_present(pud))
return 0;
- if (pmd_leaf(*pmd)) {
-#ifdef pmd_page
- page = pmd_page(*pmd);
- if (PageHuge(page))
- return page_size(compound_head(page));
-#endif
- return 1ULL << PMD_SHIFT;
- }
+ if (pud_leaf(pud))
+ return pud_leaf_size(pud);
- pte = pte_offset_map(pmd, addr);
- if (!pte_present(*pte)) {
- pte_unmap(pte);
+ pmdp = pmd_offset_lockless(pudp, pud, addr);
+ pmd = READ_ONCE(*pmdp);
+ if (!pmd_present(pmd))
return 0;
- }
- page = pte_page(*pte);
- if (PageHuge(page)) {
- u64 size = page_size(compound_head(page));
- pte_unmap(pte);
- return size;
- }
-
- pte_unmap(pte);
- return PAGE_SIZE;
-}
+ if (pmd_leaf(pmd))
+ return pmd_leaf_size(pmd);
-#else
+ ptep = pte_offset_map(&pmd, addr);
+ pte = ptep_get_lockless(ptep);
+ if (pte_present(pte))
+ size = pte_leaf_size(pte);
+ pte_unmap(ptep);
+#endif /* CONFIG_HAVE_FAST_GUP */
-static u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
-{
- return 0;
+ return size;
}
-#endif
-
static u64 perf_get_page_size(unsigned long addr)
{
struct mm_struct *mm;
@@ -7109,7 +7081,7 @@ static u64 perf_get_page_size(unsigned l
mm = &init_mm;
}
- size = arch_perf_get_page_size(mm, addr);
+ size = perf_get_tlb_page_size(mm, addr);
local_irq_restore(flags);
^ permalink raw reply
* [PATCH 1/5] mm/gup: Provide gup_get_pte() more generic
From: Peter Zijlstra @ 2020-11-13 11:19 UTC (permalink / raw)
To: kan.liang, mingo, acme, mark.rutland, alexander.shishkin, jolsa,
eranian
Cc: linux-arch, ak, catalin.marinas, peterz, linuxppc-dev, willy,
linux-kernel, dave.hansen, npiggin, aneesh.kumar, sparclinux,
will, davem, kirill.shutemov
In-Reply-To: <20201113111901.743573013@infradead.org>
In order to write another lockless page-table walker, we need
gup_get_pte() exposed. While doing that, rename it to
ptep_get_lockless() to match the existing ptep_get() naming.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
include/linux/pgtable.h | 55 +++++++++++++++++++++++++++++++++++++++++++++
mm/gup.c | 58 ------------------------------------------------
2 files changed, 56 insertions(+), 57 deletions(-)
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -258,6 +258,61 @@ static inline pte_t ptep_get(pte_t *ptep
}
#endif
+#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
+/*
+ * WARNING: only to be used in the get_user_pages_fast() implementation.
+ *
+ * With get_user_pages_fast(), we walk down the pagetables without taking any
+ * locks. For this we would like to load the pointers atomically, but sometimes
+ * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What
+ * we do have is the guarantee that a PTE will only either go from not present
+ * to present, or present to not present or both -- it will not switch to a
+ * completely different present page without a TLB flush in between; something
+ * that we are blocking by holding interrupts off.
+ *
+ * Setting ptes from not present to present goes:
+ *
+ * ptep->pte_high = h;
+ * smp_wmb();
+ * ptep->pte_low = l;
+ *
+ * And present to not present goes:
+ *
+ * ptep->pte_low = 0;
+ * smp_wmb();
+ * ptep->pte_high = 0;
+ *
+ * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'.
+ * We load pte_high *after* loading pte_low, which ensures we don't see an older
+ * value of pte_high. *Then* we recheck pte_low, which ensures that we haven't
+ * picked up a changed pte high. We might have gotten rubbish values from
+ * pte_low and pte_high, but we are guaranteed that pte_low will not have the
+ * present bit set *unless* it is 'l'. Because get_user_pages_fast() only
+ * operates on present ptes we're safe.
+ */
+static inline pte_t ptep_get_lockless(pte_t *ptep)
+{
+ pte_t pte;
+
+ do {
+ pte.pte_low = ptep->pte_low;
+ smp_rmb();
+ pte.pte_high = ptep->pte_high;
+ smp_rmb();
+ } while (unlikely(pte.pte_low != ptep->pte_low));
+
+ return pte;
+}
+#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
+/*
+ * We require that the PTE can be read atomically.
+ */
+static inline pte_t ptep_get_lockless(pte_t *ptep)
+{
+ return ptep_get(ptep);
+}
+#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2079,62 +2079,6 @@ static void put_compound_head(struct pag
put_page(page);
}
-#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
-
-/*
- * WARNING: only to be used in the get_user_pages_fast() implementation.
- *
- * With get_user_pages_fast(), we walk down the pagetables without taking any
- * locks. For this we would like to load the pointers atomically, but sometimes
- * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What
- * we do have is the guarantee that a PTE will only either go from not present
- * to present, or present to not present or both -- it will not switch to a
- * completely different present page without a TLB flush in between; something
- * that we are blocking by holding interrupts off.
- *
- * Setting ptes from not present to present goes:
- *
- * ptep->pte_high = h;
- * smp_wmb();
- * ptep->pte_low = l;
- *
- * And present to not present goes:
- *
- * ptep->pte_low = 0;
- * smp_wmb();
- * ptep->pte_high = 0;
- *
- * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'.
- * We load pte_high *after* loading pte_low, which ensures we don't see an older
- * value of pte_high. *Then* we recheck pte_low, which ensures that we haven't
- * picked up a changed pte high. We might have gotten rubbish values from
- * pte_low and pte_high, but we are guaranteed that pte_low will not have the
- * present bit set *unless* it is 'l'. Because get_user_pages_fast() only
- * operates on present ptes we're safe.
- */
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
- pte_t pte;
-
- do {
- pte.pte_low = ptep->pte_low;
- smp_rmb();
- pte.pte_high = ptep->pte_high;
- smp_rmb();
- } while (unlikely(pte.pte_low != ptep->pte_low));
-
- return pte;
-}
-#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
-/*
- * We require that the PTE can be read atomically.
- */
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
- return ptep_get(ptep);
-}
-#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
-
static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
unsigned int flags,
struct page **pages)
@@ -2160,7 +2104,7 @@ static int gup_pte_range(pmd_t pmd, unsi
ptem = ptep = pte_offset_map(&pmd, addr);
do {
- pte_t pte = gup_get_pte(ptep);
+ pte_t pte = ptep_get_lockless(ptep);
struct page *head, *page;
/*
^ permalink raw reply
* [PATCH 4/5] arm64/mm: Implement pXX_leaf_size() support
From: Peter Zijlstra @ 2020-11-13 11:19 UTC (permalink / raw)
To: kan.liang, mingo, acme, mark.rutland, alexander.shishkin, jolsa,
eranian
Cc: linux-arch, ak, catalin.marinas, peterz, linuxppc-dev, willy,
linux-kernel, dave.hansen, npiggin, aneesh.kumar, sparclinux,
will, davem, kirill.shutemov
In-Reply-To: <20201113111901.743573013@infradead.org>
ARM64 has non-pagetable aligned large page support with PTE_CONT, when
this bit is set the page is part of a super-page. Match the hugetlb
code and support these super pages for PTE and PMD levels.
This enables PERF_SAMPLE_{DATA,CODE}_PAGE_SIZE to report accurate TLB
page sizes.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/arm64/include/asm/pgtable.h | 3 +++
1 file changed, 3 insertions(+)
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -503,6 +503,9 @@ extern pgprot_t phys_mem_access_prot(str
PMD_TYPE_SECT)
#define pmd_leaf(pmd) pmd_sect(pmd)
+#define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
+#define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE)
+
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
static inline bool pud_sect(pud_t pud) { return false; }
static inline bool pud_table(pud_t pud) { return true; }
^ permalink raw reply
* [PATCH 2/5] mm: Introduce pXX_leaf_size()
From: Peter Zijlstra @ 2020-11-13 11:19 UTC (permalink / raw)
To: kan.liang, mingo, acme, mark.rutland, alexander.shishkin, jolsa,
eranian
Cc: linux-arch, ak, catalin.marinas, peterz, linuxppc-dev, willy,
linux-kernel, dave.hansen, npiggin, aneesh.kumar, sparclinux,
will, davem, kirill.shutemov
In-Reply-To: <20201113111901.743573013@infradead.org>
A number of architectures have non-pagetable aligned huge/large pages.
For such architectures a leaf can actually be part of a larger TLB
entry.
Provide generic helpers to determine the TLB size of a page-table
leaf.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
include/linux/pgtable.h | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1536,4 +1536,20 @@ typedef unsigned int pgtbl_mod_mask;
#define pmd_leaf(x) 0
#endif
+#ifndef pgd_leaf_size
+#define pgd_leaf_size(x) PGD_SIZE
+#endif
+#ifndef p4d_leaf_size
+#define p4d_leaf_size(x) P4D_SIZE
+#endif
+#ifndef pud_leaf_size
+#define pud_leaf_size(x) PUD_SIZE
+#endif
+#ifndef pmd_leaf_size
+#define pmd_leaf_size(x) PMD_SIZE
+#endif
+#ifndef pte_leaf_size
+#define pte_leaf_size(x) PAGE_SIZE
+#endif
+
#endif /* _LINUX_PGTABLE_H */
^ permalink raw reply
* [PATCH] macintosh: windfarm: Use NULL to compare with pointer-typed value rather than 0
From: Xu Wang @ 2020-11-13 7:33 UTC (permalink / raw)
To: benh, linuxppc-dev; +Cc: linux-kernel
Compare pointer-typed values to NULL rather than 0.
Signed-off-by: Xu Wang <vulab@iscas.ac.cn>
---
drivers/macintosh/windfarm_pm121.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c
index ab467b9c31be..62826844b584 100644
--- a/drivers/macintosh/windfarm_pm121.c
+++ b/drivers/macintosh/windfarm_pm121.c
@@ -650,7 +650,7 @@ static void pm121_create_cpu_fans(void)
/* First, locate the PID params in SMU SBD */
hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL);
- if (hdr == 0) {
+ if (hdr == NULL) {
printk(KERN_WARNING "pm121: CPU PID fan config not found.\n");
goto fail;
}
@@ -969,7 +969,7 @@ static int pm121_init_pm(void)
const struct smu_sdbp_header *hdr;
hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL);
- if (hdr != 0) {
+ if (hdr != NULL) {
struct smu_sdbp_sensortree *st =
(struct smu_sdbp_sensortree *)&hdr[1];
pm121_mach_model = st->model_id;
--
2.17.1
^ permalink raw reply related
* Re: [PATCH 11/25] soc: fsl: qe: qe_common: Fix misnamed function attribute 'addr'
From: Lee Jones @ 2020-11-13 7:15 UTC (permalink / raw)
To: Leo Li
Cc: Software, Inc, linux-kernel@vger.kernel.org, act, Dan Malek,
Vitaly Bordug, Scott Wood, linuxppc-dev@lists.ozlabs.org,
linux-arm-kernel@lists.infradead.org, Qiang Zhao
In-Reply-To: <VE1PR04MB66877659A67152AE02CF443F8FE70@VE1PR04MB6687.eurprd04.prod.outlook.com>
On Thu, 12 Nov 2020, Leo Li wrote:
>
>
> > -----Original Message-----
> > From: Lee Jones <lee.jones@linaro.org>
> > Sent: Thursday, November 12, 2020 4:33 AM
> > To: linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org;
> > Qiang Zhao <qiang.zhao@nxp.com>; Leo Li <leoyang.li@nxp.com>; Scott
> > Wood <scottwood@freescale.com>; act <dmalek@jlc.net>; Dan Malek
> > <dan@embeddedalley.com>; Software, Inc <source@mvista.com>; Vitaly
> > Bordug <vbordug@ru.mvista.com>; linuxppc-dev@lists.ozlabs.org
> > Subject: Re: [PATCH 11/25] soc: fsl: qe: qe_common: Fix misnamed function
> > attribute 'addr'
> >
> > On Tue, 03 Nov 2020, Lee Jones wrote:
> >
> > > Fixes the following W=1 kernel build warning(s):
> > >
> > > drivers/soc/fsl/qe/qe_common.c:237: warning: Function parameter or
> > member 'addr' not described in 'cpm_muram_dma'
> > > drivers/soc/fsl/qe/qe_common.c:237: warning: Excess function parameter
> > 'offset' description in 'cpm_muram_dma'
> > >
> > > Cc: Qiang Zhao <qiang.zhao@nxp.com>
> > > Cc: Li Yang <leoyang.li@nxp.com>
> > > Cc: Scott Wood <scottwood@freescale.com>
> > > Cc: act <dmalek@jlc.net>
> > > Cc: Dan Malek <dan@embeddedalley.com>
> > > Cc: "Software, Inc" <source@mvista.com>
> > > Cc: Vitaly Bordug <vbordug@ru.mvista.com>
> > > Cc: linuxppc-dev@lists.ozlabs.org
> > > Signed-off-by: Lee Jones <lee.jones@linaro.org>
> > > ---
> > > drivers/soc/fsl/qe/qe_common.c | 2 +-
> > > 1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/soc/fsl/qe/qe_common.c
> > > b/drivers/soc/fsl/qe/qe_common.c index 75075591f6308..497a7e0fd0272
> > > 100644
> > > --- a/drivers/soc/fsl/qe/qe_common.c
> > > +++ b/drivers/soc/fsl/qe/qe_common.c
> > > @@ -231,7 +231,7 @@ EXPORT_SYMBOL(cpm_muram_offset);
> > >
> > > /**
> > > * cpm_muram_dma - turn a muram virtual address into a DMA address
> > > - * @offset: virtual address from cpm_muram_addr() to convert
> > > + * @addr: virtual address from cpm_muram_addr() to convert
> > > */
> > > dma_addr_t cpm_muram_dma(void __iomem *addr) {
> >
> > Any idea who will pick this up?
>
> I can pick them up through my tree, but I haven't created the
> for-next branch for the next kernel yet. Will look through this
> series soon. Thanks.
Thank you Leo.
There's not rush. Just trying to ensure they don't get forgotten.
--
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog
^ permalink raw reply
* Re: [PATCH v4 01/18] dt-bindings: usb: usb-hcd: Detach generic USB controller properties
From: Chunfeng Yun @ 2020-11-13 6:04 UTC (permalink / raw)
To: Serge Semin
Cc: Neil Armstrong, Bjorn Andersson, Pavel Parkhomenko, Kevin Hilman,
Krzysztof Kozlowski, Andy Gross, linux-snps-arc, devicetree,
Mathias Nyman, Martin Blumenstingl, Lad Prabhakar, Alexey Malahov,
Rob Herring, linux-arm-kernel, Roger Quadros, Felipe Balbi,
Greg Kroah-Hartman, Yoshihiro Shimoda, linux-usb, linux-mips,
Serge Semin, linux-kernel, Manu Gautam, linuxppc-dev
In-Reply-To: <20201111090853.14112-2-Sergey.Semin@baikalelectronics.ru>
On Wed, 2020-11-11 at 12:08 +0300, Serge Semin wrote:
> There can be three distinctive types of the USB controllers: USB hosts,
> USB peripherals/gadgets and USB OTG, which can switch from one role to
> another. In order to have that hierarchy handled in the DT binding files,
> we need to collect common properties in a common DT schema and specific
> properties in dedicated schemas. Seeing the usb-hcd.yaml DT schema is
> dedicated for the USB host controllers only, let's move some common
> properties from there into the usb.yaml schema. So the later would be
> available to evaluate all currently supported types of the USB
> controllers.
>
> While at it add an explicit "additionalProperties: true" into the
> usb-hcd.yaml as setting the additionalProperties/unevaluateProperties
> properties is going to be get mandatory soon.
>
> Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
>
> ---
>
> Changelog v4:
> - This is a new patch created as a result of the comment left
> by Chunfeng Yun in v3
> ---
> .../devicetree/bindings/usb/usb-hcd.yaml | 14 ++-------
> .../devicetree/bindings/usb/usb.yaml | 29 +++++++++++++++++++
> 2 files changed, 32 insertions(+), 11 deletions(-)
> create mode 100644 Documentation/devicetree/bindings/usb/usb.yaml
>
> diff --git a/Documentation/devicetree/bindings/usb/usb-hcd.yaml b/Documentation/devicetree/bindings/usb/usb-hcd.yaml
> index 7263b7f2b510..81f3ad1419d8 100644
> --- a/Documentation/devicetree/bindings/usb/usb-hcd.yaml
> +++ b/Documentation/devicetree/bindings/usb/usb-hcd.yaml
> @@ -9,18 +9,10 @@ title: Generic USB Host Controller Device Tree Bindings
> maintainers:
> - Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>
> -properties:
> - $nodename:
> - pattern: "^usb(@.*)?"
> +allOf:
> + - $ref: usb.yaml#
>
> - phys:
> - $ref: /schemas/types.yaml#/definitions/phandle-array
> - description:
> - List of all the USB PHYs on this HCD
> -
> - phy-names:
> - description:
> - Name specifier for the USB PHY
> +additionalProperties: true
This seems already added by the applied patch 6a0e321ea735 "dt-bindings:
Explicitly allow additional properties in common schemas"
>
> examples:
> - |
> diff --git a/Documentation/devicetree/bindings/usb/usb.yaml b/Documentation/devicetree/bindings/usb/usb.yaml
> new file mode 100644
> index 000000000000..941ad59fbac5
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/usb/usb.yaml
> @@ -0,0 +1,29 @@
> +# SPDX-License-Identifier: GPL-2.0
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/usb/usb.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: Generic USB Controller Device Tree Bindings
> +
> +maintainers:
> + - Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> +
> +select: false
> +
> +properties:
> + $nodename:
> + pattern: "^usb(@.*)?"
> +
> + phys:
> + $ref: /schemas/types.yaml#/definitions/phandle-array
> + description:
> + List of all the USB PHYs on this HCD
> +
> + phy-names:
> + description:
> + Name specifier for the USB PHY
> +
> +additionalProperties: true
> +
> +...
^ permalink raw reply
* Re: [PATCH net-next 12/12] ibmvnic: Do not replenish RX buffers after every polling loop
From: drt @ 2020-11-13 5:52 UTC (permalink / raw)
To: Thomas Falcon
Cc: cforno12, netdev, ljp, ricklind, dnbanerg, brking, sukadev,
linuxppc-dev, Linuxppc-dev
In-Reply-To: <1605208207-1896-13-git-send-email-tlfalcon@linux.ibm.com>
On 2020-11-12 11:10, Thomas Falcon wrote:
> From: "Dwip N. Banerjee" <dnbanerg@us.ibm.com>
>
> Reduce the amount of time spent replenishing RX buffers by
> only doing so once available buffers has fallen under a certain
> threshold, in this case half of the total number of buffers, or
> if the polling loop exits before the packets processed is less
> than its budget.
>
> Signed-off-by: Dwip N. Banerjee <dnbanerg@us.ibm.com>
Acked-by: Dany Madden <drt@linux.ibm.com>
> ---
> drivers/net/ethernet/ibm/ibmvnic.c | 5 ++++-
> 1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c
> b/drivers/net/ethernet/ibm/ibmvnic.c
> index 0791dbf1cba8..66f8068bee5a 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.c
> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> @@ -2476,7 +2476,10 @@ static int ibmvnic_poll(struct napi_struct
> *napi, int budget)
> frames_processed++;
> }
>
> - if (adapter->state != VNIC_CLOSING)
> + if (adapter->state != VNIC_CLOSING &&
> + ((atomic_read(&adapter->rx_pool[scrq_num].available) <
> + adapter->req_rx_add_entries_per_subcrq / 2) ||
> + frames_processed < budget))
> replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]);
> if (frames_processed < budget) {
> if (napi_complete_done(napi, frames_processed)) {
^ permalink raw reply
* Re: [PATCH net-next 01/12] ibmvnic: Ensure that subCRQ entry reads are ordered
From: drt @ 2020-11-13 5:45 UTC (permalink / raw)
To: Thomas Falcon
Cc: cforno12, netdev, ljp, ricklind, dnbanerg, brking, sukadev,
linuxppc-dev
In-Reply-To: <1605208207-1896-2-git-send-email-tlfalcon@linux.ibm.com>
On 2020-11-12 11:09, Thomas Falcon wrote:
> Ensure that received Subordinate Command-Response Queue
> entries are properly read in order by the driver.
>
> Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Acked-by: Dany Madden <drt@linux.ibm.com>
> ---
> drivers/net/ethernet/ibm/ibmvnic.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c
> b/drivers/net/ethernet/ibm/ibmvnic.c
> index da15913879f8..5647f54bf387 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.c
> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> @@ -2391,6 +2391,8 @@ static int ibmvnic_poll(struct napi_struct
> *napi, int budget)
>
> if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num]))
> break;
> + /* ensure that we do not prematurely exit the polling loop */
> + dma_rmb();
> next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]);
> rx_buff =
> (struct ibmvnic_rx_buff *)be64_to_cpu(next->
> @@ -3087,6 +3089,8 @@ static int ibmvnic_complete_tx(struct
> ibmvnic_adapter *adapter,
> int num_entries = 0;
>
> next = ibmvnic_next_scrq(adapter, scrq);
> + /* ensure that we are reading the correct queue entry */
> + dma_rmb();
> for (i = 0; i < next->tx_comp.num_comps; i++) {
> if (next->tx_comp.rcs[i]) {
> dev_err(dev, "tx error %x\n",
^ permalink raw reply
* Re: [PATCH kernel] vfio_pci_nvlink2: Do not attempt NPU2 setup on old P8's NPU
From: Andrew Donnellan @ 2020-11-13 5:30 UTC (permalink / raw)
To: Alexey Kardashevskiy, linuxppc-dev; +Cc: Alex Williamson, kvm, David Gibson
In-Reply-To: <20201113050632.74124-1-aik@ozlabs.ru>
On 13/11/20 4:06 pm, Alexey Kardashevskiy wrote:
> We execute certain NPU2 setup code (such as mapping an LPID to a device
> in NPU2) unconditionally if an Nvlink bridge is detected. However this
> cannot succeed on P8+ machines as the init helpers return an error other
> than ENODEV which means the device is there is and setup failed so
> vfio_pci_enable() fails and pass through is not possible.
>
> This changes the two NPU2 related init helpers to return -ENODEV if
> there is no "memory-region" device tree property as this is
> the distinction between NPU and NPU2.
>
> Fixes: 7f92891778df ("vfio_pci: Add NVIDIA GV100GL [Tesla V100 SXM2] subdriver")
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Should this be Cc: stable?
Andrew
--
Andrew Donnellan OzLabs, ADL Canberra
ajd@linux.ibm.com IBM Australia Limited
^ permalink raw reply
* [PATCH kernel] vfio_pci_nvlink2: Do not attempt NPU2 setup on old P8's NPU
From: Alexey Kardashevskiy @ 2020-11-13 5:06 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Alexey Kardashevskiy, Alex Williamson, kvm, David Gibson
We execute certain NPU2 setup code (such as mapping an LPID to a device
in NPU2) unconditionally if an Nvlink bridge is detected. However this
cannot succeed on P8+ machines as the init helpers return an error other
than ENODEV which means the device is there is and setup failed so
vfio_pci_enable() fails and pass through is not possible.
This changes the two NPU2 related init helpers to return -ENODEV if
there is no "memory-region" device tree property as this is
the distinction between NPU and NPU2.
Fixes: 7f92891778df ("vfio_pci: Add NVIDIA GV100GL [Tesla V100 SXM2] subdriver")
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
drivers/vfio/pci/vfio_pci_nvlink2.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c b/drivers/vfio/pci/vfio_pci_nvlink2.c
index 65c61710c0e9..9adcf6a8f888 100644
--- a/drivers/vfio/pci/vfio_pci_nvlink2.c
+++ b/drivers/vfio/pci/vfio_pci_nvlink2.c
@@ -231,7 +231,7 @@ int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
return -EINVAL;
if (of_property_read_u32(npu_node, "memory-region", &mem_phandle))
- return -EINVAL;
+ return -ENODEV;
mem_node = of_find_node_by_phandle(mem_phandle);
if (!mem_node)
@@ -393,7 +393,7 @@ int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
int ret;
struct vfio_pci_npu2_data *data;
struct device_node *nvlink_dn;
- u32 nvlink_index = 0;
+ u32 nvlink_index = 0, mem_phandle = 0;
struct pci_dev *npdev = vdev->pdev;
struct device_node *npu_node = pci_device_to_OF_node(npdev);
struct pci_controller *hose = pci_bus_to_host(npdev->bus);
@@ -408,6 +408,9 @@ int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
if (!pnv_pci_get_gpu_dev(vdev->pdev))
return -ENODEV;
+ if (of_property_read_u32(npu_node, "memory-region", &mem_phandle))
+ return -ENODEV;
+
/*
* NPU2 normally has 8 ATSD registers (for concurrency) and 6 links
* so we can allocate one register per link, using nvlink index as
--
2.17.1
^ permalink raw reply related
* Re: [PATCH 3/3] powerpc: rewrite atomics to use ARCH_ATOMIC
From: kernel test robot @ 2020-11-13 5:05 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev
Cc: Christophe Leroy, kbuild-all, Arnd Bergmann, Peter Zijlstra,
Boqun Feng, linux-kernel, Nicholas Piggin, Alexey Kardashevskiy,
Will Deacon
In-Reply-To: <20201111110723.3148665-4-npiggin@gmail.com>
[-- Attachment #1: Type: text/plain, Size: 7933 bytes --]
Hi Nicholas,
I love your patch! Perhaps something to improve:
[auto build test WARNING on powerpc/next]
[also build test WARNING on asm-generic/master linus/master v5.10-rc3 next-20201112]
[cannot apply to scottwood/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-convert-to-use-ARCH_ATOMIC/20201111-190941
base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc64-randconfig-s031-20201111 (attached as .config)
compiler: powerpc-linux-gcc (GCC) 9.3.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.3-107-gaf3512a6-dirty
# https://github.com/0day-ci/linux/commit/9e1bec8fe216b0745c647e52c40d1f0033fb4efd
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Nicholas-Piggin/powerpc-convert-to-use-ARCH_ATOMIC/20201111-190941
git checkout 9e1bec8fe216b0745c647e52c40d1f0033fb4efd
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=powerpc64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
"sparse warnings: (new ones prefixed by >>)"
>> drivers/gpu/drm/drm_lock.c:75:24: sparse: sparse: incorrect type in argument 1 (different modifiers) @@ expected void *ptr @@ got unsigned int volatile *__ai_ptr @@
drivers/gpu/drm/drm_lock.c:75:24: sparse: expected void *ptr
>> drivers/gpu/drm/drm_lock.c:75:24: sparse: got unsigned int volatile *__ai_ptr
>> drivers/gpu/drm/drm_lock.c:75:24: sparse: sparse: incorrect type in argument 1 (different modifiers) @@ expected void *ptr @@ got unsigned int volatile *__ai_ptr @@
drivers/gpu/drm/drm_lock.c:75:24: sparse: expected void *ptr
>> drivers/gpu/drm/drm_lock.c:75:24: sparse: got unsigned int volatile *__ai_ptr
drivers/gpu/drm/drm_lock.c:118:24: sparse: sparse: incorrect type in argument 1 (different modifiers) @@ expected void *ptr @@ got unsigned int volatile *__ai_ptr @@
drivers/gpu/drm/drm_lock.c:118:24: sparse: expected void *ptr
drivers/gpu/drm/drm_lock.c:118:24: sparse: got unsigned int volatile *__ai_ptr
drivers/gpu/drm/drm_lock.c:118:24: sparse: sparse: incorrect type in argument 1 (different modifiers) @@ expected void *ptr @@ got unsigned int volatile *__ai_ptr @@
drivers/gpu/drm/drm_lock.c:118:24: sparse: expected void *ptr
drivers/gpu/drm/drm_lock.c:118:24: sparse: got unsigned int volatile *__ai_ptr
drivers/gpu/drm/drm_lock.c:141:24: sparse: sparse: incorrect type in argument 1 (different modifiers) @@ expected void *ptr @@ got unsigned int volatile *__ai_ptr @@
drivers/gpu/drm/drm_lock.c:141:24: sparse: expected void *ptr
drivers/gpu/drm/drm_lock.c:141:24: sparse: got unsigned int volatile *__ai_ptr
drivers/gpu/drm/drm_lock.c:141:24: sparse: sparse: incorrect type in argument 1 (different modifiers) @@ expected void *ptr @@ got unsigned int volatile *__ai_ptr @@
drivers/gpu/drm/drm_lock.c:141:24: sparse: expected void *ptr
drivers/gpu/drm/drm_lock.c:141:24: sparse: got unsigned int volatile *__ai_ptr
drivers/gpu/drm/drm_lock.c:319:40: sparse: sparse: incorrect type in argument 1 (different modifiers) @@ expected void *ptr @@ got unsigned int volatile *__ai_ptr @@
drivers/gpu/drm/drm_lock.c:319:40: sparse: expected void *ptr
drivers/gpu/drm/drm_lock.c:319:40: sparse: got unsigned int volatile *__ai_ptr
drivers/gpu/drm/drm_lock.c:319:40: sparse: sparse: incorrect type in argument 1 (different modifiers) @@ expected void *ptr @@ got unsigned int volatile *__ai_ptr @@
drivers/gpu/drm/drm_lock.c:319:40: sparse: expected void *ptr
drivers/gpu/drm/drm_lock.c:319:40: sparse: got unsigned int volatile *__ai_ptr
vim +75 drivers/gpu/drm/drm_lock.c
4ac5ec40ec70022 Daniel Vetter 2010-08-23 48
bd50d4a2168370b Benjamin Gaignard 2020-03-06 49 /*
1a75a222f5ca106 Daniel Vetter 2016-06-14 50 * Take the heavyweight lock.
1a75a222f5ca106 Daniel Vetter 2016-06-14 51 *
1a75a222f5ca106 Daniel Vetter 2016-06-14 52 * \param lock lock pointer.
1a75a222f5ca106 Daniel Vetter 2016-06-14 53 * \param context locking context.
1a75a222f5ca106 Daniel Vetter 2016-06-14 54 * \return one if the lock is held, or zero otherwise.
1a75a222f5ca106 Daniel Vetter 2016-06-14 55 *
1a75a222f5ca106 Daniel Vetter 2016-06-14 56 * Attempt to mark the lock as held by the given context, via the \p cmpxchg instruction.
1a75a222f5ca106 Daniel Vetter 2016-06-14 57 */
1a75a222f5ca106 Daniel Vetter 2016-06-14 58 static
1a75a222f5ca106 Daniel Vetter 2016-06-14 59 int drm_lock_take(struct drm_lock_data *lock_data,
1a75a222f5ca106 Daniel Vetter 2016-06-14 60 unsigned int context)
1a75a222f5ca106 Daniel Vetter 2016-06-14 61 {
1a75a222f5ca106 Daniel Vetter 2016-06-14 62 unsigned int old, new, prev;
1a75a222f5ca106 Daniel Vetter 2016-06-14 63 volatile unsigned int *lock = &lock_data->hw_lock->lock;
1a75a222f5ca106 Daniel Vetter 2016-06-14 64
1a75a222f5ca106 Daniel Vetter 2016-06-14 65 spin_lock_bh(&lock_data->spinlock);
1a75a222f5ca106 Daniel Vetter 2016-06-14 66 do {
1a75a222f5ca106 Daniel Vetter 2016-06-14 67 old = *lock;
1a75a222f5ca106 Daniel Vetter 2016-06-14 68 if (old & _DRM_LOCK_HELD)
1a75a222f5ca106 Daniel Vetter 2016-06-14 69 new = old | _DRM_LOCK_CONT;
1a75a222f5ca106 Daniel Vetter 2016-06-14 70 else {
1a75a222f5ca106 Daniel Vetter 2016-06-14 71 new = context | _DRM_LOCK_HELD |
1a75a222f5ca106 Daniel Vetter 2016-06-14 72 ((lock_data->user_waiters + lock_data->kernel_waiters > 1) ?
1a75a222f5ca106 Daniel Vetter 2016-06-14 73 _DRM_LOCK_CONT : 0);
1a75a222f5ca106 Daniel Vetter 2016-06-14 74 }
1a75a222f5ca106 Daniel Vetter 2016-06-14 @75 prev = cmpxchg(lock, old, new);
1a75a222f5ca106 Daniel Vetter 2016-06-14 76 } while (prev != old);
1a75a222f5ca106 Daniel Vetter 2016-06-14 77 spin_unlock_bh(&lock_data->spinlock);
1a75a222f5ca106 Daniel Vetter 2016-06-14 78
1a75a222f5ca106 Daniel Vetter 2016-06-14 79 if (_DRM_LOCKING_CONTEXT(old) == context) {
1a75a222f5ca106 Daniel Vetter 2016-06-14 80 if (old & _DRM_LOCK_HELD) {
1a75a222f5ca106 Daniel Vetter 2016-06-14 81 if (context != DRM_KERNEL_CONTEXT) {
1a75a222f5ca106 Daniel Vetter 2016-06-14 82 DRM_ERROR("%d holds heavyweight lock\n",
1a75a222f5ca106 Daniel Vetter 2016-06-14 83 context);
1a75a222f5ca106 Daniel Vetter 2016-06-14 84 }
1a75a222f5ca106 Daniel Vetter 2016-06-14 85 return 0;
1a75a222f5ca106 Daniel Vetter 2016-06-14 86 }
1a75a222f5ca106 Daniel Vetter 2016-06-14 87 }
1a75a222f5ca106 Daniel Vetter 2016-06-14 88
1a75a222f5ca106 Daniel Vetter 2016-06-14 89 if ((_DRM_LOCKING_CONTEXT(new)) == context && (new & _DRM_LOCK_HELD)) {
1a75a222f5ca106 Daniel Vetter 2016-06-14 90 /* Have lock */
1a75a222f5ca106 Daniel Vetter 2016-06-14 91 return 1;
1a75a222f5ca106 Daniel Vetter 2016-06-14 92 }
1a75a222f5ca106 Daniel Vetter 2016-06-14 93 return 0;
1a75a222f5ca106 Daniel Vetter 2016-06-14 94 }
1a75a222f5ca106 Daniel Vetter 2016-06-14 95
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 37481 bytes --]
^ permalink raw reply
* [PATCH kernel] powerpc/powernv/npu: Do not attempt NPU2 setup on old P8's NPU
From: Alexey Kardashevskiy @ 2020-11-13 5:04 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Alexey Kardashevskiy, David Gibson
We execute certain NPU2 setup code (such as mapping an LPID to a device
in NPU2) unconditionally if an Nvlink bridge is detected. However this
cannot succeed on P8+ machines and errors appear in dmesg. This is
harmless as skiboot returns an error and the only place we check it is
vfio-pci but that code does not get called on P8+ either.
This adds a check if pnv_npu2_xxx helpers are called on a machine with
NPU2 which initializes pnv_phb::npu in pnv_npu2_init();
pnv_phb::npu==NULL on P8+.
Fixes: 1b785611e119 ("powerpc/powernv/npu: Add release_ownership hook")
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
arch/powerpc/platforms/powernv/npu-dma.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index abeaa533b976..7b28b284a594 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -623,6 +623,11 @@ int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
return -ENODEV;
hose = pci_bus_to_host(npdev->bus);
+ if (hose->npu == NULL) {
+ dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
+ return 0;
+ }
+
nphb = hose->private_data;
dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
@@ -670,6 +675,11 @@ int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev)
return -ENODEV;
hose = pci_bus_to_host(npdev->bus);
+ if (hose->npu == NULL) {
+ dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
+ return 0;
+ }
+
nphb = hose->private_data;
dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
--
2.17.1
^ permalink raw reply related
* RE: [PATCH 11/25] soc: fsl: qe: qe_common: Fix misnamed function attribute 'addr'
From: Leo Li @ 2020-11-12 23:12 UTC (permalink / raw)
To: Lee Jones, linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org, Qiang Zhao, Scott Wood, act,
Dan Malek, Software, Inc, Vitaly Bordug,
linuxppc-dev@lists.ozlabs.org
In-Reply-To: <20201112103300.GE1997862@dell>
> -----Original Message-----
> From: Lee Jones <lee.jones@linaro.org>
> Sent: Thursday, November 12, 2020 4:33 AM
> To: linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org;
> Qiang Zhao <qiang.zhao@nxp.com>; Leo Li <leoyang.li@nxp.com>; Scott
> Wood <scottwood@freescale.com>; act <dmalek@jlc.net>; Dan Malek
> <dan@embeddedalley.com>; Software, Inc <source@mvista.com>; Vitaly
> Bordug <vbordug@ru.mvista.com>; linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH 11/25] soc: fsl: qe: qe_common: Fix misnamed function
> attribute 'addr'
>
> On Tue, 03 Nov 2020, Lee Jones wrote:
>
> > Fixes the following W=1 kernel build warning(s):
> >
> > drivers/soc/fsl/qe/qe_common.c:237: warning: Function parameter or
> member 'addr' not described in 'cpm_muram_dma'
> > drivers/soc/fsl/qe/qe_common.c:237: warning: Excess function parameter
> 'offset' description in 'cpm_muram_dma'
> >
> > Cc: Qiang Zhao <qiang.zhao@nxp.com>
> > Cc: Li Yang <leoyang.li@nxp.com>
> > Cc: Scott Wood <scottwood@freescale.com>
> > Cc: act <dmalek@jlc.net>
> > Cc: Dan Malek <dan@embeddedalley.com>
> > Cc: "Software, Inc" <source@mvista.com>
> > Cc: Vitaly Bordug <vbordug@ru.mvista.com>
> > Cc: linuxppc-dev@lists.ozlabs.org
> > Signed-off-by: Lee Jones <lee.jones@linaro.org>
> > ---
> > drivers/soc/fsl/qe/qe_common.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/soc/fsl/qe/qe_common.c
> > b/drivers/soc/fsl/qe/qe_common.c index 75075591f6308..497a7e0fd0272
> > 100644
> > --- a/drivers/soc/fsl/qe/qe_common.c
> > +++ b/drivers/soc/fsl/qe/qe_common.c
> > @@ -231,7 +231,7 @@ EXPORT_SYMBOL(cpm_muram_offset);
> >
> > /**
> > * cpm_muram_dma - turn a muram virtual address into a DMA address
> > - * @offset: virtual address from cpm_muram_addr() to convert
> > + * @addr: virtual address from cpm_muram_addr() to convert
> > */
> > dma_addr_t cpm_muram_dma(void __iomem *addr) {
>
> Any idea who will pick this up?
I can pick them up through my tree, but I haven't created the for-next branch for the next kernel yet. Will look through this series soon. Thanks.
>
> --
> Lee Jones [李琼斯]
> Senior Technical Lead - Developer Services Linaro.org │ Open source
> software for Arm SoCs Follow Linaro: Facebook | Twitter | Blog
^ permalink raw reply
* [PATCH net-next 12/12] ibmvnic: Do not replenish RX buffers after every polling loop
From: Thomas Falcon @ 2020-11-12 19:10 UTC (permalink / raw)
To: netdev
Cc: cforno12, ljp, ricklind, dnbanerg, tlfalcon, drt, brking, sukadev,
linuxppc-dev
In-Reply-To: <1605208207-1896-1-git-send-email-tlfalcon@linux.ibm.com>
From: "Dwip N. Banerjee" <dnbanerg@us.ibm.com>
Reduce the amount of time spent replenishing RX buffers by
only doing so once available buffers has fallen under a certain
threshold, in this case half of the total number of buffers, or
if the polling loop exits before the packets processed is less
than its budget.
Signed-off-by: Dwip N. Banerjee <dnbanerg@us.ibm.com>
---
drivers/net/ethernet/ibm/ibmvnic.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 0791dbf1cba8..66f8068bee5a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2476,7 +2476,10 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget)
frames_processed++;
}
- if (adapter->state != VNIC_CLOSING)
+ if (adapter->state != VNIC_CLOSING &&
+ ((atomic_read(&adapter->rx_pool[scrq_num].available) <
+ adapter->req_rx_add_entries_per_subcrq / 2) ||
+ frames_processed < budget))
replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]);
if (frames_processed < budget) {
if (napi_complete_done(napi, frames_processed)) {
--
2.26.2
^ permalink raw reply related
* [PATCH net-next 11/12] ibmvnic: Use netdev_alloc_skb instead of alloc_skb to replenish RX buffers
From: Thomas Falcon @ 2020-11-12 19:10 UTC (permalink / raw)
To: netdev
Cc: cforno12, ljp, ricklind, dnbanerg, tlfalcon, drt, brking, sukadev,
linuxppc-dev
In-Reply-To: <1605208207-1896-1-git-send-email-tlfalcon@linux.ibm.com>
From: "Dwip N. Banerjee" <dnbanerg@us.ibm.com>
Take advantage of the additional optimizations in netdev_alloc_skb when
allocating socket buffers to be used for packet reception.
Signed-off-by: Dwip N. Banerjee <dnbanerg@us.ibm.com>
---
drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index e48a44d8884c..0791dbf1cba8 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -323,7 +323,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
rx_scrq = adapter->rx_scrq[pool->index];
ind_bufp = &rx_scrq->ind_buf;
for (i = 0; i < count; ++i) {
- skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
+ skb = netdev_alloc_skb(adapter->netdev, pool->buff_size);
if (!skb) {
dev_err(dev, "Couldn't replenish rx buff\n");
adapter->replenish_no_mem++;
--
2.26.2
^ permalink raw reply related
* [PATCH net-next 10/12] ibmvnic: Correctly re-enable interrupts in NAPI polling routine
From: Thomas Falcon @ 2020-11-12 19:10 UTC (permalink / raw)
To: netdev
Cc: cforno12, ljp, ricklind, dnbanerg, tlfalcon, drt, brking, sukadev,
linuxppc-dev
In-Reply-To: <1605208207-1896-1-git-send-email-tlfalcon@linux.ibm.com>
From: "Dwip N. Banerjee" <dnbanerg@us.ibm.com>
If the current NAPI polling loop exits without completing it's
budget, only re-enable interrupts if there are no entries remaining
in the queue and napi_complete_done is successful. If there are entries
remaining on the queue that were missed, restart the polling loop.
Signed-off-by: Dwip N. Banerjee <dnbanerg@us.ibm.com>
---
drivers/net/ethernet/ibm/ibmvnic.c | 37 +++++++++++++++++++-----------
1 file changed, 23 insertions(+), 14 deletions(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index dc42bdc6d3e1..e48a44d8884c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2387,10 +2387,17 @@ static void remove_buff_from_pool(struct ibmvnic_adapter *adapter,
static int ibmvnic_poll(struct napi_struct *napi, int budget)
{
- struct net_device *netdev = napi->dev;
- struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- int scrq_num = (int)(napi - adapter->napi);
- int frames_processed = 0;
+ struct ibmvnic_sub_crq_queue *rx_scrq;
+ struct ibmvnic_adapter *adapter;
+ struct net_device *netdev;
+ int frames_processed;
+ int scrq_num;
+
+ netdev = napi->dev;
+ adapter = netdev_priv(netdev);
+ scrq_num = (int)(napi - adapter->napi);
+ frames_processed = 0;
+ rx_scrq = adapter->rx_scrq[scrq_num];
restart_poll:
while (frames_processed < budget) {
@@ -2403,16 +2410,16 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget)
if (unlikely(test_bit(0, &adapter->resetting) &&
adapter->reset_reason != VNIC_RESET_NON_FATAL)) {
- enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
+ enable_scrq_irq(adapter, rx_scrq);
napi_complete_done(napi, frames_processed);
return frames_processed;
}
- if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num]))
+ if (!pending_scrq(adapter, rx_scrq))
break;
/* ensure that we do not prematurely exit the polling loop */
dma_rmb();
- next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]);
+ next = ibmvnic_next_scrq(adapter, rx_scrq);
rx_buff =
(struct ibmvnic_rx_buff *)be64_to_cpu(next->
rx_comp.correlator);
@@ -2471,14 +2478,16 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget)
if (adapter->state != VNIC_CLOSING)
replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]);
-
if (frames_processed < budget) {
- enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
- napi_complete_done(napi, frames_processed);
- if (pending_scrq(adapter, adapter->rx_scrq[scrq_num]) &&
- napi_reschedule(napi)) {
- disable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
- goto restart_poll;
+ if (napi_complete_done(napi, frames_processed)) {
+ enable_scrq_irq(adapter, rx_scrq);
+ if (pending_scrq(adapter, rx_scrq)) {
+ rmb();
+ if (napi_reschedule(napi)) {
+ disable_scrq_irq(adapter, rx_scrq);
+ goto restart_poll;
+ }
+ }
}
}
return frames_processed;
--
2.26.2
^ permalink raw reply related
* [PATCH net-next 06/12] ibmvnic: Clean up TX code and TX buffer data structure
From: Thomas Falcon @ 2020-11-12 19:10 UTC (permalink / raw)
To: netdev
Cc: cforno12, ljp, ricklind, dnbanerg, tlfalcon, drt, brking, sukadev,
linuxppc-dev
In-Reply-To: <1605208207-1896-1-git-send-email-tlfalcon@linux.ibm.com>
Remove unused and superfluous code and members in
existing TX implementation and data structures.
Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
---
drivers/net/ethernet/ibm/ibmvnic.c | 31 +++++++++++-------------------
drivers/net/ethernet/ibm/ibmvnic.h | 8 --------
2 files changed, 11 insertions(+), 28 deletions(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index c9437b2d1aa8..b523da20bffc 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1496,17 +1496,18 @@ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len,
* L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect.
*/
-static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff,
+static void build_hdr_descs_arr(struct sk_buff *skb,
+ union sub_crq *indir_arr,
int *num_entries, u8 hdr_field)
{
int hdr_len[3] = {0, 0, 0};
+ u8 hdr_data[140] = {0};
int tot_len;
- u8 *hdr_data = txbuff->hdr_data;
- tot_len = build_hdr_data(hdr_field, txbuff->skb, hdr_len,
- txbuff->hdr_data);
+ tot_len = build_hdr_data(hdr_field, skb, hdr_len,
+ hdr_data);
*num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len,
- txbuff->indir_arr + 1);
+ indir_arr + 1);
}
static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
@@ -1537,6 +1538,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
unsigned int tx_send_failed = 0;
netdev_tx_t ret = NETDEV_TX_OK;
unsigned int tx_map_failed = 0;
+ union sub_crq indir_arr[16];
unsigned int tx_dropped = 0;
unsigned int tx_packets = 0;
unsigned int tx_bytes = 0;
@@ -1620,11 +1622,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_buff = &tx_pool->tx_buff[index];
tx_buff->skb = skb;
- tx_buff->data_dma[0] = data_dma_addr;
- tx_buff->data_len[0] = skb->len;
tx_buff->index = index;
tx_buff->pool_index = queue_num;
- tx_buff->last_frag = true;
memset(&tx_crq, 0, sizeof(tx_crq));
tx_crq.v1.first = IBMVNIC_CRQ_CMD;
@@ -1671,7 +1670,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
}
if ((*hdrs >> 7) & 1)
- build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
+ build_hdr_descs_arr(skb, indir_arr, &num_entries, *hdrs);
netdev_tx_sent_queue(txq, skb->len);
@@ -1688,8 +1687,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
ind_bufp->index = 0;
}
- tx_buff->indir_arr[0] = tx_crq;
- memcpy(&ind_bufp->indir_arr[ind_bufp->index], tx_buff->indir_arr,
+ indir_arr[0] = tx_crq;
+ memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0],
num_entries * sizeof(struct ibmvnic_generic_scrq));
ind_bufp->index += num_entries;
if (!netdev_xmit_more() || netif_xmit_stopped(txq) ||
@@ -3140,7 +3139,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
struct netdev_queue *txq;
union sub_crq *next;
int index;
- int i, j;
+ int i;
restart_loop:
while (pending_scrq(adapter, scrq)) {
@@ -3169,14 +3168,6 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
}
txbuff = &tx_pool->tx_buff[index];
-
- for (j = 0; j < IBMVNIC_MAX_FRAGS_PER_CRQ; j++) {
- if (!txbuff->data_dma[j])
- continue;
-
- txbuff->data_dma[j] = 0;
- }
-
num_packets++;
num_entries += txbuff->num_entries;
if (txbuff->skb) {
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 05bf212d387d..11af1f29210b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -225,8 +225,6 @@ struct ibmvnic_tx_comp_desc {
#define IBMVNIC_TCP_CHKSUM 0x20
#define IBMVNIC_UDP_CHKSUM 0x08
-#define IBMVNIC_MAX_FRAGS_PER_CRQ 3
-
struct ibmvnic_tx_desc {
u8 first;
u8 type;
@@ -897,14 +895,8 @@ struct ibmvnic_long_term_buff {
struct ibmvnic_tx_buff {
struct sk_buff *skb;
- dma_addr_t data_dma[IBMVNIC_MAX_FRAGS_PER_CRQ];
- unsigned int data_len[IBMVNIC_MAX_FRAGS_PER_CRQ];
int index;
int pool_index;
- bool last_frag;
- union sub_crq indir_arr[6];
- u8 hdr_data[140];
- dma_addr_t indir_dma;
int num_entries;
};
--
2.26.2
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox