* [PATCH v2 2/4] powerpc/xive: fix hcall H_INT_RESET to support long busy delays
From: Cédric Le Goater @ 2018-05-08 7:05 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Ellerman, Benjamin Herrenschmidt, Cédric Le Goater
In-Reply-To: <20180508070517.947-1-clg@kaod.org>
The hcall H_INT_RESET can take some time to complete and in such cases
it returns H_LONG_BUSY_* codes requiring the machine to sleep for a
while before retrying.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
Changes since v2:
- replaced msleep() by mdelay() as some calling path are under lock.
arch/powerpc/sysdev/xive/spapr.c | 52 ++++++++++++++++++++++++++++++++++++----
1 file changed, 47 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index 3cf5f8bf4c29..31dc73cacd45 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -19,6 +19,7 @@
#include <linux/spinlock.h>
#include <linux/cpumask.h>
#include <linux/mm.h>
+#include <linux/delay.h>
#include <asm/prom.h>
#include <asm/io.h>
@@ -108,6 +109,51 @@ static void xive_irq_bitmap_free(int irq)
}
}
+
+/* Based on the similar routines in RTAS */
+static unsigned int plpar_busy_delay_time(long rc)
+{
+ unsigned int ms = 0;
+
+ if (H_IS_LONG_BUSY(rc)) {
+ ms = get_longbusy_msecs(rc);
+ } else if (rc == H_BUSY) {
+ ms = 10; /* seems appropriate for XIVE hcalls */
+ }
+
+ return ms;
+}
+
+static unsigned int plpar_busy_delay(int rc)
+{
+ unsigned int ms;
+
+ ms = plpar_busy_delay_time(rc);
+ if (ms)
+ mdelay(ms);
+
+ return ms;
+}
+
+/*
+ * Note: this call has a partition wide scope and can take a while to
+ * complete. If it returns H_LONG_BUSY_* it should be retried
+ * periodically.
+ */
+static long plpar_int_reset(unsigned long flags)
+{
+ long rc;
+
+ do {
+ rc = plpar_hcall_norets(H_INT_RESET, flags);
+ } while (plpar_busy_delay(rc));
+
+ if (rc)
+ pr_err("H_INT_RESET failed %ld\n", rc);
+
+ return rc;
+}
+
static long plpar_int_get_source_info(unsigned long flags,
unsigned long lisn,
unsigned long *src_flags,
@@ -433,11 +479,7 @@ static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc)
static void xive_spapr_shutdown(void)
{
- long rc;
-
- rc = plpar_hcall_norets(H_INT_RESET, 0);
- if (rc)
- pr_err("H_INT_RESET failed %ld\n", rc);
+ plpar_int_reset(0);
}
/*
--
2.13.6
^ permalink raw reply related
* [PATCH v2 4/4] powerpc/xive: prepare all hcalls to support long busy delays
From: Cédric Le Goater @ 2018-05-08 7:05 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Ellerman, Benjamin Herrenschmidt, Cédric Le Goater
In-Reply-To: <20180508070517.947-1-clg@kaod.org>
This is not the case for the moment, but future releases of pHyp might
need to introduce some synchronisation routines under the hood which
would make the XIVE hcalls longer to complete.
As this was done for H_INT_RESET, let's wrap the other hcalls in a
loop catching the H_LONG_BUSY_* codes.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/sysdev/xive/spapr.c | 36 ++++++++++++++++++++++++++++--------
1 file changed, 28 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index 31dc73cacd45..730284f838c8 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -164,7 +164,10 @@ static long plpar_int_get_source_info(unsigned long flags,
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
long rc;
- rc = plpar_hcall(H_INT_GET_SOURCE_INFO, retbuf, flags, lisn);
+ do {
+ rc = plpar_hcall(H_INT_GET_SOURCE_INFO, retbuf, flags, lisn);
+ } while (plpar_busy_delay(rc));
+
if (rc) {
pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc);
return rc;
@@ -194,8 +197,11 @@ static long plpar_int_set_source_config(unsigned long flags,
flags, lisn, target, prio, sw_irq);
- rc = plpar_hcall_norets(H_INT_SET_SOURCE_CONFIG, flags, lisn,
- target, prio, sw_irq);
+ do {
+ rc = plpar_hcall_norets(H_INT_SET_SOURCE_CONFIG, flags, lisn,
+ target, prio, sw_irq);
+ } while (plpar_busy_delay(rc));
+
if (rc) {
pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx failed %ld\n",
lisn, target, prio, rc);
@@ -214,7 +220,11 @@ static long plpar_int_get_queue_info(unsigned long flags,
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
long rc;
- rc = plpar_hcall(H_INT_GET_QUEUE_INFO, retbuf, flags, target, priority);
+ do {
+ rc = plpar_hcall(H_INT_GET_QUEUE_INFO, retbuf, flags, target,
+ priority);
+ } while (plpar_busy_delay(rc));
+
if (rc) {
pr_err("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld failed %ld\n",
target, priority, rc);
@@ -241,8 +251,11 @@ static long plpar_int_set_queue_config(unsigned long flags,
pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx qpage=%lx qsize=%lx\n",
flags, target, priority, qpage, qsize);
- rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target,
- priority, qpage, qsize);
+ do {
+ rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target,
+ priority, qpage, qsize);
+ } while (plpar_busy_delay(rc));
+
if (rc) {
pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx returned %ld\n",
target, priority, qpage, rc);
@@ -256,7 +269,10 @@ static long plpar_int_sync(unsigned long flags, unsigned long lisn)
{
long rc;
- rc = plpar_hcall_norets(H_INT_SYNC, flags, lisn);
+ do {
+ rc = plpar_hcall_norets(H_INT_SYNC, flags, lisn);
+ } while (plpar_busy_delay(rc));
+
if (rc) {
pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc);
return rc;
@@ -277,7 +293,11 @@ static long plpar_int_esb(unsigned long flags,
pr_devel("H_INT_ESB flags=%lx lisn=%lx offset=%lx in=%lx\n",
flags, lisn, offset, in_data);
- rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset, in_data);
+ do {
+ rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset,
+ in_data);
+ } while (plpar_busy_delay(rc));
+
if (rc) {
pr_err("H_INT_ESB lisn=%ld offset=%ld returned %ld\n",
lisn, offset, rc);
--
2.13.6
^ permalink raw reply related
* Re: [PATCH 01/17] powerpc/nohash: remove hash related code from nohash headers.
From: Aneesh Kumar K.V @ 2018-05-08 8:25 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <7f5c8870b0ad4b64cae354ad2e09ebdefa79bd06.1525435203.git.christophe.leroy@c-s.fr>
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> When nohash and book3s header were split, some hash related stuff
> remained in the nohash header. This patch removes them.
>
Thanks for doing this. This was on the TODO list for a long time. When
we did the split for book3s, I mostly copied the generic headers to
nohash.
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
> Removed the call to pte_young() as it fails, back to using PAGE_ACCESSED directly.
>
> arch/powerpc/include/asm/nohash/32/pgtable.h | 29 +++------------------
> arch/powerpc/include/asm/nohash/64/pgtable.h | 16 ++----------
> arch/powerpc/include/asm/nohash/pgtable.h | 38 +++-------------------------
> arch/powerpc/include/asm/nohash/pte-book3e.h | 1 -
> 4 files changed, 10 insertions(+), 74 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
> index 03bbd1149530..140f8e74b478 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
> @@ -133,7 +133,7 @@ extern int icache_44x_need_flush;
> #ifndef __ASSEMBLY__
>
> #define pte_clear(mm, addr, ptep) \
> - do { pte_update(ptep, ~_PAGE_HASHPTE, 0); } while (0)
> + do { pte_update(ptep, ~0, 0); } while (0)
>
> #define pmd_none(pmd) (!pmd_val(pmd))
> #define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD)
> @@ -146,21 +146,6 @@ static inline void pmd_clear(pmd_t *pmdp)
>
>
> /*
> - * When flushing the tlb entry for a page, we also need to flush the hash
> - * table entry. flush_hash_pages is assembler (for speed) in hashtable.S.
> - */
> -extern int flush_hash_pages(unsigned context, unsigned long va,
> - unsigned long pmdval, int count);
> -
> -/* Add an HPTE to the hash table */
> -extern void add_hash_page(unsigned context, unsigned long va,
> - unsigned long pmdval);
> -
> -/* Flush an entry from the TLB/hash table */
> -extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep,
> - unsigned long address);
> -
> -/*
> * PTE updates. This function is called whenever an existing
> * valid PTE is updated. This does -not- include set_pte_at()
> * which nowadays only sets a new PTE.
> @@ -246,12 +231,6 @@ static inline int __ptep_test_and_clear_young(unsigned int context, unsigned lon
> {
> unsigned long old;
> old = pte_update(ptep, _PAGE_ACCESSED, 0);
> -#if _PAGE_HASHPTE != 0
> - if (old & _PAGE_HASHPTE) {
> - unsigned long ptephys = __pa(ptep) & PAGE_MASK;
> - flush_hash_pages(context, addr, ptephys, 1);
> - }
> -#endif
> return (old & _PAGE_ACCESSED) != 0;
> }
> #define ptep_test_and_clear_young(__vma, __addr, __ptep) \
> @@ -261,7 +240,7 @@ static inline int __ptep_test_and_clear_young(unsigned int context, unsigned lon
> static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
> pte_t *ptep)
> {
> - return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0));
> + return __pte(pte_update(ptep, ~0, 0));
> }
>
> #define __HAVE_ARCH_PTEP_SET_WRPROTECT
> @@ -289,7 +268,7 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
> }
>
> #define __HAVE_ARCH_PTE_SAME
> -#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)
> +#define pte_same(A,B) ((pte_val(A) ^ pte_val(B)) == 0)
>
> /*
> * Note that on Book E processors, the pmd contains the kernel virtual
> @@ -330,7 +309,7 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
> /*
> * Encode and decode a swap entry.
> * Note that the bits we use in a PTE for representing a swap entry
> - * must not include the _PAGE_PRESENT bit or the _PAGE_HASHPTE bit (if used).
> + * must not include the _PAGE_PRESENT bit.
> * -- paulus
> */
> #define __swp_type(entry) ((entry).val & 0x1f)
> diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
> index 5c5f75d005ad..4f6f5a27bfb5 100644
> --- a/arch/powerpc/include/asm/nohash/64/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
> @@ -173,8 +173,6 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
> /* to find an entry in a kernel page-table-directory */
> /* This now only contains the vmalloc pages */
> #define pgd_offset_k(address) pgd_offset(&init_mm, address)
> -extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
> - pte_t *ptep, unsigned long pte, int huge);
>
> /* Atomic PTE updates */
> static inline unsigned long pte_update(struct mm_struct *mm,
> @@ -205,11 +203,6 @@ static inline unsigned long pte_update(struct mm_struct *mm,
> if (!huge)
> assert_pte_locked(mm, addr);
>
> -#ifdef CONFIG_PPC_BOOK3S_64
> - if (old & _PAGE_HASHPTE)
> - hpte_need_flush(mm, addr, ptep, old, huge);
> -#endif
> -
> return old;
> }
>
> @@ -218,7 +211,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
> {
> unsigned long old;
>
> - if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
> + if ((pte_val(*ptep) & _PAGE_ACCESSED) == 0)
> return 0;
> old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
> return (old & _PAGE_ACCESSED) != 0;
> @@ -312,7 +305,7 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
> }
>
> #define __HAVE_ARCH_PTE_SAME
> -#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
> +#define pte_same(A,B) ((pte_val(A) ^ pte_val(B)) == 0)
>
> #define pte_ERROR(e) \
> pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
> @@ -324,11 +317,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
> /* Encode and de-code a swap entry */
> #define MAX_SWAPFILES_CHECK() do { \
> BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
> - /* \
> - * Don't have overlapping bits with _PAGE_HPTEFLAGS \
> - * We filter HPTEFLAGS on set_pte. \
> - */ \
> - BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
> } while (0)
> /*
> * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
> diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
> index c56de1e8026f..f2fe3cbe90af 100644
> --- a/arch/powerpc/include/asm/nohash/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/pgtable.h
> @@ -148,37 +148,16 @@ extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
> static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
> pte_t *ptep, pte_t pte, int percpu)
> {
> -#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
> - /* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
> - * helper pte_update() which does an atomic update. We need to do that
> - * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
> - * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
> - * the hash bits instead (ie, same as the non-SMP case)
> - */
> - if (percpu)
> - *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
> - | (pte_val(pte) & ~_PAGE_HASHPTE));
> - else
> - pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
> -
> -#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
> +#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
> /* Second case is 32-bit with 64-bit PTE. In this case, we
> * can just store as long as we do the two halves in the right order
> - * with a barrier in between. This is possible because we take care,
> - * in the hash code, to pre-invalidate if the PTE was already hashed,
> - * which synchronizes us with any concurrent invalidation.
> - * In the percpu case, we also fallback to the simple update preserving
> - * the hash bits
> + * with a barrier in between.
> + * In the percpu case, we also fallback to the simple update
> */
> if (percpu) {
> - *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
> - | (pte_val(pte) & ~_PAGE_HASHPTE));
> + *ptep = pte;
> return;
> }
> -#if _PAGE_HASHPTE != 0
> - if (pte_val(*ptep) & _PAGE_HASHPTE)
> - flush_hash_entry(mm, ptep, addr);
> -#endif
> __asm__ __volatile__("\
> stw%U0%X0 %2,%0\n\
> eieio\n\
> @@ -186,15 +165,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
> : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
> : "r" (pte) : "memory");
>
> -#elif defined(CONFIG_PPC_STD_MMU_32)
> - /* Third case is 32-bit hash table in UP mode, we need to preserve
> - * the _PAGE_HASHPTE bit since we may not have invalidated the previous
> - * translation in the hash yet (done in a subsequent flush_tlb_xxx())
> - * and see we need to keep track that this PTE needs invalidating
> - */
> - *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
> - | (pte_val(pte) & ~_PAGE_HASHPTE));
> -
> #else
> /* Anything else just stores the PTE normally. That covers all 64-bit
> * cases, and 32-bit non-hash with 32-bit PTEs.
> diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h
> index ccee8eb509bb..9ff51b4c0cac 100644
> --- a/arch/powerpc/include/asm/nohash/pte-book3e.h
> +++ b/arch/powerpc/include/asm/nohash/pte-book3e.h
> @@ -57,7 +57,6 @@
> #define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */
> #define _PAGE_PRIVILEGED (_PAGE_BAP_SR)
>
> -#define _PAGE_HASHPTE 0
> #define _PAGE_BUSY 0
>
> #define _PAGE_SPECIAL _PAGE_SW0
> --
> 2.13.3
^ permalink raw reply
* Re: [PATCH 02/17] powerpc/nohash: remove _PAGE_BUSY
From: Aneesh Kumar K.V @ 2018-05-08 8:26 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <7332568467307f9fc79eeef47507909ddc423c4b.1525435203.git.christophe.leroy@c-s.fr>
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> _PAGE_BUSY is always 0, remove it
>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
> arch/powerpc/include/asm/nohash/64/pgtable.h | 10 +++-------
> arch/powerpc/include/asm/nohash/pte-book3e.h | 5 -----
> 2 files changed, 3 insertions(+), 12 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
> index 4f6f5a27bfb5..c3559d7a94fb 100644
> --- a/arch/powerpc/include/asm/nohash/64/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
> @@ -186,14 +186,12 @@ static inline unsigned long pte_update(struct mm_struct *mm,
>
> __asm__ __volatile__(
> "1: ldarx %0,0,%3 # pte_update\n\
> - andi. %1,%0,%6\n\
> - bne- 1b \n\
> andc %1,%0,%4 \n\
> - or %1,%1,%7\n\
> + or %1,%1,%6\n\
> stdcx. %1,0,%3 \n\
> bne- 1b"
> : "=&r" (old), "=&r" (tmp), "=m" (*ptep)
> - : "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
> + : "r" (ptep), "r" (clr), "m" (*ptep), "r" (set)
> : "cc" );
> #else
> unsigned long old = pte_val(*ptep);
> @@ -290,13 +288,11 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
>
> __asm__ __volatile__(
> "1: ldarx %0,0,%4\n\
> - andi. %1,%0,%6\n\
> - bne- 1b \n\
> or %0,%3,%0\n\
> stdcx. %0,0,%4\n\
> bne- 1b"
> :"=&r" (old), "=&r" (tmp), "=m" (*ptep)
> - :"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
> + :"r" (bits), "r" (ptep), "m" (*ptep)
> :"cc");
> #else
> unsigned long old = pte_val(*ptep);
> diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h
> index 9ff51b4c0cac..12730b81cd98 100644
> --- a/arch/powerpc/include/asm/nohash/pte-book3e.h
> +++ b/arch/powerpc/include/asm/nohash/pte-book3e.h
> @@ -57,13 +57,8 @@
> #define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */
> #define _PAGE_PRIVILEGED (_PAGE_BAP_SR)
>
> -#define _PAGE_BUSY 0
> -
> #define _PAGE_SPECIAL _PAGE_SW0
>
> -/* Flags to be preserved on PTE modifications */
> -#define _PAGE_HPTEFLAGS _PAGE_BUSY
> -
> /* Base page size */
> #ifdef CONFIG_PPC_64K_PAGES
> #define _PAGE_PSIZE _PAGE_PSIZE_64K
> --
> 2.13.3
^ permalink raw reply
* Re: [PATCH v3] ppc64le livepatch: implement reliable stacktrace for newer consistency models
From: Torsten Duwe @ 2018-05-08 8:38 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: Michael Ellerman, Jiri Kosina, linuxppc-dev, linux-kernel,
Nicholas Piggin, live-patching
In-Reply-To: <20180507154208.jo4s43olrf5a3hw4@treble>
On Mon, 7 May 2018 10:42:08 -0500
Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> The subject doesn't actively describe what the patch does, maybe
> change it to something like:
>
> powerpc: Add support for HAVE_RELIABLE_STACKTRACE
>
> or maybe
>
> powerpc: Add support for livepatch consistency model
Maybe $SUBJECT? You're absolutely right, the old subject was just a
leftover of my original attempt to just set the flag, before Miroslav
corrected me. I just kept on copying it without a second thought. Thanks
for noting.
> Otherwise it looks great to me.
>
> Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
>
Thanks.
Torsten
^ permalink raw reply
* Re: [PATCH 09/17] powerpc: make __ioremap_caller() common to PPC32 and PPC64
From: Aneesh Kumar K.V @ 2018-05-08 9:56 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman
Cc: linux-kernel, linuxppc-dev
In-Reply-To: <457781f2de403852ba2a60257c3d9aca75c4d2c8.1525435203.git.christophe.leroy@c-s.fr>
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
> arch/powerpc/include/asm/book3s/64/pgtable.h | 1 +
> arch/powerpc/mm/ioremap.c | 126 +++++++--------------------
> 2 files changed, 34 insertions(+), 93 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index c5c6ead06bfb..2bebdd8302cb 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -18,6 +18,7 @@
> #define _PAGE_RO 0
> #define _PAGE_USER 0
> #define _PAGE_HWWRITE 0
> +#define _PAGE_COHERENT 0
This is something I was trying to avoid when I split the headers. We do
support _PAGE_USER it is !_PAGE_PRIVILEGED. It gets really confusing
when we have these conflicting names because we are trying to make code
common across platforms.
>
> #define _PAGE_EXEC 0x00001 /* execute permission */
> #define _PAGE_WRITE 0x00002 /* write access allowed */
> diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c
> index 65d611d44d38..59be5dfcb3e9 100644
> --- a/arch/powerpc/mm/ioremap.c
> +++ b/arch/powerpc/mm/ioremap.c
> @@ -33,95 +33,6 @@ unsigned long ioremap_bot;
> unsigned long ioremap_bot = IOREMAP_BASE;
> #endif
>
> -#ifdef CONFIG_PPC32
> -
> -void __iomem *
> -__ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
> - void *caller)
> -{
> - unsigned long v, i;
> - phys_addr_t p;
> - int err;
> -
> - /* Make sure we have the base flags */
> - if ((flags & _PAGE_PRESENT) == 0)
> - flags |= pgprot_val(PAGE_KERNEL);
> -
> - /* Non-cacheable page cannot be coherent */
> - if (flags & _PAGE_NO_CACHE)
> - flags &= ~_PAGE_COHERENT;
> -
> - /*
> - * Choose an address to map it to.
> - * Once the vmalloc system is running, we use it.
> - * Before then, we use space going up from IOREMAP_BASE
> - * (ioremap_bot records where we're up to).
> - */
> - p = addr & PAGE_MASK;
> - size = PAGE_ALIGN(addr + size) - p;
> -
> - /*
> - * If the address lies within the first 16 MB, assume it's in ISA
> - * memory space
> - */
> - if (p < 16*1024*1024)
> - p += _ISA_MEM_BASE;
> -
> -#ifndef CONFIG_CRASH_DUMP
> - /*
> - * Don't allow anybody to remap normal RAM that we're using.
> - * mem_init() sets high_memory so only do the check after that.
> - */
> - if (slab_is_available() && (p < virt_to_phys(high_memory)) &&
> - page_is_ram(__phys_to_pfn(p))) {
> - printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
> - (unsigned long long)p, __builtin_return_address(0));
> - return NULL;
> - }
> -#endif
> -
> - if (size == 0)
> - return NULL;
> -
> - /*
> - * Is it already mapped? Perhaps overlapped by a previous
> - * mapping.
> - */
> - v = p_block_mapped(p);
> - if (v)
> - goto out;
> -
> - if (slab_is_available()) {
> - struct vm_struct *area;
> - area = get_vm_area_caller(size, VM_IOREMAP, caller);
> - if (area == 0)
> - return NULL;
> - area->phys_addr = p;
> - v = (unsigned long) area->addr;
> - } else {
> - v = ioremap_bot;
> - ioremap_bot += size;
> - }
> -
> - /*
> - * Should check if it is a candidate for a BAT mapping
> - */
> -
> - err = 0;
> - for (i = 0; i < size && err == 0; i += PAGE_SIZE)
> - err = map_kernel_page(v+i, p+i, flags);
> - if (err) {
> - if (slab_is_available())
> - vunmap((void *)v);
> - return NULL;
> - }
> -
> -out:
> - return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
> -}
> -
> -#else
> -
> /**
> * __ioremap_at - Low level function to establish the page tables
> * for an IO mapping
> @@ -135,6 +46,10 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
> if ((flags & _PAGE_PRESENT) == 0)
> flags |= pgprot_val(PAGE_KERNEL);
>
> + /* Non-cacheable page cannot be coherent */
> + if (flags & _PAGE_NO_CACHE)
> + flags &= ~_PAGE_COHERENT;
> +
> /* We don't support the 4K PFN hack with ioremap */
> if (flags & H_PAGE_4K_PFN)
> return NULL;
> @@ -187,6 +102,33 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
> if ((size == 0) || (paligned == 0))
> return NULL;
>
> + /*
> + * If the address lies within the first 16 MB, assume it's in ISA
> + * memory space
> + */
> + if (IS_ENABLED(CONFIG_PPC32) && paligned < 16*1024*1024)
> + paligned += _ISA_MEM_BASE;
> +
> + /*
> + * Don't allow anybody to remap normal RAM that we're using.
> + * mem_init() sets high_memory so only do the check after that.
> + */
> + if (!IS_ENABLED(CONFIG_CRASH_DUMP) &&
> + slab_is_available() && (paligned < virt_to_phys(high_memory)) &&
> + page_is_ram(__phys_to_pfn(paligned))) {
> + printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
> + (u64)paligned, __builtin_return_address(0));
> + return NULL;
> + }
> +
> + /*
> + * Is it already mapped? Perhaps overlapped by a previous
> + * mapping.
> + */
> + ret = (void __iomem *)p_block_mapped(paligned);
> + if (ret)
> + goto out;
> +
> if (slab_is_available()) {
> struct vm_struct *area;
>
> @@ -205,14 +147,12 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
> if (ret)
> ioremap_bot += size;
> }
> -
> +out:
> if (ret)
> - ret += addr & ~PAGE_MASK;
> + ret += (unsigned long)addr & ~PAGE_MASK;
> return ret;
> }
>
> -#endif
> -
> /*
> * Unmap an IO region and remove it from imalloc'd list.
> * Access to IO memory should be serialized by driver.
> --
> 2.13.3
^ permalink raw reply
* [PATCH v2 3/4] powerpc/xive: shutdown XIVE when kexec or kdump is performed
From: Cédric Le Goater @ 2018-05-08 7:05 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Ellerman, Benjamin Herrenschmidt, Cédric Le Goater
In-Reply-To: <20180508070517.947-1-clg@kaod.org>
The hcall H_INT_RESET should be called to make sure XIVE is fully
reseted.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/platforms/pseries/kexec.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 3fe126796975..46fbaef69a59 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -57,8 +57,11 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
}
}
- if (xive_enabled())
+ if (xive_enabled()) {
xive_kexec_teardown_cpu(secondary);
- else
+
+ if (!secondary)
+ xive_shutdown();
+ } else
xics_kexec_teardown_cpu(secondary);
}
--
2.13.6
^ permalink raw reply related
* [PATCH v2 1/4] powerpc/64/kexec: fix race in kexec when XIVE is shutdown
From: Cédric Le Goater @ 2018-05-08 7:05 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Ellerman, Benjamin Herrenschmidt, Cédric Le Goater
In-Reply-To: <20180508070517.947-1-clg@kaod.org>
The kexec_state KEXEC_STATE_IRQS_OFF barrier is reached by all
secondary CPUs before the kexec_cpu_down() operation is called on
secondaries. This can raise conflicts and provoque errors in the XIVE
hcalls when XIVE is shutdown with H_INT_RESET on the primary CPU.
To synchronize the kexec_cpu_down() operations and make sure the
secondaries have completed their task before the primary starts doing
the same, let's move the primary kexec_cpu_down() after the
KEXEC_STATE_REAL_MODE barrier.
This change of the ending sequence of kexec is mostly useful on the
pseries platform but it impacts also the powernv, ps3 and 85xx
platforms. powernv can be easily tested and fixed but some caution is
required for the other two.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
arch/powerpc/kernel/machine_kexec_64.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 1044bf15d5ed..a0f6f45005bd 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -231,16 +231,16 @@ static void kexec_prepare_cpus(void)
/* we are sure every CPU has IRQs off at this point */
kexec_all_irq_disabled = 1;
- /* after we tell the others to go down */
- if (ppc_md.kexec_cpu_down)
- ppc_md.kexec_cpu_down(0, 0);
-
/*
* Before removing MMU mappings make sure all CPUs have entered real
* mode:
*/
kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
+ /* after we tell the others to go down */
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 0);
+
put_cpu();
}
--
2.13.6
^ permalink raw reply related
* Re: [PATCH v10 12/25] mm: cache some VMA fields in the vm_fault structure
From: Minchan Kim @ 2018-05-08 10:56 UTC (permalink / raw)
To: Laurent Dufour
Cc: akpm, mhocko, peterz, kirill, ak, dave, jack, Matthew Wilcox,
benh, mpe, paulus, Thomas Gleixner, Ingo Molnar, hpa, Will Deacon,
Sergey Senozhatsky, Andrea Arcangeli, Alexei Starovoitov,
kemi.wang, sergey.senozhatsky.work, Daniel Jordan, David Rientjes,
Jerome Glisse, Ganesh Mahendran, linux-kernel, linux-mm, haren,
khandual, npiggin, bsingharora, paulmck, Tim Chen, linuxppc-dev,
x86
In-Reply-To: <580c2760-2157-61fe-01ff-f928516fa23f@linux.vnet.ibm.com>
On Fri, May 04, 2018 at 11:10:54AM +0200, Laurent Dufour wrote:
> On 03/05/2018 17:42, Minchan Kim wrote:
> > On Thu, May 03, 2018 at 02:25:18PM +0200, Laurent Dufour wrote:
> >> On 23/04/2018 09:42, Minchan Kim wrote:
> >>> On Tue, Apr 17, 2018 at 04:33:18PM +0200, Laurent Dufour wrote:
> >>>> When handling speculative page fault, the vma->vm_flags and
> >>>> vma->vm_page_prot fields are read once the page table lock is released. So
> >>>> there is no more guarantee that these fields would not change in our back
> >>>> They will be saved in the vm_fault structure before the VMA is checked for
> >>>> changes.
> >>>
> >>> Sorry. I cannot understand.
> >>> If it is changed under us, what happens? If it's critical, why cannot we
> >>> check with seqcounter?
> >>> Clearly, I'm not understanding the logic here. However, it's a global
> >>> change without CONFIG_SPF so I want to be more careful.
> >>> It would be better to describe why we need to sanpshot those values
> >>> into vm_fault rather than preventing the race.
> >>
> >> The idea is to go forward processing the page fault using the VMA's fields
> >> values saved in the vm_fault structure. Then once the pte are locked, the
> >> vma->sequence_counter is checked again and if something has changed in our back
> >> the speculative page fault processing is aborted.
> >
> > Sorry, still I don't understand why we should capture some fields to vm_fault.
> > If we found vma->seq_cnt is changed under pte lock, can't we just bail out and
> > fallback to classic fault handling?
> >
> > Maybe, I'm missing something clear now. It would be really helpful to understand
> > if you give some exmaple.
>
> I'd rather say that I was not clear enough ;)
>
> Here is the point, when we deal with a speculative page fault, the mmap_sem is
> not taken, so parallel VMA's changes can occurred. When a VMA change is done
> which will impact the page fault processing, we assumed that the VMA sequence
> counter will be changed.
>
> In the page fault processing, at the time the PTE is locked, we checked the VMA
> sequence counter to detect changes done in our back. If no change is detected
> we can continue further. But this doesn't prevent the VMA to not be changed in
> our back while the PTE is locked. So VMA's fields which are used while the PTE
> is locked must be saved to ensure that we are using *static* values.
> This is important since the PTE changes will be made on regards to these VMA
> fields and they need to be consistent. This concerns the vma->vm_flags and
> vma->vm_page_prot VMA fields.
>
> I hope I make this clear enough this time.
It's more clear at this point. Please include such nice explanation in description.
Now, I am wondering how you synchronize those static value and vma's seqcount.
It must be in next patchset. I hope to grab a time to read it, asap.
Thanks.
^ permalink raw reply
* Re: [PATCH v10 02/25] x86/mm: define ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT
From: Punit Agrawal @ 2018-05-08 11:04 UTC (permalink / raw)
To: Laurent Dufour
Cc: akpm, mhocko, peterz, kirill, ak, dave, jack, Matthew Wilcox,
benh, mpe, paulus, Thomas Gleixner, Ingo Molnar, hpa, Will Deacon,
Sergey Senozhatsky, Andrea Arcangeli, Alexei Starovoitov,
kemi.wang, sergey.senozhatsky.work, Daniel Jordan, David Rientjes,
Jerome Glisse, Ganesh Mahendran, linux-kernel, linux-mm, haren,
khandual, npiggin, bsingharora, paulmck, Tim Chen, linuxppc-dev,
x86
In-Reply-To: <1523975611-15978-3-git-send-email-ldufour@linux.vnet.ibm.com>
Hi Laurent,
Laurent Dufour <ldufour@linux.vnet.ibm.com> writes:
> Set ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT which turns on the
> Speculative Page Fault handler when building for 64bit.
>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
> ---
> arch/x86/Kconfig | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index d8983df5a2bc..ebdeb48e4a4a 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -30,6 +30,7 @@ config X86_64
> select MODULES_USE_ELF_RELA
> select X86_DEV_DMA_OPS
> select ARCH_HAS_SYSCALL_WRAPPER
> + select ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT
I'd suggest merging this patch with the one making changes to the
architectural fault handler towards the end of the series.
The Kconfig change is closely tied to the architectural support for SPF
and makes sense to be in a single patch.
If there's a good reason to keep them as separate patches, please move
the architecture Kconfig changes after the patch adding fault handler
changes.
It's better to enable the feature once the core infrastructure is merged
rather than at the beginning of the series to avoid potential bad
fallout from incomplete functionality during bisection.
All the comments here definitely hold for the arm64 patches that you
plan to include with the next update.
Thanks,
Punit
>
> #
> # Arch settings
^ permalink raw reply
* Re: [PATCH] powerpc/pseries: hcall_exit tracepoint retval should be signed
From: Anton Blanchard @ 2018-05-08 11:57 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
In-Reply-To: <20180507130355.25115-1-mpe@ellerman.id.au>
Hi Michael,
> The hcall_exit() tracepoint has retval defined as unsigned long. That
> leads to humours results like:
>
> bash-3686 [009] d..2 854.134094: hcall_entry: opcode=24
> bash-3686 [009] d..2 854.134095: hcall_exit: opcode=24
> retval=18446744073709551609
>
> It's normal for some hcalls to return negative values, displaying them
> as unsigned isn't very helpful. So change it to signed.
>
> bash-3711 [001] d..2 471.691008: hcall_entry: opcode=24
> bash-3711 [001] d..2 471.691008: hcall_exit: opcode=24 retval=-7
>
> Which can be more easily compared to H_NOT_FOUND in hvcall.h
Much nicer.
Acked-by: Anton Blanchard <anton@samba.org>
Anton
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
> arch/powerpc/include/asm/asm-prototypes.h | 3 +--
> arch/powerpc/include/asm/trace.h | 7 +++----
> arch/powerpc/platforms/pseries/hvCall_inst.c | 2 +-
> arch/powerpc/platforms/pseries/lpar.c | 3 +--
> 4 files changed, 6 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/asm-prototypes.h
> b/arch/powerpc/include/asm/asm-prototypes.h index
> d9713ad62e3c..068760d61e7e 100644 ---
> a/arch/powerpc/include/asm/asm-prototypes.h +++
> b/arch/powerpc/include/asm/asm-prototypes.h @@ -36,8 +36,7 @@ void
> kexec_copy_flush(struct kimage *image); /* pseries hcall tracing */
> extern struct static_key hcall_tracepoint_key;
> void __trace_hcall_entry(unsigned long opcode, unsigned long *args);
> -void __trace_hcall_exit(long opcode, unsigned long retval,
> - unsigned long *retbuf);
> +void __trace_hcall_exit(long opcode, long retval, unsigned long
> *retbuf); /* OPAL tracing */
> #ifdef HAVE_JUMP_LABEL
> extern struct static_key opal_tracepoint_key;
> diff --git a/arch/powerpc/include/asm/trace.h
> b/arch/powerpc/include/asm/trace.h index 33f3b479138b..d018e8602694
> 100644 --- a/arch/powerpc/include/asm/trace.h
> +++ b/arch/powerpc/include/asm/trace.h
> @@ -81,8 +81,7 @@ TRACE_EVENT_FN_COND(hcall_entry,
>
> TRACE_EVENT_FN_COND(hcall_exit,
>
> - TP_PROTO(unsigned long opcode, unsigned long retval,
> - unsigned long *retbuf),
> + TP_PROTO(unsigned long opcode, long retval, unsigned long
> *retbuf),
> TP_ARGS(opcode, retval, retbuf),
>
> @@ -90,7 +89,7 @@ TRACE_EVENT_FN_COND(hcall_exit,
>
> TP_STRUCT__entry(
> __field(unsigned long, opcode)
> - __field(unsigned long, retval)
> + __field(long, retval)
> ),
>
> TP_fast_assign(
> @@ -98,7 +97,7 @@ TRACE_EVENT_FN_COND(hcall_exit,
> __entry->retval = retval;
> ),
>
> - TP_printk("opcode=%lu retval=%lu", __entry->opcode,
> __entry->retval),
> + TP_printk("opcode=%lu retval=%ld", __entry->opcode,
> __entry->retval),
> hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
> );
> diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c
> b/arch/powerpc/platforms/pseries/hvCall_inst.c index
> 89b7ce807e70..6da320c786cd 100644 ---
> a/arch/powerpc/platforms/pseries/hvCall_inst.c +++
> b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -125,7 +125,7 @@
> static void probe_hcall_entry(void *ignored, unsigned long opcode,
> unsigned long h->purr_start = mfspr(SPRN_PURR); }
>
> -static void probe_hcall_exit(void *ignored, unsigned long opcode,
> unsigned long retval, +static void probe_hcall_exit(void *ignored,
> unsigned long opcode, long retval, unsigned long *retbuf)
> {
> struct hcall_stats *h;
> diff --git a/arch/powerpc/platforms/pseries/lpar.c
> b/arch/powerpc/platforms/pseries/lpar.c index
> adb996ed51e1..5a392e40f3d2 100644 ---
> a/arch/powerpc/platforms/pseries/lpar.c +++
> b/arch/powerpc/platforms/pseries/lpar.c @@ -902,8 +902,7 @@ void
> __trace_hcall_entry(unsigned long opcode, unsigned long *args)
> local_irq_restore(flags); }
>
> -void __trace_hcall_exit(long opcode, unsigned long retval,
> - unsigned long *retbuf)
> +void __trace_hcall_exit(long opcode, long retval, unsigned long
> *retbuf) {
> unsigned long flags;
> unsigned int *depth;
^ permalink raw reply
* Re: [PATCH] pkeys: Introduce PKEY_ALLOC_SIGNALINHERIT and change signal semantics
From: Florian Weimer @ 2018-05-08 12:40 UTC (permalink / raw)
To: Andy Lutomirski
Cc: linuxram, Dave Hansen, Linux-MM, Linux API, linux-x86_64,
linux-arch, X86 ML, linuxppc-dev
In-Reply-To: <CALCETrX46wR_MDW=m9SVm=ejQmPAmD3+2oC3iapf75bPhnEAWQ@mail.gmail.com>
On 05/08/2018 04:49 AM, Andy Lutomirski wrote:
> On Mon, May 7, 2018 at 2:48 AM Florian Weimer <fweimer@redhat.com> wrote:
>
>> On 05/03/2018 06:05 AM, Andy Lutomirski wrote:
>>> On Wed, May 2, 2018 at 7:11 PM Ram Pai <linuxram@us.ibm.com> wrote:
>>>
>>>> On Wed, May 02, 2018 at 09:23:49PM +0000, Andy Lutomirski wrote:
>>>>>
>>>>>> If I recall correctly, the POWER maintainer did express a strong
>>> desire
>>>>>> back then for (what is, I believe) their current semantics, which my
>>>>>> PKEY_ALLOC_SIGNALINHERIT patch implements for x86, too.
>>>>>
>>>>> Ram, I really really don't like the POWER semantics. Can you give
> some
>>>>> justification for them? Does POWER at least have an atomic way for
>>>>> userspace to modify just the key it wants to modify or, even better,
>>>>> special load and store instructions to use alternate keys?
>>>
>>>> I wouldn't call it POWER semantics. The way I implemented it on power
>>>> lead to the semantics, given that nothing was explicitly stated
>>>> about how the semantics should work within a signal handler.
>>>
>>> I think that this is further evidence that we should introduce a new
>>> pkey_alloc() mode and deprecate the old. To the extent possible, this
>>> thing should work the same way on x86 and POWER.
>
>> Do you propose to change POWER or to change x86?
>
> Sorry for being slow to reply. I propose to introduce a new
> PKEY_ALLOC_something variant on x86 and POWER and to make the behavior
> match on both.
So basically implement PKEY_ALLOC_SETSIGNAL for POWER, and keep the
existing (different) behavior without the flag?
Ram, would you be okay with that? Could you give me a hand if
necessary? (I assume we have silicon in-house because it's a
long-standing feature of the POWER platform which was simply dormant on
Linux until now.)
> It should at least update the values loaded when a signal
> is delivered and it should probably also update it for new threads.
I think we should keep inheritance for new threads and fork. pkey_alloc
only has a single access rights argument, which makes it hard to reuse
this interface if there are two (three) separate sets of access rights.
Is there precedent for process state reverting on fork, besides
MADV_WIPEONFORK? My gut feeling is that we should avoid that.
> For glibc, for example, I assume that you want signals to be delivered with
> write access disabled to the GOT. Otherwise you would fail to protect
> against exploits that occur in signal context. Glibc controls thread
> creation, so the initial state on thread startup doesn't really matter, but
> there will be more users than just glibc.
glibc does not control thread, or more precisely, subprocess creation.
Otherwise we wouldn't have face that many issues with our PID cache. 8-/
Thanks,
Florian
^ permalink raw reply
* Re: seccomp_bpf.c:2880:global.get_metadata:Expected 0 (0) == seccomp(1, 2, &prog) (4294967295)
From: Michael Ellerman @ 2018-05-08 14:34 UTC (permalink / raw)
To: Mathieu Malaterre, linuxppc-dev
In-Reply-To: <CA+7wUsxwRi3tLRR_+-=h_y2Xj_RfJS58E0vpa8K1KPtRAQpr_g@mail.gmail.com>
Mathieu Malaterre <malat@debian.org> writes:
> Hi there,
>
> Quick question (I have not investigate root cause): is support for
> seccomp complete on ppc32 ?
Doesn't look like it does it :)
> $ make KBUILD_OUTPUT=/tmp/kselftest TARGETS=seccomp kselftest
> ...
> seccomp_bpf.c:1804:TRACE_syscall.ptrace_syscall_dropped:Expected 1 (1)
> == syscall(286) (4294967295)
> TRACE_syscall.ptrace_syscall_dropped: Test failed at step #13
> [ FAIL ] TRACE_syscall.ptrace_syscall_dropped
> ...
> [ RUN ] global.get_metadata
> seccomp_bpf.c:2880:global.get_metadata:Expected 0 (0) == seccomp(1, 2,
> &prog) (4294967295)
> seccomp_bpf.c:2892:global.get_metadata:Expected 1 (1) ==
> read(pipefd[0], &buf, 1) (0)
> global.get_metadata: Test terminated by assertion
> [ FAIL ] global.get_metadata
I'm not sure sorry.
That could be a test case bug, hard to say without looking at the
details.
cheers
^ permalink raw reply
* Re: [PATCH v13 0/3] mm, x86, powerpc: Enhancements to Memory Protection Keys.
From: Michael Ellerman @ 2018-05-08 14:39 UTC (permalink / raw)
To: Ram Pai, mingo, akpm
Cc: linuxppc-dev, linux-mm, x86, linux-arch, linux-kernel,
dave.hansen, benh, paulus, khandual, aneesh.kumar, bsingharora,
hbabu, mhocko, bauerman, ebiederm, linuxram, corbet, arnd
In-Reply-To: <1525471183-21277-1-git-send-email-linuxram@us.ibm.com>
Ram Pai <linuxram@us.ibm.com> writes:
> This patch series provides arch-neutral enhancements to
> enable memory-keys on new architecutes, and the corresponding
> changes in x86 and powerpc specific code to support that.
>
> a) Provides ability to support upto 32 keys. PowerPC
> can handle 32 keys and hence needs this.
>
> b) Arch-neutral code; and not the arch-specific code,
> determines the format of the string, that displays the key
> for each vma in smaps.
>
> History:
> -------
> version 14:
This doesn't match the patch subjects, which is a little confusing :)
> (1) made VM_PKEY_BIT4 unusable on x86, #defined it to 0
> -- comment by Dave Hansen
> (2) due to some reason this patch series continue to
> break some or the other build. The last series
> passed everything but created a merge
> conflict followed by build failure for
> Michael Ellermen. :(
I have a fix, it involved some cleanup of headers prior to the smaps
change.
Will post it.
cheers
^ permalink raw reply
* Re: [PATCH 2/2] powerpc/ptrace: Disable array-bounds warning with gcc8
From: Michael Ellerman @ 2018-05-08 14:40 UTC (permalink / raw)
To: Khem Raj, linuxppc-dev; +Cc: Khem Raj
In-Reply-To: <20180504192313.18625-2-raj.khem@gmail.com>
Khem Raj <raj.khem@gmail.com> writes:
> This masks the new gcc8 warning
>
> regset.h:270:4: error: 'memcpy' offset [-527, -529] is out
> of the bounds [0, 16] of object 'vrsave' with type 'union <anonymous>'
Why would we want to mask the warning rather than fix it?
cheers
> Signed-off-by: Khem Raj <raj.khem@gmail.com>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: linuxppc-dev@lists.ozlabs.org
> ---
> arch/powerpc/kernel/Makefile | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
> index 7ac5a68ad6b1..ab159a34704a 100644
> --- a/arch/powerpc/kernel/Makefile
> +++ b/arch/powerpc/kernel/Makefile
> @@ -4,6 +4,7 @@
> #
>
> CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' $(call cc-disable-warning, attribute-alias)
> +CFLAGS_ptrace.o += $(call cc-disable-warning, array-bounds)
> CFLAGS_syscalls.o += $(call cc-disable-warning, attribute-alias)
>
> subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
> --
> 2.17.0
^ permalink raw reply
* Re: [PATCH-next] powerpc: remove retired sbc834x support
From: Michael Ellerman @ 2018-05-08 14:52 UTC (permalink / raw)
To: Paul Gortmaker, Benjamin Herrenschmidt, Paul Mackerras
Cc: Paul Gortmaker, linuxppc-dev
In-Reply-To: <20171211032913.31253-1-paul.gortmaker@windriver.com>
On Mon, 2017-12-11 at 03:29:13 UTC, Paul Gortmaker wrote:
> I no longer have a functional version of this board for even the most
> basic sanity boot testing, and they have not been available for purchase
> for quite some years now.
>
> There is no point in adding a burden to testing coverage that does
> walk all the possible defconfigs, so with all the above in mind, it
> makes sense to remove it. Of course it will remain in the git history
> for anyone who happens to stumble on one and wants to tinker with it.
>
> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Applied to powerpc next, thanks.
https://git.kernel.org/powerpc/c/3bc6cf5a86e5e506250c03dfcdf971
cheers
^ permalink raw reply
* Re: powerpc: Only support DYNAMIC_FTRACE not static
From: Michael Ellerman @ 2018-05-08 14:52 UTC (permalink / raw)
To: Michael Ellerman, linuxppc-dev; +Cc: naveen.n.rao, rostedt
In-Reply-To: <20180327042906.32461-1-mpe@ellerman.id.au>
On Tue, 2018-03-27 at 04:29:06 UTC, Michael Ellerman wrote:
> We've had dynamic ftrace support for over 9 years since Steve first
> wrote it, all the distros use dynamic, and static is basically
> untested these days, so drop support for static ftrace.
>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Applied to powerpc next.
https://git.kernel.org/powerpc/c/0c0c52306f4792a41d8a86e7c5d30c
cheers
^ permalink raw reply
* Re: [v2, 1/2] powerpc/fadump: exclude memory holes while reserving memory in second kernel
From: Michael Ellerman @ 2018-05-08 14:52 UTC (permalink / raw)
To: Hari Bathini
Cc: Mahesh J Salgaonkar, Anshuman Khandual, Aneesh Kumar K.V,
linuxppc-dev
In-Reply-To: <152336766167.8374.13811759102783227353.stgit@hbathini.in.ibm.com>
On Tue, 2018-04-10 at 13:41:16 UTC, Hari Bathini wrote:
> From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>
> The second kernel, during early boot after the crash, reserves rest of
> the memory above boot memory size to make sure it does not touch any of the
> dump memory area. It uses memblock_reserve() that reserves the specified
> memory region irrespective of memory holes present within that region.
> There are chances where previous kernel would have hot removed some of
> its memory leaving memory holes behind. In such cases fadump kernel reports
> incorrect number of reserved pages through arch_reserved_kernel_pages()
> hook causing kernel to hang or panic.
>
> Fix this by excluding memory holes while reserving rest of the memory
> above boot memory size during second kernel boot after crash.
>
> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
Series applied to powerpc next, thanks.
https://git.kernel.org/powerpc/c/b71a693d3db3abd1ddf7d29be967a1
cheers
^ permalink raw reply
* Re: [v5, 01/10] powerpc64/ftrace: Add a field in paca to disable ftrace in unsafe code paths
From: Michael Ellerman @ 2018-05-08 14:52 UTC (permalink / raw)
To: Naveen N. Rao; +Cc: Satheesh Rajendran, linuxppc-dev, Steven Rostedt
In-Reply-To: <47c79b5591208f332876fc548186b5b1e3b68e52.1524121038.git.naveen.n.rao@linux.vnet.ibm.com>
On Thu, 2018-04-19 at 07:04:00 UTC, "Naveen N. Rao" wrote:
> We have some C code that we call into from real mode where we cannot
> take any exceptions. Though the C functions themselves are mostly safe,
> if these functions are traced, there is a possibility that we may take
> an exception. For instance, in certain conditions, the ftrace code uses
> WARN(), which uses a 'trap' to do its job.
>
> For such scenarios, introduce a new field in paca 'ftrace_enabled',
> which is checked on ftrace entry before continuing. This field can then
> be set to zero to disable/pause ftrace, and set to a non-zero value to
> resume ftrace.
>
> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Series applied to powerpc next, thanks.
https://git.kernel.org/powerpc/c/ea678ac627e01daf5b4f1da24bf1d0
cheers
^ permalink raw reply
* Re: [1/3] powerpc/nohash: remove hash related code from nohash headers.
From: Michael Ellerman @ 2018-05-08 14:52 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <02633d43f29e1ba01865cd334216dc8efb8b4b11.1524587425.git.christophe.leroy@c-s.fr>
On Tue, 2018-04-24 at 16:31:28 UTC, Christophe Leroy wrote:
> When nohash and book3s header were split, some hash related stuff
> remained in the nohash header. This patch removes them.
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Series applied to powerpc next, thanks.
https://git.kernel.org/powerpc/c/45201c8794693d16e8e634188a92e9
cheers
^ permalink raw reply
* Re: powerpc/fadump: Unregister fadump on kexec down path.
From: Michael Ellerman @ 2018-05-08 14:52 UTC (permalink / raw)
To: Mahesh J Salgaonkar, linuxppc-dev
In-Reply-To: <152481017041.9024.9087742951572073802.stgit@jupiter.in.ibm.com>
On Fri, 2018-04-27 at 06:23:18 UTC, Mahesh J Salgaonkar wrote:
> From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>
> Unregister fadump on kexec down path otherwise the fadump registration in
> new kexec-ed kernel complains that fadump is already registered. This
> makes new kernel to continue using fadump registered by previous kernel
> which may lead to invalid vmcore generation. Hence this patch fixes this
> issue by un-registering fadump in fadump_cleanup() which is called during
> kexec path so that new kernel can register fadump with new valid values.
>
> Fixes: b500afff11f6 ("fadump: Invalidate registration and release reserved memory for general use.")
> Cc: stable@vger.kernel.org
> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Applied to powerpc next, thanks.
https://git.kernel.org/powerpc/c/722cde76d68e8cc4f3de42e71c82fd
cheers
^ permalink raw reply
* Re: [resend] Revert "powerpc/powernv: Increase memory block size to 1GB on radix"
From: Michael Ellerman @ 2018-05-08 14:52 UTC (permalink / raw)
To: Balbir Singh, linuxppc-dev; +Cc: Michael Neuling
In-Reply-To: <20180501025725.7441-1-bsingharora@gmail.com>
On Tue, 2018-05-01 at 02:57:25 UTC, Balbir Singh wrote:
> This commit was a stop-gap to prevent crashes on hotunplug, caused by
> the mismatch between the 1G mappings used for the linear mapping and the
> memory block size. Those issues are now resolved because we split the
> linear mapping at hotunplug time if necessary, as implemented in commit
> 4dd5f8a99e79 ("powerpc/mm/radix: Split linear mapping on hot-unplug").
>
> Signed-off-by: Balbir Singh <bsingharora@gmail.com>
> Signed-off-by: Michael Neuling <mikey@neuling.org>
> Tested-by: Rashmica Gupta <rashmica.g@gmail.com>
> Tested-by: Balbir Singh <bsingharora@gmail.com>
Applied to powerpc next, thanks.
https://git.kernel.org/powerpc/c/7acf50e4efa60270edcb95107f660f
cheers
^ permalink raw reply
* Re: tracing: Remove PPC32 wart from config TRACING_SUPPORT
From: Michael Ellerman @ 2018-05-08 14:52 UTC (permalink / raw)
To: Michael Ellerman, rostedt, mingo; +Cc: linuxppc-dev, linux-kernel
In-Reply-To: <20180502112948.1747-1-mpe@ellerman.id.au>
On Wed, 2018-05-02 at 11:29:48 UTC, Michael Ellerman wrote:
> config TRACING_SUPPORT has an exception for PPC32, because PPC32
> didn't have irqflags tracing support.
>
> But that hasn't been true since commit 5d38902c4838 ("powerpc: Add
> irqtrace support for 32-bit powerpc") (Jun 2009).
>
> So remove the exception for PPC32 and the comment.
>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Applied to powerpc next.
https://git.kernel.org/powerpc/c/0ea5ee035133aeb549883ddc604a65
cheers
^ permalink raw reply
* [PATCH 0/8] mm, x86, powerpc: Consolidate pkey code
From: Michael Ellerman @ 2018-05-08 14:59 UTC (permalink / raw)
To: linuxram; +Cc: mingo, linuxppc-dev, linux-mm, x86, linux-kernel, dave.hansen
This is a rework of Ram's series, which broke the build on both arches at
various points due to the differing header dependencies.
The actual pkey changes are basically the same, this just has some rework to
get the headers cleaned up a bit beforehand.
If no one objects I'll ask Stephen to put these in a topic branch in
linux-next, and I or someone else can merge them for 4.18.
cheers
Ram's original:
http://patchwork.ozlabs.org/patch/909066/
http://patchwork.ozlabs.org/patch/909067/
http://patchwork.ozlabs.org/patch/909068/
Michael Ellerman (5):
mm/pkeys: Remove include of asm/mmu_context.h from pkeys.h
mm/pkeys, powerpc, x86: Provide an empty vma_pkey() in linux/pkeys.h
x86/pkeys: Move vma_pkey() into asm/pkeys.h
x86/pkeys: Add arch_pkeys_enabled()
mm/pkeys: Add an empty arch_pkeys_enabled()
Ram Pai (3):
mm, powerpc, x86: define VM_PKEY_BITx bits if CONFIG_ARCH_HAS_PKEYS is
enabled
mm, powerpc, x86: introduce an additional vma bit for powerpc pkey
mm/pkeys, x86, powerpc: Display pkey in smaps if arch supports pkeys
arch/powerpc/include/asm/mmu_context.h | 5 -----
arch/powerpc/include/asm/pkeys.h | 2 ++
arch/x86/include/asm/mmu_context.h | 15 ---------------
arch/x86/include/asm/pkeys.h | 13 +++++++++++++
arch/x86/kernel/setup.c | 8 --------
fs/proc/task_mmu.c | 13 +++++++------
include/linux/mm.h | 12 +++++++-----
include/linux/pkeys.h | 13 +++++++++++--
8 files changed, 40 insertions(+), 41 deletions(-)
--
2.14.1
^ permalink raw reply
* [PATCH 1/8] mm, powerpc, x86: define VM_PKEY_BITx bits if CONFIG_ARCH_HAS_PKEYS is enabled
From: Michael Ellerman @ 2018-05-08 14:59 UTC (permalink / raw)
To: linuxram; +Cc: mingo, linuxppc-dev, linux-mm, x86, linux-kernel, dave.hansen
In-Reply-To: <20180508145948.9492-1-mpe@ellerman.id.au>
From: Ram Pai <linuxram@us.ibm.com>
VM_PKEY_BITx are defined only if CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
is enabled. Powerpc also needs these bits. Hence lets define the
VM_PKEY_BITx bits for any architecture that enables
CONFIG_ARCH_HAS_PKEYS.
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
arch/powerpc/include/asm/pkeys.h | 2 ++
fs/proc/task_mmu.c | 4 ++--
include/linux/mm.h | 9 +++++----
3 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
index 0409c80c32c0..18ef59a9886d 100644
--- a/arch/powerpc/include/asm/pkeys.h
+++ b/arch/powerpc/include/asm/pkeys.h
@@ -26,6 +26,8 @@ extern u32 initial_allocation_mask; /* bits set for reserved keys */
# define VM_PKEY_BIT2 VM_HIGH_ARCH_2
# define VM_PKEY_BIT3 VM_HIGH_ARCH_3
# define VM_PKEY_BIT4 VM_HIGH_ARCH_4
+#elif !defined(VM_PKEY_BIT4)
+# define VM_PKEY_BIT4 VM_HIGH_ARCH_4
#endif
#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index c486ad4b43f0..541392a62608 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -673,13 +673,13 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_MERGEABLE)] = "mg",
[ilog2(VM_UFFD_MISSING)]= "um",
[ilog2(VM_UFFD_WP)] = "uw",
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#ifdef CONFIG_ARCH_HAS_PKEYS
/* These come out via ProtectionKey: */
[ilog2(VM_PKEY_BIT0)] = "",
[ilog2(VM_PKEY_BIT1)] = "",
[ilog2(VM_PKEY_BIT2)] = "",
[ilog2(VM_PKEY_BIT3)] = "",
-#endif
+#endif /* CONFIG_ARCH_HAS_PKEYS */
};
size_t i;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1ac1f06a4be6..c6a6f2492c1b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -228,15 +228,16 @@ extern unsigned int kobjsize(const void *objp);
#define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4)
#endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
-#if defined(CONFIG_X86)
-# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */
-#if defined (CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS)
+#ifdef CONFIG_ARCH_HAS_PKEYS
# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0
# define VM_PKEY_BIT0 VM_HIGH_ARCH_0 /* A protection key is a 4-bit value */
# define VM_PKEY_BIT1 VM_HIGH_ARCH_1
# define VM_PKEY_BIT2 VM_HIGH_ARCH_2
# define VM_PKEY_BIT3 VM_HIGH_ARCH_3
-#endif
+#endif /* CONFIG_ARCH_HAS_PKEYS */
+
+#if defined(CONFIG_X86)
+# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */
#elif defined(CONFIG_PPC)
# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */
#elif defined(CONFIG_PARISC)
--
2.14.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox