LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* Re: [PATCH] powerpc/32s: fix booting with CONFIG_PPC_EARLY_DEBUG_BOOTX
From: Mathieu Malaterre @ 2019-06-07  6:16 UTC (permalink / raw)
  To: Christophe Leroy; +Cc: linuxppc-dev, Paul Mackerras, LKML
In-Reply-To: <CA+7wUswvw3JJ2dLCn877tNbTd==O5c9LxHGezOm+y5otQZnS2w@mail.gmail.com>

On Wed, Jun 5, 2019 at 1:32 PM Mathieu Malaterre <malat@debian.org> wrote:
>
> On Mon, Jun 3, 2019 at 3:00 PM Christophe Leroy <christophe.leroy@c-s.fr> wrote:
> >
> > When booting through OF, setup_disp_bat() does nothing because
> > disp_BAT are not set. By change, it used to work because BOOTX
> > buffer is mapped 1:1 at address 0x81000000 by the bootloader, and
> > btext_setup_display() sets virt addr same as phys addr.
> >
> > But since commit 215b823707ce ("powerpc/32s: set up an early static
> > hash table for KASAN."), a temporary page table overrides the
> > bootloader mapping.
> >
> > This 0x81000000 is also problematic with the newly implemented
> > Kernel Userspace Access Protection (KUAP) because it is within user
> > address space.
> >
> > This patch fixes those issues by properly setting disp_BAT through
> > a call to btext_prepare_BAT(), allowing setup_disp_bat() to
> > properly setup BAT3 for early bootx screen buffer access.
> >
> > Reported-by: Mathieu Malaterre <malat@debian.org>
> > Fixes: 215b823707ce ("powerpc/32s: set up an early static hash table for KASAN.")
> > Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>
> The patch below does fix the symptoms I reported. Tested with CONFIG_KASAN=n :
>
> Tested-by: Mathieu Malaterre <malat@debian.org>

Link: https://bugzilla.kernel.org/show_bug.cgi?id=203699

>
> Thanks !
>
> > ---
> >  arch/powerpc/include/asm/btext.h       | 4 ++++
> >  arch/powerpc/kernel/prom_init.c        | 1 +
> >  arch/powerpc/kernel/prom_init_check.sh | 2 +-
> >  3 files changed, 6 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/powerpc/include/asm/btext.h b/arch/powerpc/include/asm/btext.h
> > index 3ffad030393c..461b0f193864 100644
> > --- a/arch/powerpc/include/asm/btext.h
> > +++ b/arch/powerpc/include/asm/btext.h
> > @@ -13,7 +13,11 @@ extern void btext_update_display(unsigned long phys, int width, int height,
> >                                  int depth, int pitch);
> >  extern void btext_setup_display(int width, int height, int depth, int pitch,
> >                                 unsigned long address);
> > +#ifdef CONFIG_PPC32
> >  extern void btext_prepare_BAT(void);
> > +#else
> > +static inline void btext_prepare_BAT(void) { }
> > +#endif
> >  extern void btext_map(void);
> >  extern void btext_unmap(void);
> >
> > diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
> > index 3555cad7bdde..ed446b7ea164 100644
> > --- a/arch/powerpc/kernel/prom_init.c
> > +++ b/arch/powerpc/kernel/prom_init.c
> > @@ -2336,6 +2336,7 @@ static void __init prom_check_displays(void)
> >                         prom_printf("W=%d H=%d LB=%d addr=0x%x\n",
> >                                     width, height, pitch, addr);
> >                         btext_setup_display(width, height, 8, pitch, addr);
> > +                       btext_prepare_BAT();
> >                 }
> >  #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
> >         }
> > diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
> > index 518d416971c1..160bef0d553d 100644
> > --- a/arch/powerpc/kernel/prom_init_check.sh
> > +++ b/arch/powerpc/kernel/prom_init_check.sh
> > @@ -24,7 +24,7 @@ fi
> >  WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
> >  _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
> >  __secondary_hold_acknowledge __secondary_hold_spinloop __start
> > -logo_linux_clut224
> > +logo_linux_clut224 btext_prepare_BAT
> >  reloc_got2 kernstart_addr memstart_addr linux_banner _stext
> >  __prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
> >
> > --
> > 2.13.3
> >

^ permalink raw reply

* [PATCH 1/2] powerpc/64s/radix: Enable HAVE_ARCH_HUGE_VMAP
From: Nicholas Piggin @ 2019-06-07  6:19 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

This sets the HAVE_ARCH_HUGE_VMAP option, and defines the required
page table functions.

This will not enable huge iomaps, because powerpc/64 ioremap does not
call ioremap_page_range. That is done in a later change.

HAVE_ARCH_HUGE_VMAP facilities will be used to enable huge pages for
vmalloc memory in a set of generic kernel changes. Combined, this
improves cached `git diff` performance by about 5% on a 2-node POWER9
with 32MB dentry cache hash, by allowing the dentry/inode hashes to
be mapped with 2MB pages:

  Profiling git diff dTLB misses with a vanilla kernel:

  81.75%  git      [kernel.vmlinux]    [k] __d_lookup_rcu
   7.21%  git      [kernel.vmlinux]    [k] strncpy_from_user
   1.77%  git      [kernel.vmlinux]    [k] find_get_entry
   1.59%  git      [kernel.vmlinux]    [k] kmem_cache_free

            40,168      dTLB-miss
       0.100342754 seconds time elapsed

  With powerpc huge vmap and generic huge vmap vmalloc:

             2,987      dTLB-miss
       0.095933138 seconds time elapsed

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/Kconfig                     |  1 +
 arch/powerpc/mm/book3s64/radix_pgtable.c | 93 ++++++++++++++++++++++++
 2 files changed, 94 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8c1c636308c8..f0e5b38d52e8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -167,6 +167,7 @@ config PPC
 	select GENERIC_STRNLEN_USER
 	select GENERIC_TIME_VSYSCALL
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_HUGE_VMAP		if PPC_BOOK3S_64 && PPC_RADIX_MMU
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KASAN			if PPC32
 	select HAVE_ARCH_KGDB
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index c9bcf428dd2b..3bc9ade56277 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1122,3 +1122,96 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
 
 	set_pte_at(mm, addr, ptep, pte);
 }
+
+int __init arch_ioremap_pud_supported(void)
+{
+	return radix_enabled();
+}
+
+int __init arch_ioremap_pmd_supported(void)
+{
+	return radix_enabled();
+}
+
+int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
+{
+	return 0;
+}
+
+int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
+{
+	pte_t *ptep = (pte_t *)pud;
+	pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot);
+
+	set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud);
+
+	return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+	if (pud_huge(*pud)) {
+		pud_clear(pud);
+		return 1;
+	}
+
+	return 0;
+}
+
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
+{
+	pmd_t *pmd;
+	int i;
+
+	pmd = (pmd_t *)pud_page_vaddr(*pud);
+	pud_clear(pud);
+
+	flush_tlb_kernel_range(addr, addr + PUD_SIZE);
+
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		if (!pmd_none(pmd[i])) {
+			pte_t *pte;
+			pte = (pte_t *)pmd_page_vaddr(pmd[i]);
+
+			pte_free_kernel(&init_mm, pte);
+		}
+	}
+
+	pmd_free(&init_mm, pmd);
+
+	return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
+{
+	pte_t *ptep = (pte_t *)pmd;
+	pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot);
+
+	set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd);
+
+	return 1;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+	if (pmd_huge(*pmd)) {
+		pmd_clear(pmd);
+		return 1;
+	}
+
+	return 0;
+}
+
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
+{
+	pte_t *pte;
+
+	pte = (pte_t *)pmd_page_vaddr(*pmd);
+	pmd_clear(pmd);
+
+	flush_tlb_kernel_range(addr, addr + PMD_SIZE);
+
+	pte_free_kernel(&init_mm, pte);
+
+	return 1;
+}
-- 
2.20.1


^ permalink raw reply related

* [PATCH 2/2] powerpc/64s/radix: ioremap use huge page mappings
From: Nicholas Piggin @ 2019-06-07  6:19 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20190607061922.20542-1-npiggin@gmail.com>

powerpc/64s does not use ioremap_page_range, so it does not get huge
vmap iomap mappings automatically. The radix kernel mapping function
already allows larger page mappings that work with huge vmap, so wire
that up to allow huge pages to be used for ioremap mappings.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h |  8 +++
 arch/powerpc/mm/pgtable_64.c                 | 58 ++++++++++++++++++--
 include/linux/io.h                           |  1 +
 lib/ioremap.c                                |  2 +-
 4 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index ccf00a8b98c6..d7a4f2d80598 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -274,6 +274,14 @@ extern unsigned long __vmalloc_end;
 #define VMALLOC_START	__vmalloc_start
 #define VMALLOC_END	__vmalloc_end
 
+static inline unsigned int ioremap_max_order(void)
+{
+	if (radix_enabled())
+		return PUD_SHIFT;
+	return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
+}
+#define IOREMAP_MAX_ORDER ({ ioremap_max_order();})
+
 extern unsigned long __kernel_virt_start;
 extern unsigned long __kernel_virt_size;
 extern unsigned long __kernel_io_start;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index d2d976ff8a0e..cf02b67eee55 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -112,7 +112,7 @@ unsigned long ioremap_bot = IOREMAP_BASE;
  * __ioremap_at - Low level function to establish the page tables
  *                for an IO mapping
  */
-void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
+static void __iomem * hash__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
 {
 	unsigned long i;
 
@@ -120,6 +120,54 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
 	if (pgprot_val(prot) & H_PAGE_4K_PFN)
 		return NULL;
 
+	for (i = 0; i < size; i += PAGE_SIZE)
+		if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
+			return NULL;
+
+	return (void __iomem *)ea;
+}
+
+static int radix__ioremap_page_range(unsigned long addr, unsigned long end,
+		       phys_addr_t phys_addr, pgprot_t prot)
+{
+	while (addr != end) {
+		if (unlikely(ioremap_huge_disabled))
+			goto use_small_page;
+
+		if (!(addr & ~PUD_MASK) && !(phys_addr & ~PUD_MASK) &&
+				end - addr >= PUD_SIZE) {
+			if (radix__map_kernel_page(addr, phys_addr, prot, PUD_SIZE))
+				return -ENOMEM;
+			addr += PUD_SIZE;
+			phys_addr += PUD_SIZE;
+
+		} else if (!(addr & ~PMD_MASK) && !(phys_addr & ~PMD_MASK) &&
+				end - addr >= PMD_SIZE) {
+			if (radix__map_kernel_page(addr, phys_addr, prot, PMD_SIZE))
+				return -ENOMEM;
+			addr += PMD_SIZE;
+			phys_addr += PMD_SIZE;
+
+		} else {
+use_small_page:
+			if (radix__map_kernel_page(addr, phys_addr, prot, PAGE_SIZE))
+				return -ENOMEM;
+			addr += PAGE_SIZE;
+			phys_addr += PAGE_SIZE;
+		}
+	}
+	return 0;
+}
+
+static void __iomem * radix__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
+{
+	if (radix__ioremap_page_range((unsigned long)ea, (unsigned long)ea + size, pa, prot))
+		return NULL;
+	return ea;
+}
+
+void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
+{
 	if ((ea + size) >= (void *)IOREMAP_END) {
 		pr_warn("Outside the supported range\n");
 		return NULL;
@@ -129,11 +177,9 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
 	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
 	WARN_ON(size & ~PAGE_MASK);
 
-	for (i = 0; i < size; i += PAGE_SIZE)
-		if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
-			return NULL;
-
-	return (void __iomem *)ea;
+	if (radix_enabled())
+		return radix__ioremap_at(pa, ea, size, prot);
+	return hash__ioremap_at(pa, ea, size, prot);
 }
 
 /**
diff --git a/include/linux/io.h b/include/linux/io.h
index 32e30e8fb9db..423c4294aaa3 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -44,6 +44,7 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end,
 #endif
 
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+extern int ioremap_huge_disabled;
 void __init ioremap_huge_init(void);
 int arch_ioremap_pud_supported(void);
 int arch_ioremap_pmd_supported(void);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 063213685563..386ff956755f 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -18,7 +18,7 @@
 static int __read_mostly ioremap_p4d_capable;
 static int __read_mostly ioremap_pud_capable;
 static int __read_mostly ioremap_pmd_capable;
-static int __read_mostly ioremap_huge_disabled;
+int __read_mostly ioremap_huge_disabled;
 
 static int __init set_nohugeiomap(char *str)
 {
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH] Powerpc/Watchpoint: Restore nvgprs while returning from exception
From: Ravi Bangoria @ 2019-06-07  6:26 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Ravi Bangoria, mikey, linux-kernel, npiggin, paulus, mahesh,
	linuxppc-dev
In-Reply-To: <87ftom0wrm.fsf@concordia.ellerman.id.au>



On 6/7/19 11:20 AM, Michael Ellerman wrote:
> Ravi Bangoria <ravi.bangoria@linux.ibm.com> writes:
> 
>> Powerpc hw triggers watchpoint before executing the instruction.
>> To make trigger-after-execute behavior, kernel emulates the
>> instruction. If the instruction is 'load something into non-
>> volatile register', exception handler should restore emulated
>> register state while returning back, otherwise there will be
>> register state corruption. Ex, Adding a watchpoint on a list
>> can corrput the list:
>>
>>   # cat /proc/kallsyms | grep kthread_create_list
>>   c00000000121c8b8 d kthread_create_list
>>
>> Add watchpoint on kthread_create_list->next:
>>
>>   # perf record -e mem:0xc00000000121c8c0
>>
>> Run some workload such that new kthread gets invoked. Ex, I
>> just logged out from console:
>>
>>   list_add corruption. next->prev should be prev (c000000001214e00), \
>> 	but was c00000000121c8b8. (next=c00000000121c8b8).
>>   WARNING: CPU: 59 PID: 309 at lib/list_debug.c:25 __list_add_valid+0xb4/0xc0
>>   CPU: 59 PID: 309 Comm: kworker/59:0 Kdump: loaded Not tainted 5.1.0-rc7+ #69
>>   ...
>>   NIP __list_add_valid+0xb4/0xc0
>>   LR __list_add_valid+0xb0/0xc0
>>   Call Trace:
>>   __list_add_valid+0xb0/0xc0 (unreliable)
>>   __kthread_create_on_node+0xe0/0x260
>>   kthread_create_on_node+0x34/0x50
>>   create_worker+0xe8/0x260
>>   worker_thread+0x444/0x560
>>   kthread+0x160/0x1a0
>>   ret_from_kernel_thread+0x5c/0x70
> 
> This all depends on what code the compiler generates for the list
> access.

True. list corruption is just an example. But any load instruction that uses
non-volatile register and hits a watchpoint, will result in register state
corruption.

> Can you include a disassembly of the relevant code in your
> kernel so we have an example of the bad case.

Register state from WARN_ON():

  GPR00: c00000000059a3a0 c000007ff23afb50 c000000001344e00 0000000000000075
  GPR04: 0000000000000000 0000000000000000 0000001852af8bc1 0000000000000000
  GPR08: 0000000000000001 0000000000000007 0000000000000006 00000000000004aa
  GPR12: 0000000000000000 c000007ffffeb080 c000000000137038 c000005ff62aaa00
  GPR16: 0000000000000000 0000000000000000 c000007fffbe7600 c000007fffbe7370
  GPR20: c000007fffbe7320 c000007fffbe7300 c000000001373a00 0000000000000000
  GPR24: fffffffffffffef7 c00000000012e320 c000007ff23afcb0 c000000000cb8628
  GPR28: c00000000121c8b8 c000000001214e00 c000007fef5b17e8 c000007fef5b17c0

Snippet from __kthread_create_on_node:

  c000000000136be8:       ed ff a2 3f     addis   r29,r2,-19
  c000000000136bec:       c0 7a bd eb     ld      r29,31424(r29)
          if (!__list_add_valid(new, prev, next))
  c000000000136bf0:       78 f3 c3 7f     mr      r3,r30
  c000000000136bf4:       78 e3 85 7f     mr      r5,r28
  c000000000136bf8:       78 eb a4 7f     mr      r4,r29
  c000000000136bfc:       fd 36 46 48     bl      c00000000059a2f8 <__list_add_valid+0x8>

Watchpoint hit at 0xc000000000136bec. 

  addis   r29,r2,-19
   => r29 = 0xc000000001344e00 + (-19 << 16)
   => r29 = 0xc000000001214e00

  ld      r29,31424(r29)
   => r29 = *(0xc000000001214e00 + 31424)
   => r29 = *(0xc00000000121c8c0)

0xc00000000121c8c0 is where we placed a watchpoint and thus this instruction was
emulated by emulate_step. But because handle_dabr_fault did not restore emulated
register state, r29 still contains stale value in above register state.


^ permalink raw reply

* [Bug 203837] Booting kernel under KVM immediately freezes host
From: bugzilla-daemon @ 2019-06-07  6:29 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-203837-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=203837

--- Comment #2 from Paul Mackerras (paulus@ozlabs.org) ---
Just tried 5.1.7 in the host and got the guest locking up during boot. In xmon
I see one cpu in pmdp_invalidate and another in handle_mm_fault. It seems very
possible this is the bug that Nick Piggin's recent patch series fixes
("powerpc/64s: Fix THP PMD collapse serialisation"):

http://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=112348

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* Re: [PATCH 2/2] powerpc/64s: __find_linux_pte synchronization vs pmdp_invalidate
From: Nicholas Piggin @ 2019-06-07  6:31 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev; +Cc: Aneesh Kumar K . V
In-Reply-To: <46295970-4740-5648-efb4-513ab6a5c1c0@c-s.fr>

Christophe Leroy's on June 7, 2019 3:35 pm:
> 
> 
> Le 07/06/2019 à 05:56, Nicholas Piggin a écrit :
>> The change to pmdp_invalidate to mark the pmd with _PAGE_INVALID broke
>> the synchronisation against lock free lookups, __find_linux_pte's
>> pmd_none check no longer returns true for such cases.
>> 
>> Fix this by adding a check for this condition as well.
>> 
>> Fixes: da7ad366b497 ("powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit")
>> Cc: Christophe Leroy <christophe.leroy@c-s.fr>
>> Suggested-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>   arch/powerpc/mm/pgtable.c | 16 ++++++++++++++--
>>   1 file changed, 14 insertions(+), 2 deletions(-)
>> 
>> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
>> index db4a6253df92..533fc6fa6726 100644
>> --- a/arch/powerpc/mm/pgtable.c
>> +++ b/arch/powerpc/mm/pgtable.c
>> @@ -372,13 +372,25 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
>>   	pdshift = PMD_SHIFT;
>>   	pmdp = pmd_offset(&pud, ea);
>>   	pmd  = READ_ONCE(*pmdp);
>> +
>>   	/*
>> -	 * A hugepage collapse is captured by pmd_none, because
>> -	 * it mark the pmd none and do a hpte invalidate.
>> +	 * A hugepage collapse is captured by this condition, see
>> +	 * pmdp_collapse_flush.
>>   	 */
>>   	if (pmd_none(pmd))
>>   		return NULL;
>>   
>> +#ifdef CONFIG_PPC_BOOK3S_64
>> +	/*
>> +	 * A hugepage split is captured by this condition, see
>> +	 * pmdp_invalidate.
>> +	 *
>> +	 * Huge page modification can be caught here too.
>> +	 */
>> +	if (pmd_is_serializing(pmd))
>> +		return NULL;
>> +#endif
>> +
> 
> Could get rid of that #ifdef by adding the following in book3s32 and 
> nohash pgtable.h:
> 
> static inline bool pmd_is_serializing()  { return false; }

I don't mind either way. If it's an isolated case like this, sometimes 
I'm against polluting the sub arch code with it.

It's up to you I can change that if you prefer.

Thanks,
Nick


^ permalink raw reply

* Re: [PATCH 1/2] powerpc/64s: Fix THP PMD collapse serialisation
From: Nicholas Piggin @ 2019-06-07  6:40 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev; +Cc: Aneesh Kumar K . V
In-Reply-To: <696d26a4-90e2-cb16-975f-d00cf6a01b67@c-s.fr>

Christophe Leroy's on June 7, 2019 3:34 pm:
> 
> 
> Le 07/06/2019 à 05:56, Nicholas Piggin a écrit :
>> Commit 1b2443a547f9 ("powerpc/book3s64: Avoid multiple endian conversion
>> in pte helpers") changed the actual bitwise tests in pte_access_permitted
>> by using pte_write() and pte_present() helpers rather than raw bitwise
>> testing _PAGE_WRITE and _PAGE_PRESENT bits.
>> 
>> The pte_present change now returns true for ptes which are !_PAGE_PRESENT
>> and _PAGE_INVALID, which is the combination used by pmdp_invalidate to
>> synchronize access from lock-free lookups. pte_access_permitted is used by
>> pmd_access_permitted, so allowing GUP lock free access to proceed with
>> such PTEs breaks this synchronisation.
>> 
>> This bug has been observed on HPT host, with random crashes and corruption
>> in guests, usually together with bad PMD messages in the host.
>> 
>> Fix this by adding an explicit check in pmd_access_permitted, and
>> documenting the condition explicitly.
>> 
>> The pte_write() change should be okay, and would prevent GUP from falling
>> back to the slow path when encountering savedwrite ptes, which matches
>> what x86 (that does not implement savedwrite) does.
>> 
>> Fixes: 1b2443a547f9 ("powerpc/book3s64: Avoid multiple endian conversion in pte helpers")
>> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> Cc: Christophe Leroy <christophe.leroy@c-s.fr>
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> 
>> I accounted for Aneesh's and Christophe's feedback, except I couldn't
>> find a good way to replace the ifdef with IS_ENABLED because of
>> _PAGE_INVALID etc., but at least cleaned that up a bit nicer.
> 
> I guess the standard way is to add a pmd_is_serializing() which return 
> always false in book3s/32/pgtable.h and in nohash/pgtable.h


> 
>> 
>> Patch 1 solves a problem I can hit quite reliably running HPT/HPT KVM.
>> Patch 2 was noticed by Aneesh when inspecting code for similar bugs.
>> They should probably both be merged in stable kernels after upstream.
>> 
>>   arch/powerpc/include/asm/book3s/64/pgtable.h | 30 ++++++++++++++++++++
>>   arch/powerpc/mm/book3s64/pgtable.c           |  3 ++
>>   2 files changed, 33 insertions(+)
>> 
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index 7dede2e34b70..ccf00a8b98c6 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -876,6 +876,23 @@ static inline int pmd_present(pmd_t pmd)
>>   	return false;
>>   }
>>   
>> +static inline int pmd_is_serializing(pmd_t pmd)
> 
> should be static inline bool instead of int ?

I think just about all the p?d_blah boolean functions in the tree are
int at the moment, so I followed that pattern.

May be a good tree wide change to make at some point.

Thanks,
Nick



^ permalink raw reply

* [PATCH] powerpc/scm: Mark the region volatile if cache flush not required
From: Aneesh Kumar K.V @ 2019-06-07  6:44 UTC (permalink / raw)
  To: npiggin, paulus, mpe, oohall; +Cc: Aneesh Kumar K.V, linuxppc-dev

The device tree node is documented as below:

“ibm,cache-flush-required”:
property name indicates Cache Flush Required for this Persistent Memory Segment to persist memory
prop-encoded-array: None, this is a name only property.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/platforms/pseries/papr_scm.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 96c53b23e58f..0176ce66673f 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -28,6 +28,7 @@ struct papr_scm_priv {
 	uint64_t blocks;
 	uint64_t block_size;
 	int metadata_size;
+	bool is_volatile;
 
 	uint64_t bound_addr;
 
@@ -248,7 +249,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	ndr_desc.nd_set = &p->nd_set;
 	set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
 
-	p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
+	if (p->is_volatile)
+		p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
+	else
+		p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
 	if (!p->region) {
 		dev_err(dev, "Error registering region %pR from %pOF\n",
 				ndr_desc.res, p->dn);
@@ -293,6 +297,7 @@ static int papr_scm_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
+
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
@@ -304,6 +309,7 @@ static int papr_scm_probe(struct platform_device *pdev)
 	p->drc_index = drc_index;
 	p->block_size = block_size;
 	p->blocks = blocks;
+	p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
 
 	/* We just need to ensure that set cookies are unique across */
 	uuid_parse(uuid_str, (uuid_t *) uuid);
-- 
2.21.0


^ permalink raw reply related

* [PATCH v2] powerpc/nvdimm: Add support for multibyte read/write for metadata
From: Aneesh Kumar K.V @ 2019-06-07  6:45 UTC (permalink / raw)
  To: npiggin, paulus, mpe, oohall; +Cc: Aneesh Kumar K.V, linuxppc-dev

SCM_READ/WRITE_MEATADATA hcall supports multibyte read/write. This patch
updates the metadata read/write to use 1, 2, 4 or 8 byte read/write as
mentioned in PAPR document.

READ/WRITE_METADATA hcall supports the 1, 2, 4, or 8 bytes read/write.
For other values hcall results H_P3.

Hypervisor stores the metadata contents in big-endian format and in-order
to enable read/write in different granularity, we need to switch the contents
to big-endian before calling HCALL.

Based on an patch from Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/platforms/pseries/papr_scm.c | 104 +++++++++++++++++-----
 1 file changed, 82 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 0176ce66673f..80fbab118ef1 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -97,42 +97,102 @@ static int drc_pmem_unbind(struct papr_scm_priv *p)
 }
 
 static int papr_scm_meta_get(struct papr_scm_priv *p,
-			struct nd_cmd_get_config_data_hdr *hdr)
+			     struct nd_cmd_get_config_data_hdr *hdr)
 {
 	unsigned long data[PLPAR_HCALL_BUFSIZE];
+	unsigned long offset, data_offset;
+	int len, read;
 	int64_t ret;
 
-	if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
+	if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
 		return -EINVAL;
 
-	ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
-			hdr->in_offset, 1);
-
-	if (ret == H_PARAMETER) /* bad DRC index */
-		return -ENODEV;
-	if (ret)
-		return -EINVAL; /* other invalid parameter */
-
-	hdr->out_buf[0] = data[0] & 0xff;
-
+	for (len = hdr->in_length; len; len -= read) {
+
+		data_offset = hdr->in_length - len;
+		offset = hdr->in_offset + data_offset;
+
+		if (len >= 8)
+			read = 8;
+		else if (len >= 4)
+			read = 4;
+		else if (len >= 2)
+			read = 2;
+		else
+			read = 1;
+
+		ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
+				  offset, read);
+
+		if (ret == H_PARAMETER) /* bad DRC index */
+			return -ENODEV;
+		if (ret)
+			return -EINVAL; /* other invalid parameter */
+
+		switch (read) {
+		case 8:
+			*(uint64_t *)(hdr->out_buf + data_offset) = be64_to_cpu(data[0]);
+			break;
+		case 4:
+			*(uint32_t *)(hdr->out_buf + data_offset) = be32_to_cpu(data[0] & 0xffffffff);
+			break;
+
+		case 2:
+			*(uint16_t *)(hdr->out_buf + data_offset) = be16_to_cpu(data[0] & 0xffff);
+			break;
+
+		case 1:
+			*(uint8_t *)(hdr->out_buf + data_offset) = (data[0] & 0xff);
+			break;
+		}
+	}
 	return 0;
 }
 
 static int papr_scm_meta_set(struct papr_scm_priv *p,
-			struct nd_cmd_set_config_hdr *hdr)
+			     struct nd_cmd_set_config_hdr *hdr)
 {
+	unsigned long offset, data_offset;
+	int len, wrote;
+	unsigned long data;
+	__be64 data_be;
 	int64_t ret;
 
-	if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
+	if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
 		return -EINVAL;
 
-	ret = plpar_hcall_norets(H_SCM_WRITE_METADATA,
-			p->drc_index, hdr->in_offset, hdr->in_buf[0], 1);
-
-	if (ret == H_PARAMETER) /* bad DRC index */
-		return -ENODEV;
-	if (ret)
-		return -EINVAL; /* other invalid parameter */
+	for (len = hdr->in_length; len; len -= wrote) {
+
+		data_offset = hdr->in_length - len;
+		offset = hdr->in_offset + data_offset;
+
+		if (len >= 8) {
+			data = *(uint64_t *)(hdr->in_buf + data_offset);
+			data_be = cpu_to_be64(data);
+			wrote = 8;
+		} else if (len >= 4) {
+			data = *(uint32_t *)(hdr->in_buf + data_offset);
+			data &= 0xffffffff;
+			data_be = cpu_to_be32(data);
+			wrote = 4;
+		} else if (len >= 2) {
+			data = *(uint16_t *)(hdr->in_buf + data_offset);
+			data &= 0xffff;
+			data_be = cpu_to_be16(data);
+			wrote = 2;
+		} else {
+			data_be = *(uint8_t *)(hdr->in_buf + data_offset);
+			data_be &= 0xff;
+			wrote = 1;
+		}
+
+		ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, p->drc_index,
+					 offset, data_be, wrote);
+		if (ret == H_PARAMETER) /* bad DRC index */
+			return -ENODEV;
+		if (ret)
+			return -EINVAL; /* other invalid parameter */
+	}
 
 	return 0;
 }
@@ -154,7 +214,7 @@ int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		get_size_hdr = buf;
 
 		get_size_hdr->status = 0;
-		get_size_hdr->max_xfer = 1;
+		get_size_hdr->max_xfer = 8;
 		get_size_hdr->config_size = p->metadata_size;
 		*cmd_rc = 0;
 		break;
-- 
2.21.0


^ permalink raw reply related

* [PATCH] powerpc/scm: Use a specific endian format for storing uuid from the device tree
From: Aneesh Kumar K.V @ 2019-06-07  6:47 UTC (permalink / raw)
  To: npiggin, paulus, mpe, oohall; +Cc: Aneesh Kumar K.V, linuxppc-dev

We used uuid_parse to convert uuid string from device tree to two u64
components. We want to make sure we look at the uuid read from device
tree in an endian-neutral fashion. For now, I am picking little-endian
to be format so that we don't end up doing an additional conversion.

The reason to store in a specific endian format is to enable reading
the namespace created with a little-endian kernel config on a big-endian kernel.
We do store the device tree uuid string as a 64-bit little-endian cookie in the
label area. When booting the kernel we also compare this cookie
against what is read from the device tree. For this, to work we have
to store and compare these values in a CPU endian config independent fashion.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/platforms/pseries/papr_scm.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 80fbab118ef1..c8ec670ee924 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -373,8 +373,15 @@ static int papr_scm_probe(struct platform_device *pdev)

 	/* We just need to ensure that set cookies are unique across */
 	uuid_parse(uuid_str, (uuid_t *) uuid);
-	p->nd_set.cookie1 = uuid[0];
-	p->nd_set.cookie2 = uuid[1];
+	/*
+	 * cookie1 and cookie2 are not really little endian
+	 * we store a little endian representation of the
+	 * uuid str so that we can compare this with the label
+	 * area cookie irrespective of the endian config with which
+	 * the kernel is built.
+	 */
+	p->nd_set.cookie1 = cpu_to_le64(uuid[0]);
+	p->nd_set.cookie2 = cpu_to_le64(uuid[1]);

 	/* might be zero */
 	p->metadata_size = metadata_size;
-- 
2.21.0

^ permalink raw reply related

* [PATCH] mm/nvdimm: Fix endian conversion issues 
From: Aneesh Kumar K.V @ 2019-06-07  6:47 UTC (permalink / raw)
  To: dan.j.williams; +Cc: linux-mm, linuxppc-dev, Aneesh Kumar K.V, linux-nvdimm

nd_label->dpa issue was observed when trying to enable the namespace created
with little-endian kernel on a big-endian kernel. That made me run
`sparse` on the rest of the code and other changes are the result of that.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 drivers/nvdimm/btt.c            | 8 ++++----
 drivers/nvdimm/namespace_devs.c | 7 ++++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 4671776f5623..4ac0f5dde467 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -400,9 +400,9 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
 	arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
 	if (++(arena->freelist[lane].seq) == 4)
 		arena->freelist[lane].seq = 1;
-	if (ent_e_flag(ent->old_map))
+	if (ent_e_flag(le32_to_cpu(ent->old_map)))
 		arena->freelist[lane].has_err = 1;
-	arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map));
+	arena->freelist[lane].block = ent_lba(le32_to_cpu(ent->old_map));
 
 	return ret;
 }
@@ -568,8 +568,8 @@ static int btt_freelist_init(struct arena_info *arena)
 		 * FIXME: if error clearing fails during init, we want to make
 		 * the BTT read-only
 		 */
-		if (ent_e_flag(log_new.old_map) &&
-				!ent_normal(log_new.old_map)) {
+		if (ent_e_flag(le32_to_cpu(log_new.old_map)) &&
+		    !ent_normal(le32_to_cpu(log_new.old_map))) {
 			arena->freelist[i].has_err = 1;
 			ret = arena_clear_freelist_error(arena, i);
 			if (ret)
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index c4c5a191b1d6..500c37db496a 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1995,7 +1995,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 		nd_mapping = &nd_region->mapping[i];
 		label_ent = list_first_entry_or_null(&nd_mapping->labels,
 				typeof(*label_ent), list);
-		label0 = label_ent ? label_ent->label : 0;
+		label0 = label_ent ? label_ent->label : NULL;
 
 		if (!label0) {
 			WARN_ON(1);
@@ -2330,8 +2330,9 @@ static struct device **scan_labels(struct nd_region *nd_region)
 			continue;
 
 		/* skip labels that describe extents outside of the region */
-		if (nd_label->dpa < nd_mapping->start || nd_label->dpa > map_end)
-			continue;
+		if (__le64_to_cpu(nd_label->dpa) < nd_mapping->start ||
+		    __le64_to_cpu(nd_label->dpa) > map_end)
+				continue;
 
 		i = add_namespace_resource(nd_region, nd_label, devs, count);
 		if (i < 0)
-- 
2.21.0


^ permalink raw reply related

* Re: [PATCH 2/2] powerpc/64s/radix: ioremap use huge page mappings
From: Christophe Leroy @ 2019-06-07  6:56 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev
In-Reply-To: <20190607061922.20542-2-npiggin@gmail.com>



Le 07/06/2019 à 08:19, Nicholas Piggin a écrit :
> powerpc/64s does not use ioremap_page_range, so it does not get huge
> vmap iomap mappings automatically. The radix kernel mapping function
> already allows larger page mappings that work with huge vmap, so wire
> that up to allow huge pages to be used for ioremap mappings.

Argh ... I was on the way to merge pgtable_64.c and pgtable_32.c. This 
will complicate the task ... Anyway this looks a good improvment.

Any reason to limit that to Radix ?

> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   arch/powerpc/include/asm/book3s/64/pgtable.h |  8 +++
>   arch/powerpc/mm/pgtable_64.c                 | 58 ++++++++++++++++++--
>   include/linux/io.h                           |  1 +
>   lib/ioremap.c                                |  2 +-
>   4 files changed, 62 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index ccf00a8b98c6..d7a4f2d80598 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -274,6 +274,14 @@ extern unsigned long __vmalloc_end;
>   #define VMALLOC_START	__vmalloc_start
>   #define VMALLOC_END	__vmalloc_end
>   
> +static inline unsigned int ioremap_max_order(void)
> +{
> +	if (radix_enabled())
> +		return PUD_SHIFT;
> +	return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
> +}
> +#define IOREMAP_MAX_ORDER ({ ioremap_max_order();})

Following form doesn't work ?

#define IOREMAP_MAX_ORDER	ioremap_max_order()

> +
>   extern unsigned long __kernel_virt_start;
>   extern unsigned long __kernel_virt_size;
>   extern unsigned long __kernel_io_start;
> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
> index d2d976ff8a0e..cf02b67eee55 100644
> --- a/arch/powerpc/mm/pgtable_64.c
> +++ b/arch/powerpc/mm/pgtable_64.c
> @@ -112,7 +112,7 @@ unsigned long ioremap_bot = IOREMAP_BASE;
>    * __ioremap_at - Low level function to establish the page tables
>    *                for an IO mapping
>    */
> -void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
> +static void __iomem * hash__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)

Is this the correct name ?

As far as I understand, this function will be used by nohash/64, looks 
strange to call hash__something() a function used by nohash platforms.

>   {
>   	unsigned long i;
>   
> @@ -120,6 +120,54 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
>   	if (pgprot_val(prot) & H_PAGE_4K_PFN)
>   		return NULL;
>   
> +	for (i = 0; i < size; i += PAGE_SIZE)
> +		if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
> +			return NULL;
> +
> +	return (void __iomem *)ea;
> +}
> +
> +static int radix__ioremap_page_range(unsigned long addr, unsigned long end,
> +		       phys_addr_t phys_addr, pgprot_t prot)
> +{
> +	while (addr != end) {
> +		if (unlikely(ioremap_huge_disabled))
> +			goto use_small_page;

I don't like too much a goto in the middle of an if/else set inside a loop.

Couldn't we have two while() loops, one for the !ioremap_huge_disabled() 
and one for the ioremap_huge_disabled() case ? It would duplicate some 
code but that's only 3 small lines.

Or, when ioremap_huge_disabled(), couldn't it just fallback to the 
hash__ioremap_at() function ?

> +
> +		if (!(addr & ~PUD_MASK) && !(phys_addr & ~PUD_MASK) &&
> +				end - addr >= PUD_SIZE) {
> +			if (radix__map_kernel_page(addr, phys_addr, prot, PUD_SIZE))
> +				return -ENOMEM;
> +			addr += PUD_SIZE;
> +			phys_addr += PUD_SIZE;
> +
> +		} else if (!(addr & ~PMD_MASK) && !(phys_addr & ~PMD_MASK) &&
> +				end - addr >= PMD_SIZE) {
> +			if (radix__map_kernel_page(addr, phys_addr, prot, PMD_SIZE))
> +				return -ENOMEM;
> +			addr += PMD_SIZE;
> +			phys_addr += PMD_SIZE;
> +
> +		} else {
> +use_small_page:
> +			if (radix__map_kernel_page(addr, phys_addr, prot, PAGE_SIZE))
> +				return -ENOMEM;
> +			addr += PAGE_SIZE;
> +			phys_addr += PAGE_SIZE;
> +		}
> +	}
> +	return 0;
> +}
> +
> +static void __iomem * radix__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
> +{
> +	if (radix__ioremap_page_range((unsigned long)ea, (unsigned long)ea + size, pa, prot))
> +		return NULL;
> +	return ea;
> +}
> +
> +void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
> +{
>   	if ((ea + size) >= (void *)IOREMAP_END) {
>   		pr_warn("Outside the supported range\n");
>   		return NULL;
> @@ -129,11 +177,9 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
>   	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
>   	WARN_ON(size & ~PAGE_MASK);
>   
> -	for (i = 0; i < size; i += PAGE_SIZE)
> -		if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
> -			return NULL;
> -
> -	return (void __iomem *)ea;
> +	if (radix_enabled())

What about  if (radix_enabled() && !ioremap_huge_disabled())  instead ?

> +		return radix__ioremap_at(pa, ea, size, prot);
> +	return hash__ioremap_at(pa, ea, size, prot);

Can't we just leave the no radix stuff here instead of making that 
hash__ioremap_at() function ?

Christophe


>   }
>   
>   /**
> diff --git a/include/linux/io.h b/include/linux/io.h
> index 32e30e8fb9db..423c4294aaa3 100644
> --- a/include/linux/io.h
> +++ b/include/linux/io.h
> @@ -44,6 +44,7 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end,
>   #endif
>   
>   #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
> +extern int ioremap_huge_disabled;
>   void __init ioremap_huge_init(void);
>   int arch_ioremap_pud_supported(void);
>   int arch_ioremap_pmd_supported(void);
> diff --git a/lib/ioremap.c b/lib/ioremap.c
> index 063213685563..386ff956755f 100644
> --- a/lib/ioremap.c
> +++ b/lib/ioremap.c
> @@ -18,7 +18,7 @@
>   static int __read_mostly ioremap_p4d_capable;
>   static int __read_mostly ioremap_pud_capable;
>   static int __read_mostly ioremap_pmd_capable;
> -static int __read_mostly ioremap_huge_disabled;
> +int __read_mostly ioremap_huge_disabled;
>   
>   static int __init set_nohugeiomap(char *str)
>   {
> 

^ permalink raw reply

* Re: [PATCH 2/2] powerpc/64s/radix: ioremap use huge page mappings
From: Nicholas Piggin @ 2019-06-07  7:24 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev
In-Reply-To: <f842646a-c9a1-61a8-9c2b-befb4d6313fa@c-s.fr>

Christophe Leroy's on June 7, 2019 4:56 pm:
> 
> 
> Le 07/06/2019 à 08:19, Nicholas Piggin a écrit :
>> powerpc/64s does not use ioremap_page_range, so it does not get huge
>> vmap iomap mappings automatically. The radix kernel mapping function
>> already allows larger page mappings that work with huge vmap, so wire
>> that up to allow huge pages to be used for ioremap mappings.
> 
> Argh ... I was on the way to merge pgtable_64.c and pgtable_32.c. This 
> will complicate the task ... Anyway this looks a good improvment.

I can put the radix code into book3s64, at least then you have less
gunk in the common code.

> Any reason to limit that to Radix ?

Just that the others don't have a page size easily exposed for their
map_kernel_page.

It would be nice to make this a bit more generic so other sub archs
can enable the larger mappings just by implementing map_kernel_page.

> 
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>   arch/powerpc/include/asm/book3s/64/pgtable.h |  8 +++
>>   arch/powerpc/mm/pgtable_64.c                 | 58 ++++++++++++++++++--
>>   include/linux/io.h                           |  1 +
>>   lib/ioremap.c                                |  2 +-
>>   4 files changed, 62 insertions(+), 7 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index ccf00a8b98c6..d7a4f2d80598 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -274,6 +274,14 @@ extern unsigned long __vmalloc_end;
>>   #define VMALLOC_START	__vmalloc_start
>>   #define VMALLOC_END	__vmalloc_end
>>   
>> +static inline unsigned int ioremap_max_order(void)
>> +{
>> +	if (radix_enabled())
>> +		return PUD_SHIFT;
>> +	return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
>> +}
>> +#define IOREMAP_MAX_ORDER ({ ioremap_max_order();})
> 
> Following form doesn't work ?
> 
> #define IOREMAP_MAX_ORDER	ioremap_max_order()

I suppose it would. I'm not sure why I did that.

>> +
>>   extern unsigned long __kernel_virt_start;
>>   extern unsigned long __kernel_virt_size;
>>   extern unsigned long __kernel_io_start;
>> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
>> index d2d976ff8a0e..cf02b67eee55 100644
>> --- a/arch/powerpc/mm/pgtable_64.c
>> +++ b/arch/powerpc/mm/pgtable_64.c
>> @@ -112,7 +112,7 @@ unsigned long ioremap_bot = IOREMAP_BASE;
>>    * __ioremap_at - Low level function to establish the page tables
>>    *                for an IO mapping
>>    */
>> -void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
>> +static void __iomem * hash__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
> 
> Is this the correct name ?
> 
> As far as I understand, this function will be used by nohash/64, looks 
> strange to call hash__something() a function used by nohash platforms.

Yeah you're right, I'll fix that.

>>   {
>>   	unsigned long i;
>>   
>> @@ -120,6 +120,54 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
>>   	if (pgprot_val(prot) & H_PAGE_4K_PFN)
>>   		return NULL;
>>   
>> +	for (i = 0; i < size; i += PAGE_SIZE)
>> +		if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
>> +			return NULL;
>> +
>> +	return (void __iomem *)ea;
>> +}
>> +
>> +static int radix__ioremap_page_range(unsigned long addr, unsigned long end,
>> +		       phys_addr_t phys_addr, pgprot_t prot)
>> +{
>> +	while (addr != end) {
>> +		if (unlikely(ioremap_huge_disabled))
>> +			goto use_small_page;
> 
> I don't like too much a goto in the middle of an if/else set inside a loop.
> 
> Couldn't we have two while() loops, one for the !ioremap_huge_disabled() 
> and one for the ioremap_huge_disabled() case ? It would duplicate some 
> code but that's only 3 small lines.
> 
> Or, when ioremap_huge_disabled(), couldn't it just fallback to the 
> hash__ioremap_at() function ?

Yeah okay. I'll see how the code looks after I move it into 
radix_pgtable.c, it might be best to keep all radix code there together
even for the disabled case.

>> +		return radix__ioremap_at(pa, ea, size, prot);
>> +	return hash__ioremap_at(pa, ea, size, prot);
> 
> Can't we just leave the no radix stuff here instead of making that 
> hash__ioremap_at() function ?

Yeah that'll probably work better.

Thanks,
Nick


^ permalink raw reply

* [Bug 203839] Kernel 5.2-rc3 fails to boot on a PowerMac G4 3,6: systemd[1]: Failed to bump fs.file-max, ignoring: invalid argument
From: bugzilla-daemon @ 2019-06-07 10:29 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-203839-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=203839

--- Comment #3 from Christophe Leroy (christophe.leroy@c-s.fr) ---
Could you try and revert the following commits ?

38b4564cf042 powerpc/32: don't do syscall stuff in transfer_to_handler
1a4b739bbb4f powerpc/32: implement fast entry for syscalls on BOOKE
b86fb88855ea powerpc/32: implement fast entry for syscalls on non BOOKE

Thanks
Christophe

On 06/07/2019 12:03 AM, bugzilla-daemon@bugzilla.kernel.org wrote:
> https://bugzilla.kernel.org/show_bug.cgi?id=203839
> 
> --- Comment #2 from Erhard F. (erhard_f@mailbox.org) ---
> Created attachment 283139
>    --> https://bugzilla.kernel.org/attachment.cgi?id=283139&action=edit
> kernel .config (5.2-rc3, G4 MDD)
>

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* Re: [Bug 203839] Kernel 5.2-rc3 fails to boot on a PowerMac G4 3,6: systemd[1]: Failed to bump fs.file-max, ignoring: invalid argument
From: Christophe Leroy @ 2019-06-07 10:29 UTC (permalink / raw)
  To: bugzilla-daemon, linuxppc-dev
In-Reply-To: <bug-203839-206035-rblwljoM5C@https.bugzilla.kernel.org/>

Could you try and revert the following commits ?

38b4564cf042 powerpc/32: don't do syscall stuff in transfer_to_handler
1a4b739bbb4f powerpc/32: implement fast entry for syscalls on BOOKE
b86fb88855ea powerpc/32: implement fast entry for syscalls on non BOOKE

Thanks
Christophe

On 06/07/2019 12:03 AM, bugzilla-daemon@bugzilla.kernel.org wrote:
> https://bugzilla.kernel.org/show_bug.cgi?id=203839
> 
> --- Comment #2 from Erhard F. (erhard_f@mailbox.org) ---
> Created attachment 283139
>    --> https://bugzilla.kernel.org/attachment.cgi?id=283139&action=edit
> kernel .config (5.2-rc3, G4 MDD)
> 

^ permalink raw reply

* [RFC V3] mm: Generalize and rename notify_page_fault() as kprobe_page_fault()
From: Anshuman Khandual @ 2019-06-07 10:34 UTC (permalink / raw)
  To: linux-kernel, linux-mm
  Cc: Mark Rutland, Michal Hocko, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Paul Mackerras,
	sparclinux, Stephen Rothwell, linux-s390, Yoshinori Sato, x86,
	Russell King, Matthew Wilcox, Ingo Molnar, Fenghua Yu,
	Anshuman Khandual, Andrey Konovalov, Andy Lutomirski,
	Thomas Gleixner, linux-arm-kernel, Tony Luck, Heiko Carstens,
	Martin Schwidefsky, Andrew Morton, linuxppc-dev, David S. Miller

Very similar definitions for notify_page_fault() are being used by multiple
architectures duplicating much of the same code. This attempts to unify all
of them into a generic implementation, rename it as kprobe_page_fault() and
then move it to a common header.

kprobes_built_in() can detect CONFIG_KPROBES, hence new kprobe_page_fault()
need not be wrapped again within CONFIG_KPROBES. Trap number argument can
now contain upto an 'unsigned int' accommodating all possible platforms.

kprobe_page_fault() goes the x86 way while dealing with preemption context.
As explained in these following commits the invoking context in itself must
be non-preemptible for kprobes processing context irrespective of whether
kprobe_running() or perhaps smp_processor_id() is safe or not. It does not
make much sense to continue when original context is preemptible. Instead
just bail out earlier.

commit a980c0ef9f6d
("x86/kprobes: Refactor kprobes_fault() like kprobe_exceptions_notify()")

commit b506a9d08bae ("x86: code clarification patch to Kprobes arch code")

Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-ia64@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Cc: linux-sh@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: x86@kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>

Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
Testing:

- Build and boot tested on arm64 and x86
- Build tested on some other archs (arm, sparc64, alpha, powerpc etc)

Changes in RFC V3:

- Updated the commit message with an explaination for new preemption behaviour
- Moved notify_page_fault() to kprobes.h with 'static nokprobe_inline' per Matthew
- Changed notify_page_fault() return type from int to bool per Michael Ellerman
- Renamed notify_page_fault() as kprobe_page_fault() per Peterz

Changes in RFC V2: (https://patchwork.kernel.org/patch/10974221/)

- Changed generic notify_page_fault() per Mathew Wilcox
- Changed x86 to use new generic notify_page_fault()
- s/must not/need not/ in commit message per Matthew Wilcox

Changes in RFC V1: (https://patchwork.kernel.org/patch/10968273/)

 arch/arm/mm/fault.c      | 24 +-----------------------
 arch/arm64/mm/fault.c    | 24 +-----------------------
 arch/ia64/mm/fault.c     | 24 +-----------------------
 arch/powerpc/mm/fault.c  | 23 ++---------------------
 arch/s390/mm/fault.c     | 16 +---------------
 arch/sh/mm/fault.c       | 18 ++----------------
 arch/sparc/mm/fault_64.c | 16 +---------------
 arch/x86/mm/fault.c      | 21 ++-------------------
 include/linux/kprobes.h  | 16 ++++++++++++++++
 9 files changed, 27 insertions(+), 155 deletions(-)

diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 58f69fa..94a97a4 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -30,28 +30,6 @@
 
 #ifdef CONFIG_MMU
 
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
-{
-	int ret = 0;
-
-	if (!user_mode(regs)) {
-		/* kprobe_running() needs smp_processor_id() */
-		preempt_disable();
-		if (kprobe_running() && kprobe_fault_handler(regs, fsr))
-			ret = 1;
-		preempt_enable();
-	}
-
-	return ret;
-}
-#else
-static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
-{
-	return 0;
-}
-#endif
-
 /*
  * This is useful to dump out the page tables associated with
  * 'addr' in mm 'mm'.
@@ -266,7 +244,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	vm_fault_t fault;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
-	if (notify_page_fault(regs, fsr))
+	if (kprobe_page_fault(regs, fsr))
 		return 0;
 
 	tsk = current;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index a30818e..8fe4bbc 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -70,28 +70,6 @@ static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr)
 	return debug_fault_info + DBG_ESR_EVT(esr);
 }
 
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
-{
-	int ret = 0;
-
-	/* kprobe_running() needs smp_processor_id() */
-	if (!user_mode(regs)) {
-		preempt_disable();
-		if (kprobe_running() && kprobe_fault_handler(regs, esr))
-			ret = 1;
-		preempt_enable();
-	}
-
-	return ret;
-}
-#else
-static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
-{
-	return 0;
-}
-#endif
-
 static void data_abort_decode(unsigned int esr)
 {
 	pr_alert("Data abort info:\n");
@@ -446,7 +424,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	unsigned long vm_flags = VM_READ | VM_WRITE;
 	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
-	if (notify_page_fault(regs, esr))
+	if (kprobe_page_fault(regs, esr))
 		return 0;
 
 	tsk = current;
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 5baeb02..22582f8 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -21,28 +21,6 @@
 
 extern int die(char *, struct pt_regs *, long);
 
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs, int trap)
-{
-	int ret = 0;
-
-	if (!user_mode(regs)) {
-		/* kprobe_running() needs smp_processor_id() */
-		preempt_disable();
-		if (kprobe_running() && kprobe_fault_handler(regs, trap))
-			ret = 1;
-		preempt_enable();
-	}
-
-	return ret;
-}
-#else
-static inline int notify_page_fault(struct pt_regs *regs, int trap)
-{
-	return 0;
-}
-#endif
-
 /*
  * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
  * (inside region 5, on ia64) and that page is present.
@@ -116,7 +94,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	/*
 	 * This is to handle the kprobes on user space access instructions
 	 */
-	if (notify_page_fault(regs, TRAP_BRKPT))
+	if (kprobe_page_fault(regs, TRAP_BRKPT))
 		return;
 
 	if (user_mode(regs))
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ec6b7ad..f20ee668 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -42,26 +42,6 @@
 #include <asm/debug.h>
 #include <asm/kup.h>
 
-static inline bool notify_page_fault(struct pt_regs *regs)
-{
-	bool ret = false;
-
-#ifdef CONFIG_KPROBES
-	/* kprobe_running() needs smp_processor_id() */
-	if (!user_mode(regs)) {
-		preempt_disable();
-		if (kprobe_running() && kprobe_fault_handler(regs, 11))
-			ret = true;
-		preempt_enable();
-	}
-#endif /* CONFIG_KPROBES */
-
-	if (unlikely(debugger_fault_handler(regs)))
-		ret = true;
-
-	return ret;
-}
-
 /*
  * Check whether the instruction inst is a store using
  * an update addressing form which will update r1.
@@ -462,8 +442,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 	int is_write = page_fault_is_write(error_code);
 	vm_fault_t fault, major = 0;
 	bool must_retry = false;
+	int kprobe_fault = kprobe_page_fault(regs, 11);
 
-	if (notify_page_fault(regs))
+	if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
 		return 0;
 
 	if (unlikely(page_fault_is_bad(error_code))) {
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 91ce03f..bb77a2c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -67,20 +67,6 @@ static int __init fault_init(void)
 }
 early_initcall(fault_init);
 
-static inline int notify_page_fault(struct pt_regs *regs)
-{
-	int ret = 0;
-
-	/* kprobe_running() needs smp_processor_id() */
-	if (kprobes_built_in() && !user_mode(regs)) {
-		preempt_disable();
-		if (kprobe_running() && kprobe_fault_handler(regs, 14))
-			ret = 1;
-		preempt_enable();
-	}
-	return ret;
-}
-
 /*
  * Find out which address space caused the exception.
  * Access register mode is impossible, ignore space == 3.
@@ -411,7 +397,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
 	 */
 	clear_pt_regs_flag(regs, PIF_PER_TRAP);
 
-	if (notify_page_fault(regs))
+	if (kprobe_page_fault(regs, 14))
 		return 0;
 
 	mm = tsk->mm;
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 6defd2c6..74cd4ac 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -24,20 +24,6 @@
 #include <asm/tlbflush.h>
 #include <asm/traps.h>
 
-static inline int notify_page_fault(struct pt_regs *regs, int trap)
-{
-	int ret = 0;
-
-	if (kprobes_built_in() && !user_mode(regs)) {
-		preempt_disable();
-		if (kprobe_running() && kprobe_fault_handler(regs, trap))
-			ret = 1;
-		preempt_enable();
-	}
-
-	return ret;
-}
-
 static void
 force_sig_info_fault(int si_signo, int si_code, unsigned long address,
 		     struct task_struct *tsk)
@@ -415,14 +401,14 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	if (unlikely(fault_in_kernel_space(address))) {
 		if (vmalloc_fault(address) >= 0)
 			return;
-		if (notify_page_fault(regs, vec))
+		if (kprobe_page_fault(regs, vec))
 			return;
 
 		bad_area_nosemaphore(regs, error_code, address);
 		return;
 	}
 
-	if (unlikely(notify_page_fault(regs, vec)))
+	if (unlikely(kprobe_page_fault(regs, vec)))
 		return;
 
 	/* Only enable interrupts if they were on before the fault */
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 8f8a604..6865f9c 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -38,20 +38,6 @@
 
 int show_unhandled_signals = 1;
 
-static inline __kprobes int notify_page_fault(struct pt_regs *regs)
-{
-	int ret = 0;
-
-	/* kprobe_running() needs smp_processor_id() */
-	if (kprobes_built_in() && !user_mode(regs)) {
-		preempt_disable();
-		if (kprobe_running() && kprobe_fault_handler(regs, 0))
-			ret = 1;
-		preempt_enable();
-	}
-	return ret;
-}
-
 static void __kprobes unhandled_fault(unsigned long address,
 				      struct task_struct *tsk,
 				      struct pt_regs *regs)
@@ -285,7 +271,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
 
 	fault_code = get_thread_fault_code();
 
-	if (notify_page_fault(regs))
+	if (kprobe_page_fault(regs, 0))
 		goto exit_exception;
 
 	si_code = SEGV_MAPERR;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 46df4c6..5400f4e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -46,23 +46,6 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
 	return 0;
 }
 
-static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
-{
-	if (!kprobes_built_in())
-		return 0;
-	if (user_mode(regs))
-		return 0;
-	/*
-	 * To be potentially processing a kprobe fault and to be allowed to call
-	 * kprobe_running(), we have to be non-preemptible.
-	 */
-	if (preemptible())
-		return 0;
-	if (!kprobe_running())
-		return 0;
-	return kprobe_fault_handler(regs, X86_TRAP_PF);
-}
-
 /*
  * Prefetch quirks:
  *
@@ -1280,7 +1263,7 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
 		return;
 
 	/* kprobes don't want to hook the spurious faults: */
-	if (kprobes_fault(regs))
+	if (kprobe_page_fault(regs, X86_TRAP_PF))
 		return;
 
 	/*
@@ -1311,7 +1294,7 @@ void do_user_addr_fault(struct pt_regs *regs,
 	mm = tsk->mm;
 
 	/* kprobes don't want to hook the spurious faults: */
-	if (unlikely(kprobes_fault(regs)))
+	if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF)))
 		return;
 
 	/*
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 443d980..064dd15 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -458,4 +458,20 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr)
 }
 #endif
 
+static nokprobe_inline bool kprobe_page_fault(struct pt_regs *regs,
+					      unsigned int trap)
+{
+	int ret = 0;
+
+	/*
+	 * To be potentially processing a kprobe fault and to be allowed
+	 * to call kprobe_running(), we have to be non-preemptible.
+	 */
+	if (kprobes_built_in() && !preemptible() && !user_mode(regs)) {
+		if (kprobe_running() && kprobe_fault_handler(regs, trap))
+			ret = 1;
+	}
+	return ret;
+}
+
 #endif /* _LINUX_KPROBES_H */
-- 
2.7.4


^ permalink raw reply related

* Re: [RFC/RFT PATCH] Revert "ASoC: fsl_esai: ETDR and TX0~5 registers are non volatile"
From: Mark Brown @ 2019-06-07 11:12 UTC (permalink / raw)
  To: Nicolin Chen
  Cc: alsa-devel, timur, Xiubo.Lee, linuxppc-dev, shengjiu.wang, tiwai,
	lgirdwood, perex, festevam, linux-kernel
In-Reply-To: <20190606230105.4385-1-nicoleotsuka@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 708 bytes --]

On Thu, Jun 06, 2019 at 04:01:05PM -0700, Nicolin Chen wrote:
> This reverts commit 8973112aa41b8ad956a5b47f2fe17bc2a5cf2645.

Please use subject lines matching the style for the subsystem.  This
makes it easier for people to identify relevant patches.

> 1) Though ETDR and TX0~5 are not volatile but write-only registers,
>    they should not be cached either. According to the definition of
>    "volatile_reg", one should be put in the volatile list if it can
>    not be cached.

There's no problem with caching write only registers, having a cache
allows one to do read/modify/write cycles on them and can help with
debugging.  The original reason we had cache code in ASoC was for write
only devices.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [RFC V3] mm: Generalize and rename notify_page_fault() as kprobe_page_fault()
From: Stephen Rothwell @ 2019-06-07 12:03 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: Mark Rutland, Michal Hocko, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, linux-mm,
	Paul Mackerras, sparclinux, linux-s390, Yoshinori Sato, x86,
	Russell King, Matthew Wilcox, Ingo Molnar, Fenghua Yu,
	Andrey Konovalov, Andy Lutomirski, Thomas Gleixner,
	linux-arm-kernel, Tony Luck, Heiko Carstens, linux-kernel,
	Martin Schwidefsky, Andrew Morton, linuxppc-dev, David S. Miller
In-Reply-To: <1559903655-5609-1-git-send-email-anshuman.khandual@arm.com>

[-- Attachment #1: Type: text/plain, Size: 722 bytes --]

Hi Anshuman,

On Fri,  7 Jun 2019 16:04:15 +0530 Anshuman Khandual <anshuman.khandual@arm.com> wrote:
>
> +static nokprobe_inline bool kprobe_page_fault(struct pt_regs *regs,
> +					      unsigned int trap)
> +{
> +	int ret = 0;
> +
> +	/*
> +	 * To be potentially processing a kprobe fault and to be allowed
> +	 * to call kprobe_running(), we have to be non-preemptible.
> +	 */
> +	if (kprobes_built_in() && !preemptible() && !user_mode(regs)) {
> +		if (kprobe_running() && kprobe_fault_handler(regs, trap))
> +			ret = 1;
> +	}
> +	return ret;
> +}

Since this is now declared as "bool" (thanks for that), you should make
"ret" be bool and use true and false;

-- 
Cheers,
Stephen Rothwell

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH v3 7/9] KVM: PPC: Ultravisor: Restrict LDBAR access
From: Claudio Carvalho @ 2019-06-07 12:34 UTC (permalink / raw)
  To: Madhavan Srinivasan, linuxppc-dev
  Cc: Michael Anderson, Ram Pai, kvm-ppc, Bharata B Rao,
	Sukadev Bhattiprolu, Anshuman Khandual
In-Reply-To: <be82d6c0-826b-a8e8-8d8c-2518404e33a7@linux.vnet.ibm.com>


On 6/7/19 1:48 AM, Madhavan Srinivasan wrote:
>
> On 06/06/19 11:06 PM, Claudio Carvalho wrote:
>> When the ultravisor firmware is available, it takes control over the
>> LDBAR register. In this case, thread-imc updates and save/restore
>> operations on the LDBAR register are handled by ultravisor.
>>
>> Signed-off-by: Claudio Carvalho <cclaudio@linux.ibm.com>
>> Signed-off-by: Ram Pai <linuxram@us.ibm.com>
>> ---
>>   arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 2 ++
>>   arch/powerpc/platforms/powernv/idle.c     | 6 ++++--
>>   arch/powerpc/platforms/powernv/opal-imc.c | 7 +++++++
>>   3 files changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> index f9b2620fbecd..cffb365d9d02 100644
>> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> @@ -375,8 +375,10 @@ BEGIN_FTR_SECTION
>>       mtspr    SPRN_RPR, r0
>>       ld    r0, KVM_SPLIT_PMMAR(r6)
>>       mtspr    SPRN_PMMAR, r0
>> +BEGIN_FW_FTR_SECTION_NESTED(70)
>>       ld    r0, KVM_SPLIT_LDBAR(r6)
>>       mtspr    SPRN_LDBAR, r0
>> +END_FW_FTR_SECTION_NESTED(FW_FEATURE_ULTRAVISOR, 0, 70)
>>       isync
>>   FTR_SECTION_ELSE
>>       /* On P9 we use the split_info for coordinating LPCR changes */
>> diff --git a/arch/powerpc/platforms/powernv/idle.c
>> b/arch/powerpc/platforms/powernv/idle.c
>> index c9133f7908ca..fd62435e3267 100644
>> --- a/arch/powerpc/platforms/powernv/idle.c
>> +++ b/arch/powerpc/platforms/powernv/idle.c
>> @@ -679,7 +679,8 @@ static unsigned long power9_idle_stop(unsigned long
>> psscr, bool mmu_on)
>>           sprs.ptcr    = mfspr(SPRN_PTCR);
>>           sprs.rpr    = mfspr(SPRN_RPR);
>>           sprs.tscr    = mfspr(SPRN_TSCR);
>> -        sprs.ldbar    = mfspr(SPRN_LDBAR);
>> +        if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
>> +            sprs.ldbar    = mfspr(SPRN_LDBAR);
>>
>>           sprs_saved = true;
>>
>> @@ -762,7 +763,8 @@ static unsigned long power9_idle_stop(unsigned long
>> psscr, bool mmu_on)
>>       mtspr(SPRN_PTCR,    sprs.ptcr);
>>       mtspr(SPRN_RPR,        sprs.rpr);
>>       mtspr(SPRN_TSCR,    sprs.tscr);
>> -    mtspr(SPRN_LDBAR,    sprs.ldbar);
>> +    if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
>> +        mtspr(SPRN_LDBAR,    sprs.ldbar);
>>
>>       if (pls >= pnv_first_tb_loss_level) {
>>           /* TB loss */
>> diff --git a/arch/powerpc/platforms/powernv/opal-imc.c
>> b/arch/powerpc/platforms/powernv/opal-imc.c
>> index 1b6932890a73..e9b641d313fb 100644
>> --- a/arch/powerpc/platforms/powernv/opal-imc.c
>> +++ b/arch/powerpc/platforms/powernv/opal-imc.c
>> @@ -254,6 +254,13 @@ static int opal_imc_counters_probe(struct
>> platform_device *pdev)
>>       bool core_imc_reg = false, thread_imc_reg = false;
>>       u32 type;
>>
>> +    /*
>> +     * When the Ultravisor is enabled, it is responsible for thread-imc
>> +     * updates
>> +     */
>
> Would prefer the comment to be "Disable IMC devices, when Ultravisor is
> enabled"
> Rest looks good.
> Acked-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>


Thanks Maddy. I applied that change to the next version.

Claudio


>
>> +    if (firmware_has_feature(FW_FEATURE_ULTRAVISOR))
>> +        return -EACCES;
>> +
>>       /*
>>        * Check whether this is kdump kernel. If yes, force the engines to
>>        * stop and return.


^ permalink raw reply

* Re: [PATCH] powerpc/pseries: fix oops in hotplug memory notifier
From: Michael Ellerman @ 2019-06-07 13:08 UTC (permalink / raw)
  To: Nathan Lynch, linuxppc-dev
In-Reply-To: <20190607050407.25444-1-nathanl@linux.ibm.com>

Nathan Lynch <nathanl@linux.ibm.com> writes:

> During post-migration device tree updates, we can oops in
> pseries_update_drconf_memory if the source device tree has an
> ibm,dynamic-memory-v2 property and the destination has a
> ibm,dynamic_memory (v1) property. The notifier processes an "update"
> for the ibm,dynamic-memory property but it's really an add in this
> scenario. So make sure the old property object is there before
> dereferencing it.
>
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>

Can you pinpoint a commit that introduced the bug? Should we backport
this to stable?

Perhaps?
  Fixes: 2b31e3aec1db ("powerpc/drmem: Add support for ibm, dynamic-memory-v2 property")

cheers

> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 47087832f8b2..e6bd172bcf30 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -980,6 +980,9 @@ static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
>  	if (!memblock_size)
>  		return -EINVAL;
>  
> +	if (!pr->old_prop)
> +		return 0;
> +
>  	p = (__be32 *) pr->old_prop->value;
>  	if (!p)
>  		return -EINVAL;
> -- 
> 2.20.1

^ permalink raw reply

* [RFC PATCH 1/1] powerpc/pseries/svm: Unshare all pages before kexecing a new kernel
From: Ram Pai @ 2019-06-07 14:47 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Anshuman Khandual, Alexey Kardashevskiy, Mike Anderson,
	linux-kernel, Claudio Carvalho, Ryan Grimm, Paul Mackerras,
	Christoph Hellwig, Thiago Jung Bauermann
In-Reply-To: <20190521044912.1375-13-bauerman@linux.ibm.com>

powerpc/pseries/svm: Unshare all pages before kexecing a new kernel.
    
A new kernel deserves a clean slate. Any pages shared with the
hypervisor is unshared before invoking the new kernel. However there are
exceptions.  If the new kernel is invoked to dump the current kernel, or
if there is a explicit request to preserve the state of the current
kernel, unsharing of pages is skipped.
 
NOTE: Reserve atleast 256M for crashkernel.  Otherwise SWIOTLB
allocation fails and crash kernel fails to boot.
 
Signed-off-by: Ram Pai <linuxram@us.ibm.com>

diff --git a/arch/powerpc/include/asm/ultravisor-api.h b/arch/powerpc/include/asm/ultravisor-api.h
index 8a6c5b4d..c8dd470 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -31,5 +31,6 @@
 #define UV_UNSHARE_PAGE			0xF134
 #define UV_PAGE_INVAL			0xF138
 #define UV_SVM_TERMINATE		0xF13C
+#define UV_UNSHARE_ALL_PAGES		0xF140
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h b/arch/powerpc/include/asm/ultravisor.h
index bf5ac05..73c44ff 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -120,6 +120,12 @@ static inline int uv_unshare_page(u64 pfn, u64 npages)
 	return ucall(UV_UNSHARE_PAGE, retbuf, pfn, npages);
 }
 
+static inline int uv_unshare_all_pages(void)
+{
+	unsigned long retbuf[UCALL_BUFSIZE];
+
+	return ucall(UV_UNSHARE_ALL_PAGES, retbuf);
+}
 #endif /* !__ASSEMBLY__ */
 
 #endif	/* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 75692c3..a93e3ab 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -329,6 +329,13 @@ void default_machine_kexec(struct kimage *image)
 #ifdef CONFIG_PPC_PSERIES
 	kexec_paca.lppaca_ptr = NULL;
 #endif
+
+	if (is_svm_platform() && !(image->preserve_context ||
+				   image->type == KEXEC_TYPE_CRASH)) {
+		uv_unshare_all_pages();
+		printk("kexec: Unshared all shared pages.\n");
+	}
+
 	paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
 
 	setup_paca(&kexec_paca);


^ permalink raw reply related

* Re: [PATCH] powerpc/pseries: fix oops in hotplug memory notifier
From: Nathan Lynch @ 2019-06-07 14:59 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev
In-Reply-To: <87tvd1h7bi.fsf@concordia.ellerman.id.au>

Michael Ellerman <mpe@ellerman.id.au> writes:

> Nathan Lynch <nathanl@linux.ibm.com> writes:
>
>> During post-migration device tree updates, we can oops in
>> pseries_update_drconf_memory if the source device tree has an
>> ibm,dynamic-memory-v2 property and the destination has a
>> ibm,dynamic_memory (v1) property. The notifier processes an "update"
>> for the ibm,dynamic-memory property but it's really an add in this
>> scenario. So make sure the old property object is there before
>> dereferencing it.
>>
>> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
>
> Can you pinpoint a commit that introduced the bug? Should we backport
> this to stable?
>
> Perhaps?
>   Fixes: 2b31e3aec1db ("powerpc/drmem: Add support for ibm, dynamic-memory-v2 property")

I guess I had considered it a latent bug, but you're right - we wouldn't
hit it until the v2 support was added. So that commit is correct. Let me
know if I should resend.


^ permalink raw reply

* Re: [RFC V3] mm: Generalize and rename notify_page_fault() as kprobe_page_fault()
From: Dave Hansen @ 2019-06-07 15:06 UTC (permalink / raw)
  To: Anshuman Khandual, linux-kernel, linux-mm
  Cc: Mark Rutland, Michal Hocko, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Will Deacon, Paul Mackerras,
	sparclinux, linux-s390, Yoshinori Sato, x86, Russell King,
	Matthew Wilcox, Ingo Molnar, Fenghua Yu, Stephen Rothwell,
	Andrey Konovalov, Andy Lutomirski, Thomas Gleixner,
	linux-arm-kernel, Tony Luck, Heiko Carstens, Martin Schwidefsky,
	Andrew Morton, linuxppc-dev, David S. Miller
In-Reply-To: <1559903655-5609-1-git-send-email-anshuman.khandual@arm.com>

On 6/7/19 3:34 AM, Anshuman Khandual wrote:
> +static nokprobe_inline bool kprobe_page_fault(struct pt_regs *regs,
> +					      unsigned int trap)
> +{
> +	int ret = 0;
> +
> +	/*
> +	 * To be potentially processing a kprobe fault and to be allowed
> +	 * to call kprobe_running(), we have to be non-preemptible.
> +	 */
> +	if (kprobes_built_in() && !preemptible() && !user_mode(regs)) {
> +		if (kprobe_running() && kprobe_fault_handler(regs, trap))
> +			ret = 1;
> +	}
> +	return ret;
> +}

Nits: Other that taking the nice, readable, x86 one and globbing it onto
a single line, looks OK to me.  It does seem a _bit_ silly to go to the
trouble of converting to 'bool' and then using 0/1 and an 'int'
internally instead of true/false and a bool, though.  It's also not a
horrible thing to add a single line comment to this sucker to say:

/* returns true if kprobes handled the fault */

In any case, and even if you don't clean any of this up:

Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>

^ permalink raw reply

* Re: [RFC V3] mm: Generalize and rename notify_page_fault() as kprobe_page_fault()
From: Christophe Leroy @ 2019-06-07 15:31 UTC (permalink / raw)
  To: Anshuman Khandual, linux-kernel, linux-mm
  Cc: Mark Rutland, Michal Hocko, linux-ia64, linux-sh, Peter Zijlstra,
	Catalin Marinas, Dave Hansen, Heiko Carstens, Paul Mackerras,
	Matthew Wilcox, sparclinux, linux-s390, Yoshinori Sato, x86,
	Russell King, Will Deacon, Ingo Molnar, Fenghua Yu,
	Stephen Rothwell, Andrey Konovalov, Andy Lutomirski,
	Thomas Gleixner, linux-arm-kernel, Tony Luck, Martin Schwidefsky,
	Andrew Morton, linuxppc-dev, David S. Miller
In-Reply-To: <1559903655-5609-1-git-send-email-anshuman.khandual@arm.com>



Le 07/06/2019 à 12:34, Anshuman Khandual a écrit :
> Very similar definitions for notify_page_fault() are being used by multiple
> architectures duplicating much of the same code. This attempts to unify all
> of them into a generic implementation, rename it as kprobe_page_fault() and
> then move it to a common header.
> 
> kprobes_built_in() can detect CONFIG_KPROBES, hence new kprobe_page_fault()
> need not be wrapped again within CONFIG_KPROBES. Trap number argument can
> now contain upto an 'unsigned int' accommodating all possible platforms.
> 
> kprobe_page_fault() goes the x86 way while dealing with preemption context.
> As explained in these following commits the invoking context in itself must
> be non-preemptible for kprobes processing context irrespective of whether
> kprobe_running() or perhaps smp_processor_id() is safe or not. It does not
> make much sense to continue when original context is preemptible. Instead
> just bail out earlier.
> 
> commit a980c0ef9f6d
> ("x86/kprobes: Refactor kprobes_fault() like kprobe_exceptions_notify()")
> 
> commit b506a9d08bae ("x86: code clarification patch to Kprobes arch code")
> 
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-ia64@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-s390@vger.kernel.org
> Cc: linux-sh@vger.kernel.org
> Cc: sparclinux@vger.kernel.org
> Cc: x86@kernel.org
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Christophe Leroy <christophe.leroy@c-s.fr>
> Cc: Stephen Rothwell <sfr@canb.auug.org.au>
> Cc: Andrey Konovalov <andreyknvl@google.com>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Russell King <linux@armlinux.org.uk>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Tony Luck <tony.luck@intel.com>
> Cc: Fenghua Yu <fenghua.yu@intel.com>
> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
> Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
> Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>
> 
> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> ---
> Testing:
> 
> - Build and boot tested on arm64 and x86
> - Build tested on some other archs (arm, sparc64, alpha, powerpc etc)
> 
> Changes in RFC V3:
> 
> - Updated the commit message with an explaination for new preemption behaviour
> - Moved notify_page_fault() to kprobes.h with 'static nokprobe_inline' per Matthew
> - Changed notify_page_fault() return type from int to bool per Michael Ellerman
> - Renamed notify_page_fault() as kprobe_page_fault() per Peterz
> 
> Changes in RFC V2: (https://patchwork.kernel.org/patch/10974221/)
> 
> - Changed generic notify_page_fault() per Mathew Wilcox
> - Changed x86 to use new generic notify_page_fault()
> - s/must not/need not/ in commit message per Matthew Wilcox
> 
> Changes in RFC V1: (https://patchwork.kernel.org/patch/10968273/)
> 
>   arch/arm/mm/fault.c      | 24 +-----------------------
>   arch/arm64/mm/fault.c    | 24 +-----------------------
>   arch/ia64/mm/fault.c     | 24 +-----------------------
>   arch/powerpc/mm/fault.c  | 23 ++---------------------
>   arch/s390/mm/fault.c     | 16 +---------------
>   arch/sh/mm/fault.c       | 18 ++----------------
>   arch/sparc/mm/fault_64.c | 16 +---------------
>   arch/x86/mm/fault.c      | 21 ++-------------------
>   include/linux/kprobes.h  | 16 ++++++++++++++++
>   9 files changed, 27 insertions(+), 155 deletions(-)
> 

[...]

> diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
> index 443d980..064dd15 100644
> --- a/include/linux/kprobes.h
> +++ b/include/linux/kprobes.h
> @@ -458,4 +458,20 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr)
>   }
>   #endif
>   
> +static nokprobe_inline bool kprobe_page_fault(struct pt_regs *regs,
> +					      unsigned int trap)
> +{
> +	int ret = 0;

ret is pointless.

> +
> +	/*
> +	 * To be potentially processing a kprobe fault and to be allowed
> +	 * to call kprobe_running(), we have to be non-preemptible.
> +	 */
> +	if (kprobes_built_in() && !preemptible() && !user_mode(regs)) {
> +		if (kprobe_running() && kprobe_fault_handler(regs, trap))

don't need an 'if A if B', can do 'if A && B'

> +			ret = 1;

can do 'return true;' directly here

> +	}
> +	return ret;

And 'return false' here.

Christophe

> +}
> +
>   #endif /* _LINUX_KPROBES_H */
> 

^ permalink raw reply

* Re: [BISECTED REGRESSION] b43legacy broken on G4 PowerBook
From: Larry Finger @ 2019-06-07 17:25 UTC (permalink / raw)
  To: Aaro Koskinen, Christoph Hellwig, Christian Zigotzky,
	Michael Ellerman
  Cc: linuxppc-dev, linux-wireless, linux-kernel
In-Reply-To: <20190605225059.GA9953@darkstar.musicnaut.iki.fi>

[-- Attachment #1: Type: text/plain, Size: 1169 bytes --]

On 6/5/19 5:50 PM, Aaro Koskinen wrote:
> Hi,
> 
> When upgrading from v5.0 -> v5.1 on G4 PowerBook, I noticed WLAN does
> not work anymore:
> 
> [   42.004303] b43legacy-phy0: Loading firmware version 0x127, patch level 14 (2005-04-18 02:36:27)
> [   42.184837] b43legacy-phy0 debug: Chip initialized
> [   42.184873] b43legacy-phy0 ERROR: The machine/kernel does not support the required 30-bit DMA mask
> 
> The same happens with the current mainline.
> 
> Bisected to:
> 
> 	commit 65a21b71f948406201e4f62e41f06513350ca390
> 	Author: Christoph Hellwig <hch@lst.de>
> 	Date:   Wed Feb 13 08:01:26 2019 +0100
> 
> 	    powerpc/dma: remove dma_nommu_dma_supported
> 
> 	    This function is largely identical to the generic version used
> 	    everywhere else.  Replace it with the generic version.
> 
> 	    Signed-off-by: Christoph Hellwig <hch@lst.de>
> 	    Tested-by: Christian Zigotzky <chzigotzky@xenosoft.de>
> 	    Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

Aaro,

Please try the attached patch. I'm not really pleased with it and I will 
continue to determine why the fallback to a 30-bit mask fails, but at least this 
one works for me.

Larry



[-- Attachment #2: 0001-b43legacy-Fix-DMA-breakage-from-commit-commit-65a21b.patch --]
[-- Type: text/x-patch, Size: 2674 bytes --]

From 25e2f50273e785598b6bd9a8aee28cf825d3fe9f Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Fri, 7 Jun 2019 12:04:16 -0500
Subject: [PATCH] b43legacy: Fix DMA breakage from commit commit 65a21b71f948
To: kvalo@codeaurora.org
Cc: linux-wireless@vger.kernel.org,
    pkshih@realtek.com

Following commit 65a21b71f948 ("powerpc/dma: remove dma_nommu_dma_supported"),
this driver errors with a message that "The machine/kernel does not
support the required 30-bit DMA mask." Indeed, the hardware only allows
31-bit masks. This solution is to change the fallback mask from 30-
to 31-bits for 32-bit PPC systems.

Fixes: 65a21b71f948 ("powerpc/dma: remove dma_nommu_dma_supported")
Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
---
 drivers/net/wireless/broadcom/b43legacy/dma.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/broadcom/b43legacy/dma.c b/drivers/net/wireless/broadcom/b43legacy/dma.c
index 1cc25f44dd9a..75613f516e50 100644
--- a/drivers/net/wireless/broadcom/b43legacy/dma.c
+++ b/drivers/net/wireless/broadcom/b43legacy/dma.c
@@ -27,6 +27,15 @@
 #include <linux/slab.h>
 #include <net/dst.h>
 
+/* Special handling for PPC32 - The maximum DMA mask is 31 bits, and
+ * the fallback to 30 bits fails. Set the fallback at 31.
+ */
+#ifdef CONFIG_PPC32
+#define FB_DMA	31
+#else
+#define FB_DMA	30
+#endif
+
 /* 32bit DMA ops. */
 static
 struct b43legacy_dmadesc32 *op32_idx2desc(struct b43legacy_dmaring *ring,
@@ -418,7 +427,7 @@ static bool b43legacy_dma_mapping_error(struct b43legacy_dmaring *ring,
 
 	switch (ring->type) {
 	case B43legacy_DMA_30BIT:
-		if ((u64)addr + buffersize > (1ULL << 30))
+		if ((u64)addr + buffersize > (1ULL << FB_DMA))
 			goto address_error;
 		break;
 	case B43legacy_DMA_32BIT:
@@ -617,12 +626,12 @@ static u64 supported_dma_mask(struct b43legacy_wldev *dev)
 	if (tmp & B43legacy_DMA32_TXADDREXT_MASK)
 		return DMA_BIT_MASK(32);
 
-	return DMA_BIT_MASK(30);
+	return DMA_BIT_MASK(FB_DMA);
 }
 
 static enum b43legacy_dmatype dma_mask_to_engine_type(u64 dmamask)
 {
-	if (dmamask == DMA_BIT_MASK(30))
+	if (dmamask == DMA_BIT_MASK(FB_DMA))
 		return B43legacy_DMA_30BIT;
 	if (dmamask == DMA_BIT_MASK(32))
 		return B43legacy_DMA_32BIT;
@@ -802,7 +811,7 @@ static int b43legacy_dma_set_mask(struct b43legacy_wldev *dev, u64 mask)
 			continue;
 		}
 		if (mask == DMA_BIT_MASK(32)) {
-			mask = DMA_BIT_MASK(30);
+			mask = DMA_BIT_MASK(FB_DMA);
 			fallback = true;
 			continue;
 		}
-- 
2.21.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox