LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 01/22] powerpc/pkeys: Avoid using lockless page table walk
From: Aneesh Kumar K.V @ 2020-05-05  7:17 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Aneesh Kumar K.V, Ram Pai, npiggin
In-Reply-To: <20200505071729.54912-1-aneesh.kumar@linux.ibm.com>

Fetch pkey from vma instead of linux page table. Also document the fact that in
some cases the pkey returned in siginfo won't be the same as the one we took
keyfault on. Even with linux page table walk, we can end up in a similar scenario.

Cc: Ram Pai <linuxram@linux.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/mmu.h        |  9 ---
 arch/powerpc/mm/book3s64/hash_utils.c | 24 --------
 arch/powerpc/mm/fault.c               | 83 +++++++++++++++++++--------
 3 files changed, 60 insertions(+), 56 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 0699cfeeb8c9..cf2a08bfd5cd 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -291,15 +291,6 @@ static inline bool early_radix_enabled(void)
 }
 #endif
 
-#ifdef CONFIG_PPC_MEM_KEYS
-extern u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address);
-#else
-static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
-{
-	return 0;
-}
-#endif /* CONFIG_PPC_MEM_KEYS */
-
 #ifdef CONFIG_STRICT_KERNEL_RWX
 static inline bool strict_kernel_rwx_enabled(void)
 {
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 8ed2411c3f39..e951e87a974d 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1671,30 +1671,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 	hash_preload(vma->vm_mm, address, is_exec, trap);
 }
 
-#ifdef CONFIG_PPC_MEM_KEYS
-/*
- * Return the protection key associated with the given address and the
- * mm_struct.
- */
-u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
-{
-	pte_t *ptep;
-	u16 pkey = 0;
-	unsigned long flags;
-
-	if (!mm || !mm->pgd)
-		return 0;
-
-	local_irq_save(flags);
-	ptep = find_linux_pte(mm->pgd, address, NULL, NULL);
-	if (ptep)
-		pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep)));
-	local_irq_restore(flags);
-
-	return pkey;
-}
-#endif /* CONFIG_PPC_MEM_KEYS */
-
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 static inline void tm_flush_hash_page(int local)
 {
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 84af6c8eecf7..8e529e4708e1 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -118,9 +118,34 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address)
 	return __bad_area(regs, address, SEGV_MAPERR);
 }
 
-static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address,
-				    int pkey)
+#ifdef CONFIG_PPC_MEM_KEYS
+static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
+				    struct vm_area_struct *vma)
 {
+	struct mm_struct *mm = current->mm;
+	int pkey;
+
+	/*
+	 * We don't try to fetch the pkey from page table because reading
+	 * page table without locking doesn't guarantee stable pte value.
+	 * Hence the pkey value that we return to userspace can be different
+	 * from the pkey that actually caused access error.
+	 *
+	 * It does *not* guarantee that the VMA we find here
+	 * was the one that we faulted on.
+	 *
+	 * 1. T1   : mprotect_key(foo, PAGE_SIZE, pkey=4);
+	 * 2. T1   : set AMR to deny access to pkey=4, touches, page
+	 * 3. T1   : faults...
+	 * 4.    T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
+	 * 5. T1   : enters fault handler, takes mmap_sem, etc...
+	 * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
+	 *	     faulted on a pte with its pkey=4.
+	 */
+	pkey = vma_pkey(vma);
+
+	up_read(&mm->mmap_sem);
+
 	/*
 	 * If we are in kernel mode, bail out with a SEGV, this will
 	 * be caught by the assembly which will restore the non-volatile
@@ -133,6 +158,7 @@ static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address,
 
 	return 0;
 }
+#endif
 
 static noinline int bad_access(struct pt_regs *regs, unsigned long address)
 {
@@ -289,8 +315,31 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
 	return false;
 }
 
-static bool access_error(bool is_write, bool is_exec,
-			 struct vm_area_struct *vma)
+#ifdef CONFIG_PPC_MEM_KEYS
+static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
+			      struct vm_area_struct *vma)
+{
+	/*
+	 * Read or write was blocked by protection keys.  This is
+	 * always an unconditional error and can never result in
+	 * a follow-up action to resolve the fault, like a COW.
+	 */
+	if (is_pkey)
+		return true;
+
+	/*
+	 * Make sure to check the VMA so that we do not perform
+	 * faults just to hit a pkey fault as soon as we fill in a
+	 * page. Only called for current mm, hence foreign == 0
+	 */
+	if (!arch_vma_access_permitted(vma, is_write, is_exec, 0))
+		return true;
+
+	return false;
+}
+#endif
+
+static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma)
 {
 	/*
 	 * Allow execution from readable areas if the MMU does not
@@ -483,10 +532,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
-	if (error_code & DSISR_KEYFAULT)
-		return bad_key_fault_exception(regs, address,
-					       get_mm_addr_key(mm, address));
-
 	/*
 	 * We want to do this outside mmap_sem, because reading code around nip
 	 * can result in fault, which will cause a deadlock when called with
@@ -555,6 +600,13 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 		return bad_area(regs, address);
 
 good_area:
+
+#ifdef CONFIG_PPC_MEM_KEYS
+	if (unlikely(access_pkey_error(is_write, is_exec,
+				       (error_code & DSISR_KEYFAULT), vma)))
+		return bad_access_pkey(regs, address, vma);
+#endif /* CONFIG_PPC_MEM_KEYS */
+
 	if (unlikely(access_error(is_write, is_exec, vma)))
 		return bad_access(regs, address);
 
@@ -565,21 +617,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 	 */
 	fault = handle_mm_fault(vma, address, flags);
 
-#ifdef CONFIG_PPC_MEM_KEYS
-	/*
-	 * we skipped checking for access error due to key earlier.
-	 * Check that using handle_mm_fault error return.
-	 */
-	if (unlikely(fault & VM_FAULT_SIGSEGV) &&
-		!arch_vma_access_permitted(vma, is_write, is_exec, 0)) {
-
-		int pkey = vma_pkey(vma);
-
-		up_read(&mm->mmap_sem);
-		return bad_key_fault_exception(regs, address, pkey);
-	}
-#endif /* CONFIG_PPC_MEM_KEYS */
-
 	major |= fault & VM_FAULT_MAJOR;
 
 	if (fault_signal_pending(fault, regs))
-- 
2.26.2


^ permalink raw reply related

* [PATCH v4 00/22] Avoid IPI while updating page table entries.
From: Aneesh Kumar K.V @ 2020-05-05  7:17 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Aneesh Kumar K.V, npiggin

Problem Summary:
Slow termination of KVM guest with large guest RAM config due to a large number
of IPIs that were caused by clearing level 1 PTE entries (THP) entries.
This is shown in the stack trace below.


- qemu-system-ppc  [kernel.vmlinux]            [k] smp_call_function_many
   - smp_call_function_many
      - 36.09% smp_call_function_many
           serialize_against_pte_lookup
           radix__pmdp_huge_get_and_clear
           zap_huge_pmd
           unmap_page_range
           unmap_vmas
           unmap_region
           __do_munmap
           __vm_munmap
           sys_munmap
          system_call
           __munmap
           qemu_ram_munmap
           qemu_anon_ram_free
           reclaim_ramblock
           call_rcu_thread
           qemu_thread_start
           start_thread
           __clone

Why we need to do IPI when clearing PMD entries:
This was added as part of commit: 13bd817bb884 ("powerpc/thp: Serialize pmd clear against a linux page table walk")

serialize_against_pte_lookup makes sure that all parallel lockless page table
walk completes before we convert a PMD pte entry to regular pmd entry.
We end up doing that conversion in the below scenarios

1) __split_huge_zero_page_pmd
2) do_huge_pmd_wp_page_fallback
3) MADV_DONTNEED running parallel to page faults.

local_irq_disable and lockless page table walk:

The lockless page table walk work with the assumption that we can dereference
the page table contents without holding a lock. For this to work, we need to
make sure we read the page table contents atomically and page table pages are
not going to be freed/released while we are walking the
table pages. We can achieve by using a rcu based freeing for page table pages or
if the architecture implements broadcast tlbie, we can block the IPI as we walk the
page table pages.

To support both the above framework, lockless page table walk is done with
irq disabled instead of rcu_read_lock()

We do have two interface for lockless page table walk, gup fast and __find_linux_pte.
This patch series makes __find_linux_pte table walk safe against the conversion of PMD PTE
to regular PMD.

gup fast:

gup fast is already safe against THP split because kernel now differentiate between a pmd
split and a compound page split. gup fast can run parallel to a pmd split and we prevent
a parallel gup fast to a hugepage split, by freezing the page refcount and failing the
speculative page ref increment.


Similar to how gup is safe against parallel pmd split, this patch series updates the
__find_linux_pte callers to be safe against a parallel pmd split. We do that by enforcing
the following rules.

1) Don't reload the pte value, because that can be updated in parallel.
2) Code should be able to work with a stale PTE value and not the recent one. ie,
the pte value that we are looking at may not be the latest value in the page table.
3) Before looking at pte value check for _PAGE_PTE bit. We now do this as part of pte_present()
check.

Performance:

This speeds up Qemu guest RAM del/unplug time as below
128 core, 496GB guest:

Without patch:
munmap start: timer = 13162 ms, PID=7684
munmap finish: timer = 95312 ms, PID=7684 - delta = 82150 ms

With patch (upto removing IPI)
munmap start: timer = 196449 ms, PID=6681
munmap finish: timer = 196488 ms, PID=6681 - delta = 39ms

With patch (with adding the tlb invalidate in pmdp_huge_get_and_clear_full)
munmap start: timer = 196345 ms, PID=6879
munmap finish: timer = 196714 ms, PID=6879 - delta = 369ms

Changes from V3:
* Rebase to latest kernel

Changes from V2:
* Rebase to lastest kernel

Changes from V1:
* Update commit messages
* Qemu Performance numbers


Aneesh Kumar K.V (22):
  powerpc/pkeys: Avoid using lockless page table walk
  powerpc/pkeys: Check vma before returning key fault error to the user
  powerpc/mm/hash64: use _PAGE_PTE when checking for pte_present
  powerpc/hash64: Restrict page table lookup using init_mm with
    __flush_hash_table_range
  powerpc/book3s64/hash: Use the pte_t address from the caller
  powerpc/mce: Don't reload pte val in addr_to_pfn
  powerpc/perf/callchain: Use __get_user_pages_fast in
    read_user_stack_slow
  powerpc/kvm/book3s: switch from raw_spin_*lock to arch_spin_lock.
  powerpc/kvm/book3s: Add helper to walk partition scoped linux page
    table.
  powerpc/kvm/nested: Add helper to walk nested shadow linux page table.
  powerpc/kvm/book3s: Use kvm helpers to walk shadow or secondary table
  powerpc/kvm/book3s: Add helper for host page table walk
  powerpc/kvm/book3s: Use find_kvm_host_pte in page fault handler
  powerpc/kvm/book3s: Use find_kvm_host_pte in h_enter
  powerpc/kvm/book3s: use find_kvm_host_pte in pute_tce functions
  powerpc/kvm/book3s: Avoid using rmap to protect parallel page table
    update.
  powerpc/kvm/book3s: use find_kvm_host_pte in
    kvmppc_book3s_instantiate_page
  powerpc/kvm/book3s: Use find_kvm_host_pte in kvmppc_get_hpa
  powerpc/kvm/book3s: Use pte_present instead of opencoding
    _PAGE_PRESENT check
  powerpc/mm/book3s64: Avoid sending IPI on clearing PMD
  mm: change pmdp_huge_get_and_clear_full take vm_area_struct as arg
  powerpc/mm/book3s64: Fix MADV_DONTNEED and parallel page fault race

 arch/powerpc/include/asm/book3s/64/pgtable.h  | 20 +++--
 .../include/asm/book3s/64/tlbflush-hash.h     |  3 +-
 arch/powerpc/include/asm/kvm_book3s.h         |  2 +-
 arch/powerpc/include/asm/kvm_book3s_64.h      | 34 ++++++++-
 arch/powerpc/include/asm/mmu.h                |  9 ---
 arch/powerpc/kernel/mce_power.c               | 14 ++--
 arch/powerpc/kernel/pci_64.c                  |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c           | 13 ++--
 arch/powerpc/kvm/book3s_64_mmu_radix.c        | 38 +++++-----
 arch/powerpc/kvm/book3s_64_vio_hv.c           | 64 ++++++++--------
 arch/powerpc/kvm/book3s_hv_nested.c           | 37 ++++++---
 arch/powerpc/kvm/book3s_hv_rm_mmu.c           | 58 +++++---------
 arch/powerpc/mm/book3s64/hash_pgtable.c       | 11 ---
 arch/powerpc/mm/book3s64/hash_tlb.c           | 16 +---
 arch/powerpc/mm/book3s64/hash_utils.c         | 62 ++++-----------
 arch/powerpc/mm/book3s64/pgtable.c            | 24 ++++--
 arch/powerpc/mm/book3s64/radix_pgtable.c      | 19 ++---
 arch/powerpc/mm/fault.c                       | 75 +++++++++++++------
 arch/powerpc/perf/callchain_64.c              | 46 ++++--------
 arch/s390/include/asm/pgtable.h               |  4 +-
 include/asm-generic/pgtable.h                 |  4 +-
 mm/huge_memory.c                              |  4 +-
 22 files changed, 273 insertions(+), 286 deletions(-)

-- 
2.26.2


^ permalink raw reply

* Re: [PATCH v7 26/28] powerpc: Support prefixed instructions in alignment handler
From: Alistair Popple @ 2020-05-05  7:17 UTC (permalink / raw)
  To: Jordan Niethe; +Cc: npiggin, bala24, naveen.n.rao, linuxppc-dev, dja
In-Reply-To: <20200501034220.8982-27-jniethe5@gmail.com>

Reviewed-by: Alistair Popple <alistair@popple.id.au>

On Friday, 1 May 2020 1:42:18 PM AEST Jordan Niethe wrote:
> If a prefixed instruction results in an alignment exception, the
> SRR1_PREFIXED bit is set. The handler attempts to emulate the
> responsible instruction and then increment the NIP past it. Use
> SRR1_PREFIXED to determine by how much the NIP should be incremented.
> 
> Prefixed instructions are not permitted to cross 64-byte boundaries. If
> they do the alignment interrupt is invoked with SRR1 BOUNDARY bit set.
> If this occurs send a SIGBUS to the offending process if in user mode.
> If in kernel mode call bad_page_fault().
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
> v2: - Move __get_user_instr() and __get_user_instr_inatomic() to this
> commit (previously in "powerpc sstep: Prepare to support prefixed
> instructions").
>     - Rename sufx to suffix
>     - Use a macro for calculating instruction length
> v3: Move __get_user_{instr(), instr_inatomic()} up with the other
> get_user definitions and remove nested if.
> v4: Rolled into "Add prefixed instructions to instruction data type"
> v5: Only one definition of inst_length()
> ---
>  arch/powerpc/kernel/traps.c | 19 ++++++++++++++++++-
>  1 file changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
> index 493a3fa0ac1a..105242cc2f28 100644
> --- a/arch/powerpc/kernel/traps.c
> +++ b/arch/powerpc/kernel/traps.c
> @@ -583,6 +583,8 @@ static inline int check_io_access(struct pt_regs *regs)
>  #define REASON_ILLEGAL		(ESR_PIL | ESR_PUO)
>  #define REASON_PRIVILEGED	ESR_PPR
>  #define REASON_TRAP		ESR_PTR
> +#define REASON_PREFIXED		0
> +#define REASON_BOUNDARY		0
> 
>  /* single-step stuff */
>  #define single_stepping(regs)	(current->thread.debug.dbcr0 & DBCR0_IC)
> @@ -597,12 +599,16 @@ static inline int check_io_access(struct pt_regs
> *regs) #define REASON_ILLEGAL		SRR1_PROGILL
>  #define REASON_PRIVILEGED	SRR1_PROGPRIV
>  #define REASON_TRAP		SRR1_PROGTRAP
> +#define REASON_PREFIXED		SRR1_PREFIXED
> +#define REASON_BOUNDARY		SRR1_BOUNDARY
> 
>  #define single_stepping(regs)	((regs)->msr & MSR_SE)
>  #define clear_single_step(regs)	((regs)->msr &= ~MSR_SE)
>  #define clear_br_trace(regs)	((regs)->msr &= ~MSR_BE)
>  #endif
> 
> +#define inst_length(reason)	(((reason) & REASON_PREFIXED) ? 8 : 4)
> +
>  #if defined(CONFIG_E500)
>  int machine_check_e500mc(struct pt_regs *regs)
>  {
> @@ -1593,11 +1599,20 @@ void alignment_exception(struct pt_regs *regs)
>  {
>  	enum ctx_state prev_state = exception_enter();
>  	int sig, code, fixed = 0;
> +	unsigned long  reason;
> 
>  	/* We restore the interrupt state now */
>  	if (!arch_irq_disabled_regs(regs))
>  		local_irq_enable();
> 
> +	reason = get_reason(regs);
> +
> +	if (reason & REASON_BOUNDARY) {
> +		sig = SIGBUS;
> +		code = BUS_ADRALN;
> +		goto bad;
> +	}
> +
>  	if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
>  		goto bail;
> 
> @@ -1606,7 +1621,8 @@ void alignment_exception(struct pt_regs *regs)
>  		fixed = fix_alignment(regs);
> 
>  	if (fixed == 1) {
> -		regs->nip += 4;	/* skip over emulated instruction */
> +		/* skip over emulated instruction */
> +		regs->nip += inst_length(reason);
>  		emulate_single_step(regs);
>  		goto bail;
>  	}
> @@ -1619,6 +1635,7 @@ void alignment_exception(struct pt_regs *regs)
>  		sig = SIGBUS;
>  		code = BUS_ADRALN;
>  	}
> +bad:
>  	if (user_mode(regs))
>  		_exception(sig, regs, code, regs->dar);
>  	else





^ permalink raw reply

* Re: [PATCH v7 25/28] powerpc: Test prefixed instructions in feature fixups
From: Alistair Popple @ 2020-05-05  7:15 UTC (permalink / raw)
  To: Jordan Niethe; +Cc: npiggin, bala24, naveen.n.rao, linuxppc-dev, dja
In-Reply-To: <20200501034220.8982-26-jniethe5@gmail.com>

Hmm, I was hoping to add a tested by but I'm seeing the following failure in 
Mambo:

[    1.475459] feature-fixups: test failed at line 730

Based on the name of the test it looks like you probably made a copy/paste 
error in ftr_fixup_prefix2_expected. I suspect you probably meant to use the alt 
fixup:

globl(ftr_fixup_prefix2_expected)
	or	1,1,1
	.long 0x7000000
	.long 0x0000001
	or	2,2,2

Also for some reason these tests (and one of the code-patching tests) aren't 
passing on big endian.

- Alistair

On Friday, 1 May 2020 1:42:17 PM AEST Jordan Niethe wrote:
> Expand the feature-fixups self-tests to includes tests for prefixed
> instructions.
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
> v6: New to series
> ---
>  arch/powerpc/lib/feature-fixups-test.S | 68 +++++++++++++++++++++++
>  arch/powerpc/lib/feature-fixups.c      | 74 ++++++++++++++++++++++++++
>  2 files changed, 142 insertions(+)
> 
> diff --git a/arch/powerpc/lib/feature-fixups-test.S
> b/arch/powerpc/lib/feature-fixups-test.S index b12168c2447a..6e2da9123a9b
> 100644
> --- a/arch/powerpc/lib/feature-fixups-test.S
> +++ b/arch/powerpc/lib/feature-fixups-test.S
> @@ -791,3 +791,71 @@ globl(lwsync_fixup_test_expected_SYNC)
>  1:	or	1,1,1
>  	sync
> 
> +globl(ftr_fixup_prefix1)
> +	or	1,1,1
> +	.long 1 << 26
> +	.long 0x0000000
> +	or	2,2,2
> +globl(end_ftr_fixup_prefix1)
> +
> +globl(ftr_fixup_prefix1_orig)
> +	or	1,1,1
> +	.long 1 << 26
> +	.long 0x0000000
> +	or	2,2,2
> +
> +globl(ftr_fixup_prefix1_expected)
> +	or	1,1,1
> +	nop
> +	nop
> +	or	2,2,2
> +
> +globl(ftr_fixup_prefix2)
> +	or	1,1,1
> +	.long 1 << 26
> +	.long 0x0000000
> +	or	2,2,2
> +globl(end_ftr_fixup_prefix2)
> +
> +globl(ftr_fixup_prefix2_orig)
> +	or	1,1,1
> +	.long 1 << 26
> +	.long 0x0000000
> +	or	2,2,2
> +
> +globl(ftr_fixup_prefix2_alt)
> +	.long 0x7000000
> +	.long 0x0000001
> +
> +globl(ftr_fixup_prefix2_expected)
> +	or	1,1,1
> +	.long 1 << 26
> +	.long 0x0000001
> +	or	2,2,2
> +
> +globl(ftr_fixup_prefix3)
> +	or	1,1,1
> +	.long 1 << 26
> +	.long 0x0000000
> +	or	2,2,2
> +	or	3,3,3
> +globl(end_ftr_fixup_prefix3)
> +
> +globl(ftr_fixup_prefix3_orig)
> +	or	1,1,1
> +	.long 1 << 26
> +	.long 0x0000000
> +	or	2,2,2
> +	or	3,3,3
> +
> +globl(ftr_fixup_prefix3_alt)
> +	.long 1 << 26
> +	.long 0x0000001
> +	nop
> +
> +globl(ftr_fixup_prefix3_expected)
> +	or	1,1,1
> +	.long 1 << 26
> +	.long 0x0000001
> +	nop
> +	or	3,3,3
> diff --git a/arch/powerpc/lib/feature-fixups.c
> b/arch/powerpc/lib/feature-fixups.c index 243011f85287..6fc499b1d63e 100644
> --- a/arch/powerpc/lib/feature-fixups.c
> +++ b/arch/powerpc/lib/feature-fixups.c
> @@ -687,6 +687,75 @@ static void test_lwsync_macros(void)
>  	}
>  }
> 
> +#ifdef __powerpc64__
> +static void __init test_prefix_patching(void)
> +{
> +	extern unsigned int ftr_fixup_prefix1[];
> +	extern unsigned int end_ftr_fixup_prefix1[];
> +	extern unsigned int ftr_fixup_prefix1_orig[];
> +	extern unsigned int ftr_fixup_prefix1_expected[];
> +	int size = sizeof(unsigned int) * (end_ftr_fixup_prefix1 -
> ftr_fixup_prefix1); +
> +	fixup.value = fixup.mask = 8;
> +	fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix1 + 1);
> +	fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix1 + 3);
> +	fixup.alt_start_off = fixup.alt_end_off = 0;
> +
> +	/* Sanity check */
> +	check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) == 0);
> +
> +	patch_feature_section(0, &fixup);
> +	check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_expected, size) == 0);
> +	check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) != 0);
> +}
> +
> +static void __init test_prefix_alt_patching(void)
> +{
> +	extern unsigned int ftr_fixup_prefix2[];
> +	extern unsigned int end_ftr_fixup_prefix2[];
> +	extern unsigned int ftr_fixup_prefix2_orig[];
> +	extern unsigned int ftr_fixup_prefix2_expected[];
> +	extern unsigned int ftr_fixup_prefix2_alt[];
> +	int size = sizeof(unsigned int) * (end_ftr_fixup_prefix2 -
> ftr_fixup_prefix2); +
> +	fixup.value = fixup.mask = 8;
> +	fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix2 + 1);
> +	fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix2 + 3);
> +	fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix2_alt);
> +	fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix2_alt + 2);
> +	/* Sanity check */
> +	check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) == 0);
> +
> +	patch_feature_section(0, &fixup);
> +	check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_expected, size) == 0);
> +	patch_feature_section(0, &fixup);
> +	check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) != 0);
> +}
> +
> +static void __init test_prefix_word_alt_patching(void)
> +{
> +	extern unsigned int ftr_fixup_prefix3[];
> +	extern unsigned int end_ftr_fixup_prefix3[];
> +	extern unsigned int ftr_fixup_prefix3_orig[];
> +	extern unsigned int ftr_fixup_prefix3_expected[];
> +	extern unsigned int ftr_fixup_prefix3_alt[];
> +	int size = sizeof(unsigned int) * (end_ftr_fixup_prefix3 -
> ftr_fixup_prefix3); +
> +	fixup.value = fixup.mask = 8;
> +	fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix3 + 1);
> +	fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix3 + 4);
> +	fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix3_alt);
> +	fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix3_alt + 3);
> +	/* Sanity check */
> +	check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) == 0);
> +
> +	patch_feature_section(0, &fixup);
> +	check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_expected, size) == 0);
> +	patch_feature_section(0, &fixup);
> +	check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) != 0);
> +}
> +#endif /* __powerpc64__ */
> +
>  static int __init test_feature_fixups(void)
>  {
>  	printk(KERN_DEBUG "Running feature fixup self-tests ...\n");
> @@ -701,6 +770,11 @@ static int __init test_feature_fixups(void)
>  	test_cpu_macros();
>  	test_fw_macros();
>  	test_lwsync_macros();
> +#ifdef __powerpc64__
> +	test_prefix_patching();
> +	test_prefix_alt_patching();
> +	test_prefix_word_alt_patching();
> +#endif
> 
>  	return 0;
>  }





^ permalink raw reply

* Re: [PATCH v7 04/28] powerpc/xmon: Use bitwise calculations in_breakpoint_table()
From: Michael Ellerman @ 2020-05-05  7:08 UTC (permalink / raw)
  To: Jordan Niethe, linuxppc-dev
  Cc: alistair, npiggin, bala24, Jordan Niethe, naveen.n.rao, dja
In-Reply-To: <20200501034220.8982-5-jniethe5@gmail.com>

Jordan Niethe <jniethe5@gmail.com> writes:
> A modulo operation is used for calculating the current offset from a
> breakpoint within the breakpoint table. As instruction lengths are
> always a power of 2, this can be replaced with a bitwise 'and'. The
> current check for word alignment can be replaced with checking that the
> lower 2 bits are not set.
>
> Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
> v6: New to series
> ---
>  arch/powerpc/xmon/xmon.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index bbfea22f4a96..e122f0c8a044 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -857,8 +857,8 @@ static struct bpt *in_breakpoint_table(unsigned long nip, unsigned long *offp)
>  	off = nip - (unsigned long) bpt_table;
>  	if (off >= sizeof(bpt_table))
>  		return NULL;
> -	*offp = off % BPT_SIZE;
> -	if (*offp != 0 && *offp != 4)
> +	*offp = off & (BPT_SIZE - 1);
> +	if (off & 3)
>  		return NULL;

It would be even better if you didn't hard code the 3 wouldn't it?

eg:

+	*offp = off & (BPT_SIZE - 1);
+	if (off & (BPT_SIZE - 1))
 		return NULL;

cheers

^ permalink raw reply

* Re: [PATCH v7 5/5] powerpc/hv-24x7: Update post_mobility_fixup() to handle migration
From: Michael Ellerman @ 2020-05-05  7:05 UTC (permalink / raw)
  To: kajoljain, acme, linuxppc-dev, sukadev
  Cc: mark.rutland, ravi.bangoria, maddy, tglx, jmario, mpetlan, peterz,
	gregkh, linux-kernel, alexander.shishkin, linux-perf-users, ak,
	yao.jin, anju, mamatha4, jolsa, namhyung, mingo, kan.liang
In-Reply-To: <9586f8ef-1f3e-d45c-e0dc-665889a4f190@linux.ibm.com>

kajoljain <kjain@linux.ibm.com> writes:
> On 4/29/20 5:07 PM, Michael Ellerman wrote:
>> Kajol Jain <kjain@linux.ibm.com> writes:
>>> Function 'read_sys_info_pseries()' is added to get system parameter
>>> values like number of sockets and chips per socket.
>>> and it gets these details via rtas_call with token
>>> "PROCESSOR_MODULE_INFO".
>>>
>>> Incase lpar migrate from one system to another, system
>>> parameter details like chips per sockets or number of sockets might
>>> change. So, it needs to be re-initialized otherwise, these values
>>> corresponds to previous system values.
>>> This patch adds a call to 'read_sys_info_pseries()' from
>>> 'post-mobility_fixup()' to re-init the physsockets and physchips values.
>>>
>>> Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
>>> ---
>>>  arch/powerpc/platforms/pseries/mobility.c | 12 ++++++++++++
>>>  1 file changed, 12 insertions(+)
>>>
>>> diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
>>> index b571285f6c14..226accd6218b 100644
>>> --- a/arch/powerpc/platforms/pseries/mobility.c
>>> +++ b/arch/powerpc/platforms/pseries/mobility.c
>>> @@ -371,6 +371,18 @@ void post_mobility_fixup(void)
>>>  	/* Possibly switch to a new RFI flush type */
>>>  	pseries_setup_rfi_flush();
>>>  
>>> +	/*
>>> +	 * Incase lpar migrate from one system to another, system
>> 
>> In case an LPAR migrates
>> 
>>> +	 * parameter details like chips per sockets and number of sockets
>>> +	 * might change. So, it needs to be re-initialized otherwise these
>>                              ^                                       ^
>>                              they need                               the
>>> +	 * values corresponds to previous system.
>>                   ^
>>                   will correspond to the
>> 
>>> +	 * Here, adding a call to read_sys_info_pseries() declared in
>> 
>> Adding is the wrong tense in a comment. When someone reads the comment
>> the code has already been added. Past tense would be right, but really
>> the comment shouldn't say what you did, it should say why.
>> 
>>> +	 * platforms/pseries/pseries.h to re-init the physsockets and
>>> +	 * physchips value.
>> 
>> Call read_sys_info_pseries() to reinitialise the values.
>> 
>>> +	 */
>>> +	if (IS_ENABLED(CONFIG_HV_PERF_CTRS) && IS_ENABLED(CONFIG_PPC_RTAS))
>>> +		read_sys_info_pseries();
>> 
>> The RTAS check is not needed. pseries always selects RTAS.
>> 
>> You shouldn't need the IS_ENABLED() check here though, do it with an
>> empty version in the header when CONFIG_HV_PERF_CTRS is not enabled.
>> 
>
> Hi Michael,
> 	Thanks for reviewing the patch. Is something like this you are suggesting. Please
> let me know if my understanding is fine.
>
> +#ifndef CONFIG_HV_PERF_CTRS
> +#define read_sys_info_pseries() 
> +#endif

It should be an empty static inline. So more like:

#ifdef CONFIG_HV_PERF_CTRS
void read_sys_info_pseries(void);
#else
static inline void read_sys_info_pseries(void) { }
#endif

cheers

^ permalink raw reply

* Re: [PATCH v7 5/5] powerpc/hv-24x7: Update post_mobility_fixup() to handle migration
From: kajoljain @ 2020-05-05  6:57 UTC (permalink / raw)
  To: Michael Ellerman, acme, linuxppc-dev, sukadev
  Cc: mark.rutland, ravi.bangoria, maddy, tglx, jmario, mpetlan, peterz,
	gregkh, linux-kernel, alexander.shishkin, linux-perf-users, ak,
	yao.jin, anju, mamatha4, jolsa, namhyung, mingo, kan.liang
In-Reply-To: <877dxyfrpz.fsf@mpe.ellerman.id.au>



On 4/29/20 5:07 PM, Michael Ellerman wrote:
> Kajol Jain <kjain@linux.ibm.com> writes:
>> Function 'read_sys_info_pseries()' is added to get system parameter
>> values like number of sockets and chips per socket.
>> and it gets these details via rtas_call with token
>> "PROCESSOR_MODULE_INFO".
>>
>> Incase lpar migrate from one system to another, system
>> parameter details like chips per sockets or number of sockets might
>> change. So, it needs to be re-initialized otherwise, these values
>> corresponds to previous system values.
>> This patch adds a call to 'read_sys_info_pseries()' from
>> 'post-mobility_fixup()' to re-init the physsockets and physchips values.
>>
>> Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
>> ---
>>  arch/powerpc/platforms/pseries/mobility.c | 12 ++++++++++++
>>  1 file changed, 12 insertions(+)
>>
>> diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
>> index b571285f6c14..226accd6218b 100644
>> --- a/arch/powerpc/platforms/pseries/mobility.c
>> +++ b/arch/powerpc/platforms/pseries/mobility.c
>> @@ -371,6 +371,18 @@ void post_mobility_fixup(void)
>>  	/* Possibly switch to a new RFI flush type */
>>  	pseries_setup_rfi_flush();
>>  
>> +	/*
>> +	 * Incase lpar migrate from one system to another, system
> 
> In case an LPAR migrates
> 
>> +	 * parameter details like chips per sockets and number of sockets
>> +	 * might change. So, it needs to be re-initialized otherwise these
>                              ^                                       ^
>                              they need                               the
>> +	 * values corresponds to previous system.
>                   ^
>                   will correspond to the
> 
>> +	 * Here, adding a call to read_sys_info_pseries() declared in
> 
> Adding is the wrong tense in a comment. When someone reads the comment
> the code has already been added. Past tense would be right, but really
> the comment shouldn't say what you did, it should say why.
> 
>> +	 * platforms/pseries/pseries.h to re-init the physsockets and
>> +	 * physchips value.
> 
> Call read_sys_info_pseries() to reinitialise the values.
> 
>> +	 */
>> +	if (IS_ENABLED(CONFIG_HV_PERF_CTRS) && IS_ENABLED(CONFIG_PPC_RTAS))
>> +		read_sys_info_pseries();
> 
> The RTAS check is not needed. pseries always selects RTAS.
> 
> You shouldn't need the IS_ENABLED() check here though, do it with an
> empty version in the header when CONFIG_HV_PERF_CTRS is not enabled.
> 

Hi Michael,
	Thanks for reviewing the patch. Is something like this you are suggesting. Please
let me know if my understanding is fine.

+#ifndef CONFIG_HV_PERF_CTRS
+#define read_sys_info_pseries() 
+#endif

Thanks,
Kajol Jain
> cheers
> 

^ permalink raw reply

* Re: [PATCH v2 2/2] clk: qoriq: add cpufreq platform device
From: Stephen Boyd @ 2020-05-05  6:11 UTC (permalink / raw)
  To: Mian Yousaf Kaukab, andy.tang, leoyang.li, linux-clk, linux-pm,
	rjw, shawnguo
  Cc: Mian Yousaf Kaukab, viresh.kumar, linuxppc-dev, linux-kernel,
	linux-arm-kernel
In-Reply-To: <20200421083000.16740-2-ykaukab@suse.de>

Quoting Mian Yousaf Kaukab (2020-04-21 01:30:00)
> Add a platform device for qoirq-cpufreq driver for the compatible
> clockgen blocks.
> 
> Reviewed-by: Yuantian Tang <andy.tang@nxp.com>
> Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
> Signed-off-by: Mian Yousaf Kaukab <ykaukab@suse.de>
> ---

Acked-by: Stephen Boyd <sboyd@kernel.org>

^ permalink raw reply

* Re: [PATCH v7 24/28] powerpc: Test prefixed code patching
From: Alistair Popple @ 2020-05-05  6:08 UTC (permalink / raw)
  To: Jordan Niethe; +Cc: npiggin, bala24, naveen.n.rao, linuxppc-dev, dja
In-Reply-To: <20200501034220.8982-25-jniethe5@gmail.com>

Reviewed-by: Alistair Popple <alistair@popple.id.au>

On Friday, 1 May 2020 1:42:16 PM AEST Jordan Niethe wrote:
> Expand the code-patching self-tests to includes tests for patching
> prefixed instructions.
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
> v6: New to series
> ---
>  arch/powerpc/lib/Makefile             |  2 +-
>  arch/powerpc/lib/code-patching.c      | 21 +++++++++++++++++++++
>  arch/powerpc/lib/test_code-patching.S | 19 +++++++++++++++++++
>  3 files changed, 41 insertions(+), 1 deletion(-)
>  create mode 100644 arch/powerpc/lib/test_code-patching.S
> 
> diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
> index 546591848219..5e994cda8e40 100644
> --- a/arch/powerpc/lib/Makefile
> +++ b/arch/powerpc/lib/Makefile
> @@ -16,7 +16,7 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
>  CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
>  endif
> 
> -obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o
> +obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o
> test_code-patching.o
> 
>  ifndef CONFIG_KASAN
>  obj-y	+=	string.o memcmp_$(BITS).o
> diff --git a/arch/powerpc/lib/code-patching.c
> b/arch/powerpc/lib/code-patching.c index b32fa707725e..7107c6d01261 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -699,6 +699,24 @@ static void __init test_translate_branch(void)
>  	vfree(buf);
>  }
> 
> +#ifdef __powerpc64__
> +static void __init test_prefixed_patching(void)
> +{
> +	extern unsigned int code_patching_test1[];
> +	extern unsigned int code_patching_test1_expected[];
> +	extern unsigned int end_code_patching_test1[];
> +
> +	__patch_instruction((struct ppc_inst *)code_patching_test1,
> +			    ppc_inst_prefix(1 << 26, 0x00000000),
> +			    (struct ppc_inst *)code_patching_test1);
> +
> +	check(!memcmp(code_patching_test1,
> +		      code_patching_test1_expected,
> +		      sizeof(unsigned int) *
> +		      (end_code_patching_test1 - code_patching_test1)));
> +}
> +#endif
> +
>  static int __init test_code_patching(void)
>  {
>  	printk(KERN_DEBUG "Running code patching self-tests ...\n");
> @@ -707,6 +725,9 @@ static int __init test_code_patching(void)
>  	test_branch_bform();
>  	test_create_function_call();
>  	test_translate_branch();
> +#ifdef __powerpc64__
> +	test_prefixed_patching();
> +#endif
> 
>  	return 0;
>  }
> diff --git a/arch/powerpc/lib/test_code-patching.S
> b/arch/powerpc/lib/test_code-patching.S new file mode 100644
> index 000000000000..91aab208a804
> --- /dev/null
> +++ b/arch/powerpc/lib/test_code-patching.S
> @@ -0,0 +1,19 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2020 IBM Corporation
> + */
> +
> +	.text
> +
> +#define globl(x)		\
> +	.globl x;	\
> +x:
> +
> +globl(code_patching_test1)
> +	nop
> +	nop
> +globl(end_code_patching_test1)
> +
> +globl(code_patching_test1_expected)
> +	.long 1 << 26
> +	.long 0x0000000





^ permalink raw reply

* Re: [PATCH v7 23/28] powerpc: Add prefixed instructions to instruction data type
From: Alistair Popple @ 2020-05-05  6:04 UTC (permalink / raw)
  To: Jordan Niethe; +Cc: npiggin, bala24, naveen.n.rao, linuxppc-dev, dja
In-Reply-To: <20200501034220.8982-24-jniethe5@gmail.com>

When reviewing earlier patches in this series I assumed the data type would 
eventually change size (on PPC64 at least) so I was looking for any possible 
side effects this may cause, but I didn't notice any so I think this should be 
ok:

Reviewed-by: Alistair Popple <alistair@popple.id.au>

However I haven't dug deeply enough into the optprobes code to fully 
understand/comment on the changes there (although they look correct afaict).

On Friday, 1 May 2020 1:42:15 PM AEST Jordan Niethe wrote:
> For powerpc64, redefine the ppc_inst type so both word and prefixed
> instructions can be represented. On powerpc32 the type will remain the
> same.  Update places which had assumed instructions to be 4 bytes long.
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
> v4: New to series
> v5:  - Distinguish normal instructions from prefixed instructions with a
>        0xff marker for the suffix.
>      - __patch_instruction() using std for prefixed instructions
> v6:  - Return false instead of 0 in ppc_inst_prefixed()
>      - Fix up types for ppc32 so it compiles
>      - remove ppc_inst_write()
>      - __patching_instruction(): move flush out of condition
> ---
>  arch/powerpc/include/asm/inst.h      | 68 +++++++++++++++++++++++++---
>  arch/powerpc/include/asm/kprobes.h   |  2 +-
>  arch/powerpc/include/asm/uaccess.h   | 32 ++++++++++++-
>  arch/powerpc/include/asm/uprobes.h   |  2 +-
>  arch/powerpc/kernel/optprobes.c      | 42 +++++++++--------
>  arch/powerpc/kernel/optprobes_head.S |  3 ++
>  arch/powerpc/lib/code-patching.c     | 13 ++++--
>  arch/powerpc/lib/feature-fixups.c    |  5 +-
>  arch/powerpc/lib/inst.c              | 40 ++++++++++++++++
>  arch/powerpc/lib/sstep.c             |  4 +-
>  arch/powerpc/xmon/xmon.c             |  4 +-
>  arch/powerpc/xmon/xmon_bpts.S        |  2 +
>  12 files changed, 180 insertions(+), 37 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/inst.h
> b/arch/powerpc/include/asm/inst.h index 2f3c9d5bcf7c..1e743635c214 100644
> --- a/arch/powerpc/include/asm/inst.h
> +++ b/arch/powerpc/include/asm/inst.h
> @@ -8,23 +8,72 @@
> 
>  struct ppc_inst {
>  	u32 val;
> +#ifdef __powerpc64__
> +	u32 suffix;
> +#endif /* __powerpc64__ */
>  } __packed;
> 
> -#define ppc_inst(x) ((struct ppc_inst){ .val = x })
> -
>  static inline u32 ppc_inst_val(struct ppc_inst x)
>  {
>  	return x.val;
>  }
> 
> -static inline int ppc_inst_len(struct ppc_inst x)
> +static inline int ppc_inst_primary_opcode(struct ppc_inst x)
>  {
> -	return sizeof(struct ppc_inst);
> +	return ppc_inst_val(x) >> 26;
>  }
> 
> -static inline int ppc_inst_primary_opcode(struct ppc_inst x)
> +#ifdef __powerpc64__
> +#define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff })
> +
> +#define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y)
> }) +
> +static inline u32 ppc_inst_suffix(struct ppc_inst x)
>  {
> -	return ppc_inst_val(x) >> 26;
> +	return x.suffix;
> +}
> +
> +static inline bool ppc_inst_prefixed(struct ppc_inst x)
> +{
> +	return (ppc_inst_primary_opcode(x) == 1) && ppc_inst_suffix(x) != 0xff;
> +}
> +
> +static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x)
> +{
> +	return ppc_inst_prefix(swab32(ppc_inst_val(x)),
> +			       swab32(ppc_inst_suffix(x)));
> +}
> +
> +static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr)
> +{
> +	u32 val, suffix;
> +
> +	val = *(u32 *)ptr;
> +	if ((val >> 26) == 1) {
> +		suffix = *((u32 *)ptr + 1);
> +		return ppc_inst_prefix(val, suffix);
> +	} else {
> +		return ppc_inst(val);
> +	}
> +}
> +
> +static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
> +{
> +	return *(u64 *)&x == *(u64 *)&y;
> +}
> +
> +#else
> +
> +#define ppc_inst(x) ((struct ppc_inst){ .val = x })
> +
> +static inline bool ppc_inst_prefixed(struct ppc_inst x)
> +{
> +	return false;
> +}
> +
> +static inline u32 ppc_inst_suffix(struct ppc_inst x)
> +{
> +	return 0;
>  }
> 
>  static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x)
> @@ -42,6 +91,13 @@ static inline bool ppc_inst_equal(struct ppc_inst x,
> struct ppc_inst y) return ppc_inst_val(x) == ppc_inst_val(y);
>  }
> 
> +#endif /* __powerpc64__ */
> +
> +static inline int ppc_inst_len(struct ppc_inst x)
> +{
> +	return (ppc_inst_prefixed(x)) ? 8  : 4;
> +}
> +
>  int probe_user_read_inst(struct ppc_inst *inst,
>  			 struct ppc_inst *nip);
>  int probe_kernel_read_inst(struct ppc_inst *inst,
> diff --git a/arch/powerpc/include/asm/kprobes.h
> b/arch/powerpc/include/asm/kprobes.h index 66b3f2983b22..4fc0e15e23a5
> 100644
> --- a/arch/powerpc/include/asm/kprobes.h
> +++ b/arch/powerpc/include/asm/kprobes.h
> @@ -43,7 +43,7 @@ extern kprobe_opcode_t optprobe_template_ret[];
>  extern kprobe_opcode_t optprobe_template_end[];
> 
>  /* Fixed instruction size for powerpc */
> -#define MAX_INSN_SIZE		1
> +#define MAX_INSN_SIZE		2
>  #define MAX_OPTIMIZED_LENGTH	sizeof(kprobe_opcode_t)	/* 4 bytes */
>  #define MAX_OPTINSN_SIZE	(optprobe_template_end - optprobe_template_entry)
>  #define RELATIVEJUMP_SIZE	sizeof(kprobe_opcode_t)	/* 4 bytes */
> diff --git a/arch/powerpc/include/asm/uaccess.h
> b/arch/powerpc/include/asm/uaccess.h index c0a35e4586a5..12e52aa179b6
> 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -105,11 +105,41 @@ static inline int __access_ok(unsigned long addr,
> unsigned long size, #define __put_user_inatomic(x, ptr) \
>  	__put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
> 
> +#ifdef __powerpc64__
> +#define __get_user_instr(x, ptr)			\
> +({							\
> +	long __gui_ret = 0;				\
> +	unsigned int prefix, suffix;			\
> +	__gui_ret = __get_user(prefix, (unsigned int __user *)ptr);		\
> +	if (!__gui_ret && (prefix >> 26) == 1) {	\
> +		__gui_ret = __get_user(suffix, (unsigned int __user *)ptr + 1);	\
> +		(x) = ppc_inst_prefix(prefix, suffix);	\
> +	} else {					\
> +		(x) = ppc_inst(prefix);			\
> +	}						\
> +	__gui_ret;					\
> +})
> +
> +#define __get_user_instr_inatomic(x, ptr)		\
> +({							\
> +	long __gui_ret = 0;				\
> +	unsigned int prefix, suffix;			\
> +	__gui_ret = __get_user_inatomic(prefix, (unsigned int __user *)ptr);		\
> +	if (!__gui_ret && (prefix >> 26) == 1) {	\
> +		__gui_ret = __get_user_inatomic(suffix, (unsigned int __user *)ptr +
> 1);	\ +		(x) = ppc_inst_prefix(prefix, suffix);	\
> +	} else {					\
> +		(x) = ppc_inst(prefix);			\
> +	}						\
> +	__gui_ret;					\
> +})
> +#else
>  #define __get_user_instr(x, ptr) \
>  	__get_user_nocheck((x).val, (u32 *)(ptr), sizeof(u32), true)
> -
>  #define __get_user_instr_inatomic(x, ptr) \
>  	__get_user_nosleep((x).val, (u32 *)(ptr), sizeof(u32))
> +#endif
> +
>  extern long __put_user_bad(void);
> 
>  /*
> diff --git a/arch/powerpc/include/asm/uprobes.h
> b/arch/powerpc/include/asm/uprobes.h index 7e3b329ba2d3..5bf65f5d44a9
> 100644
> --- a/arch/powerpc/include/asm/uprobes.h
> +++ b/arch/powerpc/include/asm/uprobes.h
> @@ -15,7 +15,7 @@
> 
>  typedef ppc_opcode_t uprobe_opcode_t;
> 
> -#define MAX_UINSN_BYTES		4
> +#define MAX_UINSN_BYTES		8
>  #define UPROBE_XOL_SLOT_BYTES	(MAX_UINSN_BYTES)
> 
>  /* The following alias is needed for reference from arch-agnostic code */
> diff --git a/arch/powerpc/kernel/optprobes.c
> b/arch/powerpc/kernel/optprobes.c index d704f9598f48..a67c5288cf50 100644
> --- a/arch/powerpc/kernel/optprobes.c
> +++ b/arch/powerpc/kernel/optprobes.c
> @@ -159,38 +159,38 @@ void patch_imm32_load_insns(unsigned int val,
> kprobe_opcode_t *addr)
> 
>  /*
>   * Generate instructions to load provided immediate 64-bit value
> - * to register 'r3' and patch these instructions at 'addr'.
> + * to register 'reg' and patch these instructions at 'addr'.
>   */
> -void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr)
> +void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t
> *addr) {
> -	/* lis r3,(op)@highest */
> -	patch_instruction((struct ppc_inst *)addr, ppc_inst(PPC_INST_ADDIS |
> ___PPC_RT(3) | +	/* lis reg,(op)@highest */
> +	patch_instruction((struct ppc_inst *)addr, ppc_inst(PPC_INST_ADDIS |
> ___PPC_RT(reg) | ((val >> 48) & 0xffff)));
>  	addr++;
> 
> -	/* ori r3,r3,(op)@higher */
> -	patch_instruction((struct ppc_inst *)addr, ppc_inst(PPC_INST_ORI |
> ___PPC_RA(3) | -			  ___PPC_RS(3) | ((val >> 32) & 0xffff)));
> +	/* ori reg,reg,(op)@higher */
> +	patch_instruction((struct ppc_inst *)addr, ppc_inst(PPC_INST_ORI |
> ___PPC_RA(reg) | +			  ___PPC_RS(reg) | ((val >> 32) & 0xffff)));
>  	addr++;
> 
> -	/* rldicr r3,r3,32,31 */
> -	patch_instruction((struct ppc_inst *)addr, ppc_inst(PPC_INST_RLDICR |
> ___PPC_RA(3) | -			  ___PPC_RS(3) | __PPC_SH64(32) | __PPC_ME64(31)));
> +	/* rldicr reg,reg,32,31 */
> +	patch_instruction((struct ppc_inst *)addr, ppc_inst(PPC_INST_RLDICR |
> ___PPC_RA(reg) | +			  ___PPC_RS(reg) | __PPC_SH64(32) | 
__PPC_ME64(31)));
>  	addr++;
> 
> -	/* oris r3,r3,(op)@h */
> -	patch_instruction((struct ppc_inst *)addr, ppc_inst(PPC_INST_ORIS |
> ___PPC_RA(3) | -			  ___PPC_RS(3) | ((val >> 16) & 0xffff)));
> +	/* oris reg,reg,(op)@h */
> +	patch_instruction((struct ppc_inst *)addr, ppc_inst(PPC_INST_ORIS |
> ___PPC_RA(reg) | +			  ___PPC_RS(reg) | ((val >> 16) & 0xffff)));
>  	addr++;
> 
> -	/* ori r3,r3,(op)@l */
> -	patch_instruction((struct ppc_inst *)addr, ppc_inst(PPC_INST_ORI |
> ___PPC_RA(3) | -			  ___PPC_RS(3) | (val & 0xffff)));
> +	/* ori reg,reg,(op)@l */
> +	patch_instruction((struct ppc_inst *)addr, ppc_inst(PPC_INST_ORI |
> ___PPC_RA(reg) | +			  ___PPC_RS(reg) | (val & 0xffff)));
>  }
> 
>  int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct
> kprobe *p) {
> -	struct ppc_inst branch_op_callback, branch_emulate_step;
> +	struct ppc_inst branch_op_callback, branch_emulate_step, temp;
>  	kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff;
>  	long b_offset;
>  	unsigned long nip, size;
> @@ -240,7 +240,7 @@ int arch_prepare_optimized_kprobe(struct
> optimized_kprobe *op, struct kprobe *p) * Fixup the template with
> instructions to:
>  	 * 1. load the address of the actual probepoint
>  	 */
> -	patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX);
> +	patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);
> 
>  	/*
>  	 * 2. branch to optimized_callback() and emulate_step()
> @@ -271,7 +271,11 @@ int arch_prepare_optimized_kprobe(struct
> optimized_kprobe *op, struct kprobe *p) /*
>  	 * 3. load instruction to be emulated into relevant register, and
>  	 */
> -	patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX);
> +	temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn);
> +	patch_imm64_load_insns(ppc_inst_val(temp) |
> +			       ((u64)ppc_inst_suffix(temp) << 32),
> +			       4,
> +			       buff + TMPL_INSN_IDX);
> 
>  	/*
>  	 * 4. branch back from trampoline
> diff --git a/arch/powerpc/kernel/optprobes_head.S
> b/arch/powerpc/kernel/optprobes_head.S index cf383520843f..ff8ba4d3824d
> 100644
> --- a/arch/powerpc/kernel/optprobes_head.S
> +++ b/arch/powerpc/kernel/optprobes_head.S
> @@ -94,6 +94,9 @@ optprobe_template_insn:
>  	/* 2, Pass instruction to be emulated in r4 */
>  	nop
>  	nop
> +	nop
> +	nop
> +	nop
> 
>  	.global optprobe_template_call_emulate
>  optprobe_template_call_emulate:
> diff --git a/arch/powerpc/lib/code-patching.c
> b/arch/powerpc/lib/code-patching.c index 5b2f66d06b1e..b32fa707725e 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -24,13 +24,18 @@ static int __patch_instruction(struct ppc_inst
> *exec_addr, struct ppc_inst instr {
>  	int err = 0;
> 
> -	__put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw");
> -	if (err)
> -		return err;
> +	if (!ppc_inst_prefixed(instr)) {
> +		__put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw");
> +		if (err)
> +			return err;
> +	} else {
> +		__put_user_asm((u64)ppc_inst_suffix(instr) << 32 | ppc_inst_val(instr),
> patch_addr, err, "std"); +		if (err)
> +			return err;
> +	}
> 
>  	asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
>  							    "r" (exec_addr));
> -
>  	return 0;
>  }
> 
> diff --git a/arch/powerpc/lib/feature-fixups.c
> b/arch/powerpc/lib/feature-fixups.c index f4845e740338..243011f85287 100644
> --- a/arch/powerpc/lib/feature-fixups.c
> +++ b/arch/powerpc/lib/feature-fixups.c
> @@ -84,12 +84,13 @@ static int patch_feature_section(unsigned long value,
> struct fixup_entry *fcur) src = alt_start;
>  	dest = start;
> 
> -	for (; src < alt_end; src++, dest++) {
> +	for (; src < alt_end; src = (void *)src +
> ppc_inst_len(ppc_inst_read(src)), +	     (dest = (void *)dest +
> ppc_inst_len(ppc_inst_read(dest)))) { if (patch_alt_instruction(src, dest,
> alt_start, alt_end))
>  			return 1;
>  	}
> 
> -	for (; dest < end; dest++)
> +	for (; dest < end; dest = (void *)dest +
> ppc_inst_len(ppc_inst(PPC_INST_NOP))) raw_patch_instruction(dest,
> ppc_inst(PPC_INST_NOP));
> 
>  	return 0;
> diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c
> index 08dedd927268..71101791edcc 100644
> --- a/arch/powerpc/lib/inst.c
> +++ b/arch/powerpc/lib/inst.c
> @@ -6,6 +6,45 @@
>  #include <linux/uaccess.h>
>  #include <asm/inst.h>
> 
> +#ifdef __powerpc64__
> +int probe_user_read_inst(struct ppc_inst *inst,
> +			 struct ppc_inst *nip)
> +{
> +	unsigned int val, suffix;
> +	int err;
> +
> +	err = probe_user_read(&val, nip, sizeof(val));
> +	if (err)
> +		return err;
> +	if ((val >> 26) == 1) {
> +		err = probe_user_read(&suffix, (void *)nip+4,
> +				      sizeof(unsigned int));
> +		*inst = ppc_inst_prefix(val, suffix);
> +	} else {
> +		*inst = ppc_inst(val);
> +	}
> +	return err;
> +}
> +
> +int probe_kernel_read_inst(struct ppc_inst *inst,
> +			   struct ppc_inst *src)
> +{
> +	unsigned int val, suffix;
> +	int err;
> +
> +	err = probe_kernel_read(&val, src, sizeof(val));
> +	if (err)
> +		return err;
> +	if ((val >> 26) == 1) {
> +		err = probe_kernel_read(&suffix, (void *)src+4,
> +				      sizeof(unsigned int));
> +		*inst = ppc_inst_prefix(val, suffix);
> +	} else {
> +		*inst = ppc_inst(val);
> +	}
> +	return err;
> +}
> +#else
>  int probe_user_read_inst(struct ppc_inst *inst,
>  			 struct ppc_inst *nip)
>  {
> @@ -27,3 +66,4 @@ int probe_kernel_read_inst(struct ppc_inst *inst,
>  	*inst = ppc_inst(val);
>  	return err;
>  }
> +#endif /* __powerpc64__ */
> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
> index 95a56bb1ba3f..ecd756c346fd 100644
> --- a/arch/powerpc/lib/sstep.c
> +++ b/arch/powerpc/lib/sstep.c
> @@ -1169,10 +1169,12 @@ int analyse_instr(struct instruction_op *op, const
> struct pt_regs *regs, unsigned long int imm;
>  	unsigned long int val, val2;
>  	unsigned int mb, me, sh;
> -	unsigned int word;
> +	unsigned int word, suffix;
>  	long ival;
> 
>  	word = ppc_inst_val(instr);
> +	suffix = ppc_inst_suffix(instr);
> +
>  	op->type = COMPUTE;
> 
>  	opcode = ppc_inst_primary_opcode(instr);
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index fb2563079046..1d6e66eb2dab 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -758,8 +758,8 @@ static int xmon_bpt(struct pt_regs *regs)
> 
>  	/* Are we at the trap at bp->instr[1] for some bp? */
>  	bp = in_breakpoint_table(regs->nip, &offset);
> -	if (bp != NULL && offset == 4) {
> -		regs->nip = bp->address + 4;
> +	if (bp != NULL && (offset == 4 || offset == 8)) {
> +		regs->nip = bp->address + offset;
>  		atomic_dec(&bp->ref_count);
>  		return 1;
>  	}
> diff --git a/arch/powerpc/xmon/xmon_bpts.S b/arch/powerpc/xmon/xmon_bpts.S
> index f3ad0ab50854..69726814cd27 100644
> --- a/arch/powerpc/xmon/xmon_bpts.S
> +++ b/arch/powerpc/xmon/xmon_bpts.S
> @@ -4,6 +4,8 @@
>  #include <asm/asm-offsets.h>
>  #include "xmon_bpts.h"
> 
> +/* Prefixed instructions can not cross 64 byte boundaries */
> +.align 6
>  .global bpt_table
>  bpt_table:
>  	.space NBPTS * BPT_SIZE





^ permalink raw reply

* Re: [PATCH 1/4] dma-mapping: move the remaining DMA API calls out of line
From: Alexey Kardashevskiy @ 2020-05-05  4:18 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Greg Kroah-Hartman, Joerg Roedel, linuxppc-dev, linux-kernel,
	iommu, Robin Murphy, Lu Baolu
In-Reply-To: <20200417075852.GA20049@lst.de>



On 17/04/2020 17:58, Christoph Hellwig wrote:
> On Wed, Apr 15, 2020 at 09:21:37PM +1000, Alexey Kardashevskiy wrote:
>> And the fact they were exported leaves possibility that there is a
>> driver somewhere relying on these symbols or distro kernel won't build
>> because the symbol disappeared from exports (I do not know what KABI
>> guarantees or if mainline kernel cares).
> 
> We absolutely do not care.  In fact for abuses of APIs that drivers
> should not use we almost care to make them private and break people
> abusing them.

ok :)

>> I do not care in particular but
>> some might, a line separated with empty lines in the commit log would do.
> 
> I'll add a blurb for the next version.


Has it gone anywhere? Thanks,


-- 
Alexey

^ permalink raw reply

* Re: [PATCH v4 0/7] clean up redundant 'kvm_run' parameters
From: Tianjia Zhang @ 2020-05-05  4:15 UTC (permalink / raw)
  To: pbonzini, tsbogend, paulus, mpe, benh, borntraeger, frankja,
	david, cohuck, heiko.carstens, gor, sean.j.christopherson,
	vkuznets, wanpengli, jmattson, joro, tglx, mingo, bp, x86, hpa,
	maz, james.morse, julien.thierry.kdev, suzuki.poulose,
	christoffer.dall, peterx, thuth, chenhuacai
  Cc: linux-s390, kvm, linux-mips, kvm-ppc, linux-kernel, linuxppc-dev,
	kvmarm, linux-arm-kernel
In-Reply-To: <20200427043514.16144-1-tianjia.zhang@linux.alibaba.com>

Paolo Bonzini, any opinion on this?

Thanks and best,
Tianjia

On 2020/4/27 12:35, Tianjia Zhang wrote:
> In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu'
> structure. For historical reasons, many kvm-related function parameters
> retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
> patch does a unified cleanup of these remaining redundant parameters.
> 
> This series of patches has completely cleaned the architecture of
> arm64, mips, ppc, and s390 (no such redundant code on x86). Due to
> the large number of modified codes, a separate patch is made for each
> platform. On the ppc platform, there is also a redundant structure
> pointer of 'kvm_run' in 'vcpu_arch', which has also been cleaned
> separately.
> 
> ---
> v4 change:
>    mips: fixes two errors in entry.c.
> 
> v3 change:
>    Keep the existing `vcpu->run` in the function body unchanged.
> 
> v2 change:
>    s390 retains the original variable name and minimizes modification.
> 
> Tianjia Zhang (7):
>    KVM: s390: clean up redundant 'kvm_run' parameters
>    KVM: arm64: clean up redundant 'kvm_run' parameters
>    KVM: PPC: Remove redundant kvm_run from vcpu_arch
>    KVM: PPC: clean up redundant 'kvm_run' parameters
>    KVM: PPC: clean up redundant kvm_run parameters in assembly
>    KVM: MIPS: clean up redundant 'kvm_run' parameters
>    KVM: MIPS: clean up redundant kvm_run parameters in assembly
> 
>   arch/arm64/include/asm/kvm_coproc.h      |  12 +--
>   arch/arm64/include/asm/kvm_host.h        |  11 +--
>   arch/arm64/include/asm/kvm_mmu.h         |   2 +-
>   arch/arm64/kvm/handle_exit.c             |  36 +++----
>   arch/arm64/kvm/sys_regs.c                |  13 ++-
>   arch/mips/include/asm/kvm_host.h         |  32 +------
>   arch/mips/kvm/emulate.c                  |  59 ++++--------
>   arch/mips/kvm/entry.c                    |  21 ++---
>   arch/mips/kvm/mips.c                     |  14 +--
>   arch/mips/kvm/trap_emul.c                | 114 ++++++++++-------------
>   arch/mips/kvm/vz.c                       |  26 ++----
>   arch/powerpc/include/asm/kvm_book3s.h    |  16 ++--
>   arch/powerpc/include/asm/kvm_host.h      |   1 -
>   arch/powerpc/include/asm/kvm_ppc.h       |  27 +++---
>   arch/powerpc/kvm/book3s.c                |   4 +-
>   arch/powerpc/kvm/book3s.h                |   2 +-
>   arch/powerpc/kvm/book3s_64_mmu_hv.c      |  12 +--
>   arch/powerpc/kvm/book3s_64_mmu_radix.c   |   4 +-
>   arch/powerpc/kvm/book3s_emulate.c        |  10 +-
>   arch/powerpc/kvm/book3s_hv.c             |  64 ++++++-------
>   arch/powerpc/kvm/book3s_hv_nested.c      |  12 +--
>   arch/powerpc/kvm/book3s_interrupts.S     |  17 ++--
>   arch/powerpc/kvm/book3s_paired_singles.c |  72 +++++++-------
>   arch/powerpc/kvm/book3s_pr.c             |  33 ++++---
>   arch/powerpc/kvm/booke.c                 |  39 ++++----
>   arch/powerpc/kvm/booke.h                 |   8 +-
>   arch/powerpc/kvm/booke_emulate.c         |   2 +-
>   arch/powerpc/kvm/booke_interrupts.S      |   9 +-
>   arch/powerpc/kvm/bookehv_interrupts.S    |  10 +-
>   arch/powerpc/kvm/e500_emulate.c          |  15 ++-
>   arch/powerpc/kvm/emulate.c               |  10 +-
>   arch/powerpc/kvm/emulate_loadstore.c     |  32 +++----
>   arch/powerpc/kvm/powerpc.c               |  72 +++++++-------
>   arch/powerpc/kvm/trace_hv.h              |   6 +-
>   arch/s390/kvm/kvm-s390.c                 |  23 +++--
>   virt/kvm/arm/arm.c                       |   6 +-
>   virt/kvm/arm/mmio.c                      |  11 ++-
>   virt/kvm/arm/mmu.c                       |   5 +-
>   38 files changed, 392 insertions(+), 470 deletions(-)
> 

^ permalink raw reply

* Re: [PATCH 3/3] mm/hugetlb: Introduce HAVE_ARCH_CLEAR_HUGEPAGE_FLAGS
From: Anshuman Khandual @ 2020-05-05  2:51 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Rich Felker, linux-ia64, linux-sh, Catalin Marinas,
	Heiko Carstens, linux-kernel, James E.J. Bottomley, linux-mm,
	Paul Mackerras, H. Peter Anvin, sparclinux, linux-riscv,
	Will Deacon, linux-arch, linux-s390, Yoshinori Sato, Helge Deller,
	x86, Russell King, Christian Borntraeger, Ingo Molnar, Fenghua Yu,
	Vasily Gorbik, Thomas Bogendoerfer, Borislav Petkov,
	Paul Walmsley, Thomas Gleixner, linux-arm-kernel, Tony Luck,
	linux-parisc, linux-mips, Palmer Dabbelt, linuxppc-dev,
	David S. Miller, Mike Kravetz
In-Reply-To: <20200425200124.20d0c75fcaef05d062d3667c@linux-foundation.org>



On 04/26/2020 08:31 AM, Andrew Morton wrote:
> On Sun, 26 Apr 2020 08:13:17 +0530 Anshuman Khandual <anshuman.khandual@arm.com> wrote:
> 
>>
>>
>> On 04/26/2020 06:25 AM, Andrew Morton wrote:
>>> On Tue, 14 Apr 2020 17:14:30 +0530 Anshuman Khandual <anshuman.khandual@arm.com> wrote:
>>>
>>>> There are multiple similar definitions for arch_clear_hugepage_flags() on
>>>> various platforms. This introduces HAVE_ARCH_CLEAR_HUGEPAGE_FLAGS for those
>>>> platforms that need to define their own arch_clear_hugepage_flags() while
>>>> also providing a generic fallback definition for others to use. This help
>>>> reduce code duplication.
>>>>
>>>> ...
>>>>
>>>> --- a/include/linux/hugetlb.h
>>>> +++ b/include/linux/hugetlb.h
>>>> @@ -544,6 +544,10 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
>>>>  }
>>>>  #endif
>>>>  
>>>> +#ifndef HAVE_ARCH_CLEAR_HUGEPAGE_FLAGS
>>>> +static inline void arch_clear_hugepage_flags(struct page *page) { }
>>>> +#endif
>>>> +
>>>>  #ifndef arch_make_huge_pte
>>>>  static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
>>>>  				       struct page *page, int writable)
>>>
>>> This is the rather old-school way of doing it.  The Linus-suggested way is
>>>
>>> #ifndef arch_clear_hugepage_flags
>>> static inline void arch_clear_hugepage_flags(struct page *page)
>>> {
>>> }
>>> #define arch_clear_hugepage_flags arch_clear_hugepage_flags
>>
>> Do we need that above line here ? Is not that implicit.
> 
> It depends if other header files want to test whether
> arch_clear_hugepage_flags is already defined.  If the header heorarchy
> is well-defined and working properly, they shouldn't need to, because
> we're reliably indluding the relevant arch header before (or early
> within) include/linux/hugetlb.h.
> 
> It would be nice if
> 
> #define arch_clear_hugepage_flags arch_clear_hugepage_flags
> #define arch_clear_hugepage_flags arch_clear_hugepage_flags
> 
> were to generate an compiler error but it doesn't.  If it did we could
> detect these incorrect inclusion orders.
> 
>>> #endif
>>>
>>> And the various arch headers do
>>>
>>> static inline void arch_clear_hugepage_flags(struct page *page)
>>> {
>>> 	<some implementation>
>>> }
>>> #define arch_clear_hugepage_flags arch_clear_hugepage_flags
>>>
>>> It's a small difference - mainly to avoid adding two variables to the
>>> overall namespace where one would do.
>>
>> Understood, will change and resend.
> 
> That's OK - I've queued up that fix.
>

Hello Andrew,

I might not have searched all the relevant trees or might have just searched
earlier than required. But I dont see these patches (or your proposed fixes)
either in mmotm (2020-04-29-23-04) or in next-20200504. Wondering if you are
waiting on a V2 for this series accommodating the changes you had proposed.

- Anshuman

^ permalink raw reply

* Re: [PATCH v7 22/28] powerpc: Define new SRR1 bits for a future ISA version
From: Alistair Popple @ 2020-05-05  2:49 UTC (permalink / raw)
  To: Jordan Niethe; +Cc: npiggin, bala24, naveen.n.rao, linuxppc-dev, dja
In-Reply-To: <20200501034220.8982-23-jniethe5@gmail.com>

Reviewed-by: Alistair Popple <alistair@popple.id.au>

On Friday, 1 May 2020 1:42:14 PM AEST Jordan Niethe wrote:
> Add the BOUNDARY SRR1 bit definition for when the cause of an alignment
> exception is a prefixed instruction that crosses a 64-byte boundary.
> Add the PREFIXED SRR1 bit definition for exceptions caused by prefixed
> instructions.
> 
> Bit 35 of SRR1 is called SRR1_ISI_N_OR_G. This name comes from it being
> used to indicate that an ISI was due to the access being no-exec or
> guarded. A future ISA version adds another purpose. It is also set if
> there is an access in a cache-inhibited location for prefixed
> instruction.  Rename from SRR1_ISI_N_OR_G to SRR1_ISI_N_G_OR_CIP.
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
> v2: Combined all the commits concerning SRR1 bits.
> ---
>  arch/powerpc/include/asm/reg.h      | 4 +++-
>  arch/powerpc/kvm/book3s_hv_nested.c | 2 +-
>  arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +-
>  3 files changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index 773f76402392..f95eb8f97756 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -762,7 +762,7 @@
>  #endif
> 
>  #define   SRR1_ISI_NOPT		0x40000000 /* ISI: Not found in hash */
> -#define   SRR1_ISI_N_OR_G	0x10000000 /* ISI: Access is no-exec or G */
> +#define   SRR1_ISI_N_G_OR_CIP	0x10000000 /* ISI: Access is no-exec or G or
> CI for a prefixed instruction */ #define   SRR1_ISI_PROT		0x08000000 /*
> ISI: Other protection fault */ #define   SRR1_WAKEMASK		0x00380000 /*
> reason for wakeup */
>  #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9
> */ @@ -789,6 +789,8 @@
>  #define   SRR1_PROGADDR		0x00010000 /* SRR0 contains subsequent addr */
> 
>  #define   SRR1_MCE_MCP		0x00080000 /* Machine check signal caused 
interrupt
> */ +#define   SRR1_BOUNDARY		0x10000000 /* Prefixed instruction crosses
> 64-byte boundary */ +#define   SRR1_PREFIXED		0x20000000 /* Exception
> caused by prefixed instruction */
> 
>  #define SPRN_HSRR0	0x13A	/* Save/Restore Register 0 */
>  #define SPRN_HSRR1	0x13B	/* Save/Restore Register 1 */
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c
> b/arch/powerpc/kvm/book3s_hv_nested.c index dc97e5be76f6..6ab685227574
> 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -1169,7 +1169,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu
> *vcpu, } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { /*
> Can we execute? */
>  			if (!gpte_p->may_execute) {
> -				flags |= SRR1_ISI_N_OR_G;
> +				flags |= SRR1_ISI_N_G_OR_CIP;
>  				goto forward_to_l1;
>  			}
>  		} else {
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 220305454c23..b53a9f1c1a46
> 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -1260,7 +1260,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu,
> unsigned long addr, status &= ~DSISR_NOHPTE;	/* DSISR_NOHPTE ==
> SRR1_ISI_NOPT */
>  	if (!data) {
>  		if (gr & (HPTE_R_N | HPTE_R_G))
> -			return status | SRR1_ISI_N_OR_G;
> +			return status | SRR1_ISI_N_G_OR_CIP;
>  		if (!hpte_read_permission(pp, slb_v & key))
>  			return status | SRR1_ISI_PROT;
>  	} else if (status & DSISR_ISSTORE) {





^ permalink raw reply

* Re: [PATCH v7 20/28] powerpc: Make test_translate_branch() independent of instruction length
From: Alistair Popple @ 2020-05-05  2:40 UTC (permalink / raw)
  To: Jordan Niethe; +Cc: npiggin, bala24, naveen.n.rao, linuxppc-dev, dja
In-Reply-To: <20200501034220.8982-21-jniethe5@gmail.com>

I guess this could change if there were prefixed branch instructions, but there 
aren't so:

Reviewed-by: Alistair Popple <alistair@popple.id.au>

On Friday, 1 May 2020 1:42:12 PM AEST Jordan Niethe wrote:
> test_translate_branch() uses two pointers to instructions within a
> buffer, p and q, to test patch_branch(). The pointer arithmetic done on
> them assumes a size of 4. This will not work if the instruction length
> changes. Instead do the arithmetic relative to the void * to the buffer.
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
> v4: New to series
> ---
>  arch/powerpc/lib/code-patching.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/lib/code-patching.c
> b/arch/powerpc/lib/code-patching.c index 110f710500c8..5b2f66d06b1e 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -569,7 +569,7 @@ static void __init test_branch_bform(void)
>  static void __init test_translate_branch(void)
>  {
>  	unsigned long addr;
> -	struct ppc_inst *p, *q;
> +	void *p, *q;
>  	struct ppc_inst instr;
>  	void *buf;
> 
> @@ -583,7 +583,7 @@ static void __init test_translate_branch(void)
>  	addr = (unsigned long)p;
>  	patch_branch(p, addr, 0);
>  	check(instr_is_branch_to_addr(p, addr));
> -	q = p + 1;
> +	q = p + 4;
>  	translate_branch(&instr, q, p);
>  	patch_instruction(q, instr);
>  	check(instr_is_branch_to_addr(q, addr));
> @@ -639,7 +639,7 @@ static void __init test_translate_branch(void)
>  	create_cond_branch(&instr, p, addr, 0);
>  	patch_instruction(p, instr);
>  	check(instr_is_branch_to_addr(p, addr));
> -	q = p + 1;
> +	q = buf + 4;
>  	translate_branch(&instr, q, p);
>  	patch_instruction(q, instr);
>  	check(instr_is_branch_to_addr(q, addr));





^ permalink raw reply

* Re: [PATCH v7 19/28] powerpc/xmon: Move insertion of breakpoint for xol'ing
From: Alistair Popple @ 2020-05-05  2:19 UTC (permalink / raw)
  To: Jordan Niethe; +Cc: npiggin, bala24, naveen.n.rao, linuxppc-dev, dja
In-Reply-To: <20200501034220.8982-20-jniethe5@gmail.com>

I can't see any side-effects from patching both instructions at the same time.

Reviewed-by: Alistair Popple <alistair@popple.id.au>

On Friday, 1 May 2020 1:42:11 PM AEST Jordan Niethe wrote:
> When a new breakpoint is created, the second instruction of that
> breakpoint is patched with a trap instruction. This assumes the length
> of the instruction is always the same. In preparation for prefixed
> instructions, remove this assumption. Insert the trap instruction at the
> same time the first instruction is inserted.
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
>  arch/powerpc/xmon/xmon.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index 1947821e425d..fb2563079046 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -878,7 +878,6 @@ static struct bpt *new_breakpoint(unsigned long a)
>  		if (!bp->enabled && atomic_read(&bp->ref_count) == 0) {
>  			bp->address = a;
>  			bp->instr = (void *)(bpt_table + ((bp - bpts) * BPT_WORDS));
> -			patch_instruction(bp->instr + 1, ppc_inst(bpinstr));
>  			return bp;
>  		}
>  	}
> @@ -910,6 +909,7 @@ static void insert_bpts(void)
>  			continue;
>  		}
>  		patch_instruction(bp->instr, instr);
> +		patch_instruction((void *)bp->instr + ppc_inst_len(instr),
> ppc_inst(bpinstr)); if (bp->enabled & BP_CIABR)
>  			continue;
>  		if (patch_instruction((struct ppc_inst *)bp->address,





^ permalink raw reply

* Re: [PATCH v7 18/28] powerpc/xmon: Use a function for reading instructions
From: Alistair Popple @ 2020-05-05  2:07 UTC (permalink / raw)
  To: Jordan Niethe; +Cc: npiggin, bala24, naveen.n.rao, linuxppc-dev, dja
In-Reply-To: <20200501034220.8982-19-jniethe5@gmail.com>

Shouldn't change anything and will be correct once prefix instructions are 
defined.

Reviewed-by: Alistair Popple <alistair@popple.id.au>

On Friday, 1 May 2020 1:42:10 PM AEST Jordan Niethe wrote:
> Currently in xmon, mread() is used for reading instructions. In
> preparation for prefixed instructions, create and use a new function,
> mread_instr(), especially for reading instructions.
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
> v5: New to series, seperated from "Add prefixed instructions to
>     instruction data type"
> v6: mread_instr(): correctly return error status
> ---
>  arch/powerpc/xmon/xmon.c | 28 ++++++++++++++++++++++++----
>  1 file changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index cde733a82366..1947821e425d 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -122,6 +122,7 @@ static unsigned bpinstr = 0x7fe00008;	/* trap */
>  static int cmds(struct pt_regs *);
>  static int mread(unsigned long, void *, int);
>  static int mwrite(unsigned long, void *, int);
> +static int mread_instr(unsigned long, struct ppc_inst *);
>  static int handle_fault(struct pt_regs *);
>  static void byterev(unsigned char *, int);
>  static void memex(void);
> @@ -896,7 +897,7 @@ static void insert_bpts(void)
>  	for (i = 0; i < NBPTS; ++i, ++bp) {
>  		if ((bp->enabled & (BP_TRAP|BP_CIABR)) == 0)
>  			continue;
> -		if (mread(bp->address, &instr, 4) != 4) {
> +		if (!mread_instr(bp->address, &instr)) {
>  			printf("Couldn't read instruction at %lx, "
>  			       "disabling breakpoint there\n", bp->address);
>  			bp->enabled = 0;
> @@ -946,7 +947,7 @@ static void remove_bpts(void)
>  	for (i = 0; i < NBPTS; ++i, ++bp) {
>  		if ((bp->enabled & (BP_TRAP|BP_CIABR)) != BP_TRAP)
>  			continue;
> -		if (mread(bp->address, &instr, 4) == 4
> +		if (mread_instr(bp->address, &instr)
>  		    && ppc_inst_equal(instr, ppc_inst(bpinstr))
>  		    && patch_instruction(
>  			(struct ppc_inst *)bp->address, ppc_inst_read(bp->instr)) != 0)
> @@ -1162,7 +1163,7 @@ static int do_step(struct pt_regs *regs)
>  	force_enable_xmon();
>  	/* check we are in 64-bit kernel mode, translation enabled */
>  	if ((regs->msr & (MSR_64BIT|MSR_PR|MSR_IR)) == (MSR_64BIT|MSR_IR)) {
> -		if (mread(regs->nip, &instr, 4) == 4) {
> +		if (mread_instr(regs->nip, &instr)) {
>  			stepped = emulate_step(regs, instr);
>  			if (stepped < 0) {
>  				printf("Couldn't single-step %s instruction\n",
> @@ -1329,7 +1330,7 @@ static long check_bp_loc(unsigned long addr)
>  		printf("Breakpoints may only be placed at kernel addresses\n");
>  		return 0;
>  	}
> -	if (!mread(addr, &instr, sizeof(instr))) {
> +	if (!mread_instr(addr, &instr)) {
>  		printf("Can't read instruction at address %lx\n", addr);
>  		return 0;
>  	}
> @@ -2122,6 +2123,25 @@ mwrite(unsigned long adrs, void *buf, int size)
>  	return n;
>  }
> 
> +static int
> +mread_instr(unsigned long adrs, struct ppc_inst *instr)
> +{
> +	volatile int n;
> +
> +	n = 0;
> +	if (setjmp(bus_error_jmp) == 0) {
> +		catch_memory_errors = 1;
> +		sync();
> +		*instr = ppc_inst_read((struct ppc_inst *)adrs);
> +		sync();
> +		/* wait a little while to see if we get a machine check */
> +		__delay(200);
> +		n = ppc_inst_len(*instr);
> +	}
> +	catch_memory_errors = 0;
> +	return n;
> +}
> +
>  static int fault_type;
>  static int fault_except;
>  static char *fault_chars[] = { "--", "**", "##" };





^ permalink raw reply

* Re: [PATCH v7 17/28] powerpc: Introduce a function for reporting instruction length
From: Alistair Popple @ 2020-05-05  2:02 UTC (permalink / raw)
  To: Jordan Niethe; +Cc: npiggin, bala24, naveen.n.rao, linuxppc-dev, dja
In-Reply-To: <20200501034220.8982-18-jniethe5@gmail.com>

Looks good,

Reviewed-by: Alistair Popple <alistair@popple.id.au>

On Friday, 1 May 2020 1:42:09 PM AEST Jordan Niethe wrote:
> Currently all instructions have the same length, but in preparation for
> prefixed instructions introduce a function for returning instruction
> length.
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
> v6: - feature-fixups.c: do_final_fixups(): use here
>     - ppc_inst_len(): change return type from bool to int
>     - uprobes: Use ppc_inst_read() before calling ppc_inst_len()
> ---
>  arch/powerpc/include/asm/inst.h   |  5 +++++
>  arch/powerpc/kernel/kprobes.c     |  6 ++++--
>  arch/powerpc/kernel/uprobes.c     |  2 +-
>  arch/powerpc/lib/feature-fixups.c | 14 +++++++-------
>  4 files changed, 17 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/inst.h
> b/arch/powerpc/include/asm/inst.h index 0d581b332c20..2f3c9d5bcf7c 100644
> --- a/arch/powerpc/include/asm/inst.h
> +++ b/arch/powerpc/include/asm/inst.h
> @@ -17,6 +17,11 @@ static inline u32 ppc_inst_val(struct ppc_inst x)
>  	return x.val;
>  }
> 
> +static inline int ppc_inst_len(struct ppc_inst x)
> +{
> +	return sizeof(struct ppc_inst);
> +}
> +
>  static inline int ppc_inst_primary_opcode(struct ppc_inst x)
>  {
>  	return ppc_inst_val(x) >> 26;
> diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
> index a72c8e1a42ad..33d54b091c70 100644
> --- a/arch/powerpc/kernel/kprobes.c
> +++ b/arch/powerpc/kernel/kprobes.c
> @@ -462,14 +462,16 @@ NOKPROBE_SYMBOL(trampoline_probe_handler);
>   */
>  int kprobe_post_handler(struct pt_regs *regs)
>  {
> +	int len;
>  	struct kprobe *cur = kprobe_running();
>  	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
> 
>  	if (!cur || user_mode(regs))
>  		return 0;
> 
> +	len = ppc_inst_len(ppc_inst_read((struct ppc_inst *)cur->ainsn.insn));
>  	/* make sure we got here for instruction we have a kprobe on */
> -	if (((unsigned long)cur->ainsn.insn + 4) != regs->nip)
> +	if (((unsigned long)cur->ainsn.insn + len) != regs->nip)
>  		return 0;
> 
>  	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
> @@ -478,7 +480,7 @@ int kprobe_post_handler(struct pt_regs *regs)
>  	}
> 
>  	/* Adjust nip to after the single-stepped instruction */
> -	regs->nip = (unsigned long)cur->addr + 4;
> +	regs->nip = (unsigned long)cur->addr + len;
>  	regs->msr |= kcb->kprobe_saved_msr;
> 
>  	/*Restore back the original saved kprobes variables and continue. */
> diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
> index 6893d40a48c5..83e883e1a42d 100644
> --- a/arch/powerpc/kernel/uprobes.c
> +++ b/arch/powerpc/kernel/uprobes.c
> @@ -112,7 +112,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe,
> struct pt_regs *regs) * support doesn't exist and have to fix-up the next
> instruction * to be executed.
>  	 */
> -	regs->nip = utask->vaddr + MAX_UINSN_BYTES;
> +	regs->nip = utask->vaddr + ppc_inst_len(ppc_inst_read(&auprobe->insn));
> 
>  	user_disable_single_step(current);
>  	return 0;
> diff --git a/arch/powerpc/lib/feature-fixups.c
> b/arch/powerpc/lib/feature-fixups.c index 13ec3264a565..f4845e740338 100644
> --- a/arch/powerpc/lib/feature-fixups.c
> +++ b/arch/powerpc/lib/feature-fixups.c
> @@ -390,20 +390,20 @@ void do_lwsync_fixups(unsigned long value, void
> *fixup_start, void *fixup_end) static void do_final_fixups(void)
>  {
>  #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
> -	struct ppc_inst *src, *dest;
> -	unsigned long length;
> +	struct ppc_inst inst, *src, *dest, *end;
> 
>  	if (PHYSICAL_START == 0)
>  		return;
> 
>  	src = (struct ppc_inst *)(KERNELBASE + PHYSICAL_START);
>  	dest = (struct ppc_inst *)KERNELBASE;
> -	length = (__end_interrupts - _stext) / sizeof(struct ppc_inst);
> +	end = (void *)src + (__end_interrupts - _stext);
> 
> -	while (length--) {
> -		raw_patch_instruction(dest, ppc_inst_read(src));
> -		src++;
> -		dest++;
> +	while (src < end) {
> +		inst = ppc_inst_read(src);
> +		raw_patch_instruction(dest, inst);
> +		src = (void *)src + ppc_inst_len(inst);
> +		dest = (void *)dest + ppc_inst_len(inst);
>  	}
>  #endif
>  }





^ permalink raw reply

* Re: [PATCH v7 16/28] powerpc: Define and use __get_user_instr{, inatomic}()
From: Alistair Popple @ 2020-05-05  1:46 UTC (permalink / raw)
  To: Jordan Niethe; +Cc: npiggin, bala24, naveen.n.rao, linuxppc-dev, dja
In-Reply-To: <20200501034220.8982-17-jniethe5@gmail.com>

Doesn't change any behaviour from what I can see.

Reviewed-by: Alistair Popple <alistair@popple.id.au>

On Friday, 1 May 2020 1:42:08 PM AEST Jordan Niethe wrote:
> Define specific __get_user_instr() and __get_user_instr_inatomic()
> macros for reading instructions from user space.
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
>  arch/powerpc/include/asm/uaccess.h  | 5 +++++
>  arch/powerpc/kernel/align.c         | 2 +-
>  arch/powerpc/kernel/hw_breakpoint.c | 2 +-
>  arch/powerpc/kernel/vecemu.c        | 2 +-
>  4 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/uaccess.h
> b/arch/powerpc/include/asm/uaccess.h index 2f500debae21..c0a35e4586a5
> 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -105,6 +105,11 @@ static inline int __access_ok(unsigned long addr,
> unsigned long size, #define __put_user_inatomic(x, ptr) \
>  	__put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
> 
> +#define __get_user_instr(x, ptr) \
> +	__get_user_nocheck((x).val, (u32 *)(ptr), sizeof(u32), true)
> +
> +#define __get_user_instr_inatomic(x, ptr) \
> +	__get_user_nosleep((x).val, (u32 *)(ptr), sizeof(u32))
>  extern long __put_user_bad(void);
> 
>  /*
> diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
> index 9e66e6c62354..b8f56052c6fe 100644
> --- a/arch/powerpc/kernel/align.c
> +++ b/arch/powerpc/kernel/align.c
> @@ -304,7 +304,7 @@ int fix_alignment(struct pt_regs *regs)
>  	 */
>  	CHECK_FULL_REGS(regs);
> 
> -	if (unlikely(__get_user(instr.val, (unsigned int __user *)regs->nip)))
> +	if (unlikely(__get_user_instr(instr, (void __user *)regs->nip)))
>  		return -EFAULT;
>  	if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
>  		/* We don't handle PPC little-endian any more... */
> diff --git a/arch/powerpc/kernel/hw_breakpoint.c
> b/arch/powerpc/kernel/hw_breakpoint.c index 2db9a7ac7bcb..423603c92c0f
> 100644
> --- a/arch/powerpc/kernel/hw_breakpoint.c
> +++ b/arch/powerpc/kernel/hw_breakpoint.c
> @@ -249,7 +249,7 @@ static bool stepping_handler(struct pt_regs *regs,
> struct perf_event *bp, struct instruction_op op;
>  	unsigned long addr = info->address;
> 
> -	if (__get_user_inatomic(instr.val, (unsigned int *)regs->nip))
> +	if (__get_user_instr_inatomic(instr, (void __user *)regs->nip))
>  		goto fail;
> 
>  	ret = analyse_instr(&op, regs, instr);
> diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
> index bb262707fb5c..adcdba6d534e 100644
> --- a/arch/powerpc/kernel/vecemu.c
> +++ b/arch/powerpc/kernel/vecemu.c
> @@ -266,7 +266,7 @@ int emulate_altivec(struct pt_regs *regs)
>  	unsigned int va, vb, vc, vd;
>  	vector128 *vrs;
> 
> -	if (get_user(instr.val, (unsigned int __user *) regs->nip))
> +	if (__get_user_instr(instr, (void __user *) regs->nip))
>  		return -EFAULT;
> 
>  	word = ppc_inst_val(instr);





^ permalink raw reply

* Re: [PATCH v7 15/28] powerpc/kprobes: Use patch_instruction()
From: Alistair Popple @ 2020-05-05  1:41 UTC (permalink / raw)
  To: Jordan Niethe; +Cc: npiggin, bala24, naveen.n.rao, linuxppc-dev, dja
In-Reply-To: <20200501034220.8982-16-jniethe5@gmail.com>

Without CONFIG_STRICT_KERNEL_RWX this boils down to doing the same thing 
(although with a few more safety checks along the way), and with 
CONFIG_STRICT_KERNEL_RWX this should make it actually work (although perhaps 
there was some other mechanism that made it work anyway).

Reviewed-by: Alistair Popple <alistair@popple.id.au>

On Friday, 1 May 2020 1:42:07 PM AEST Jordan Niethe wrote:
> Instead of using memcpy() and flush_icache_range() use
> patch_instruction() which not only accomplishes both of these steps but
> will also make it easier to add support for prefixed instructions.
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
> v6: New to series.
> ---
>  arch/powerpc/kernel/kprobes.c | 5 +----
>  1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
> index f64312dca84f..a72c8e1a42ad 100644
> --- a/arch/powerpc/kernel/kprobes.c
> +++ b/arch/powerpc/kernel/kprobes.c
> @@ -125,11 +125,8 @@ int arch_prepare_kprobe(struct kprobe *p)
>  	}
> 
>  	if (!ret) {
> -		memcpy(p->ainsn.insn, p->addr,
> -				MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
> +		patch_instruction((struct ppc_inst *)p->ainsn.insn, insn);
>  		p->opcode = ppc_inst_val(insn);
> -		flush_icache_range((unsigned long)p->ainsn.insn,
> -			(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
>  	}
> 
>  	p->ainsn.boostable = 0;





^ permalink raw reply

* Re: [PATCH V2 00/11] Subject: Remove duplicated kmap code
From: Ira Weiny @ 2020-05-04 23:27 UTC (permalink / raw)
  To: Al Viro
  Cc: Peter Zijlstra, Dave Hansen, dri-devel, linux-mips,
	James E.J. Bottomley, Max Filippov, Huang Rui, Paul Mackerras,
	H. Peter Anvin, sparclinux, Dan Williams, Helge Deller, x86,
	linux-csky, Ingo Molnar, linux-snps-arc, linux-xtensa,
	Borislav Petkov, Andy Lutomirski, Thomas Gleixner,
	linux-arm-kernel, Chris Zankel, Thomas Bogendoerfer, linux-parisc,
	linux-kernel, Christian Koenig, Andrew Morton, linuxppc-dev,
	David S. Miller
In-Reply-To: <20200504210225.GW23230@ZenIV.linux.org.uk>

On Mon, May 04, 2020 at 10:02:25PM +0100, Al Viro wrote:
> On Mon, May 04, 2020 at 01:17:41PM -0700, Ira Weiny wrote:
> 
> > > || * arm: much, much worse.  We have several files that pull linux/highmem.h:
> > > || arch/arm/mm/cache-feroceon-l2.c, arch/arm/mm/cache-xsc3l2.c,
> > > || arch/arm/mm/copypage-*.c, arch/arm/mm/dma-mapping.c, arch/arm/mm/flush.c,
> > > || arch/arm/mm/highmem.c, arch/arm/probes/uprobes/core.c,
> > > || arch/arm/include/asm/kvm_mmu.h (kmap_atomic_pfn()).
> > > || Those are fine, but we also have this:
> > > || arch/arm/include/asm/pgtable.h:200:#define __pte_map(pmd)               (pte_t *)kmap_atomic(pmd_page(*(pmd)))
> > > || arch/arm/include/asm/pgtable.h:208:#define pte_offset_map(pmd,addr)     (__pte_map(pmd) + pte_index(addr))
> > > || and sure as hell, asm/pgtable.h does *NOT* pull linux/highmem.h.
> > 
> > It does not pull asm/highmem.h either...
> 
> No, but the users of those macros need to be considered.

Agreed, I was trying to point out that highmem.h was being pulled from
somewhere else prior to my series, sorry.

> 
> > > || #define pte_offset_map(dir, addr)               \
> > > ||         ((pte_t *) kmap_atomic(pmd_page(*(dir))) + pte_index(addr))
> > > ||         One pte_offset_map user in arch/microblaze:
> > > || arch/microblaze/kernel/signal.c:207:    ptep = pte_offset_map(pmdp, address);
> > > || Messy, but doesn't require any changes (we have asm/pgalloc.h included
> > > || there, and that pull linux/highmem.h).
> > 
> > AFAICS asm/pgtable.h does not include asm/highmem.h here...
> > 
> > So looks like arch/microblaze/kernel/signal.c will need linux/highmem.h
> 
> See above - line 39 in there is
> #include <asm/pgalloc.h>
> and line 14 in arch/microblaze/include/asm/pgalloc.h is
> #include <linux/highmem.h>
> It's conditional upon CONFIG_MMU in there, but so's the use of
> pte_offset_map() in arch/microblaze/kernel/signal.c 
> 
> So it shouldn't be a problem.

Ah ok, I did not see that one.  Ok I'll drop that change and this series should
be good.

I was setting up to submit another version with 3 more patches you have
suggested:

kmap: Remove kmap_atomic_to_page()
parisc/kmap: Remove duplicate kmap code
sparc: Remove unnecessary includes

Would you like to see those folded in?  I submitted 2 of the above as a
separate series already.

> 
> > > || * xtensa: users in arch/xtensa/kernel/pci-dma.c, arch/xtensa/mm/highmem.c,
> > > || arch/xtensa/mm/cache.c and arch/xtensa/platforms/iss/simdisk.c (all pull
> > > || linux/highmem.h).
> > 
> > Actually
> > 
> > arch/xtensa/mm/cache.c gets linux/highmem.h from linux/pagemap.h
> > 
> > arch/xtensa/platforms/iss/simdisk.c may have an issue?
> > 	linux/blkdev.h -> CONFIG_BLOCK -> linux/pagemap.h -> linux/highmem.h
> > 	But simdisk.c requires BLK_DEV_SIMDISK -> CONFIG_BLOCK...
> > 	<sigh>
> 
> Yep - see above re major chain of indirect includes conditional upon CONFIG_BLOCK
> and its uses in places that only build with such configs.  There's a plenty of
> similar considerations outside of arch/*, unfortunately...

Indeed.

FWIW the last 2 versions of this series have had no build failures with 0-day.

This series in particular just finished 164 configs without issue.

Would you like me to submit a new series?  With your additional patches?

Ira

^ permalink raw reply

* Re: [PATCH v2 2/5] stats_fs API: create, add and remove stats_fs sources and values
From: Randy Dunlap @ 2020-05-04 22:11 UTC (permalink / raw)
  To: Emanuele Giuseppe Esposito, kvm
  Cc: linux-s390, David Hildenbrand, Cornelia Huck,
	Emanuele Giuseppe Esposito, linux-kernel, kvm-ppc, linux-mips,
	Christian Borntraeger, Alexander Viro, linux-fsdevel,
	Paolo Bonzini, Vitaly Kuznetsov, linuxppc-dev, Jim Mattson
In-Reply-To: <20200504110344.17560-3-eesposit@redhat.com>

On 5/4/20 4:03 AM, Emanuele Giuseppe Esposito wrote:
> diff --git a/fs/Kconfig b/fs/Kconfig
> index f08fbbfafd9a..1b0de0f19e96 100644
> --- a/fs/Kconfig
> +++ b/fs/Kconfig
> @@ -328,4 +328,10 @@ source "fs/unicode/Kconfig"
>  config IO_WQ
>  	bool
>  
> +config STATS_FS
> +	bool "Statistics Filesystem"
> +	help
> +	  stats_fs is a virtual file system that provides counters and
> +	  other statistics about the running kernel.
> +
>  endmenu

Hi,

This kconfig entry should be under (inside) "Pseudo filesystems",
i.e., between 'menu "Pseudo filesystems"' and its corresponding
"endmenu".

Thanks.
-- 
~Randy


^ permalink raw reply

* [powerpc:next] BUILD SUCCESS 140777a3d8dfdb3d3f20ea7707c0f1c0ce1b0aa5
From: kbuild test robot @ 2020-05-04 22:02 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  next
branch HEAD: 140777a3d8dfdb3d3f20ea7707c0f1c0ce1b0aa5  powerpc/fadump: consider reserved ranges while reserving memory

elapsed time: 481m

configs tested: 205
configs skipped: 0

The following configs have been built successfully.
More configs may be tested in the coming days.

arm64                            allyesconfig
arm                              allyesconfig
arm64                            allmodconfig
arm                              allmodconfig
arm64                             allnoconfig
arm                               allnoconfig
arm                           efm32_defconfig
arm                         at91_dt_defconfig
arm                        shmobile_defconfig
arm64                               defconfig
arm                          exynos_defconfig
arm                        multi_v5_defconfig
arm                           sunxi_defconfig
arm                        multi_v7_defconfig
sparc                            allyesconfig
ia64                        generic_defconfig
riscv                             allnoconfig
parisc                           allmodconfig
parisc                            allnoconfig
powerpc                       holly_defconfig
s390                             allmodconfig
s390                          debug_defconfig
ia64                             allyesconfig
i386                              allnoconfig
i386                             allyesconfig
i386                             alldefconfig
i386                                defconfig
i386                              debian-10.3
ia64                             allmodconfig
ia64                                defconfig
ia64                              allnoconfig
ia64                          tiger_defconfig
ia64                         bigsur_defconfig
ia64                             alldefconfig
m68k                       m5475evb_defconfig
m68k                             allmodconfig
m68k                       bvme6000_defconfig
m68k                           sun3_defconfig
m68k                          multi_defconfig
nios2                         3c120_defconfig
nios2                         10m50_defconfig
c6x                        evmc6678_defconfig
c6x                              allyesconfig
openrisc                 simple_smp_defconfig
openrisc                    or1ksim_defconfig
nds32                               defconfig
nds32                             allnoconfig
csky                                defconfig
alpha                               defconfig
h8300                       h8s-sim_defconfig
h8300                     edosk2674_defconfig
xtensa                          iss_defconfig
h8300                    h8300h-sim_defconfig
xtensa                       common_defconfig
arc                                 defconfig
arc                              allyesconfig
microblaze                      mmu_defconfig
microblaze                    nommu_defconfig
mips                      fuloong2e_defconfig
mips                      malta_kvm_defconfig
mips                            ar7_defconfig
mips                             allyesconfig
mips                         64r6el_defconfig
mips                              allnoconfig
mips                           32r2_defconfig
mips                             allmodconfig
mips                malta_kvm_guest_defconfig
mips                         tb0287_defconfig
mips                       capcella_defconfig
mips                           ip32_defconfig
mips                  decstation_64_defconfig
mips                      loongson3_defconfig
mips                          ath79_defconfig
mips                        bcm63xx_defconfig
parisc                generic-64bit_defconfig
parisc                generic-32bit_defconfig
parisc                           allyesconfig
powerpc                      chrp32_defconfig
powerpc                             defconfig
powerpc                       ppc64_defconfig
powerpc                          rhel-kconfig
powerpc                           allnoconfig
powerpc                  mpc866_ads_defconfig
powerpc                    amigaone_defconfig
powerpc                    adder875_defconfig
powerpc                     ep8248e_defconfig
powerpc                          g5_defconfig
powerpc                     mpc512x_defconfig
m68k                 randconfig-a001-20200505
mips                 randconfig-a001-20200505
nds32                randconfig-a001-20200505
parisc               randconfig-a001-20200505
alpha                randconfig-a001-20200505
riscv                randconfig-a001-20200505
m68k                 randconfig-a001-20200503
mips                 randconfig-a001-20200503
nds32                randconfig-a001-20200503
alpha                randconfig-a001-20200503
parisc               randconfig-a001-20200503
riscv                randconfig-a001-20200503
m68k                 randconfig-a001-20200502
mips                 randconfig-a001-20200502
nds32                randconfig-a001-20200502
alpha                randconfig-a001-20200502
parisc               randconfig-a001-20200502
riscv                randconfig-a001-20200502
h8300                randconfig-a001-20200503
nios2                randconfig-a001-20200503
microblaze           randconfig-a001-20200503
c6x                  randconfig-a001-20200503
sparc64              randconfig-a001-20200503
s390                 randconfig-a001-20200504
xtensa               randconfig-a001-20200504
sh                   randconfig-a001-20200504
openrisc             randconfig-a001-20200504
csky                 randconfig-a001-20200504
s390                 randconfig-a001-20200505
xtensa               randconfig-a001-20200505
sh                   randconfig-a001-20200505
openrisc             randconfig-a001-20200505
csky                 randconfig-a001-20200505
xtensa               randconfig-a001-20200503
sh                   randconfig-a001-20200503
openrisc             randconfig-a001-20200503
csky                 randconfig-a001-20200503
s390                 randconfig-a001-20200430
xtensa               randconfig-a001-20200430
csky                 randconfig-a001-20200430
openrisc             randconfig-a001-20200430
sh                   randconfig-a001-20200430
i386                 randconfig-b003-20200503
x86_64               randconfig-b002-20200503
i386                 randconfig-b001-20200503
x86_64               randconfig-b003-20200503
x86_64               randconfig-b001-20200503
i386                 randconfig-b002-20200503
x86_64               randconfig-c002-20200502
i386                 randconfig-c002-20200502
i386                 randconfig-c001-20200502
i386                 randconfig-c003-20200502
i386                 randconfig-d003-20200502
i386                 randconfig-d001-20200502
x86_64               randconfig-d002-20200502
i386                 randconfig-d002-20200502
x86_64               randconfig-e003-20200502
i386                 randconfig-e003-20200502
x86_64               randconfig-e001-20200502
i386                 randconfig-e002-20200502
i386                 randconfig-e001-20200502
x86_64               randconfig-e003-20200503
x86_64               randconfig-e002-20200503
i386                 randconfig-e003-20200503
x86_64               randconfig-e001-20200503
i386                 randconfig-e002-20200503
i386                 randconfig-e001-20200503
i386                 randconfig-f003-20200505
x86_64               randconfig-f001-20200505
x86_64               randconfig-f003-20200505
i386                 randconfig-f001-20200505
i386                 randconfig-f002-20200505
x86_64               randconfig-a003-20200505
x86_64               randconfig-a001-20200505
i386                 randconfig-a001-20200505
i386                 randconfig-a003-20200505
i386                 randconfig-a002-20200505
ia64                 randconfig-a001-20200503
arm64                randconfig-a001-20200503
arc                  randconfig-a001-20200503
arm                  randconfig-a001-20200503
sparc                randconfig-a001-20200503
ia64                 randconfig-a001-20200505
arc                  randconfig-a001-20200505
powerpc              randconfig-a001-20200505
arm                  randconfig-a001-20200505
sparc                randconfig-a001-20200505
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
s390                       zfcpdump_defconfig
s390                             allyesconfig
s390                              allnoconfig
s390                             alldefconfig
s390                                defconfig
sh                          rsk7269_defconfig
sh                               allmodconfig
sh                            titan_defconfig
sh                  sh7785lcr_32bit_defconfig
sh                                allnoconfig
sparc                               defconfig
sparc64                             defconfig
sparc64                           allnoconfig
sparc64                          allyesconfig
sparc64                          allmodconfig
um                           x86_64_defconfig
um                             i386_defconfig
um                                  defconfig
x86_64                                   rhel
x86_64                               rhel-7.6
x86_64                    rhel-7.6-kselftests
x86_64                         rhel-7.2-clear
x86_64                                    lkp
x86_64                              fedora-25
x86_64                                  kexec

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [RFC][PATCH 2/2] Add support for ima buffer pass using reserved memory arm64
From: Prakhar Srivastava @ 2020-05-04 20:38 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, linuxppc-dev, devicetree,
	linux-integrity, linux-security-module
  Cc: kstewart, mark.rutland, catalin.marinas, bhsharma, tao.li, zohar,
	paulus, vincenzo.frascino, frowand.list, nramas, masahiroy,
	jmorris, takahiro.akashi, serge, pasha.tatashin, will, prsriva,
	robh+dt, hsinyi, tusharsu, tglx, allison, mbrugger, balajib,
	dmitry.kasatkin, james.morse, gregkh
In-Reply-To: <20200504203829.6330-1-prsriva@linux.microsoft.com>

 Add support for ima buffer pass using reserved memory for
 arm64 kexec. Update the arch sepcific code path in kexec file load to store
 the ima buffer in the reserved memory. The same reserved memory is read on
 kexec or cold boot.

Signed-off-by: Prakhar Srivastava <prsriva@linux.microsoft.com>
---
 arch/arm64/Kconfig                     |  1 +
 arch/arm64/include/asm/ima.h           | 22 +++++++++
 arch/arm64/include/asm/kexec.h         |  5 ++
 arch/arm64/kernel/Makefile             |  1 +
 arch/arm64/kernel/ima_kexec.c          | 64 ++++++++++++++++++++++++++
 arch/arm64/kernel/machine_kexec_file.c |  1 +
 arch/powerpc/include/asm/ima.h         |  3 +-
 arch/powerpc/kexec/ima.c               | 14 +++++-
 security/integrity/ima/ima_kexec.c     | 15 ++++--
 9 files changed, 119 insertions(+), 7 deletions(-)
 create mode 100644 arch/arm64/include/asm/ima.h
 create mode 100644 arch/arm64/kernel/ima_kexec.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 40fb05d96c60..bc9e1a91686b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1069,6 +1069,7 @@ config KEXEC
 config KEXEC_FILE
 	bool "kexec file based system call"
 	select KEXEC_CORE
+	select HAVE_IMA_KEXEC
 	help
 	  This is new version of kexec system call. This system call is
 	  file based and takes file descriptors as system call argument
diff --git a/arch/arm64/include/asm/ima.h b/arch/arm64/include/asm/ima.h
new file mode 100644
index 000000000000..58033b427e59
--- /dev/null
+++ b/arch/arm64/include/asm/ima.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_IMA_H
+#define _ASM_ARM64_IMA_H
+
+struct kimage;
+
+int is_ima_memory_reserved(void);
+int ima_get_kexec_buffer(void **addr, size_t *size);
+int ima_free_kexec_buffer(void);
+
+#ifdef CONFIG_IMA_KEXEC
+int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
+			      void *buffer, size_t size);
+
+#else
+int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
+			      void *buffer, size_t size)
+{
+	return 0;
+}
+#endif /* CONFIG_IMA_KEXEC */
+#endif /* _ASM_ARM64_IMA_H */
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index d24b527e8c00..2bd19ccb6c43 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -100,6 +100,11 @@ struct kimage_arch {
 	void *elf_headers;
 	unsigned long elf_headers_mem;
 	unsigned long elf_headers_sz;
+
+#ifdef CONFIG_IMA_KEXEC
+	phys_addr_t ima_buffer_addr;
+	size_t ima_buffer_size;
+#endif
 };
 
 extern const struct kexec_file_ops kexec_image_ops;
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 4e5b8ee31442..cd3cb7690d51 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -55,6 +55,7 @@ obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o
 obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
 obj-$(CONFIG_KEXEC_CORE)		+= machine_kexec.o relocate_kernel.o	\
 					   cpu-reset.o
+obj-$(CONFIG_HAVE_IMA_KEXEC)		+= ima_kexec.o
 obj-$(CONFIG_KEXEC_FILE)		+= machine_kexec_file.o kexec_image.o
 obj-$(CONFIG_ARM64_RELOC_TEST)		+= arm64-reloc-test.o
 arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
diff --git a/arch/arm64/kernel/ima_kexec.c b/arch/arm64/kernel/ima_kexec.c
new file mode 100644
index 000000000000..ff5649333c7c
--- /dev/null
+++ b/arch/arm64/kernel/ima_kexec.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Microsoft Corporation.
+ *
+ * Authors:
+ * Prakhar Srivastava <prsriva@linux.microsoft.com>
+ */
+
+#include <linux/kexec.h>
+#include <linux/of.h>
+
+
+/**
+ * is_ima_memory_reserved - check if memory is reserved via device
+ *			    tree.
+ *	Return: negative or zero when memory is not reserved.
+ *	positive number on success.
+ *
+ */
+int is_ima_memory_reserved(void)
+{
+	return of_is_ima_memory_reserved();
+}
+
+/**
+ * ima_get_kexec_buffer - get IMA buffer from the previous kernel
+ * @addr:	On successful return, set to point to the buffer contents.
+ * @size:	On successful return, set to the buffer size.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int ima_get_kexec_buffer(void **addr, size_t *size)
+{
+	return of_get_ima_buffer(addr, size);
+}
+
+/**
+ * ima_free_kexec_buffer - free memory used by the IMA buffer
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int ima_free_kexec_buffer(void)
+{
+	return of_remove_ima_buffer();
+}
+
+#ifdef CONFIG_IMA_KEXEC
+/**
+ * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA
+ *	measurement log.
+ * @image: - pointer to the kimage, to store the address and size of the
+ *	IMA measurement log.
+ * @load_addr: - the address where the IMA measurement log is stored.
+ * @size - size of the IMA measurement log.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
+			      void *buffer, size_t size)
+{
+	of_ima_write_buffer(buffer, size);
+	return 0;
+}
+#endif /* CONFIG_IMA_KEXEC */
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index b40c3b0def92..8dc25511142d 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -22,6 +22,7 @@
 #include <linux/types.h>
 #include <linux/vmalloc.h>
 #include <asm/byteorder.h>
+#include <asm/ima.h>
 
 /* relevant device tree properties */
 #define FDT_PROP_KEXEC_ELFHDR	"linux,elfcorehdr"
diff --git a/arch/powerpc/include/asm/ima.h b/arch/powerpc/include/asm/ima.h
index ead488cf3981..a8febc620b42 100644
--- a/arch/powerpc/include/asm/ima.h
+++ b/arch/powerpc/include/asm/ima.h
@@ -4,6 +4,7 @@
 
 struct kimage;
 
+int is_ima_memory_reserved(void);
 int ima_get_kexec_buffer(void **addr, size_t *size);
 int ima_free_kexec_buffer(void);
 
@@ -15,7 +16,7 @@ static inline void remove_ima_buffer(void *fdt, int chosen_node) {}
 
 #ifdef CONFIG_IMA_KEXEC
 int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
-			      size_t size);
+			      void *buffer, size_t size);
 
 int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node);
 #else
diff --git a/arch/powerpc/kexec/ima.c b/arch/powerpc/kexec/ima.c
index 720e50e490b6..3823539d4e07 100644
--- a/arch/powerpc/kexec/ima.c
+++ b/arch/powerpc/kexec/ima.c
@@ -46,6 +46,18 @@ static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr,
 	return 0;
 }
 
+/**
+ * is_ima_memory_reserved - check if memory is reserved via device
+ *			    tree.
+ *	Return: negative or zero when memory is not reserved.
+ *	positive number on success.
+ *
+ */
+int is_ima_memory_reserved(void)
+{
+	return -EOPNOTSUPP;
+}
+
 /**
  * ima_get_kexec_buffer - get IMA buffer from the previous kernel
  * @addr:	On successful return, set to point to the buffer contents.
@@ -137,7 +149,7 @@ void remove_ima_buffer(void *fdt, int chosen_node)
  * Return: 0 on success, negative errno on error.
  */
 int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
-			      size_t size)
+			      void *buffer, size_t size)
 {
 	image->arch.ima_buffer_addr = load_addr;
 	image->arch.ima_buffer_size = size;
diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
index 121de3e04af2..3749472c7e18 100644
--- a/security/integrity/ima/ima_kexec.c
+++ b/security/integrity/ima/ima_kexec.c
@@ -116,13 +116,18 @@ void ima_add_kexec_buffer(struct kimage *image)
 	kbuf.buffer = kexec_buffer;
 	kbuf.bufsz = kexec_buffer_size;
 	kbuf.memsz = kexec_segment_size;
-	ret = kexec_add_buffer(&kbuf);
-	if (ret) {
-		pr_err("Error passing over kexec measurement buffer.\n");
-		return;
+
+	if (!is_ima_memory_reserved()) {
+
+		ret = kexec_add_buffer(&kbuf);
+		if (ret) {
+			pr_err("Error passing over kexec measurement buffer.\n");
+			return;
+		}
 	}
 
-	ret = arch_ima_add_kexec_buffer(image, kbuf.mem, kexec_segment_size);
+	ret = arch_ima_add_kexec_buffer(image, kbuf.mem, kexec_buffer,
+					kexec_segment_size);
 	if (ret) {
 		pr_err("Error passing over kexec measurement buffer.\n");
 		return;
-- 
2.25.1


^ permalink raw reply related

* [RFC][PATCH 0/2] Add support for using reserved memory for ima buffer pass
From: Prakhar Srivastava @ 2020-05-04 20:38 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, linuxppc-dev, devicetree,
	linux-integrity, linux-security-module
  Cc: kstewart, mark.rutland, catalin.marinas, bhsharma, tao.li, zohar,
	paulus, vincenzo.frascino, frowand.list, nramas, masahiroy,
	jmorris, takahiro.akashi, serge, pasha.tatashin, will, prsriva,
	robh+dt, hsinyi, tusharsu, tglx, allison, mbrugger, balajib,
	dmitry.kasatkin, james.morse, gregkh

IMA during kexec(kexec file load) verifies the kernel signature and measures
the signature of the kernel. The signature in the logs can be used to verfiy the 
authenticity of the kernel. The logs don not get carried over kexec and thus
remote attesation cannot verify the signature of the running kernel.

Introduce an ABI to carry forward the ima logs over kexec.
Memory reserved via device tree reservation can be used to store and read
via the of_* functions.

Reserved memory stores the size(sizeof(size_t)) of the buffer in the starting
address, followed by the IMA log contents.

Tested on:
  arm64 with Uboot

Prakhar Srivastava (2):
  Add a layer of abstraction to use the memory reserved by device tree
    for ima buffer pass.
  Add support for ima buffer pass using reserved memory for arm64 kexec.
    Update the arch sepcific code path in kexec file load to store the
    ima buffer in the reserved memory. The same reserved memory is read
    on kexec or cold boot.

 arch/arm64/Kconfig                     |   1 +
 arch/arm64/include/asm/ima.h           |  22 ++++
 arch/arm64/include/asm/kexec.h         |   5 +
 arch/arm64/kernel/Makefile             |   1 +
 arch/arm64/kernel/ima_kexec.c          |  64 ++++++++++
 arch/arm64/kernel/machine_kexec_file.c |   1 +
 arch/powerpc/include/asm/ima.h         |   3 +-
 arch/powerpc/kexec/ima.c               |  14 ++-
 drivers/of/Kconfig                     |   6 +
 drivers/of/Makefile                    |   1 +
 drivers/of/of_ima.c                    | 165 +++++++++++++++++++++++++
 include/linux/of.h                     |  34 +++++
 security/integrity/ima/ima_kexec.c     |  15 ++-
 13 files changed, 325 insertions(+), 7 deletions(-)
 create mode 100644 arch/arm64/include/asm/ima.h
 create mode 100644 arch/arm64/kernel/ima_kexec.c
 create mode 100644 drivers/of/of_ima.c

-- 
2.25.1


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox