LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH 0/1] PPC32: fix ptrace() access to FPU registers
From: Daniel Axtens @ 2019-06-19  0:36 UTC (permalink / raw)
  To: Andreas Schwab, Radu Rendec; +Cc: Paul Mackerras, linuxppc-dev, Oleg Nesterov
In-Reply-To: <875zp2rcip.fsf@igel.home>

Andreas Schwab <schwab@linux-m68k.org> writes:

> On Jun 18 2019, Radu Rendec <radu.rendec@gmail.com> wrote:
>
>> Since you already have a working setup, it would be nice if you could
>> add a printk to arch_ptrace() to print the address and confirm what I
>> believe happens (by reading the gdb source code).
>
> A ppc32 ptrace syscall goes through compat_arch_ptrace.


Ah right, and that (in ptrace32.c) contains code that will work:


			/*
			 * the user space code considers the floating point
			 * to be an array of unsigned int (32 bits) - the
			 * index passed in is based on this assumption.
			 */
			tmp = ((unsigned int *)child->thread.fp_state.fpr)
				[FPRINDEX(index)];

FPRINDEX is defined above to deal with the various manipulations you
need to do.

Radu: I think we want to copy that working code back into ptrace.c. The
challenge will be unpicking the awful mess of ifdefs in ptrace.c and
making it somewhat more comprehensible.

Regards,
Daniel

>
> Andreas.
>
> -- 
> Andreas Schwab, schwab@linux-m68k.org
> GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
> "And now for something completely different."

^ permalink raw reply

* Re: [PATCH 3/4] powerpc/powernv: remove dead NPU DMA code
From: Alexey Kardashevskiy @ 2019-06-19  0:34 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Paul Mackerras, linuxppc-dev, linux-kernel
In-Reply-To: <20190523074924.19659-4-hch@lst.de>



On 23/05/2019 17:49, Christoph Hellwig wrote:
> None of these routines were ever used since they were added to the
> kernel.


It is still being used exactly in the way as it was explained before in
previous respins. Thanks.


> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  arch/powerpc/include/asm/book3s/64/mmu.h |   2 -
>  arch/powerpc/include/asm/powernv.h       |  22 -
>  arch/powerpc/mm/book3s64/mmu_context.c   |   1 -
>  arch/powerpc/platforms/powernv/npu-dma.c | 556 -----------------------
>  4 files changed, 581 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
> index 74d24201fc4f..23b83d3593e2 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
> @@ -116,8 +116,6 @@ typedef struct {
>  	/* Number of users of the external (Nest) MMU */
>  	atomic_t copros;
>  
> -	/* NPU NMMU context */
> -	struct npu_context *npu_context;
>  	struct hash_mm_context *hash_context;
>  
>  	unsigned long vdso_base;
> diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
> index 05b552418519..40f868c5e93c 100644
> --- a/arch/powerpc/include/asm/powernv.h
> +++ b/arch/powerpc/include/asm/powernv.h
> @@ -11,35 +11,13 @@
>  #define _ASM_POWERNV_H
>  
>  #ifdef CONFIG_PPC_POWERNV
> -#define NPU2_WRITE 1
>  extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
> -extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
> -			unsigned long flags,
> -			void (*cb)(struct npu_context *, void *),
> -			void *priv);
> -extern void pnv_npu2_destroy_context(struct npu_context *context,
> -				struct pci_dev *gpdev);
> -extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
> -				unsigned long *flags, unsigned long *status,
> -				int count);
>  
>  void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val);
>  
>  void pnv_tm_init(void);
>  #else
>  static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { }
> -static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
> -			unsigned long flags,
> -			struct npu_context *(*cb)(struct npu_context *, void *),
> -			void *priv) { return ERR_PTR(-ENODEV); }
> -static inline void pnv_npu2_destroy_context(struct npu_context *context,
> -					struct pci_dev *gpdev) { }
> -
> -static inline int pnv_npu2_handle_fault(struct npu_context *context,
> -					uintptr_t *ea, unsigned long *flags,
> -					unsigned long *status, int count) {
> -	return -ENODEV;
> -}
>  
>  static inline void pnv_tm_init(void) { }
>  #endif
> diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
> index cb2b08635508..0dd3e631cf3e 100644
> --- a/arch/powerpc/mm/book3s64/mmu_context.c
> +++ b/arch/powerpc/mm/book3s64/mmu_context.c
> @@ -140,7 +140,6 @@ static int radix__init_new_context(struct mm_struct *mm)
>  	 */
>  	asm volatile("ptesync;isync" : : : "memory");
>  
> -	mm->context.npu_context = NULL;
>  	mm->context.hash_context = NULL;
>  
>  	return index;
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> index 495550432f3d..4ed24132bb7c 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -22,12 +22,6 @@
>  
>  #include "pci.h"
>  
> -/*
> - * spinlock to protect initialisation of an npu_context for a particular
> - * mm_struct.
> - */
> -static DEFINE_SPINLOCK(npu_context_lock);
> -
>  static struct pci_dev *get_pci_dev(struct device_node *dn)
>  {
>  	struct pci_dn *pdn = PCI_DN(dn);
> @@ -362,15 +356,6 @@ struct npu_comp {
>  /* An NPU descriptor, valid for POWER9 only */
>  struct npu {
>  	int index;
> -	__be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS];
> -	unsigned int mmio_atsd_count;
> -
> -	/* Bitmask for MMIO register usage */
> -	unsigned long mmio_atsd_usage;
> -
> -	/* Do we need to explicitly flush the nest mmu? */
> -	bool nmmu_flush;
> -
>  	struct npu_comp npucomp;
>  };
>  
> @@ -627,534 +612,8 @@ struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
>  }
>  #endif /* CONFIG_IOMMU_API */
>  
> -/* Maximum number of nvlinks per npu */
> -#define NV_MAX_LINKS 6
> -
> -/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */
> -static int max_npu2_index;
> -
> -struct npu_context {
> -	struct mm_struct *mm;
> -	struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS];
> -	struct mmu_notifier mn;
> -	struct kref kref;
> -	bool nmmu_flush;
> -
> -	/* Callback to stop translation requests on a given GPU */
> -	void (*release_cb)(struct npu_context *context, void *priv);
> -
> -	/*
> -	 * Private pointer passed to the above callback for usage by
> -	 * device drivers.
> -	 */
> -	void *priv;
> -};
> -
> -struct mmio_atsd_reg {
> -	struct npu *npu;
> -	int reg;
> -};
> -
> -/*
> - * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC
> - * if none are available.
> - */
> -static int get_mmio_atsd_reg(struct npu *npu)
> -{
> -	int i;
> -
> -	for (i = 0; i < npu->mmio_atsd_count; i++) {
> -		if (!test_bit(i, &npu->mmio_atsd_usage))
> -			if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage))
> -				return i;
> -	}
> -
> -	return -ENOSPC;
> -}
> -
> -static void put_mmio_atsd_reg(struct npu *npu, int reg)
> -{
> -	clear_bit_unlock(reg, &npu->mmio_atsd_usage);
> -}
> -
> -/* MMIO ATSD register offsets */
> -#define XTS_ATSD_LAUNCH 0
> -#define XTS_ATSD_AVA    1
> -#define XTS_ATSD_STAT   2
> -
> -static unsigned long get_atsd_launch_val(unsigned long pid, unsigned long psize)
> -{
> -	unsigned long launch = 0;
> -
> -	if (psize == MMU_PAGE_COUNT) {
> -		/* IS set to invalidate entire matching PID */
> -		launch |= PPC_BIT(12);
> -	} else {
> -		/* AP set to invalidate region of psize */
> -		launch |= (u64)mmu_get_ap(psize) << PPC_BITLSHIFT(17);
> -	}
> -
> -	/* PRS set to process-scoped */
> -	launch |= PPC_BIT(13);
> -
> -	/* PID */
> -	launch |= pid << PPC_BITLSHIFT(38);
> -
> -	/* Leave "No flush" (bit 39) 0 so every ATSD performs a flush */
> -
> -	return launch;
> -}
> -
> -static void mmio_atsd_regs_write(struct mmio_atsd_reg
> -			mmio_atsd_reg[NV_MAX_NPUS], unsigned long offset,
> -			unsigned long val)
> -{
> -	struct npu *npu;
> -	int i, reg;
> -
> -	for (i = 0; i <= max_npu2_index; i++) {
> -		reg = mmio_atsd_reg[i].reg;
> -		if (reg < 0)
> -			continue;
> -
> -		npu = mmio_atsd_reg[i].npu;
> -		__raw_writeq_be(val, npu->mmio_atsd_regs[reg] + offset);
> -	}
> -}
> -
> -static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
> -				unsigned long pid)
> -{
> -	unsigned long launch = get_atsd_launch_val(pid, MMU_PAGE_COUNT);
> -
> -	/* Invalidating the entire process doesn't use a va */
> -	mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
> -}
> -
> -static void mmio_invalidate_range(struct mmio_atsd_reg
> -			mmio_atsd_reg[NV_MAX_NPUS], unsigned long pid,
> -			unsigned long start, unsigned long psize)
> -{
> -	unsigned long launch = get_atsd_launch_val(pid, psize);
> -
> -	/* Write all VAs first */
> -	mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_AVA, start);
> -
> -	/* Issue one barrier for all address writes */
> -	eieio();
> -
> -	/* Launch */
> -	mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
> -}
> -
> -#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
> -
> -static void mmio_invalidate_wait(
> -	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
> -{
> -	struct npu *npu;
> -	int i, reg;
> -
> -	/* Wait for all invalidations to complete */
> -	for (i = 0; i <= max_npu2_index; i++) {
> -		if (mmio_atsd_reg[i].reg < 0)
> -			continue;
> -
> -		/* Wait for completion */
> -		npu = mmio_atsd_reg[i].npu;
> -		reg = mmio_atsd_reg[i].reg;
> -		while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
> -			cpu_relax();
> -	}
> -}
> -
> -/*
> - * Acquires all the address translation shootdown (ATSD) registers required to
> - * launch an ATSD on all links this npu_context is active on.
> - */
> -static void acquire_atsd_reg(struct npu_context *npu_context,
> -			struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
> -{
> -	int i, j;
> -	struct npu *npu;
> -	struct pci_dev *npdev;
> -
> -	for (i = 0; i <= max_npu2_index; i++) {
> -		mmio_atsd_reg[i].reg = -1;
> -		for (j = 0; j < NV_MAX_LINKS; j++) {
> -			/*
> -			 * There are no ordering requirements with respect to
> -			 * the setup of struct npu_context, but to ensure
> -			 * consistent behaviour we need to ensure npdev[][] is
> -			 * only read once.
> -			 */
> -			npdev = READ_ONCE(npu_context->npdev[i][j]);
> -			if (!npdev)
> -				continue;
> -
> -			npu = pci_bus_to_host(npdev->bus)->npu;
> -			if (!npu)
> -				continue;
> -
> -			mmio_atsd_reg[i].npu = npu;
> -			mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
> -			while (mmio_atsd_reg[i].reg < 0) {
> -				mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
> -				cpu_relax();
> -			}
> -			break;
> -		}
> -	}
> -}
> -
> -/*
> - * Release previously acquired ATSD registers. To avoid deadlocks the registers
> - * must be released in the same order they were acquired above in
> - * acquire_atsd_reg.
> - */
> -static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
> -{
> -	int i;
> -
> -	for (i = 0; i <= max_npu2_index; i++) {
> -		/*
> -		 * We can't rely on npu_context->npdev[][] being the same here
> -		 * as when acquire_atsd_reg() was called, hence we use the
> -		 * values stored in mmio_atsd_reg during the acquire phase
> -		 * rather than re-reading npdev[][].
> -		 */
> -		if (mmio_atsd_reg[i].reg < 0)
> -			continue;
> -
> -		put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg);
> -	}
> -}
> -
> -/*
> - * Invalidate a virtual address range
> - */
> -static void mmio_invalidate(struct npu_context *npu_context,
> -			unsigned long start, unsigned long size)
> -{
> -	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
> -	unsigned long pid = npu_context->mm->context.id;
> -	unsigned long atsd_start = 0;
> -	unsigned long end = start + size - 1;
> -	int atsd_psize = MMU_PAGE_COUNT;
> -
> -	/*
> -	 * Convert the input range into one of the supported sizes. If the range
> -	 * doesn't fit, use the next larger supported size. Invalidation latency
> -	 * is high, so over-invalidation is preferred to issuing multiple
> -	 * invalidates.
> -	 *
> -	 * A 4K page size isn't supported by NPU/GPU ATS, so that case is
> -	 * ignored.
> -	 */
> -	if (size == SZ_64K) {
> -		atsd_start = start;
> -		atsd_psize = MMU_PAGE_64K;
> -	} else if (ALIGN_DOWN(start, SZ_2M) == ALIGN_DOWN(end, SZ_2M)) {
> -		atsd_start = ALIGN_DOWN(start, SZ_2M);
> -		atsd_psize = MMU_PAGE_2M;
> -	} else if (ALIGN_DOWN(start, SZ_1G) == ALIGN_DOWN(end, SZ_1G)) {
> -		atsd_start = ALIGN_DOWN(start, SZ_1G);
> -		atsd_psize = MMU_PAGE_1G;
> -	}
> -
> -	if (npu_context->nmmu_flush)
> -		/*
> -		 * Unfortunately the nest mmu does not support flushing specific
> -		 * addresses so we have to flush the whole mm once before
> -		 * shooting down the GPU translation.
> -		 */
> -		flush_all_mm(npu_context->mm);
> -
> -	/*
> -	 * Loop over all the NPUs this process is active on and launch
> -	 * an invalidate.
> -	 */
> -	acquire_atsd_reg(npu_context, mmio_atsd_reg);
> -
> -	if (atsd_psize == MMU_PAGE_COUNT)
> -		mmio_invalidate_pid(mmio_atsd_reg, pid);
> -	else
> -		mmio_invalidate_range(mmio_atsd_reg, pid, atsd_start,
> -					atsd_psize);
> -
> -	mmio_invalidate_wait(mmio_atsd_reg);
> -
> -	/*
> -	 * The GPU requires two flush ATSDs to ensure all entries have been
> -	 * flushed. We use PID 0 as it will never be used for a process on the
> -	 * GPU.
> -	 */
> -	mmio_invalidate_pid(mmio_atsd_reg, 0);
> -	mmio_invalidate_wait(mmio_atsd_reg);
> -	mmio_invalidate_pid(mmio_atsd_reg, 0);
> -	mmio_invalidate_wait(mmio_atsd_reg);
> -
> -	release_atsd_reg(mmio_atsd_reg);
> -}
> -
> -static void pnv_npu2_mn_release(struct mmu_notifier *mn,
> -				struct mm_struct *mm)
> -{
> -	struct npu_context *npu_context = mn_to_npu_context(mn);
> -
> -	/* Call into device driver to stop requests to the NMMU */
> -	if (npu_context->release_cb)
> -		npu_context->release_cb(npu_context, npu_context->priv);
> -
> -	/*
> -	 * There should be no more translation requests for this PID, but we
> -	 * need to ensure any entries for it are removed from the TLB.
> -	 */
> -	mmio_invalidate(npu_context, 0, ~0UL);
> -}
> -
> -static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
> -					struct mm_struct *mm,
> -					unsigned long start, unsigned long end)
> -{
> -	struct npu_context *npu_context = mn_to_npu_context(mn);
> -	mmio_invalidate(npu_context, start, end - start);
> -}
> -
> -static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
> -	.release = pnv_npu2_mn_release,
> -	.invalidate_range = pnv_npu2_mn_invalidate_range,
> -};
> -
> -/*
> - * Call into OPAL to setup the nmmu context for the current task in
> - * the NPU. This must be called to setup the context tables before the
> - * GPU issues ATRs. pdev should be a pointed to PCIe GPU device.
> - *
> - * A release callback should be registered to allow a device driver to
> - * be notified that it should not launch any new translation requests
> - * as the final TLB invalidate is about to occur.
> - *
> - * Returns an error if there no contexts are currently available or a
> - * npu_context which should be passed to pnv_npu2_handle_fault().
> - *
> - * mmap_sem must be held in write mode and must not be called from interrupt
> - * context.
> - */
> -struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
> -			unsigned long flags,
> -			void (*cb)(struct npu_context *, void *),
> -			void *priv)
> -{
> -	int rc;
> -	u32 nvlink_index;
> -	struct device_node *nvlink_dn;
> -	struct mm_struct *mm = current->mm;
> -	struct npu *npu;
> -	struct npu_context *npu_context;
> -	struct pci_controller *hose;
> -
> -	/*
> -	 * At present we don't support GPUs connected to multiple NPUs and I'm
> -	 * not sure the hardware does either.
> -	 */
> -	struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
> -
> -	if (!npdev)
> -		/* No nvlink associated with this GPU device */
> -		return ERR_PTR(-ENODEV);
> -
> -	/* We only support DR/PR/HV in pnv_npu2_map_lpar_dev() */
> -	if (flags & ~(MSR_DR | MSR_PR | MSR_HV))
> -		return ERR_PTR(-EINVAL);
> -
> -	nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
> -	if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
> -							&nvlink_index)))
> -		return ERR_PTR(-ENODEV);
> -
> -	if (!mm || mm->context.id == 0) {
> -		/*
> -		 * Kernel thread contexts are not supported and context id 0 is
> -		 * reserved on the GPU.
> -		 */
> -		return ERR_PTR(-EINVAL);
> -	}
> -
> -	hose = pci_bus_to_host(npdev->bus);
> -	npu = hose->npu;
> -	if (!npu)
> -		return ERR_PTR(-ENODEV);
> -
> -	/*
> -	 * We store the npu pci device so we can more easily get at the
> -	 * associated npus.
> -	 */
> -	spin_lock(&npu_context_lock);
> -	npu_context = mm->context.npu_context;
> -	if (npu_context) {
> -		if (npu_context->release_cb != cb ||
> -			npu_context->priv != priv) {
> -			spin_unlock(&npu_context_lock);
> -			return ERR_PTR(-EINVAL);
> -		}
> -
> -		WARN_ON(!kref_get_unless_zero(&npu_context->kref));
> -	}
> -	spin_unlock(&npu_context_lock);
> -
> -	if (!npu_context) {
> -		/*
> -		 * We can set up these fields without holding the
> -		 * npu_context_lock as the npu_context hasn't been returned to
> -		 * the caller meaning it can't be destroyed. Parallel allocation
> -		 * is protected against by mmap_sem.
> -		 */
> -		rc = -ENOMEM;
> -		npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
> -		if (npu_context) {
> -			kref_init(&npu_context->kref);
> -			npu_context->mm = mm;
> -			npu_context->mn.ops = &nv_nmmu_notifier_ops;
> -			rc = __mmu_notifier_register(&npu_context->mn, mm);
> -		}
> -
> -		if (rc) {
> -			kfree(npu_context);
> -			return ERR_PTR(rc);
> -		}
> -
> -		mm->context.npu_context = npu_context;
> -	}
> -
> -	npu_context->release_cb = cb;
> -	npu_context->priv = priv;
> -
> -	/*
> -	 * npdev is a pci_dev pointer setup by the PCI code. We assign it to
> -	 * npdev[][] to indicate to the mmu notifiers that an invalidation
> -	 * should also be sent over this nvlink. The notifiers don't use any
> -	 * other fields in npu_context, so we just need to ensure that when they
> -	 * deference npu_context->npdev[][] it is either a valid pointer or
> -	 * NULL.
> -	 */
> -	WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev);
> -
> -	if (!npu->nmmu_flush) {
> -		/*
> -		 * If we're not explicitly flushing ourselves we need to mark
> -		 * the thread for global flushes
> -		 */
> -		npu_context->nmmu_flush = false;
> -		mm_context_add_copro(mm);
> -	} else
> -		npu_context->nmmu_flush = true;
> -
> -	return npu_context;
> -}
> -EXPORT_SYMBOL(pnv_npu2_init_context);
> -
> -static void pnv_npu2_release_context(struct kref *kref)
> -{
> -	struct npu_context *npu_context =
> -		container_of(kref, struct npu_context, kref);
> -
> -	if (!npu_context->nmmu_flush)
> -		mm_context_remove_copro(npu_context->mm);
> -
> -	npu_context->mm->context.npu_context = NULL;
> -}
> -
> -/*
> - * Destroy a context on the given GPU. May free the npu_context if it is no
> - * longer active on any GPUs. Must not be called from interrupt context.
> - */
> -void pnv_npu2_destroy_context(struct npu_context *npu_context,
> -			struct pci_dev *gpdev)
> -{
> -	int removed;
> -	struct npu *npu;
> -	struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
> -	struct device_node *nvlink_dn;
> -	u32 nvlink_index;
> -	struct pci_controller *hose;
> -
> -	if (WARN_ON(!npdev))
> -		return;
> -
> -	hose = pci_bus_to_host(npdev->bus);
> -	npu = hose->npu;
> -	if (!npu)
> -		return;
> -	nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
> -	if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
> -							&nvlink_index)))
> -		return;
> -	WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
> -	spin_lock(&npu_context_lock);
> -	removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
> -	spin_unlock(&npu_context_lock);
> -
> -	/*
> -	 * We need to do this outside of pnv_npu2_release_context so that it is
> -	 * outside the spinlock as mmu_notifier_destroy uses SRCU.
> -	 */
> -	if (removed) {
> -		mmu_notifier_unregister(&npu_context->mn,
> -					npu_context->mm);
> -
> -		kfree(npu_context);
> -	}
> -
> -}
> -EXPORT_SYMBOL(pnv_npu2_destroy_context);
> -
> -/*
> - * Assumes mmap_sem is held for the contexts associated mm.
> - */
> -int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
> -			unsigned long *flags, unsigned long *status, int count)
> -{
> -	u64 rc = 0, result = 0;
> -	int i, is_write;
> -	struct page *page[1];
> -	const char __user *u;
> -	char c;
> -
> -	/* mmap_sem should be held so the struct_mm must be present */
> -	struct mm_struct *mm = context->mm;
> -
> -	WARN_ON(!rwsem_is_locked(&mm->mmap_sem));
> -
> -	for (i = 0; i < count; i++) {
> -		is_write = flags[i] & NPU2_WRITE;
> -		rc = get_user_pages_remote(NULL, mm, ea[i], 1,
> -					is_write ? FOLL_WRITE : 0,
> -					page, NULL, NULL);
> -
> -		if (rc != 1) {
> -			status[i] = rc;
> -			result = -EFAULT;
> -			continue;
> -		}
> -
> -		/* Make sure partition scoped tree gets a pte */
> -		u = page_address(page[0]);
> -		if (__get_user(c, u))
> -			result = -EFAULT;
> -
> -		status[i] = 0;
> -		put_page(page[0]);
> -	}
> -
> -	return result;
> -}
> -EXPORT_SYMBOL(pnv_npu2_handle_fault);
> -
>  int pnv_npu2_init(struct pci_controller *hose)
>  {
> -	unsigned int i;
> -	u64 mmio_atsd;
>  	static int npu_index;
>  	struct npu *npu;
>  	int ret;
> @@ -1163,33 +622,18 @@ int pnv_npu2_init(struct pci_controller *hose)
>  	if (!npu)
>  		return -ENOMEM;
>  
> -	npu->nmmu_flush = of_property_read_bool(hose->dn, "ibm,nmmu-flush");
> -
> -	for (i = 0; i < ARRAY_SIZE(npu->mmio_atsd_regs) &&
> -			!of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
> -				i, &mmio_atsd); i++)
> -		npu->mmio_atsd_regs[i] = ioremap(mmio_atsd, 32);
> -
> -	pr_info("NPU%d: Found %d MMIO ATSD registers", hose->global_number, i);
> -	npu->mmio_atsd_count = i;
> -	npu->mmio_atsd_usage = 0;
>  	npu_index++;
>  	if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
>  		ret = -ENOSPC;
>  		goto fail_exit;
>  	}
> -	max_npu2_index = npu_index;
>  	npu->index = npu_index;
>  	hose->npu = npu;
>  
>  	return 0;
>  
>  fail_exit:
> -	for (i = 0; i < npu->mmio_atsd_count; ++i)
> -		iounmap(npu->mmio_atsd_regs[i]);
> -
>  	kfree(npu);
> -
>  	return ret;
>  }
>  
> 

-- 
Alexey

^ permalink raw reply

* Re: [PATCH v4 3/6] kvmppc: H_SVM_INIT_START and H_SVM_INIT_DONE hcalls
From: Thiago Jung Bauermann @ 2019-06-18 23:05 UTC (permalink / raw)
  To: Bharata B Rao
  Cc: linuxram, cclaudio, kvm-ppc, linux-mm, jglisse, aneesh.kumar,
	paulus, sukadev, linuxppc-dev
In-Reply-To: <20190528064933.23119-4-bharata@linux.ibm.com>


Hello Bharata,

Bharata B Rao <bharata@linux.ibm.com> writes:

> diff --git a/arch/powerpc/include/asm/kvm_book3s_hmm.h b/arch/powerpc/include/asm/kvm_book3s_hmm.h
> index 21f3de5f2acb..3e13dab7f690 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_hmm.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_hmm.h
> @@ -11,6 +11,8 @@ extern unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
>  					  unsigned long gra,
>  					  unsigned long flags,
>  					  unsigned long page_shift);
> +extern unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
> +extern unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
>  #else
>  static inline unsigned long
>  kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
> @@ -25,5 +27,15 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
>  {
>  	return H_UNSUPPORTED;
>  }
> +
> +static inine unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
> +{
> +	return H_UNSUPPORTED;
> +}
> +
> +static inine unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
> +{
> +	return H_UNSUPPORTED;
> +}
>  #endif /* CONFIG_PPC_UV */
>  #endif /* __POWERPC_KVM_PPC_HMM_H__ */

This patch won't build when CONFIG_PPC_UV isn't set because of two
typos: "inine" and the ';' at the end of kvmppc_h_svm_init_done()
function prototype.

-- 
Thiago Jung Bauermann
IBM Linux Technology Center


^ permalink raw reply

* Re: [PATCH v4 1/3] PM: wakeup: Add routine to help fetch wakeup source object.
From: Rafael J. Wysocki @ 2019-06-18 22:44 UTC (permalink / raw)
  To: Ran Wang
  Cc: Mark Rutland, Len Brown, devicetree, Greg Kroah-Hartman, linux-pm,
	linux-kernel, Li Yang, Rob Herring, Pavel Machek, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20190520095238.29210-1-ran.wang_1@nxp.com>

On Monday, May 20, 2019 11:52:36 AM CEST Ran Wang wrote:
> Some user might want to go through all registered wakeup sources
> and doing things accordingly. For example, SoC PM driver might need to
> do HW programming to prevent powering down specific IP which wakeup
> source depending on. And is user's responsibility to identify if this
> wakeup source he is interested in.

I guess the idea here is that you need to walk wakeup devices and you noticed
that there was a wakeup source object for each of them and those wakeup
source objects were on a list, so you could walk wakeup devices by walking
the list of wakeup source objects.

That is fair enough, but the changelog above doesn't even talk about that.
 
> Signed-off-by: Ran Wang <ran.wang_1@nxp.com>
> ---
> Change in v4:
> 	- None.
> 
> Change in v3:
> 	- Adjust indentation of *attached_dev;.
> 
> Change in v2:
> 	- None.
> 
>  drivers/base/power/wakeup.c |   18 ++++++++++++++++++
>  include/linux/pm_wakeup.h   |    3 +++
>  2 files changed, 21 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
> index 5b2b6a0..6904485 100644
> --- a/drivers/base/power/wakeup.c
> +++ b/drivers/base/power/wakeup.c
> @@ -14,6 +14,7 @@
>  #include <linux/suspend.h>
>  #include <linux/seq_file.h>
>  #include <linux/debugfs.h>
> +#include <linux/of_device.h>
>  #include <linux/pm_wakeirq.h>
>  #include <trace/events/power.h>
>  
> @@ -226,6 +227,22 @@ void wakeup_source_unregister(struct wakeup_source *ws)
>  	}
>  }
>  EXPORT_SYMBOL_GPL(wakeup_source_unregister);
> +/**
> + * wakeup_source_get_next - Get next wakeup source from the list
> + * @ws: Previous wakeup source object, null means caller want first one.
> + */
> +struct wakeup_source *wakeup_source_get_next(struct wakeup_source *ws)
> +{
> +	struct list_head *ws_head = &wakeup_sources;
> +
> +	if (ws)
> +		return list_next_or_null_rcu(ws_head, &ws->entry,
> +				struct wakeup_source, entry);
> +	else
> +		return list_entry_rcu(ws_head->next,
> +				struct wakeup_source, entry);
> +}
> +EXPORT_SYMBOL_GPL(wakeup_source_get_next);

This needs to be arranged along the lines of wakeup_sources_stats_seq_start/next/stop()
because of the SRCU protection of the list.

>  
>  /**
>   * device_wakeup_attach - Attach a wakeup source object to a device object.
> @@ -242,6 +259,7 @@ static int device_wakeup_attach(struct device *dev, struct wakeup_source *ws)
>  		return -EEXIST;
>  	}
>  	dev->power.wakeup = ws;
> +	ws->attached_dev = dev;
>  	if (dev->power.wakeirq)
>  		device_wakeup_attach_irq(dev, dev->power.wakeirq);
>  	spin_unlock_irq(&dev->power.lock);
> diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
> index 0ff134d..913b2fb 100644
> --- a/include/linux/pm_wakeup.h
> +++ b/include/linux/pm_wakeup.h
> @@ -50,6 +50,7 @@
>   * @wakeup_count: Number of times the wakeup source might abort suspend.
>   * @active: Status of the wakeup source.
>   * @has_timeout: The wakeup source has been activated with a timeout.
> + * @attached_dev: The device it attached to
>   */
>  struct wakeup_source {
>  	const char 		*name;
> @@ -70,6 +71,7 @@ struct wakeup_source {
>  	unsigned long		wakeup_count;
>  	bool			active:1;
>  	bool			autosleep_enabled:1;
> +	struct device		*attached_dev;

Please (a) call it just dev and (b) move it up (before wakeirq, say).

>  };
>  
>  #ifdef CONFIG_PM_SLEEP
> @@ -101,6 +103,7 @@ static inline void device_set_wakeup_path(struct device *dev)
>  extern void wakeup_source_remove(struct wakeup_source *ws);
>  extern struct wakeup_source *wakeup_source_register(const char *name);
>  extern void wakeup_source_unregister(struct wakeup_source *ws);
> +extern struct wakeup_source *wakeup_source_get_next(struct wakeup_source *ws);
>  extern int device_wakeup_enable(struct device *dev);
>  extern int device_wakeup_disable(struct device *dev);
>  extern void device_set_wakeup_capable(struct device *dev, bool capable);
> 





^ permalink raw reply

* [PATCH v1 22/22] admin-guide: add kdump documentation into it
From: Mauro Carvalho Chehab @ 2019-06-18 21:05 UTC (permalink / raw)
  To: Linux Doc Mailing List
  Cc: Rich Felker, linux-sh, Catalin Marinas, Will Deacon,
	Jerry Hoemann, Harry Wei, Paul Mackerras, H. Peter Anvin,
	Mauro Carvalho Chehab, Alex Shi, Yoshinori Sato, Jonathan Corbet,
	x86, Russell King, Ingo Molnar, Dave Young, Guenter Roeck,
	linux-watchdog, Mauro Carvalho Chehab, Borislav Petkov,
	Thomas Gleixner, Wim Van Sebroeck, linux-arm-kernel, Baoquan He,
	kexec, linux-kernel, Vivek Goyal, linuxppc-dev
In-Reply-To: <cover.1560891322.git.mchehab+samsung@kernel.org>

The Kdump documentation describes procedures with admins use
in order to solve issues on their systems.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/admin-guide/bug-hunting.rst            | 4 ++--
 Documentation/admin-guide/index.rst                  | 1 +
 Documentation/{ => admin-guide}/kdump/gdbmacros.txt  | 0
 Documentation/{ => admin-guide}/kdump/index.rst      | 1 -
 Documentation/{ => admin-guide}/kdump/kdump.rst      | 0
 Documentation/{ => admin-guide}/kdump/vmcoreinfo.rst | 0
 Documentation/admin-guide/kernel-parameters.txt      | 6 +++---
 Documentation/powerpc/firmware-assisted-dump.rst     | 2 +-
 Documentation/translations/zh_CN/oops-tracing.txt    | 4 ++--
 Documentation/watchdog/hpwdt.rst                     | 2 +-
 MAINTAINERS                                          | 2 +-
 arch/arm/Kconfig                                     | 2 +-
 arch/arm64/Kconfig                                   | 2 +-
 arch/sh/Kconfig                                      | 2 +-
 arch/x86/Kconfig                                     | 4 ++--
 15 files changed, 16 insertions(+), 16 deletions(-)
 rename Documentation/{ => admin-guide}/kdump/gdbmacros.txt (100%)
 rename Documentation/{ => admin-guide}/kdump/index.rst (97%)
 rename Documentation/{ => admin-guide}/kdump/kdump.rst (100%)
 rename Documentation/{ => admin-guide}/kdump/vmcoreinfo.rst (100%)

diff --git a/Documentation/admin-guide/bug-hunting.rst b/Documentation/admin-guide/bug-hunting.rst
index b761aa2a51d2..44b8a4edd348 100644
--- a/Documentation/admin-guide/bug-hunting.rst
+++ b/Documentation/admin-guide/bug-hunting.rst
@@ -90,9 +90,9 @@ the disk is not available then you have three options:
     run a null modem to a second machine and capture the output there
     using your favourite communication program.  Minicom works well.
 
-(3) Use Kdump (see Documentation/kdump/kdump.rst),
+(3) Use Kdump (see Documentation/admin-guide/kdump/kdump.rst),
     extract the kernel ring buffer from old memory with using dmesg
-    gdbmacro in Documentation/kdump/gdbmacros.txt.
+    gdbmacro in Documentation/admin-guide/kdump/gdbmacros.txt.
 
 Finding the bug's location
 --------------------------
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index e4f0cb2a02bd..9f6820a7e8f8 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -39,6 +39,7 @@ problems and bugs in particular.
    ramoops
    dynamic-debug-howto
    init
+   kdump/index
 
 This is the beginning of a section with information of interest to
 application developers.  Documents covering various aspects of the kernel
diff --git a/Documentation/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt
similarity index 100%
rename from Documentation/kdump/gdbmacros.txt
rename to Documentation/admin-guide/kdump/gdbmacros.txt
diff --git a/Documentation/kdump/index.rst b/Documentation/admin-guide/kdump/index.rst
similarity index 97%
rename from Documentation/kdump/index.rst
rename to Documentation/admin-guide/kdump/index.rst
index 2b17fcf6867a..8e2ebd0383cd 100644
--- a/Documentation/kdump/index.rst
+++ b/Documentation/admin-guide/kdump/index.rst
@@ -1,4 +1,3 @@
-:orphan:
 
 ================================================================
 Documentation for Kdump - The kexec-based Crash Dumping Solution
diff --git a/Documentation/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst
similarity index 100%
rename from Documentation/kdump/kdump.rst
rename to Documentation/admin-guide/kdump/kdump.rst
diff --git a/Documentation/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
similarity index 100%
rename from Documentation/kdump/vmcoreinfo.rst
rename to Documentation/admin-guide/kdump/vmcoreinfo.rst
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 69a9e2e66dfb..1f3fc445c78d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -708,14 +708,14 @@
 			[KNL, x86_64] select a region under 4G first, and
 			fall back to reserve region above 4G when '@offset'
 			hasn't been specified.
-			See Documentation/kdump/kdump.rst for further details.
+			See Documentation/admin-guide/kdump/kdump.rst for further details.
 
 	crashkernel=range1:size1[,range2:size2,...][@offset]
 			[KNL] Same as above, but depends on the memory
 			in the running system. The syntax of range is
 			start-[end] where start and end are both
 			a memory unit (amount[KMG]). See also
-			Documentation/kdump/kdump.rst for an example.
+			Documentation/admin-guide/kdump/kdump.rst for an example.
 
 	crashkernel=size[KMG],high
 			[KNL, x86_64] range could be above 4G. Allow kernel
@@ -1207,7 +1207,7 @@
 			Specifies physical address of start of kernel core
 			image elf header and optionally the size. Generally
 			kexec loader will pass this option to capture kernel.
-			See Documentation/kdump/kdump.rst for details.
+			See Documentation/admin-guide/kdump/kdump.rst for details.
 
 	enable_mtrr_cleanup [X86]
 			The kernel tries to adjust MTRR layout from continuous
diff --git a/Documentation/powerpc/firmware-assisted-dump.rst b/Documentation/powerpc/firmware-assisted-dump.rst
index d7fa7c35dd12..9ca12830a48e 100644
--- a/Documentation/powerpc/firmware-assisted-dump.rst
+++ b/Documentation/powerpc/firmware-assisted-dump.rst
@@ -61,7 +61,7 @@ as follows:
          the default calculated size. Use this option if default
          boot memory size is not sufficient for second kernel to
          boot successfully. For syntax of crashkernel= parameter,
-         refer to Documentation/kdump/kdump.rst. If any offset is
+         refer to Documentation/admin-guide/kdump/kdump.rst. If any offset is
          provided in crashkernel= parameter, it will be ignored
          as fadump uses a predefined offset to reserve memory
          for boot memory dump preservation in case of a crash.
diff --git a/Documentation/translations/zh_CN/oops-tracing.txt b/Documentation/translations/zh_CN/oops-tracing.txt
index 368ddd05b304..c5f3bda7abcb 100644
--- a/Documentation/translations/zh_CN/oops-tracing.txt
+++ b/Documentation/translations/zh_CN/oops-tracing.txt
@@ -53,8 +53,8 @@ cat /proc/kmsg > file, 然而你必须介入中止传输, kmsg是一个“
 (2)用串口终端启动(请参看Documentation/admin-guide/serial-console.rst),运行一个null
 modem到另一台机器并用你喜欢的通讯工具获取输出。Minicom工作地很好。
 
-(3)使用Kdump(请参看Documentation/kdump/kdump.rst),
-使用在Documentation/kdump/gdbmacros.txt中定义的dmesg gdb宏,从旧的内存中提取内核
+(3)使用Kdump(请参看Documentation/admin-guide/kdump/kdump.rst),
+使用在Documentation/admin-guide/kdump/gdbmacros.txt中定义的dmesg gdb宏,从旧的内存中提取内核
 环形缓冲区。
 
 完整信息
diff --git a/Documentation/watchdog/hpwdt.rst b/Documentation/watchdog/hpwdt.rst
index 437456bd91a4..9a75720dd51a 100644
--- a/Documentation/watchdog/hpwdt.rst
+++ b/Documentation/watchdog/hpwdt.rst
@@ -59,7 +59,7 @@ Last reviewed: 08/20/2018
  and loop forever.  This is generally not what a watchdog user wants.
 
  For those wishing to learn more please see:
-	Documentation/kdump/kdump.rst
+	Documentation/admin-guide/kdump/kdump.rst
 	Documentation/admin-guide/kernel-parameters.txt (panic=)
 	Your Linux Distribution specific documentation.
 
diff --git a/MAINTAINERS b/MAINTAINERS
index ab170522ec55..071b9e5a1664 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8571,7 +8571,7 @@ R:	Vivek Goyal <vgoyal@redhat.com>
 L:	kexec@lists.infradead.org
 W:	http://lse.sourceforge.net/kdump/
 S:	Maintained
-F:	Documentation/kdump/
+F:	Documentation/admin-guide/kdump/
 
 KEENE FM RADIO TRANSMITTER DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ff0e247573d8..fc495004f148 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2009,7 +2009,7 @@ config CRASH_DUMP
 	  kdump/kexec. The crash dump kernel must be compiled to a
 	  memory address not used by the main kernel
 
-	  For more details see Documentation/kdump/kdump.rst
+	  For more details see Documentation/admin-guide/kdump/kdump.rst
 
 config AUTO_ZRELADDR
 	bool "Auto calculation of the decompressed kernel image address"
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a8a5fe91c579..9c2275b22904 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -995,7 +995,7 @@ config CRASH_DUMP
 	  reserved region and then later executed after a crash by
 	  kdump/kexec.
 
-	  For more details see Documentation/kdump/kdump.rst
+	  For more details see Documentation/admin-guide/kdump/kdump.rst
 
 config XEN_DOM0
 	def_bool y
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 7440639510a0..b731d22c5b9d 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -624,7 +624,7 @@ config CRASH_DUMP
 	  to a memory address not used by the main kernel using
 	  PHYSICAL_START.
 
-	  For more details see Documentation/kdump/kdump.rst
+	  For more details see Documentation/admin-guide/kdump/kdump.rst
 
 config KEXEC_JUMP
 	bool "kexec jump (EXPERIMENTAL)"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 586dd3529d14..8ed2e47ef4ba 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2052,7 +2052,7 @@ config CRASH_DUMP
 	  to a memory address not used by the main kernel or BIOS using
 	  PHYSICAL_START, or it must be built as a relocatable image
 	  (CONFIG_RELOCATABLE=y).
-	  For more details see Documentation/kdump/kdump.rst
+	  For more details see Documentation/admin-guide/kdump/kdump.rst
 
 config KEXEC_JUMP
 	bool "kexec jump"
@@ -2089,7 +2089,7 @@ config PHYSICAL_START
 	  the reserved region.  In other words, it can be set based on
 	  the "X" value as specified in the "crashkernel=YM@XM"
 	  command line boot parameter passed to the panic-ed
-	  kernel. Please take a look at Documentation/kdump/kdump.rst
+	  kernel. Please take a look at Documentation/admin-guide/kdump/kdump.rst
 	  for more details about crash dumps.
 
 	  Usage of bzImage for capturing the crash dump is recommended as
-- 
2.21.0


^ permalink raw reply related

* [PATCH V2] crypto/NX: Set receive window credits to max number of CRBs in RxFIFO
From: Haren Myneni @ 2019-06-18 19:09 UTC (permalink / raw)
  To: mpe, herbert; +Cc: linuxppc-dev, linux-crypto, stable

    
System gets checkstop if RxFIFO overruns with more requests than the
maximum possible number of CRBs in FIFO at the same time. The max number
of requests per window is controlled by window credits. So find max
CRBs from FIFO size and set it to receive window credits.

Fixes: b0d6c9bab5e4 ("crypto/nx: Add P9 NX support for 842 compression engine")
CC: stable@vger.kernel.org # v4.14+   
Signed-off-by:Haren Myneni <haren@us.ibm.com>

diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c
index 4acbc47..e78ff5c 100644
--- a/drivers/crypto/nx/nx-842-powernv.c
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -27,8 +27,6 @@
 #define WORKMEM_ALIGN	(CRB_ALIGN)
 #define CSB_WAIT_MAX	(5000) /* ms */
 #define VAS_RETRIES	(10)
-/* # of requests allowed per RxFIFO at a time. 0 for unlimited */
-#define MAX_CREDITS_PER_RXFIFO	(1024)
 
 struct nx842_workmem {
 	/* Below fields must be properly aligned */
@@ -812,7 +810,11 @@ static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
 	rxattr.lnotify_lpid = lpid;
 	rxattr.lnotify_pid = pid;
 	rxattr.lnotify_tid = tid;
-	rxattr.wcreds_max = MAX_CREDITS_PER_RXFIFO;
+	/*
+	 * Maximum RX window credits can not be more than #CRBs in
+	 * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns.
+	 */
+	rxattr.wcreds_max = fifo_size / CRB_SIZE;
 
 	/*
 	 * Open a VAS receice window which is used to configure RxFIFO
    



^ permalink raw reply related

* Re: [EXTERNAL] Re: crypto/NX: Set receive window credits to max number of CRBs in RxFIFO
From: Haren Myneni @ 2019-06-18 18:49 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, Herbert Xu, stable, linux-crypto
In-Reply-To: <87ef3royva.fsf@concordia.ellerman.id.au>

On 06/18/2019 05:35 AM, Michael Ellerman wrote:
> Haren Myneni <haren@linux.vnet.ibm.com> writes:
>>     
>> System gets checkstop if RxFIFO overruns with more requests than the
>> maximum possible number of CRBs in FIFO at the same time. So find max
>> CRBs from FIFO size and set it to receive window credits.
>>    
>> CC: stable@vger.kernel.org # v4.14+
>> Signed-off-by:Haren Myneni <haren@us.ibm.com>
> 
> It's helpful to mention the actual commit that's fixed, so that people
> with backports can join things up, so should that be:
> 
>   Fixes: b0d6c9bab5e4 ("crypto/nx: Add P9 NX support for 842 compression engine")
> 
> ???

Sorry, Yes, We use 1K for Rx window credits in b0d6c9bab5e41d07f (crypto/nx: Add P9 NX support for 842 compression engine). But credits should be based on FIFO size. 

I will repost the patch.  

> 
> cheers
> 
>> diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c
>> index 4acbc47..e78ff5c 100644
>> --- a/drivers/crypto/nx/nx-842-powernv.c
>> +++ b/drivers/crypto/nx/nx-842-powernv.c
>> @@ -27,8 +27,6 @@
>>  #define WORKMEM_ALIGN	(CRB_ALIGN)
>>  #define CSB_WAIT_MAX	(5000) /* ms */
>>  #define VAS_RETRIES	(10)
>> -/* # of requests allowed per RxFIFO at a time. 0 for unlimited */
>> -#define MAX_CREDITS_PER_RXFIFO	(1024)
>>  
>>  struct nx842_workmem {
>>  	/* Below fields must be properly aligned */
>> @@ -812,7 +810,11 @@ static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
>>  	rxattr.lnotify_lpid = lpid;
>>  	rxattr.lnotify_pid = pid;
>>  	rxattr.lnotify_tid = tid;
>> -	rxattr.wcreds_max = MAX_CREDITS_PER_RXFIFO;
>> +	/*
>> +	 * Maximum RX window credits can not be more than #CRBs in
>> +	 * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns.
>> +	 */
>> +	rxattr.wcreds_max = fifo_size / CRB_SIZE;
>>  
>>  	/*
>>  	 * Open a VAS receice window which is used to configure RxFIFO
> 


^ permalink raw reply

* Re: [PATCH 5/7] powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel
From: Steven Rostedt @ 2019-06-18 18:32 UTC (permalink / raw)
  To: Naveen N. Rao
  Cc: linux-kernel, Nicholas Piggin, Masami Hiramatsu, linuxppc-dev,
	Ingo Molnar
In-Reply-To: <1560881840.vz9llflvnf.naveen@linux.ibm.com>

On Tue, 18 Jun 2019 23:53:11 +0530
"Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> wrote:

> Naveen N. Rao wrote:
> > Steven Rostedt wrote:  
> >> On Tue, 18 Jun 2019 20:17:04 +0530
> >> "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> wrote:
> >>   
> >>> @@ -1551,7 +1551,7 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end)
> >>>  	key.flags = end;	/* overload flags, as it is unsigned long */
> >>>  
> >>>  	for (pg = ftrace_pages_start; pg; pg = pg->next) {
> >>> -		if (end < pg->records[0].ip ||
> >>> +		if (end <= pg->records[0].ip ||  
> >> 
> >> This breaks the algorithm. "end" is inclusive. That is, if you look for
> >> a single byte, where "start" and "end" are the same, and it happens to
> >> be the first ip on the pg page, it will be skipped, and not found.  
> > 
> > Thanks. It looks like I should be over-riding ftrace_location() instead.  
> > I will update this patch.  
> 
> I think I will have ftrace own the two instruction range, regardless of 
> whether the preceding instruction is a 'mflr r0' or not. This simplifies 
> things and I don't see an issue with it as of now. I will do more 
> testing to confirm.
> 
> - Naveen
> 
> 
> --- a/arch/powerpc/kernel/trace/ftrace.c
> +++ b/arch/powerpc/kernel/trace/ftrace.c
> @@ -951,6 +951,16 @@ void arch_ftrace_update_code(int command)
>  }
>  
>  #ifdef CONFIG_MPROFILE_KERNEL
> +/*
> + * We consider two instructions -- 'mflr r0', 'bl _mcount' -- to be part
> + * of ftrace. When checking for the first instruction, we want to include
> + * the next instruction in the range check.
> + */
> +unsigned long ftrace_location(unsigned long ip)
> +{
> +	return ftrace_location_range(ip, ip + MCOUNT_INSN_SIZE);
> +}
> +
>  /* Returns 1 if we patched in the mflr */
>  static int __ftrace_make_call_prep(struct dyn_ftrace *rec)
>  {
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 21d8e201ee80..122e2bb4a739 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1573,7 +1573,7 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end)
>   * the function tracer. It checks the ftrace internal tables to
>   * determine if the address belongs or not.
>   */
> -unsigned long ftrace_location(unsigned long ip)
> +unsigned long __weak ftrace_location(unsigned long ip)
>  {
>  	return ftrace_location_range(ip, ip);
>  }

Actually, instead of making this a weak function, let's do this:


In include/ftrace.h:

#ifndef FTRACE_IP_EXTENSION
# define FTRACE_IP_EXTENSION	0
#endif


In arch/powerpc/include/asm/ftrace.h

#define FTRACE_IP_EXTENSION	MCOUNT_INSN_SIZE


Then we can just have:

unsigned long ftrace_location(unsigned long ip)
{
	return ftrace_location_range(ip, ip + FTRACE_IP_EXTENSION);
}

-- Steve

^ permalink raw reply

* Re: [PATCH 5/7] powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel
From: Naveen N. Rao @ 2019-06-18 18:23 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Nicholas Piggin, Masami Hiramatsu, linuxppc-dev,
	Ingo Molnar
In-Reply-To: <1560881411.p0i6a1dkwk.naveen@linux.ibm.com>

Naveen N. Rao wrote:
> Steven Rostedt wrote:
>> On Tue, 18 Jun 2019 20:17:04 +0530
>> "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> wrote:
>> 
>>> @@ -1551,7 +1551,7 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end)
>>>  	key.flags = end;	/* overload flags, as it is unsigned long */
>>>  
>>>  	for (pg = ftrace_pages_start; pg; pg = pg->next) {
>>> -		if (end < pg->records[0].ip ||
>>> +		if (end <= pg->records[0].ip ||
>> 
>> This breaks the algorithm. "end" is inclusive. That is, if you look for
>> a single byte, where "start" and "end" are the same, and it happens to
>> be the first ip on the pg page, it will be skipped, and not found.
> 
> Thanks. It looks like I should be over-riding ftrace_location() instead.  
> I will update this patch.

I think I will have ftrace own the two instruction range, regardless of 
whether the preceding instruction is a 'mflr r0' or not. This simplifies 
things and I don't see an issue with it as of now. I will do more 
testing to confirm.

- Naveen


--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -951,6 +951,16 @@ void arch_ftrace_update_code(int command)
 }
 
 #ifdef CONFIG_MPROFILE_KERNEL
+/*
+ * We consider two instructions -- 'mflr r0', 'bl _mcount' -- to be part
+ * of ftrace. When checking for the first instruction, we want to include
+ * the next instruction in the range check.
+ */
+unsigned long ftrace_location(unsigned long ip)
+{
+	return ftrace_location_range(ip, ip + MCOUNT_INSN_SIZE);
+}
+
 /* Returns 1 if we patched in the mflr */
 static int __ftrace_make_call_prep(struct dyn_ftrace *rec)
 {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 21d8e201ee80..122e2bb4a739 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1573,7 +1573,7 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end)
  * the function tracer. It checks the ftrace internal tables to
  * determine if the address belongs or not.
  */
-unsigned long ftrace_location(unsigned long ip)
+unsigned long __weak ftrace_location(unsigned long ip)
 {
 	return ftrace_location_range(ip, ip);
 }



^ permalink raw reply related

* Re: [PATCH 5/7] powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel
From: Naveen N. Rao @ 2019-06-18 18:11 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Nicholas Piggin, Masami Hiramatsu, linuxppc-dev,
	Ingo Molnar
In-Reply-To: <20190618114509.5b1acbe5@gandalf.local.home>

Steven Rostedt wrote:
> On Tue, 18 Jun 2019 20:17:04 +0530
> "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> wrote:
> 
>> @@ -1551,7 +1551,7 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end)
>>  	key.flags = end;	/* overload flags, as it is unsigned long */
>>  
>>  	for (pg = ftrace_pages_start; pg; pg = pg->next) {
>> -		if (end < pg->records[0].ip ||
>> +		if (end <= pg->records[0].ip ||
> 
> This breaks the algorithm. "end" is inclusive. That is, if you look for
> a single byte, where "start" and "end" are the same, and it happens to
> be the first ip on the pg page, it will be skipped, and not found.

Thanks. It looks like I should be over-riding ftrace_location() instead.  
I will update this patch.

- Naveen



^ permalink raw reply

* Re: [PATCH 0/1] PPC32: fix ptrace() access to FPU registers
From: Andreas Schwab @ 2019-06-18 18:09 UTC (permalink / raw)
  To: Radu Rendec; +Cc: Paul Mackerras, linuxppc-dev, Oleg Nesterov, Daniel Axtens
In-Reply-To: <fbf9f9cbb99fc40c7d7af86fee3984427c61b799.camel__46559.9162316479$1560860409$gmane$org@gmail.com>

On Jun 18 2019, Radu Rendec <radu.rendec@gmail.com> wrote:

> Since you already have a working setup, it would be nice if you could
> add a printk to arch_ptrace() to print the address and confirm what I
> believe happens (by reading the gdb source code).

A ppc32 ptrace syscall goes through compat_arch_ptrace.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

^ permalink raw reply

* Re: [PATCH] ps3_gelic: Use [] to denote a flexible array member
From: David Miller @ 2019-06-18 17:43 UTC (permalink / raw)
  To: geert+renesas; +Cc: geoff, linux-kernel, paulus, netdev, linuxppc-dev
In-Reply-To: <20190617115044.4406-1-geert+renesas@glider.be>

From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 17 Jun 2019 13:50:44 +0200

> Flexible array members should be denoted using [] instead of [0], else
> gcc will not warn when they are no longer at the end of a struct.
> 
> Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>

Applied to net-next, thanks.

^ permalink raw reply

* Re: [PATCH v5 2/2] powerpc: Fix compile issue with force DAWR
From: Christophe Leroy @ 2019-06-18 16:28 UTC (permalink / raw)
  To: Michael Neuling, mpe; +Cc: Mathieu Malaterre, hch, linuxppc-dev
In-Reply-To: <20190604030037.9424-2-mikey@neuling.org>



Le 04/06/2019 à 05:00, Michael Neuling a écrit :
> If you compile with KVM but without CONFIG_HAVE_HW_BREAKPOINT you fail
> at linking with:
>    arch/powerpc/kvm/book3s_hv_rmhandlers.o:(.text+0x708): undefined reference to `dawr_force_enable'
> 
> This was caused by commit c1fe190c0672 ("powerpc: Add force enable of
> DAWR on P9 option").
> 
> This moves a bunch of code around to fix this. It moves a lot of the
> DAWR code in a new file and creates a new CONFIG_PPC_DAWR to enable
> compiling it.

After looking at all this once more, I'm just wondering: why are we 
creating stuff specific to DAWR ?

In the old days, we only add DABR, and everything was named on DABR.
When DAWR was introduced some years ago we renamed stuff like do_dabr() 
to do_break() so that we could regroup things together. And now we are 
taking dawr() out of the rest. Why not keep dabr() stuff and dawr() 
stuff all together in something dedicated to breakpoints, and try to 
regroup all breakpoint stuff in a single place ? I see some 
breakpointing stuff done in kernel/process.c and other things done in 
hw_breakpoint.c, to common functions call from one file to the other, 
preventing GCC to fully optimise, etc ...

Also, behing this thinking, I have the idea that we could easily 
implement 512 bytes breakpoints on the 8xx too. The 8xx have neither 
DABR nor DAWR, but is using a set of comparators. And as you can see in 
the 8xx version of __set_dabr() in kernel/process.c, we emulate the DABR 
behaviour by setting two comparators. By using the same comparators with 
a different setup, we should be able to implement breakpoints on larger 
ranges of address.

Christophe

> 
> Fixes: c1fe190c0672 ("powerpc: Add force enable of DAWR on P9 option")
> Signed-off-by: Michael Neuling <mikey@neuling.org>
> --
> v5:
>    - Changes based on comments by hch
>      - Change // to /* comments
>      - Change return code from -1 to -ENODEV
>      - Remove unneeded default n in new Kconfig option
>      - Remove setting to default value
>      - Remove unnecessary braces
> 
> v4:
>    - Fix merge conflict with patch from Mathieu Malaterre:
>       powerpc: silence a -Wcast-function-type warning in dawr_write_file_bool
>    - Fixed checkpatch issues noticed by Christophe Leroy.
> 
> v3:
>    Fixes based on Christophe Leroy's comments:
>    - Fix Kconfig options to better reflect reality
>    - Reorder alphabetically
>    - Inline vs #define
>    - Fixed default return for dawr_enabled() when CONFIG_PPC_DAWR=N
> 
> V2:
>    Fixes based on Christophe Leroy's comments:
>    - Fix commit message formatting
>    - Move more DAWR code into dawr.c
> ---
>   arch/powerpc/Kconfig                     |  4 +
>   arch/powerpc/include/asm/hw_breakpoint.h | 21 +++--
>   arch/powerpc/kernel/Makefile             |  1 +
>   arch/powerpc/kernel/dawr.c               | 98 ++++++++++++++++++++++++
>   arch/powerpc/kernel/hw_breakpoint.c      | 61 ---------------
>   arch/powerpc/kernel/process.c            | 28 -------
>   arch/powerpc/kvm/Kconfig                 |  1 +
>   7 files changed, 118 insertions(+), 96 deletions(-)
>   create mode 100644 arch/powerpc/kernel/dawr.c
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 8c1c636308..9d61b36df4 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -234,6 +234,7 @@ config PPC
>   	select OLD_SIGSUSPEND
>   	select PCI_DOMAINS			if PCI
>   	select PCI_SYSCALL			if PCI
> +	select PPC_DAWR				if PPC64
>   	select RTC_LIB
>   	select SPARSE_IRQ
>   	select SYSCTL_EXCEPTION_TRACE
> @@ -370,6 +371,9 @@ config PPC_ADV_DEBUG_DAC_RANGE
>   	depends on PPC_ADV_DEBUG_REGS && 44x
>   	default y
>   
> +config PPC_DAWR
> +	bool
> +
>   config ZONE_DMA
>   	bool
>   	default y if PPC_BOOK3E_64
> diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
> index 0fe8c1e46b..41abdae6d0 100644
> --- a/arch/powerpc/include/asm/hw_breakpoint.h
> +++ b/arch/powerpc/include/asm/hw_breakpoint.h
> @@ -90,18 +90,25 @@ static inline void hw_breakpoint_disable(void)
>   extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
>   int hw_breakpoint_handler(struct die_args *args);
>   
> -extern int set_dawr(struct arch_hw_breakpoint *brk);
> +#else	/* CONFIG_HAVE_HW_BREAKPOINT */
> +static inline void hw_breakpoint_disable(void) { }
> +static inline void thread_change_pc(struct task_struct *tsk,
> +					struct pt_regs *regs) { }
> +
> +#endif	/* CONFIG_HAVE_HW_BREAKPOINT */
> +
> +
> +#ifdef CONFIG_PPC_DAWR
>   extern bool dawr_force_enable;
>   static inline bool dawr_enabled(void)
>   {
>   	return dawr_force_enable;
>   }
> -
> -#else	/* CONFIG_HAVE_HW_BREAKPOINT */
> -static inline void hw_breakpoint_disable(void) { }
> -static inline void thread_change_pc(struct task_struct *tsk,
> -					struct pt_regs *regs) { }
> +int set_dawr(struct arch_hw_breakpoint *brk);
> +#else
>   static inline bool dawr_enabled(void) { return false; }
> -#endif	/* CONFIG_HAVE_HW_BREAKPOINT */
> +static inline int set_dawr(struct arch_hw_breakpoint *brk) { return -1; }
> +#endif
> +
>   #endif	/* __KERNEL__ */
>   #endif	/* _PPC_BOOK3S_64_HW_BREAKPOINT_H */
> diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
> index 0ea6c4aa3a..56dfa7a2a6 100644
> --- a/arch/powerpc/kernel/Makefile
> +++ b/arch/powerpc/kernel/Makefile
> @@ -56,6 +56,7 @@ obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
>   obj-$(CONFIG_VDSO32)		+= vdso32/
>   obj-$(CONFIG_PPC_WATCHDOG)	+= watchdog.o
>   obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
> +obj-$(CONFIG_PPC_DAWR)		+= dawr.o
>   obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
>   obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
>   obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o
> diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
> new file mode 100644
> index 0000000000..ae3bd6abac
> --- /dev/null
> +++ b/arch/powerpc/kernel/dawr.c
> @@ -0,0 +1,98 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * DAWR infrastructure
> + *
> + * Copyright 2019, Michael Neuling, IBM Corporation.
> + */
> +
> +#include <linux/types.h>
> +#include <linux/export.h>
> +#include <linux/fs.h>
> +#include <linux/debugfs.h>
> +#include <asm/debugfs.h>
> +#include <asm/machdep.h>
> +#include <asm/hvcall.h>
> +
> +bool dawr_force_enable;
> +EXPORT_SYMBOL_GPL(dawr_force_enable);
> +
> +int set_dawr(struct arch_hw_breakpoint *brk)
> +{
> +	unsigned long dawr, dawrx, mrd;
> +
> +	dawr = brk->address;
> +
> +	dawrx  = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE))
> +		<< (63 - 58);
> +	dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) << (63 - 59);
> +	dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) >> 3;
> +	/* dawr length is stored in field MDR bits 48:53.  Matches range in
> +	 * doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
> +	 * 0b111111=64DW.
> +	 * brk->len is in bytes.
> +	 * This aligns up to double word size, shifts and does the bias.
> +	 */
> +	mrd = ((brk->len + 7) >> 3) - 1;
> +	dawrx |= (mrd & 0x3f) << (63 - 53);
> +
> +	if (ppc_md.set_dawr)
> +		return ppc_md.set_dawr(dawr, dawrx);
> +	mtspr(SPRN_DAWR, dawr);
> +	mtspr(SPRN_DAWRX, dawrx);
> +	return 0;
> +}
> +
> +static void set_dawr_cb(void *info)
> +{
> +	set_dawr(info);
> +}
> +
> +static ssize_t dawr_write_file_bool(struct file *file,
> +				    const char __user *user_buf,
> +				    size_t count, loff_t *ppos)
> +{
> +	struct arch_hw_breakpoint null_brk = {0, 0, 0};
> +	size_t rc;
> +
> +	/* Send error to user if they hypervisor won't allow us to write DAWR */
> +	if (!dawr_force_enable &&
> +	    firmware_has_feature(FW_FEATURE_LPAR) &&
> +	    set_dawr(&null_brk) != H_SUCCESS)
> +		return -ENODEV;
> +
> +	rc = debugfs_write_file_bool(file, user_buf, count, ppos);
> +	if (rc)
> +		return rc;
> +
> +	/* If we are clearing, make sure all CPUs have the DAWR cleared */
> +	if (!dawr_force_enable)
> +		smp_call_function(set_dawr_cb, &null_brk, 0);
> +
> +	return rc;
> +}
> +
> +static const struct file_operations dawr_enable_fops = {
> +	.read =		debugfs_read_file_bool,
> +	.write =	dawr_write_file_bool,
> +	.open =		simple_open,
> +	.llseek =	default_llseek,
> +};
> +
> +static int __init dawr_force_setup(void)
> +{
> +	if (cpu_has_feature(CPU_FTR_DAWR)) {
> +		/* Don't setup sysfs file for user control on P8 */
> +		dawr_force_enable = true;
> +		return 0;
> +	}
> +
> +	if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
> +		/* Turn DAWR off by default, but allow admin to turn it on */
> +		debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
> +					   powerpc_debugfs_root,
> +					   &dawr_force_enable,
> +					   &dawr_enable_fops);
> +	}
> +	return 0;
> +}
> +arch_initcall(dawr_force_setup);
> diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
> index ca3a2358b7..95605a9c9a 100644
> --- a/arch/powerpc/kernel/hw_breakpoint.c
> +++ b/arch/powerpc/kernel/hw_breakpoint.c
> @@ -380,64 +380,3 @@ void hw_breakpoint_pmu_read(struct perf_event *bp)
>   {
>   	/* TODO */
>   }
> -
> -bool dawr_force_enable;
> -EXPORT_SYMBOL_GPL(dawr_force_enable);
> -
> -static void set_dawr_cb(void *info)
> -{
> -	set_dawr(info);
> -}
> -
> -static ssize_t dawr_write_file_bool(struct file *file,
> -				    const char __user *user_buf,
> -				    size_t count, loff_t *ppos)
> -{
> -	struct arch_hw_breakpoint null_brk = {0, 0, 0};
> -	size_t rc;
> -
> -	/* Send error to user if they hypervisor won't allow us to write DAWR */
> -	if ((!dawr_force_enable) &&
> -	    (firmware_has_feature(FW_FEATURE_LPAR)) &&
> -	    (set_dawr(&null_brk) != H_SUCCESS))
> -		return -1;
> -
> -	rc = debugfs_write_file_bool(file, user_buf, count, ppos);
> -	if (rc)
> -		return rc;
> -
> -	/* If we are clearing, make sure all CPUs have the DAWR cleared */
> -	if (!dawr_force_enable)
> -		smp_call_function(set_dawr_cb, &null_brk, 0);
> -
> -	return rc;
> -}
> -
> -static const struct file_operations dawr_enable_fops = {
> -	.read =		debugfs_read_file_bool,
> -	.write =	dawr_write_file_bool,
> -	.open =		simple_open,
> -	.llseek =	default_llseek,
> -};
> -
> -static int __init dawr_force_setup(void)
> -{
> -	dawr_force_enable = false;
> -
> -	if (cpu_has_feature(CPU_FTR_DAWR)) {
> -		/* Don't setup sysfs file for user control on P8 */
> -		dawr_force_enable = true;
> -		return 0;
> -	}
> -
> -	if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
> -		/* Turn DAWR off by default, but allow admin to turn it on */
> -		dawr_force_enable = false;
> -		debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
> -					   powerpc_debugfs_root,
> -					   &dawr_force_enable,
> -					   &dawr_enable_fops);
> -	}
> -	return 0;
> -}
> -arch_initcall(dawr_force_setup);
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 87da401299..03a2da35ce 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -797,34 +797,6 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk)
>   	return __set_dabr(dabr, dabrx);
>   }
>   
> -int set_dawr(struct arch_hw_breakpoint *brk)
> -{
> -	unsigned long dawr, dawrx, mrd;
> -
> -	dawr = brk->address;
> -
> -	dawrx  = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE)) \
> -		                   << (63 - 58); //* read/write bits */
> -	dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) \
> -		                   << (63 - 59); //* translate */
> -	dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) \
> -		                   >> 3; //* PRIM bits */
> -	/* dawr length is stored in field MDR bits 48:53.  Matches range in
> -	   doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
> -	   0b111111=64DW.
> -	   brk->len is in bytes.
> -	   This aligns up to double word size, shifts and does the bias.
> -	*/
> -	mrd = ((brk->len + 7) >> 3) - 1;
> -	dawrx |= (mrd & 0x3f) << (63 - 53);
> -
> -	if (ppc_md.set_dawr)
> -		return ppc_md.set_dawr(dawr, dawrx);
> -	mtspr(SPRN_DAWR, dawr);
> -	mtspr(SPRN_DAWRX, dawrx);
> -	return 0;
> -}
> -
>   void __set_breakpoint(struct arch_hw_breakpoint *brk)
>   {
>   	memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
> index f53997a8ca..b8e13d5a4a 100644
> --- a/arch/powerpc/kvm/Kconfig
> +++ b/arch/powerpc/kvm/Kconfig
> @@ -38,6 +38,7 @@ config KVM_BOOK3S_32_HANDLER
>   config KVM_BOOK3S_64_HANDLER
>   	bool
>   	select KVM_BOOK3S_HANDLER
> +	select PPC_DAWR_FORCE_ENABLE
>   
>   config KVM_BOOK3S_PR_POSSIBLE
>   	bool
> 

^ permalink raw reply

* Re: [PATCH 00/15] kbuild: refactor headers_install and support compile-test of UAPI headers
From: Masahiro Yamada @ 2019-06-18 15:46 UTC (permalink / raw)
  To: Linux Kbuild mailing list
  Cc: Song Liu, open list:DOCUMENTATION, Palmer Dabbelt, Heiko Carstens,
	Alexei Starovoitov, David Howells, Paul Mackerras, linux-riscv,
	Vincent Chen, Sam Ravnborg, linux-s390, Arnd Bergmann,
	Daniel Borkmann, Jonathan Corbet, Helge Deller,
	Christian Borntraeger, Yonghong Song, arcml, Albert Ou,
	Vasily Gorbik, Jani Nikula, Greentime Hu, James E.J. Bottomley,
	Michal Marek, linux-parisc, Vineet Gupta, Randy Dunlap,
	Linux Kernel Mailing List, Networking, bpf, linuxppc-dev,
	Martin KaFai Lau
In-Reply-To: <20190604101409.2078-1-yamada.masahiro@socionext.com>

On Tue, Jun 4, 2019 at 7:15 PM Masahiro Yamada
<yamada.masahiro@socionext.com> wrote:
>
>
> Multiple people have suggested to compile-test UAPI headers.
>
> Currently, Kbuild provides simple sanity checks by headers_check
> but they are not enough to catch bugs.
>
> The most recent patch I know is David Howells' work:
> https://patchwork.kernel.org/patch/10590203/
>
> I agree that we need better tests for UAPI headers,
> but I want to integrate it in a clean way.
>
> The idea that has been in my mind is to compile each header
> to make sure the selfcontainedness.
>
> Recently, Jani Nikula proposed a new syntax 'header-test-y'.
> https://patchwork.kernel.org/patch/10947005/
>
> So, I implemented UAPI compile-testing on top of that.
>
> When adding a new feature, cleaning the code first is a
> good practice.
>
> [1] Remove headers_install_all
>
> This target installs UAPI headers of all architectures
> in a single tree.
> It does not make sense to compile test of headers from
> multiple arches at the same time. Hence, removed.
>
> [2] Split header installation into 'make headers' and 'make headers_install'
>
> To compile-test UAPI headers, we need a work-directory somewhere
> to save objects and .*.cmd files.
>
> usr/include/ will be the work-directory.
>
> Since we cannot pollute the final destination of headers_install,
>
> I split the header installation into two stages.
>
> 'make headers' will build up
> the ready-to-install headers in usr/include,
> which will be also used as a work-directory for the compile-test.
>
> 'make headers_install' will copy headers
> from usr/include to $(INSTALL_HDR_PATH)/include.
>
> [3] Support compile-test of UAPI headers
>
> This is implemented in usr/include/Makefile
>
>
> Jani Nikula (1):
>   kbuild: add support for ensuring headers are self-contained
>
> Masahiro Yamada (14):
>   kbuild: remove headers_{install,check}_all
>   kbuild: remove stale dependency between Documentation/ and
>     headers_install
>   kbuild: make gdb_script depend on prepare0 instead of prepare
>   kbuild: fix Kconfig prompt of CONFIG_HEADERS_CHECK
>   kbuild: add CONFIG_HEADERS_INSTALL and loosen the dependency of
>     samples
>   kbuild: remove build_unifdef target in scripts/Makefile
>   kbuild: build all prerequisite of headers_install simultaneously
>   kbuild: add 'headers' target to build up ready-to-install uapi headers
>   kbuild: re-implement Makefile.headersinst without directory descending
>   kbuild: move hdr-inst shorthand to top Makefile
>   kbuild: simplify scripts/headers_install.sh
>   kbuild: deb-pkg: do not run headers_check
>   fixup: kbuild: add support for ensuring headers are self-contained
>   kbuild: compile test UAPI headers to ensure they are self-contained

Series, applied to linux-kbuild.


>  Documentation/kbuild/headers_install.txt |   7 --
>  Documentation/kbuild/makefiles.txt       |  13 ++-
>  Makefile                                 |  56 +++++-----
>  arch/arc/configs/tb10x_defconfig         |   1 +
>  arch/nds32/configs/defconfig             |   1 +
>  arch/parisc/configs/a500_defconfig       |   1 +
>  arch/parisc/configs/b180_defconfig       |   1 +
>  arch/parisc/configs/c3000_defconfig      |   1 +
>  arch/parisc/configs/default_defconfig    |   1 +
>  arch/powerpc/configs/ppc6xx_defconfig    |   1 +
>  arch/s390/configs/debug_defconfig        |   1 +
>  include/uapi/{linux => }/Kbuild          |   6 +-
>  init/Kconfig                             |  20 ++++
>  lib/Kconfig.debug                        |  25 +++--
>  samples/Kconfig                          |  14 ++-
>  samples/Makefile                         |   4 +-
>  scripts/Kbuild.include                   |   6 --
>  scripts/Makefile                         |   5 -
>  scripts/Makefile.build                   |   9 ++
>  scripts/Makefile.headersinst             | 132 ++++++++++-------------
>  scripts/Makefile.lib                     |   3 +
>  scripts/cc-system-headers.sh             |   8 ++
>  scripts/headers.sh                       |  29 -----
>  scripts/headers_install.sh               |  48 ++++-----
>  scripts/package/builddeb                 |   2 +-
>  usr/.gitignore                           |   1 -
>  usr/Makefile                             |   2 +
>  usr/include/.gitignore                   |   3 +
>  usr/include/Makefile                     | 132 +++++++++++++++++++++++
>  29 files changed, 329 insertions(+), 204 deletions(-)
>  rename include/uapi/{linux => }/Kbuild (77%)
>  create mode 100755 scripts/cc-system-headers.sh
>  delete mode 100755 scripts/headers.sh
>  create mode 100644 usr/include/.gitignore
>  create mode 100644 usr/include/Makefile
>
> --
> 2.17.1
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv



-- 
Best Regards
Masahiro Yamada

^ permalink raw reply

* Re: [PATCH 5/7] powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel
From: Steven Rostedt @ 2019-06-18 15:45 UTC (permalink / raw)
  To: Naveen N. Rao
  Cc: linux-kernel, Nicholas Piggin, Masami Hiramatsu, linuxppc-dev,
	Ingo Molnar
In-Reply-To: <186656540d3e6225abd98374e791a13d10d86fab.1560868106.git.naveen.n.rao@linux.vnet.ibm.com>

On Tue, 18 Jun 2019 20:17:04 +0530
"Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> wrote:

> @@ -1551,7 +1551,7 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end)
>  	key.flags = end;	/* overload flags, as it is unsigned long */
>  
>  	for (pg = ftrace_pages_start; pg; pg = pg->next) {
> -		if (end < pg->records[0].ip ||
> +		if (end <= pg->records[0].ip ||

This breaks the algorithm. "end" is inclusive. That is, if you look for
a single byte, where "start" and "end" are the same, and it happens to
be the first ip on the pg page, it will be skipped, and not found.

-- Steve

>  		    start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
>  			continue;
>  		rec = bsearch(&key, pg->records, pg->index,

^ permalink raw reply

* Re: Re: [PATCH v3 4/9] KVM: PPC: Ultravisor: Add generic ultravisor call handler
From: Ram Pai @ 2019-06-18 15:25 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: Madhavan Srinivasan, Michael Anderson, Claudio Carvalho, kvm-ppc,
	Bharata B Rao, linuxppc-dev, Sukadev Bhattiprolu,
	Thiago Bauermann, Anshuman Khandual
In-Reply-To: <20190618114701.GH7313@gate.crashing.org>

On Tue, Jun 18, 2019 at 06:47:01AM -0500, Segher Boessenkool wrote:
> Hi Paul,
> 
> On Mon, Jun 17, 2019 at 12:06:32PM +1000, Paul Mackerras wrote:
> > The thing we need to consider is that when SMFCTRL[E] = 0, a ucall
> > instruction becomes a hcall (that is, sc 2 is executed as if it was
> > sc 1).  In that case, the first argument to the ucall will be
> > interpreted as the hcall number.  Mostly that will happen not to be a
> > valid hcall number, but sometimes it might unavoidably be a valid but
> > unintended hcall number.
> 
> Shouldn't a caller of the ultravisor *know* that it is talking to the
> ultravisor in the first place?  And not to the hypervisor.

It may or may not.  But if it knows and still decides to make the ucall,
   the hypervisor must gracefully handle it.  
   
 We can't control who makes a ucall. A normal process within the VM could
 make a ucall too. Or a normal process running on top of the hypervisor
 could make a ucall.

RP


^ permalink raw reply

* [PATCH 3/7] ftrace: Expose __ftrace_replace_code()
From: Naveen N. Rao @ 2019-06-18 14:47 UTC (permalink / raw)
  To: Michael Ellerman, Steven Rostedt, Masami Hiramatsu, Ingo Molnar,
	Nicholas Piggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1560868106.git.naveen.n.rao@linux.vnet.ibm.com>

While over-riding ftrace_replace_code(), we still want to reuse the
existing __ftrace_replace_code() function. Rename the function and
make it available for other kernel code.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 include/linux/ftrace.h | 1 +
 kernel/trace/ftrace.c  | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e97789c95c4e..fa653a561da5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -456,6 +456,7 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
 /* defined in arch */
 extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_dyn_arch_init(void);
+extern int ftrace_replace_code_rec(struct dyn_ftrace *rec, int enable);
 extern void ftrace_replace_code(int enable);
 extern int ftrace_update_ftrace_func(ftrace_func_t func);
 extern void ftrace_caller(void);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5710a6b3edc1..21d8e201ee80 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2351,8 +2351,8 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
 		return (unsigned long)FTRACE_ADDR;
 }
 
-static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+int
+ftrace_replace_code_rec(struct dyn_ftrace *rec, int enable)
 {
 	unsigned long ftrace_old_addr;
 	unsigned long ftrace_addr;
@@ -2403,7 +2403,7 @@ void __weak ftrace_replace_code(int mod_flags)
 		if (rec->flags & FTRACE_FL_DISABLED)
 			continue;
 
-		failed = __ftrace_replace_code(rec, enable);
+		failed = ftrace_replace_code_rec(rec, enable);
 		if (failed) {
 			ftrace_bug(failed, rec);
 			/* Stop processing */
@@ -5827,7 +5827,7 @@ void ftrace_module_enable(struct module *mod)
 		rec->flags = cnt;
 
 		if (ftrace_start_up && cnt) {
-			int failed = __ftrace_replace_code(rec, 1);
+			int failed = ftrace_replace_code_rec(rec, 1);
 			if (failed) {
 				ftrace_bug(failed, rec);
 				goto out_loop;
-- 
2.22.0


^ permalink raw reply related

* [PATCH 5/7] powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel
From: Naveen N. Rao @ 2019-06-18 14:47 UTC (permalink / raw)
  To: Michael Ellerman, Steven Rostedt, Masami Hiramatsu, Ingo Molnar,
	Nicholas Piggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1560868106.git.naveen.n.rao@linux.vnet.ibm.com>

Now that we are patching the preceding 'mflr r0' instruction with
-mprofile-kernel, we need to update ftrace_location[_range]() to
recognise that as being part of ftrace. To do this, we make a small
change to ftrace_location_range() and convert ftrace_cmp_recs() into a
weak function. We implement a custom version of ftrace_cmp_recs() which
looks at the instruction preceding the branch to _mcount() and marks
that instruction as belonging to ftrace if it is a 'nop' or 'mflr r0'.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/trace/ftrace.c | 31 ++++++++++++++++++++++++++++++
 include/linux/ftrace.h             |  1 +
 kernel/trace/ftrace.c              |  4 ++--
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 5e2b29808af1..b84046e43207 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -951,6 +951,37 @@ void arch_ftrace_update_code(int command)
 }
 
 #ifdef CONFIG_MPROFILE_KERNEL
+/*
+ * We need to check if the previous instruction is a 'nop' or 'mflr r0'.
+ * If so, we will patch those subsequently and that instruction must be
+ * considered as part of ftrace.
+ */
+int ftrace_cmp_recs(const void *a, const void *b)
+{
+	const struct dyn_ftrace *key = a;
+	const struct dyn_ftrace *rec = b;
+	unsigned int op;
+
+	if (key->flags < rec->ip - MCOUNT_INSN_SIZE)
+		return -1;
+	if (key->ip >= rec->ip + MCOUNT_INSN_SIZE)
+		return 1;
+
+	if (key->flags > rec->ip)
+		return 0;
+
+	/* check the previous instruction */
+	if (probe_kernel_read(&op, (void *)rec->ip - MCOUNT_INSN_SIZE,
+				sizeof(op)))
+		/* assume we own it */
+		return 0;
+
+	if (op != PPC_INST_NOP && op != PPC_INST_MFLR)
+		return -1;
+
+	return 0;
+}
+
 /* Returns 1 if we patched in the mflr */
 static int __ftrace_make_call_prep(struct dyn_ftrace *rec)
 {
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fa653a561da5..9941987bf510 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -435,6 +435,7 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
 int ftrace_update_record(struct dyn_ftrace *rec, int enable);
 int ftrace_test_record(struct dyn_ftrace *rec, int enable);
 void ftrace_run_stop_machine(int command);
+int ftrace_cmp_recs(const void *a, const void *b);
 unsigned long ftrace_location(unsigned long ip);
 unsigned long ftrace_location_range(unsigned long start, unsigned long end);
 unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 21d8e201ee80..b5c61db0b452 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1517,7 +1517,7 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 	}
 
 
-static int ftrace_cmp_recs(const void *a, const void *b)
+int __weak ftrace_cmp_recs(const void *a, const void *b)
 {
 	const struct dyn_ftrace *key = a;
 	const struct dyn_ftrace *rec = b;
@@ -1551,7 +1551,7 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end)
 	key.flags = end;	/* overload flags, as it is unsigned long */
 
 	for (pg = ftrace_pages_start; pg; pg = pg->next) {
-		if (end < pg->records[0].ip ||
+		if (end <= pg->records[0].ip ||
 		    start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
 			continue;
 		rec = bsearch(&key, pg->records, pg->index,
-- 
2.22.0


^ permalink raw reply related

* [PATCH 6/7] kprobes/ftrace: Use ftrace_location() when [dis]arming probes
From: Naveen N. Rao @ 2019-06-18 14:47 UTC (permalink / raw)
  To: Michael Ellerman, Steven Rostedt, Masami Hiramatsu, Ingo Molnar,
	Nicholas Piggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1560868106.git.naveen.n.rao@linux.vnet.ibm.com>

Ftrace location could include more than a single instruction in case of
some architectures (powerpc64, for now). In this case, kprobe is
permitted on any of those instructions, and uses ftrace infrastructure
for functioning.

However, [dis]arm_kprobe_ftrace() uses the kprobe address when setting
up ftrace filter IP. This won't work if the address points to any
instruction apart from the one that has a branch to _mcount(). To
resolve this, have [dis]arm_kprobe_ftrace() use ftrace_function() to
identify the filter IP.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 kernel/kprobes.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 445337c107e0..282ee704e2d8 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -978,10 +978,10 @@ static int prepare_kprobe(struct kprobe *p)
 /* Caller must lock kprobe_mutex */
 static int arm_kprobe_ftrace(struct kprobe *p)
 {
+	unsigned long ftrace_ip = ftrace_location((unsigned long)p->addr);
 	int ret = 0;
 
-	ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
-				   (unsigned long)p->addr, 0, 0);
+	ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, ftrace_ip, 0, 0);
 	if (ret) {
 		pr_debug("Failed to arm kprobe-ftrace at %pS (%d)\n",
 			 p->addr, ret);
@@ -1005,13 +1005,14 @@ static int arm_kprobe_ftrace(struct kprobe *p)
 	 * non-empty filter_hash for IPMODIFY ops, we're safe from an accidental
 	 * empty filter_hash which would undesirably trace all functions.
 	 */
-	ftrace_set_filter_ip(&kprobe_ftrace_ops, (unsigned long)p->addr, 1, 0);
+	ftrace_set_filter_ip(&kprobe_ftrace_ops, ftrace_ip, 1, 0);
 	return ret;
 }
 
 /* Caller must lock kprobe_mutex */
 static int disarm_kprobe_ftrace(struct kprobe *p)
 {
+	unsigned long ftrace_ip = ftrace_location((unsigned long)p->addr);
 	int ret = 0;
 
 	if (kprobe_ftrace_enabled == 1) {
@@ -1022,8 +1023,7 @@ static int disarm_kprobe_ftrace(struct kprobe *p)
 
 	kprobe_ftrace_enabled--;
 
-	ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
-			   (unsigned long)p->addr, 1, 0);
+	ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, ftrace_ip, 1, 0);
 	WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (%d)\n",
 		  p->addr, ret);
 	return ret;
-- 
2.22.0


^ permalink raw reply related

* [PATCH 7/7] powerpc/kprobes: Allow probing on any ftrace address
From: Naveen N. Rao @ 2019-06-18 14:47 UTC (permalink / raw)
  To: Michael Ellerman, Steven Rostedt, Masami Hiramatsu, Ingo Molnar,
	Nicholas Piggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1560868106.git.naveen.n.rao@linux.vnet.ibm.com>

With KPROBES_ON_FTRACE, kprobe is allowed to be inserted on instructions
that branch to _mcount (referred to as ftrace location). With
-mprofile-kernel, we now include the preceding 'mflr r0' as being part
of the ftrace location.

However, by default, probing on an instruction that is not actually the
branch to _mcount() is prohibited, as that is considered to not be at an
instruction boundary. This is not the case on powerpc, so allow the same
by overriding arch_check_ftrace_location()

In addition, we update kprobe_ftrace_handler() to detect this scenarios
and to pass the proper nip to the pre and post probe handlers.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/kprobes-ftrace.c | 30 ++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 972cb28174b2..6a0bd3c16cb6 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -12,14 +12,34 @@
 #include <linux/preempt.h>
 #include <linux/ftrace.h>
 
+/*
+ * With -mprofile-kernel, we patch two instructions -- the branch to _mcount
+ * as well as the preceding 'mflr r0'. Both these instructions are claimed
+ * by ftrace and we should allow probing on either instruction.
+ */
+int arch_check_ftrace_location(struct kprobe *p)
+{
+	if (ftrace_location((unsigned long)p->addr))
+		p->flags |= KPROBE_FLAG_FTRACE;
+	return 0;
+}
+
 /* Ftrace callback handler for kprobes */
 void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
 			   struct ftrace_ops *ops, struct pt_regs *regs)
 {
 	struct kprobe *p;
+	int mflr_kprobe = 0;
 	struct kprobe_ctlblk *kcb;
 
 	p = get_kprobe((kprobe_opcode_t *)nip);
+	if (unlikely(!p)) {
+		p = get_kprobe((kprobe_opcode_t *)(nip - MCOUNT_INSN_SIZE));
+		if (!p)
+			return;
+		mflr_kprobe = 1;
+	}
+
 	if (unlikely(!p) || kprobe_disabled(p))
 		return;
 
@@ -33,6 +53,9 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
 		 */
 		regs->nip -= MCOUNT_INSN_SIZE;
 
+		if (mflr_kprobe)
+			regs->nip -= MCOUNT_INSN_SIZE;
+
 		__this_cpu_write(current_kprobe, p);
 		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
 		if (!p->pre_handler || !p->pre_handler(p, regs)) {
@@ -45,6 +68,8 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
 				kcb->kprobe_status = KPROBE_HIT_SSDONE;
 				p->post_handler(p, regs, 0);
 			}
+			if (mflr_kprobe)
+				regs->nip += MCOUNT_INSN_SIZE;
 		}
 		/*
 		 * If pre_handler returns !0, it changes regs->nip. We have to
@@ -57,6 +82,11 @@ NOKPROBE_SYMBOL(kprobe_ftrace_handler);
 
 int arch_prepare_kprobe_ftrace(struct kprobe *p)
 {
+	if ((unsigned long)p->addr & 0x03) {
+		printk("Attempt to register kprobe at an unaligned address\n");
+		return -EILSEQ;
+	}
+
 	p->ainsn.insn = NULL;
 	p->ainsn.boostable = -1;
 	return 0;
-- 
2.22.0


^ permalink raw reply related

* [PATCH 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel
From: Naveen N. Rao @ 2019-06-18 14:47 UTC (permalink / raw)
  To: Michael Ellerman, Steven Rostedt, Masami Hiramatsu, Ingo Molnar,
	Nicholas Piggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1560868106.git.naveen.n.rao@linux.vnet.ibm.com>

With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
enable function tracing and profiling. So far, with dynamic ftrace, we
used to only patch out the branch to _mcount(). However, mflr is
executed by the branch unit that can only execute one per cycle on
POWER9 and shared with branches, so it would be nice to avoid it where
possible.

We cannot simply nop out the mflr either. When enabling function
tracing, there can be a race if tracing is enabled when some thread was
interrupted after executing a nop'ed out mflr. In this case, the thread
would execute the now-patched-in branch to _mcount() without having
executed the preceding mflr.

To solve this, we now enable function tracing in 2 steps: patch in the
mflr instruction, use synchronize_rcu_tasks() to ensure all existing
threads make progress, and then patch in the branch to _mcount(). We
override ftrace_replace_code() with a powerpc64 variant for this
purpose.

Suggested-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/trace/ftrace.c | 241 ++++++++++++++++++++++++++---
 1 file changed, 219 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 517662a56bdc..5e2b29808af1 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -125,7 +125,7 @@ __ftrace_make_nop(struct module *mod,
 {
 	unsigned long entry, ptr, tramp;
 	unsigned long ip = rec->ip;
-	unsigned int op, pop;
+	unsigned int op;
 
 	/* read where this goes */
 	if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
@@ -160,8 +160,6 @@ __ftrace_make_nop(struct module *mod,
 
 #ifdef CONFIG_MPROFILE_KERNEL
 	/* When using -mkernel_profile there is no load to jump over */
-	pop = PPC_INST_NOP;
-
 	if (probe_kernel_read(&op, (void *)(ip - 4), 4)) {
 		pr_err("Fetching instruction at %lx failed.\n", ip - 4);
 		return -EFAULT;
@@ -169,26 +167,23 @@ __ftrace_make_nop(struct module *mod,
 
 	/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
 	if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) {
-		pr_err("Unexpected instruction %08x around bl _mcount\n", op);
+		pr_err("Unexpected instruction %08x before bl _mcount\n", op);
 		return -EINVAL;
 	}
-#else
-	/*
-	 * Our original call site looks like:
-	 *
-	 * bl <tramp>
-	 * ld r2,XX(r1)
-	 *
-	 * Milton Miller pointed out that we can not simply nop the branch.
-	 * If a task was preempted when calling a trace function, the nops
-	 * will remove the way to restore the TOC in r2 and the r2 TOC will
-	 * get corrupted.
-	 *
-	 * Use a b +8 to jump over the load.
-	 */
 
-	pop = PPC_INST_BRANCH | 8;	/* b +8 */
+	/* We should patch out the bl to _mcount first */
+	if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
+		pr_err("Patching NOP failed.\n");
+		return -EPERM;
+	}
 
+	/* then, nop out the preceding 'mflr r0' as an optimization */
+	if (op == PPC_INST_MFLR &&
+		patch_instruction((unsigned int *)(ip - 4), PPC_INST_NOP)) {
+		pr_err("Patching NOP failed.\n");
+		return -EPERM;
+	}
+#else
 	/*
 	 * Check what is in the next instruction. We can see ld r2,40(r1), but
 	 * on first pass after boot we will see mflr r0.
@@ -202,12 +197,25 @@ __ftrace_make_nop(struct module *mod,
 		pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op);
 		return -EINVAL;
 	}
-#endif /* CONFIG_MPROFILE_KERNEL */
 
-	if (patch_instruction((unsigned int *)ip, pop)) {
+	/*
+	 * Our original call site looks like:
+	 *
+	 * bl <tramp>
+	 * ld r2,XX(r1)
+	 *
+	 * Milton Miller pointed out that we can not simply nop the branch.
+	 * If a task was preempted when calling a trace function, the nops
+	 * will remove the way to restore the TOC in r2 and the r2 TOC will
+	 * get corrupted.
+	 *
+	 * Use a b +8 to jump over the load.
+	 */
+	if (patch_instruction((unsigned int *)ip, PPC_INST_BRANCH | 8)) {
 		pr_err("Patching NOP failed.\n");
 		return -EPERM;
 	}
+#endif /* CONFIG_MPROFILE_KERNEL */
 
 	return 0;
 }
@@ -421,6 +429,26 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
 		return -EPERM;
 	}
 
+#ifdef CONFIG_MPROFILE_KERNEL
+	/* Nop out the preceding 'mflr r0' as an optimization */
+	if (probe_kernel_read(&op, (void *)(ip - 4), 4)) {
+		pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+		return -EFAULT;
+	}
+
+	/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
+	if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) {
+		pr_err("Unexpected instruction %08x before bl _mcount\n", op);
+		return -EINVAL;
+	}
+
+	if (op == PPC_INST_MFLR &&
+		patch_instruction((unsigned int *)(ip - 4), PPC_INST_NOP)) {
+		pr_err("Patching NOP failed.\n");
+		return -EPERM;
+	}
+#endif
+
 	return 0;
 }
 
@@ -429,6 +457,7 @@ int ftrace_make_nop(struct module *mod,
 {
 	unsigned long ip = rec->ip;
 	unsigned int old, new;
+	int rc;
 
 	/*
 	 * If the calling address is more that 24 bits away,
@@ -439,7 +468,34 @@ int ftrace_make_nop(struct module *mod,
 		/* within range */
 		old = ftrace_call_replace(ip, addr, 1);
 		new = PPC_INST_NOP;
-		return ftrace_modify_code(ip, old, new);
+		rc = ftrace_modify_code(ip, old, new);
+#ifdef CONFIG_MPROFILE_KERNEL
+		if (rc)
+			return rc;
+
+		/*
+		 * For -mprofile-kernel, we patch out the preceding 'mflr r0'
+		 * instruction, as an optimization. It is important to nop out
+		 * the branch to _mcount() first, as a lone 'mflr r0' is
+		 * harmless.
+		 */
+		if (probe_kernel_read(&old, (void *)(ip - 4), 4)) {
+			pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+			return -EFAULT;
+		}
+
+		/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
+		if (old != PPC_INST_MFLR && old != PPC_INST_STD_LR) {
+			pr_err("Unexpected instruction %08x before bl _mcount\n",
+					old);
+			return -EINVAL;
+		}
+
+		if (old == PPC_INST_MFLR)
+			rc = patch_instruction((unsigned int *)(ip - 4),
+					PPC_INST_NOP);
+#endif
+		return rc;
 	} else if (core_kernel_text(ip))
 		return __ftrace_make_nop_kernel(rec, addr);
 
@@ -567,6 +623,37 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		return -EINVAL;
 	}
 
+#ifdef CONFIG_MPROFILE_KERNEL
+	/*
+	 * We could end up here without having called __ftrace_make_call_prep()
+	 * if function tracing is enabled before a module is loaded.
+	 *
+	 * ftrace_module_enable() --> ftrace_replace_code_rec() -->
+	 *	ftrace_make_call() --> __ftrace_make_call()
+	 *
+	 * In this scenario, the previous instruction will be a NOP. It is
+	 * safe to patch it with a 'mflr r0' since we know for a fact that
+	 * this code is not yet being run.
+	 */
+	ip -= MCOUNT_INSN_SIZE;
+
+	/* read where this goes */
+	if (probe_kernel_read(op, ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/*
+	 * nothing to do if this is using the older -mprofile-kernel
+	 * instruction sequence
+	 */
+	if (op[0] != PPC_INST_NOP)
+		return 0;
+
+	if (patch_instruction((unsigned int *)ip, PPC_INST_MFLR)) {
+		pr_err("Patching MFLR failed.\n");
+		return -EPERM;
+	}
+#endif
+
 	return 0;
 }
 
@@ -863,6 +950,116 @@ void arch_ftrace_update_code(int command)
 	ftrace_modify_all_code(command);
 }
 
+#ifdef CONFIG_MPROFILE_KERNEL
+/* Returns 1 if we patched in the mflr */
+static int __ftrace_make_call_prep(struct dyn_ftrace *rec)
+{
+	void *ip = (void *)rec->ip - MCOUNT_INSN_SIZE;
+	unsigned int op[2];
+
+	/* read where this goes */
+	if (probe_kernel_read(op, ip, sizeof(op)))
+		return -EFAULT;
+
+	if (op[1] != PPC_INST_NOP) {
+		pr_err("Unexpected call sequence at %p: %x %x\n",
+							ip, op[0], op[1]);
+		return -EINVAL;
+	}
+
+	/*
+	 * nothing to do if this is using the older -mprofile-kernel
+	 * instruction sequence
+	 */
+	if (op[0] != PPC_INST_NOP)
+		return 0;
+
+	if (patch_instruction((unsigned int *)ip, PPC_INST_MFLR)) {
+		pr_err("Patching MFLR failed.\n");
+		return -EPERM;
+	}
+
+	return 1;
+}
+
+/*
+ * When enabling function tracing for -mprofile-kernel that uses a
+ * 2-instruction sequence of 'mflr r0, bl _mcount()', we use a 2 step process:
+ * 1. Patch in the 'mflr r0' instruction
+ * 1a. synchronize_rcu_tasks() to ensure that any threads that had executed
+ *     the earlier nop there make progress (and hence do not branch into
+ *     ftrace without executing the preceding mflr)
+ * 2. Patch in the branch to ftrace
+ */
+void ftrace_replace_code(int mod_flags)
+{
+	int enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
+	int schedulable = mod_flags & FTRACE_MODIFY_MAY_SLEEP_FL;
+	int ret, failed, make_call = 0;
+	struct ftrace_rec_iter *iter;
+	struct dyn_ftrace *rec;
+
+	if (unlikely(!ftrace_enabled))
+		return;
+
+	for_ftrace_rec_iter(iter) {
+		rec = ftrace_rec_iter_record(iter);
+
+		if (rec->flags & FTRACE_FL_DISABLED)
+			continue;
+
+		failed = 0;
+		ret = ftrace_test_record(rec, enable);
+		if (ret == FTRACE_UPDATE_MAKE_CALL) {
+			failed = __ftrace_make_call_prep(rec);
+			if (failed < 0) {
+				ftrace_bug(failed, rec);
+				return;
+			} else if (failed == 1)
+				make_call++;
+		}
+
+		if (!failed) {
+			/* We can patch the record directly */
+			failed = ftrace_replace_code_rec(rec, enable);
+			if (failed) {
+				ftrace_bug(failed, rec);
+				return;
+			}
+		}
+
+		if (schedulable)
+			cond_resched();
+	}
+
+	if (!make_call)
+		/* No records needed patching a preceding mflr */
+		return;
+
+	synchronize_rcu_tasks();
+
+	for_ftrace_rec_iter(iter) {
+		rec = ftrace_rec_iter_record(iter);
+
+		if (rec->flags & FTRACE_FL_DISABLED)
+			continue;
+
+		ret = ftrace_test_record(rec, enable);
+		if (ret == FTRACE_UPDATE_MAKE_CALL)
+			failed = ftrace_replace_code_rec(rec, enable);
+
+		if (failed) {
+			ftrace_bug(failed, rec);
+			return;
+		}
+
+		if (schedulable)
+			cond_resched();
+	}
+
+}
+#endif
+
 #ifdef CONFIG_PPC64
 #define PACATOC offsetof(struct paca_struct, kernel_toc)
 
-- 
2.22.0


^ permalink raw reply related

* [PATCH 2/7] x86/ftrace: Fix use of flags in ftrace_replace_code()
From: Naveen N. Rao @ 2019-06-18 14:47 UTC (permalink / raw)
  To: Michael Ellerman, Steven Rostedt, Masami Hiramatsu, Ingo Molnar,
	Nicholas Piggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1560868106.git.naveen.n.rao@linux.vnet.ibm.com>

In commit a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be
schedulable), the generic ftrace_replace_code() function was modified to
accept a flags argument in place of a single 'enable' flag. However, the
x86 version of this function was not updated. Fix the same.

Fixes: a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be schedulable")
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 arch/x86/kernel/ftrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 0927bb158ffc..f34005a17051 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -573,8 +573,9 @@ static void run_sync(void)
 		local_irq_disable();
 }
 
-void ftrace_replace_code(int enable)
+void ftrace_replace_code(int mod_flags)
 {
+	int enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
 	struct ftrace_rec_iter *iter;
 	struct dyn_ftrace *rec;
 	const char *report = "adding breakpoints";
-- 
2.22.0


^ permalink raw reply related

* [PATCH 1/7] ftrace: Expose flags used for ftrace_replace_code()
From: Naveen N. Rao @ 2019-06-18 14:47 UTC (permalink / raw)
  To: Michael Ellerman, Steven Rostedt, Masami Hiramatsu, Ingo Molnar,
	Nicholas Piggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1560868106.git.naveen.n.rao@linux.vnet.ibm.com>

Since ftrace_replace_code() is a __weak function and can be overridden,
we need to expose the flags that can be set. So, move the flags enum to
the header file.

Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 include/linux/ftrace.h | 5 +++++
 kernel/trace/ftrace.c  | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 25e2995d4a4c..e97789c95c4e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -162,6 +162,11 @@ enum {
 	FTRACE_OPS_FL_TRACE_ARRAY		= 1 << 15,
 };
 
+enum {
+	FTRACE_MODIFY_ENABLE_FL		= (1 << 0),
+	FTRACE_MODIFY_MAY_SLEEP_FL	= (1 << 1),
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* The hash used to know what functions callbacks trace */
 struct ftrace_ops_hash {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 38277af44f5c..5710a6b3edc1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -75,11 +75,6 @@
 #define INIT_OPS_HASH(opsname)
 #endif
 
-enum {
-	FTRACE_MODIFY_ENABLE_FL		= (1 << 0),
-	FTRACE_MODIFY_MAY_SLEEP_FL	= (1 << 1),
-};
-
 struct ftrace_ops ftrace_list_end __read_mostly = {
 	.func		= ftrace_stub,
 	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
-- 
2.22.0


^ permalink raw reply related

* [PATCH 0/7] powerpc/ftrace: Patch out -mprofile-kernel instructions
From: Naveen N. Rao @ 2019-06-18 14:46 UTC (permalink / raw)
  To: Michael Ellerman, Steven Rostedt, Masami Hiramatsu, Ingo Molnar,
	Nicholas Piggin
  Cc: linuxppc-dev, linux-kernel

Changes since RFC:
- Patches 1 and 2: No changes
- Patch 3: rename function to ftrace_replace_code_rec() to signify that 
  it acts on a ftrace record.
- Patch 4: numerous small changes, including those suggested by Nick 
  Piggin.
- Patch 4: additionally handle scenario where __ftrace_make_call() can 
  be invoked without having called __ftrace_make_call_prep(), when a 
  kernel module is loaded while function tracing is active.
- Patch 5 to 7: new, to have ftrace claim ownership of two instructions, 
  and to have kprobes work properly with this.


--
On powerpc64, -mprofile-kernel results in two instructions being 
emitted: 'mflr r0' and 'bl _mcount'. So far, we were only nop'ing out 
the branch to _mcount(). This series implements an approach to also nop 
out the preceding mflr.


- Naveen


Naveen N. Rao (7):
  ftrace: Expose flags used for ftrace_replace_code()
  x86/ftrace: Fix use of flags in ftrace_replace_code()
  ftrace: Expose __ftrace_replace_code()
  powerpc/ftrace: Additionally nop out the preceding mflr with
    -mprofile-kernel
  powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel
  kprobes/ftrace: Use ftrace_location() when [dis]arming probes
  powerpc/kprobes: Allow probing on any ftrace address

 arch/powerpc/kernel/kprobes-ftrace.c |  30 +++
 arch/powerpc/kernel/trace/ftrace.c   | 272 ++++++++++++++++++++++++---
 arch/x86/kernel/ftrace.c             |   3 +-
 include/linux/ftrace.h               |   7 +
 kernel/kprobes.c                     |  10 +-
 kernel/trace/ftrace.c                |  17 +-
 6 files changed, 300 insertions(+), 39 deletions(-)

-- 
2.22.0


^ permalink raw reply

* Re: [PATCH 5/5] Powerpc/Watchpoint: Fix length calculation for unaligned target
From: Michael Neuling @ 2019-06-18 13:32 UTC (permalink / raw)
  To: Ravi Bangoria, mpe
  Cc: linux-kernel, npiggin, paulus, naveen.n.rao, linuxppc-dev
In-Reply-To: <20190618042732.5582-6-ravi.bangoria@linux.ibm.com>

On Tue, 2019-06-18 at 09:57 +0530, Ravi Bangoria wrote:
> Watchpoint match range is always doubleword(8 bytes) aligned on
> powerpc. If the given range is crossing doubleword boundary, we
> need to increase the length such that next doubleword also get
> covered. Ex,
> 
>           address   len = 6 bytes
>                 |=========.
>    |------------v--|------v--------|
>    | | | | | | | | | | | | | | | | |
>    |---------------|---------------|
>     <---8 bytes--->
> 
> In such case, current code configures hw as:
>   start_addr = address & ~HW_BREAKPOINT_ALIGN
>   len = 8 bytes
> 
> And thus read/write in last 4 bytes of the given range is ignored.
> Fix this by including next doubleword in the length. Watchpoint
> exception handler already ignores extraneous exceptions, so no
> changes required for that.

Nice catch. Thanks.

I assume this has been broken forever? Should we be CCing stable? If so, it
would be nice to have this self contained (separate from the refactor) so we can
more easily backport it.

Also, can you update 
tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c to catch this issue?

A couple more comments below.

> 
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/hw_breakpoint.h |  7 ++--
>  arch/powerpc/kernel/hw_breakpoint.c      | 44 +++++++++++++-----------
>  arch/powerpc/kernel/process.c            | 34 ++++++++++++++++--
>  3 files changed, 60 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/hw_breakpoint.h
> b/arch/powerpc/include/asm/hw_breakpoint.h
> index 8acbbdd4a2d5..749a357164d5 100644
> --- a/arch/powerpc/include/asm/hw_breakpoint.h
> +++ b/arch/powerpc/include/asm/hw_breakpoint.h
> @@ -34,6 +34,8 @@ struct arch_hw_breakpoint {
>  #define HW_BRK_TYPE_PRIV_ALL	(HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
>  				 HW_BRK_TYPE_HYP)
>  
> +#define HW_BREAKPOINT_ALIGN 0x7
> +
>  #ifdef CONFIG_HAVE_HW_BREAKPOINT
>  #include <linux/kdebug.h>
>  #include <asm/reg.h>
> @@ -45,8 +47,6 @@ struct pmu;
>  struct perf_sample_data;
>  struct task_struct;
>  
> -#define HW_BREAKPOINT_ALIGN 0x7
> -
>  extern int hw_breakpoint_slots(int type);
>  extern int arch_bp_generic_fields(int type, int *gen_bp_type);
>  extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
> @@ -76,7 +76,8 @@ static inline void hw_breakpoint_disable(void)
>  }
>  extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
>  int hw_breakpoint_handler(struct die_args *args);
> -
> +extern u16 hw_breakpoint_get_final_len(struct arch_hw_breakpoint *brk,
> +		unsigned long *start_addr, unsigned long *end_addr);
>  extern int set_dawr(struct arch_hw_breakpoint *brk);
>  extern bool dawr_force_enable;
>  static inline bool dawr_enabled(void)
> diff --git a/arch/powerpc/kernel/hw_breakpoint.c
> b/arch/powerpc/kernel/hw_breakpoint.c
> index 36bcf705df65..c122fd55aa44 100644
> --- a/arch/powerpc/kernel/hw_breakpoint.c
> +++ b/arch/powerpc/kernel/hw_breakpoint.c
> @@ -126,6 +126,28 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
>  	return 0;
>  }
>  
> +/* Maximum len for DABR is 8 bytes and DAWR is 512 bytes */
> +static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
> +{
> +	u16 length_max = 8;
> +	u16 final_len;
> +	unsigned long start_addr, end_addr;
> +
> +	final_len = hw_breakpoint_get_final_len(hw, &start_addr, &end_addr);
> +
> +	if (dawr_enabled()) {
> +		length_max = 512;
> +		/* DAWR region can't cross 512 bytes boundary */
> +		if ((start_addr >> 9) != (end_addr >> 9))
> +			return -EINVAL;
> +	}
> +
> +	if (final_len > length_max)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
>  /*
>   * Validate the arch-specific HW Breakpoint register settings
>   */
> @@ -133,12 +155,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
>  			     const struct perf_event_attr *attr,
>  			     struct arch_hw_breakpoint *hw)
>  {
> -	int length_max;
> -
>  	if (!ppc_breakpoint_available())
>  		return -ENODEV;
>  
> -	if (!bp)
> +	if (!bp || !attr->bp_len)
>  		return -EINVAL;
>  
>  	hw->type = HW_BRK_TYPE_TRANSLATE;
> @@ -160,23 +180,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
>  	hw->address = attr->bp_addr;
>  	hw->len = attr->bp_len;
>  
> -	length_max = 8; /* DABR */
> -	if (dawr_enabled()) {
> -		length_max = 512 ; /* 64 doublewords */
> -		/* DAWR region can't cross 512 bytes boundary */
> -		if ((hw->address >> 9) != ((hw->address + hw->len - 1) >> 9))
> -			return -EINVAL;
> -	}
> -
> -	/*
> -	 * Since breakpoint length can be a maximum of length_max and
> -	 * breakpoint addresses are aligned to nearest double-word
> -	 * HW_BREAKPOINT_ALIGN by rounding off to the lower address,
> -	 * the 'symbolsize' should satisfy the check below.
> -	 */
> -	if (hw->len > (length_max - (hw->address & HW_BREAKPOINT_ALIGN)))
> -		return -EINVAL;
> -	return 0;
> +	return hw_breakpoint_validate_len(hw);
>  }
>  
>  /*
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 265fac9fb3a4..159aaa70de46 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -802,9 +802,39 @@ static int disable_dawr(void)
>  	return 0;
>  }
>  
> +/*
> + * Watchpoint match range is always doubleword(8 bytes) aligned on
> + * powerpc. If the given range is crossing doubleword boundary, we
> + * need to increase the length such that next doubleword also get
> + * covered. Ex,
> + *
> + *          address   len = 6 bytes
> + *                |=========.
> + *   |------------v--|------v--------|
> + *   | | | | | | | | | | | | | | | | |
> + *   |---------------|---------------|
> + *    <---8 bytes--->
> + *
> + * In this case, we should configure hw as:
> + *   start_addr = address & ~HW_BREAKPOINT_ALIGN
> + *   len = 16 bytes
> + *
> + * @start_addr and @end_addr are inclusive.
> + */
> +u16 hw_breakpoint_get_final_len(struct arch_hw_breakpoint *brk,
> +				unsigned long *start_addr,
> +				unsigned long *end_addr)

I don't really like this.  "final" is not a good name. Something like hardware
would be better.

Also, can you put the start_addr and end addr in the arch_hw_breakpoint rather
than doing what you have above.  Call them hw_start_addr, hw_end_addr.

We could even set these two new addresses where we set the set of
arch_hw_breakpoint rather than having this late call.

> +{
> +	*start_addr = brk->address & ~HW_BREAKPOINT_ALIGN;
> +	*end_addr = (brk->address + brk->len - 1) | HW_BREAKPOINT_ALIGN;
> +	return *end_addr - *start_addr + 1;
> +}
> +
>  int set_dawr(struct arch_hw_breakpoint *brk)
>  {
>  	unsigned long dawr, dawrx, mrd;
> +	unsigned long start_addr, end_addr;
> +	u16 final_len;
>  
>  	if (brk->type == HW_BRK_TYPE_DISABLE)
>  		return disable_dawr();
> @@ -815,8 +845,8 @@ int set_dawr(struct arch_hw_breakpoint *brk)
>  	dawrx |= ((brk->type & HW_BRK_TYPE_TRANSLATE) >> 2) << (63 - 59);
>  	dawrx |= (brk->type & HW_BRK_TYPE_PRIV_ALL) >> 3;
>  
> -	/* brk->len is in bytes. */
> -	mrd = ((brk->len + 7) >> 3) - 1;
> +	final_len = hw_breakpoint_get_final_len(brk, &start_addr, &end_addr);

Again, hardware length, or something other than "final"

> +	mrd = ((final_len + 7) >> 3) - 1;
>  	dawrx |= (mrd & 0x3f) << (63 - 53);
>  
>  	if (ppc_md.set_dawr)


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox