LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v13 03/12] swiotlb: Set dev->dma_io_tlb_mem to the swiotlb pool used
From: Stefano Stabellini @ 2021-06-17 23:30 UTC (permalink / raw)
  To: Claire Chang
  Cc: heikki.krogerus, thomas.hellstrom, peterz, joonas.lahtinen,
	dri-devel, chris, grant.likely, paulus, Frank Rowand, mingo,
	Marek Szyprowski, sstabellini, Saravana Kannan, Joerg Roedel,
	Rafael J . Wysocki, Christoph Hellwig, Bartosz Golaszewski,
	bskeggs, linux-pci, xen-devel, Thierry Reding, intel-gfx,
	matthew.auld, linux-devicetree, jxgao, daniel, Will Deacon,
	Konrad Rzeszutek Wilk, maarten.lankhorst, airlied, Dan Williams,
	linuxppc-dev, jani.nikula, Rob Herring, rodrigo.vivi, bhelgaas,
	boris.ostrovsky, Andy Shevchenko, jgross, Nicolas Boichat,
	Greg KH, Randy Dunlap, lkml, tfiga, list@263.net:IOMMU DRIVERS,
	Jim Quinlan, xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <20210617062635.1660944-4-tientzu@chromium.org>

On Thu, 17 Jun 2021, Claire Chang wrote:
> Always have the pointer to the swiotlb pool used in struct device. This
> could help simplify the code for other pools.
> 
> Signed-off-by: Claire Chang <tientzu@chromium.org>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Tested-by: Stefano Stabellini <sstabellini@kernel.org>
> Tested-by: Will Deacon <will@kernel.org>

Acked-by: Stefano Stabellini <sstabellini@kernel.org>

> ---
>  drivers/base/core.c    | 4 ++++
>  include/linux/device.h | 4 ++++
>  kernel/dma/swiotlb.c   | 8 ++++----
>  3 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index f29839382f81..cb3123e3954d 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -27,6 +27,7 @@
>  #include <linux/netdevice.h>
>  #include <linux/sched/signal.h>
>  #include <linux/sched/mm.h>
> +#include <linux/swiotlb.h>
>  #include <linux/sysfs.h>
>  #include <linux/dma-map-ops.h> /* for dma_default_coherent */
>  
> @@ -2736,6 +2737,9 @@ void device_initialize(struct device *dev)
>      defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
>  	dev->dma_coherent = dma_default_coherent;
>  #endif
> +#ifdef CONFIG_SWIOTLB
> +	dev->dma_io_tlb_mem = io_tlb_default_mem;
> +#endif
>  }
>  EXPORT_SYMBOL_GPL(device_initialize);
>  
> diff --git a/include/linux/device.h b/include/linux/device.h
> index ba660731bd25..240d652a0696 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -416,6 +416,7 @@ struct dev_links_info {
>   * @dma_pools:	Dma pools (if dma'ble device).
>   * @dma_mem:	Internal for coherent mem override.
>   * @cma_area:	Contiguous memory area for dma allocations
> + * @dma_io_tlb_mem: Pointer to the swiotlb pool used.  Not for driver use.
>   * @archdata:	For arch-specific additions.
>   * @of_node:	Associated device tree node.
>   * @fwnode:	Associated device node supplied by platform firmware.
> @@ -518,6 +519,9 @@ struct device {
>  #ifdef CONFIG_DMA_CMA
>  	struct cma *cma_area;		/* contiguous memory area for dma
>  					   allocations */
> +#endif
> +#ifdef CONFIG_SWIOTLB
> +	struct io_tlb_mem *dma_io_tlb_mem;
>  #endif
>  	/* arch specific additions */
>  	struct dev_archdata	archdata;
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 2dba659a1e73..de79e9437030 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -340,7 +340,7 @@ void __init swiotlb_exit(void)
>  static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size,
>  			   enum dma_data_direction dir)
>  {
> -	struct io_tlb_mem *mem = io_tlb_default_mem;
> +	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
>  	int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
>  	unsigned int offset = (tlb_addr - mem->start) & (IO_TLB_SIZE - 1);
>  	phys_addr_t orig_addr = mem->slots[index].orig_addr;
> @@ -431,7 +431,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
>  static int find_slots(struct device *dev, phys_addr_t orig_addr,
>  		size_t alloc_size)
>  {
> -	struct io_tlb_mem *mem = io_tlb_default_mem;
> +	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
>  	unsigned long boundary_mask = dma_get_seg_boundary(dev);
>  	dma_addr_t tbl_dma_addr =
>  		phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
> @@ -508,7 +508,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
>  		size_t mapping_size, size_t alloc_size,
>  		enum dma_data_direction dir, unsigned long attrs)
>  {
> -	struct io_tlb_mem *mem = io_tlb_default_mem;
> +	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
>  	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
>  	unsigned int i;
>  	int index;
> @@ -559,7 +559,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
>  			      size_t mapping_size, enum dma_data_direction dir,
>  			      unsigned long attrs)
>  {
> -	struct io_tlb_mem *mem = io_tlb_default_mem;
> +	struct io_tlb_mem *mem = hwdev->dma_io_tlb_mem;
>  	unsigned long flags;
>  	unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
>  	int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
> -- 
> 2.32.0.288.g62a8d224e6-goog
> 

^ permalink raw reply

* Re: [PATCH v13 04/12] swiotlb: Update is_swiotlb_buffer to add a struct device argument
From: Stefano Stabellini @ 2021-06-17 23:30 UTC (permalink / raw)
  To: Claire Chang
  Cc: heikki.krogerus, thomas.hellstrom, peterz, joonas.lahtinen,
	dri-devel, chris, grant.likely, paulus, Frank Rowand, mingo,
	Marek Szyprowski, sstabellini, Saravana Kannan, Joerg Roedel,
	Rafael J . Wysocki, Christoph Hellwig, Bartosz Golaszewski,
	bskeggs, linux-pci, xen-devel, Thierry Reding, intel-gfx,
	matthew.auld, linux-devicetree, jxgao, daniel, Will Deacon,
	Konrad Rzeszutek Wilk, maarten.lankhorst, airlied, Dan Williams,
	linuxppc-dev, jani.nikula, Rob Herring, rodrigo.vivi, bhelgaas,
	boris.ostrovsky, Andy Shevchenko, jgross, Nicolas Boichat,
	Greg KH, Randy Dunlap, lkml, tfiga, list@263.net:IOMMU DRIVERS,
	Jim Quinlan, xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <20210617062635.1660944-5-tientzu@chromium.org>

On Thu, 17 Jun 2021, Claire Chang wrote:
> Update is_swiotlb_buffer to add a struct device argument. This will be
> useful later to allow for different pools.
> 
> Signed-off-by: Claire Chang <tientzu@chromium.org>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Tested-by: Stefano Stabellini <sstabellini@kernel.org>
> Tested-by: Will Deacon <will@kernel.org>

Acked-by: Stefano Stabellini <sstabellini@kernel.org>


> ---
>  drivers/iommu/dma-iommu.c | 12 ++++++------
>  drivers/xen/swiotlb-xen.c |  2 +-
>  include/linux/swiotlb.h   |  7 ++++---
>  kernel/dma/direct.c       |  6 +++---
>  kernel/dma/direct.h       |  6 +++---
>  5 files changed, 17 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 3087d9fa6065..10997ef541f8 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -507,7 +507,7 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
>  
>  	__iommu_dma_unmap(dev, dma_addr, size);
>  
> -	if (unlikely(is_swiotlb_buffer(phys)))
> +	if (unlikely(is_swiotlb_buffer(dev, phys)))
>  		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
>  }
>  
> @@ -578,7 +578,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>  	}
>  
>  	iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
> -	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
> +	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
>  		swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
>  	return iova;
>  }
> @@ -749,7 +749,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
>  	if (!dev_is_dma_coherent(dev))
>  		arch_sync_dma_for_cpu(phys, size, dir);
>  
> -	if (is_swiotlb_buffer(phys))
> +	if (is_swiotlb_buffer(dev, phys))
>  		swiotlb_sync_single_for_cpu(dev, phys, size, dir);
>  }
>  
> @@ -762,7 +762,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
>  		return;
>  
>  	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
> -	if (is_swiotlb_buffer(phys))
> +	if (is_swiotlb_buffer(dev, phys))
>  		swiotlb_sync_single_for_device(dev, phys, size, dir);
>  
>  	if (!dev_is_dma_coherent(dev))
> @@ -783,7 +783,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
>  		if (!dev_is_dma_coherent(dev))
>  			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
>  
> -		if (is_swiotlb_buffer(sg_phys(sg)))
> +		if (is_swiotlb_buffer(dev, sg_phys(sg)))
>  			swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
>  						    sg->length, dir);
>  	}
> @@ -800,7 +800,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
>  		return;
>  
>  	for_each_sg(sgl, sg, nelems, i) {
> -		if (is_swiotlb_buffer(sg_phys(sg)))
> +		if (is_swiotlb_buffer(dev, sg_phys(sg)))
>  			swiotlb_sync_single_for_device(dev, sg_phys(sg),
>  						       sg->length, dir);
>  
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index 4c89afc0df62..0c6ed09f8513 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -100,7 +100,7 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
>  	 * in our domain. Therefore _only_ check address within our domain.
>  	 */
>  	if (pfn_valid(PFN_DOWN(paddr)))
> -		return is_swiotlb_buffer(paddr);
> +		return is_swiotlb_buffer(dev, paddr);
>  	return 0;
>  }
>  
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index 216854a5e513..d1f3d95881cd 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -2,6 +2,7 @@
>  #ifndef __LINUX_SWIOTLB_H
>  #define __LINUX_SWIOTLB_H
>  
> +#include <linux/device.h>
>  #include <linux/dma-direction.h>
>  #include <linux/init.h>
>  #include <linux/types.h>
> @@ -101,9 +102,9 @@ struct io_tlb_mem {
>  };
>  extern struct io_tlb_mem *io_tlb_default_mem;
>  
> -static inline bool is_swiotlb_buffer(phys_addr_t paddr)
> +static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
>  {
> -	struct io_tlb_mem *mem = io_tlb_default_mem;
> +	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
>  
>  	return mem && paddr >= mem->start && paddr < mem->end;
>  }
> @@ -115,7 +116,7 @@ bool is_swiotlb_active(void);
>  void __init swiotlb_adjust_size(unsigned long size);
>  #else
>  #define swiotlb_force SWIOTLB_NO_FORCE
> -static inline bool is_swiotlb_buffer(phys_addr_t paddr)
> +static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
>  {
>  	return false;
>  }
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index f737e3347059..84c9feb5474a 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -343,7 +343,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
>  	for_each_sg(sgl, sg, nents, i) {
>  		phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
>  
> -		if (unlikely(is_swiotlb_buffer(paddr)))
> +		if (unlikely(is_swiotlb_buffer(dev, paddr)))
>  			swiotlb_sync_single_for_device(dev, paddr, sg->length,
>  						       dir);
>  
> @@ -369,7 +369,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
>  		if (!dev_is_dma_coherent(dev))
>  			arch_sync_dma_for_cpu(paddr, sg->length, dir);
>  
> -		if (unlikely(is_swiotlb_buffer(paddr)))
> +		if (unlikely(is_swiotlb_buffer(dev, paddr)))
>  			swiotlb_sync_single_for_cpu(dev, paddr, sg->length,
>  						    dir);
>  
> @@ -504,7 +504,7 @@ size_t dma_direct_max_mapping_size(struct device *dev)
>  bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
>  {
>  	return !dev_is_dma_coherent(dev) ||
> -		is_swiotlb_buffer(dma_to_phys(dev, dma_addr));
> +	       is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr));
>  }
>  
>  /**
> diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
> index 50afc05b6f1d..13e9e7158d94 100644
> --- a/kernel/dma/direct.h
> +++ b/kernel/dma/direct.h
> @@ -56,7 +56,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
>  {
>  	phys_addr_t paddr = dma_to_phys(dev, addr);
>  
> -	if (unlikely(is_swiotlb_buffer(paddr)))
> +	if (unlikely(is_swiotlb_buffer(dev, paddr)))
>  		swiotlb_sync_single_for_device(dev, paddr, size, dir);
>  
>  	if (!dev_is_dma_coherent(dev))
> @@ -73,7 +73,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
>  		arch_sync_dma_for_cpu_all();
>  	}
>  
> -	if (unlikely(is_swiotlb_buffer(paddr)))
> +	if (unlikely(is_swiotlb_buffer(dev, paddr)))
>  		swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
>  
>  	if (dir == DMA_FROM_DEVICE)
> @@ -113,7 +113,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
>  	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
>  		dma_direct_sync_single_for_cpu(dev, addr, size, dir);
>  
> -	if (unlikely(is_swiotlb_buffer(phys)))
> +	if (unlikely(is_swiotlb_buffer(dev, phys)))
>  		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
>  }
>  #endif /* _KERNEL_DMA_DIRECT_H */
> -- 
> 2.32.0.288.g62a8d224e6-goog
> 

^ permalink raw reply

* Re: [PATCH v13 05/12] swiotlb: Update is_swiotlb_active to add a struct device argument
From: Stefano Stabellini @ 2021-06-17 23:30 UTC (permalink / raw)
  To: Claire Chang
  Cc: heikki.krogerus, thomas.hellstrom, peterz, joonas.lahtinen,
	dri-devel, chris, grant.likely, paulus, Frank Rowand, mingo,
	Marek Szyprowski, sstabellini, Saravana Kannan, Joerg Roedel,
	Rafael J . Wysocki, Christoph Hellwig, Bartosz Golaszewski,
	bskeggs, linux-pci, xen-devel, Thierry Reding, intel-gfx,
	matthew.auld, linux-devicetree, jxgao, daniel, Will Deacon,
	Konrad Rzeszutek Wilk, maarten.lankhorst, airlied, Dan Williams,
	linuxppc-dev, jani.nikula, Rob Herring, rodrigo.vivi, bhelgaas,
	boris.ostrovsky, Andy Shevchenko, jgross, Nicolas Boichat,
	Greg KH, Randy Dunlap, lkml, tfiga, list@263.net:IOMMU DRIVERS,
	Jim Quinlan, xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <20210617062635.1660944-6-tientzu@chromium.org>

On Thu, 17 Jun 2021, Claire Chang wrote:
> Update is_swiotlb_active to add a struct device argument. This will be
> useful later to allow for different pools.
> 
> Signed-off-by: Claire Chang <tientzu@chromium.org>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Tested-by: Stefano Stabellini <sstabellini@kernel.org>
> Tested-by: Will Deacon <will@kernel.org>

Acked-by: Stefano Stabellini <sstabellini@kernel.org>


> ---
>  drivers/gpu/drm/i915/gem/i915_gem_internal.c | 2 +-
>  drivers/gpu/drm/nouveau/nouveau_ttm.c        | 2 +-
>  drivers/pci/xen-pcifront.c                   | 2 +-
>  include/linux/swiotlb.h                      | 4 ++--
>  kernel/dma/direct.c                          | 2 +-
>  kernel/dma/swiotlb.c                         | 4 ++--
>  6 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
> index a9d65fc8aa0e..4b7afa0fc85d 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
> @@ -42,7 +42,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
>  
>  	max_order = MAX_ORDER;
>  #ifdef CONFIG_SWIOTLB
> -	if (is_swiotlb_active()) {
> +	if (is_swiotlb_active(obj->base.dev->dev)) {
>  		unsigned int max_segment;
>  
>  		max_segment = swiotlb_max_segment();
> diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
> index 9662522aa066..be15bfd9e0ee 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
> @@ -321,7 +321,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
>  	}
>  
>  #if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86)
> -	need_swiotlb = is_swiotlb_active();
> +	need_swiotlb = is_swiotlb_active(dev->dev);
>  #endif
>  
>  	ret = ttm_bo_device_init(&drm->ttm.bdev, &nouveau_bo_driver,
> diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
> index b7a8f3a1921f..0d56985bfe81 100644
> --- a/drivers/pci/xen-pcifront.c
> +++ b/drivers/pci/xen-pcifront.c
> @@ -693,7 +693,7 @@ static int pcifront_connect_and_init_dma(struct pcifront_device *pdev)
>  
>  	spin_unlock(&pcifront_dev_lock);
>  
> -	if (!err && !is_swiotlb_active()) {
> +	if (!err && !is_swiotlb_active(&pdev->xdev->dev)) {
>  		err = pci_xen_swiotlb_init_late();
>  		if (err)
>  			dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n");
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index d1f3d95881cd..dd1c30a83058 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -112,7 +112,7 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
>  void __init swiotlb_exit(void);
>  unsigned int swiotlb_max_segment(void);
>  size_t swiotlb_max_mapping_size(struct device *dev);
> -bool is_swiotlb_active(void);
> +bool is_swiotlb_active(struct device *dev);
>  void __init swiotlb_adjust_size(unsigned long size);
>  #else
>  #define swiotlb_force SWIOTLB_NO_FORCE
> @@ -132,7 +132,7 @@ static inline size_t swiotlb_max_mapping_size(struct device *dev)
>  	return SIZE_MAX;
>  }
>  
> -static inline bool is_swiotlb_active(void)
> +static inline bool is_swiotlb_active(struct device *dev)
>  {
>  	return false;
>  }
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 84c9feb5474a..7a88c34d0867 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -495,7 +495,7 @@ int dma_direct_supported(struct device *dev, u64 mask)
>  size_t dma_direct_max_mapping_size(struct device *dev)
>  {
>  	/* If SWIOTLB is active, use its maximum mapping size */
> -	if (is_swiotlb_active() &&
> +	if (is_swiotlb_active(dev) &&
>  	    (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE))
>  		return swiotlb_max_mapping_size(dev);
>  	return SIZE_MAX;
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index de79e9437030..409694d7a8ad 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -664,9 +664,9 @@ size_t swiotlb_max_mapping_size(struct device *dev)
>  	return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE;
>  }
>  
> -bool is_swiotlb_active(void)
> +bool is_swiotlb_active(struct device *dev)
>  {
> -	return io_tlb_default_mem != NULL;
> +	return dev->dma_io_tlb_mem != NULL;
>  }
>  EXPORT_SYMBOL_GPL(is_swiotlb_active);
>  
> -- 
> 2.32.0.288.g62a8d224e6-goog
> 

^ permalink raw reply

* Re: [PATCH v13 06/12] swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing
From: Stefano Stabellini @ 2021-06-17 23:31 UTC (permalink / raw)
  To: Claire Chang
  Cc: heikki.krogerus, thomas.hellstrom, peterz, joonas.lahtinen,
	dri-devel, chris, grant.likely, paulus, Frank Rowand, mingo,
	Marek Szyprowski, sstabellini, Saravana Kannan, Joerg Roedel,
	Rafael J . Wysocki, Christoph Hellwig, Bartosz Golaszewski,
	bskeggs, linux-pci, xen-devel, Thierry Reding, intel-gfx,
	matthew.auld, linux-devicetree, jxgao, daniel, Will Deacon,
	Konrad Rzeszutek Wilk, maarten.lankhorst, airlied, Dan Williams,
	linuxppc-dev, jani.nikula, Rob Herring, rodrigo.vivi, bhelgaas,
	boris.ostrovsky, Andy Shevchenko, jgross, Nicolas Boichat,
	Greg KH, Randy Dunlap, lkml, tfiga, list@263.net:IOMMU DRIVERS,
	Jim Quinlan, xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <20210617062635.1660944-7-tientzu@chromium.org>

On Thu, 17 Jun 2021, Claire Chang wrote:
> Propagate the swiotlb_force into io_tlb_default_mem->force_bounce and
> use it to determine whether to bounce the data or not. This will be
> useful later to allow for different pools.
> 
> Signed-off-by: Claire Chang <tientzu@chromium.org>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Tested-by: Stefano Stabellini <sstabellini@kernel.org>
> Tested-by: Will Deacon <will@kernel.org>

Acked-by: Stefano Stabellini <sstabellini@kernel.org>


> ---
>  drivers/xen/swiotlb-xen.c |  2 +-
>  include/linux/swiotlb.h   | 11 +++++++++++
>  kernel/dma/direct.c       |  2 +-
>  kernel/dma/direct.h       |  2 +-
>  kernel/dma/swiotlb.c      |  4 ++++
>  5 files changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index 0c6ed09f8513..4730a146fa35 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -369,7 +369,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
>  	if (dma_capable(dev, dev_addr, size, true) &&
>  	    !range_straddles_page_boundary(phys, size) &&
>  		!xen_arch_need_swiotlb(dev, phys, dev_addr) &&
> -		swiotlb_force != SWIOTLB_FORCE)
> +		!is_swiotlb_force_bounce(dev))
>  		goto done;
>  
>  	/*
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index dd1c30a83058..8d8855c77d9a 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -84,6 +84,7 @@ extern enum swiotlb_force swiotlb_force;
>   *		unmap calls.
>   * @debugfs:	The dentry to debugfs.
>   * @late_alloc:	%true if allocated using the page allocator
> + * @force_bounce: %true if swiotlb bouncing is forced
>   */
>  struct io_tlb_mem {
>  	phys_addr_t start;
> @@ -94,6 +95,7 @@ struct io_tlb_mem {
>  	spinlock_t lock;
>  	struct dentry *debugfs;
>  	bool late_alloc;
> +	bool force_bounce;
>  	struct io_tlb_slot {
>  		phys_addr_t orig_addr;
>  		size_t alloc_size;
> @@ -109,6 +111,11 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
>  	return mem && paddr >= mem->start && paddr < mem->end;
>  }
>  
> +static inline bool is_swiotlb_force_bounce(struct device *dev)
> +{
> +	return dev->dma_io_tlb_mem->force_bounce;
> +}
> +
>  void __init swiotlb_exit(void);
>  unsigned int swiotlb_max_segment(void);
>  size_t swiotlb_max_mapping_size(struct device *dev);
> @@ -120,6 +127,10 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
>  {
>  	return false;
>  }
> +static inline bool is_swiotlb_force_bounce(struct device *dev)
> +{
> +	return false;
> +}
>  static inline void swiotlb_exit(void)
>  {
>  }
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 7a88c34d0867..a92465b4eb12 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -496,7 +496,7 @@ size_t dma_direct_max_mapping_size(struct device *dev)
>  {
>  	/* If SWIOTLB is active, use its maximum mapping size */
>  	if (is_swiotlb_active(dev) &&
> -	    (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE))
> +	    (dma_addressing_limited(dev) || is_swiotlb_force_bounce(dev)))
>  		return swiotlb_max_mapping_size(dev);
>  	return SIZE_MAX;
>  }
> diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
> index 13e9e7158d94..4632b0f4f72e 100644
> --- a/kernel/dma/direct.h
> +++ b/kernel/dma/direct.h
> @@ -87,7 +87,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
>  	phys_addr_t phys = page_to_phys(page) + offset;
>  	dma_addr_t dma_addr = phys_to_dma(dev, phys);
>  
> -	if (unlikely(swiotlb_force == SWIOTLB_FORCE))
> +	if (is_swiotlb_force_bounce(dev))
>  		return swiotlb_map(dev, phys, size, dir, attrs);
>  
>  	if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 409694d7a8ad..13891d5de8c9 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -179,6 +179,10 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
>  	mem->end = mem->start + bytes;
>  	mem->index = 0;
>  	mem->late_alloc = late_alloc;
> +
> +	if (swiotlb_force == SWIOTLB_FORCE)
> +		mem->force_bounce = true;
> +
>  	spin_lock_init(&mem->lock);
>  	for (i = 0; i < mem->nslabs; i++) {
>  		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
> -- 
> 2.32.0.288.g62a8d224e6-goog
> 

^ permalink raw reply

* Re: [PATCH v13 09/12] swiotlb: Add restricted DMA alloc/free support
From: Stefano Stabellini @ 2021-06-17 23:31 UTC (permalink / raw)
  To: Claire Chang
  Cc: heikki.krogerus, thomas.hellstrom, peterz, joonas.lahtinen,
	dri-devel, chris, grant.likely, paulus, Frank Rowand, mingo,
	Marek Szyprowski, sstabellini, Saravana Kannan, Joerg Roedel,
	Rafael J . Wysocki, Christoph Hellwig, Bartosz Golaszewski,
	bskeggs, linux-pci, xen-devel, Thierry Reding, intel-gfx,
	matthew.auld, linux-devicetree, jxgao, daniel, Will Deacon,
	Konrad Rzeszutek Wilk, maarten.lankhorst, airlied, Dan Williams,
	linuxppc-dev, jani.nikula, Rob Herring, rodrigo.vivi, bhelgaas,
	boris.ostrovsky, Andy Shevchenko, jgross, Nicolas Boichat,
	Greg KH, Randy Dunlap, lkml, tfiga, list@263.net:IOMMU DRIVERS,
	Jim Quinlan, xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <20210617062635.1660944-10-tientzu@chromium.org>

On Thu, 17 Jun 2021, Claire Chang wrote:
> Add the functions, swiotlb_{alloc,free} and is_swiotlb_for_alloc to
> support the memory allocation from restricted DMA pool.
> 
> The restricted DMA pool is preferred if available.
> 
> Note that since coherent allocation needs remapping, one must set up
> another device coherent pool by shared-dma-pool and use
> dma_alloc_from_dev_coherent instead for atomic coherent allocation.
> 
> Signed-off-by: Claire Chang <tientzu@chromium.org>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Tested-by: Stefano Stabellini <sstabellini@kernel.org>
> Tested-by: Will Deacon <will@kernel.org>

Acked-by: Stefano Stabellini <sstabellini@kernel.org>


> ---
>  include/linux/swiotlb.h | 26 ++++++++++++++++++++++
>  kernel/dma/direct.c     | 49 +++++++++++++++++++++++++++++++----------
>  kernel/dma/swiotlb.c    | 38 ++++++++++++++++++++++++++++++--
>  3 files changed, 99 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index 8d8855c77d9a..a73fad460162 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -85,6 +85,7 @@ extern enum swiotlb_force swiotlb_force;
>   * @debugfs:	The dentry to debugfs.
>   * @late_alloc:	%true if allocated using the page allocator
>   * @force_bounce: %true if swiotlb bouncing is forced
> + * @for_alloc:  %true if the pool is used for memory allocation
>   */
>  struct io_tlb_mem {
>  	phys_addr_t start;
> @@ -96,6 +97,7 @@ struct io_tlb_mem {
>  	struct dentry *debugfs;
>  	bool late_alloc;
>  	bool force_bounce;
> +	bool for_alloc;
>  	struct io_tlb_slot {
>  		phys_addr_t orig_addr;
>  		size_t alloc_size;
> @@ -156,4 +158,28 @@ static inline void swiotlb_adjust_size(unsigned long size)
>  extern void swiotlb_print_info(void);
>  extern void swiotlb_set_max_segment(unsigned int);
>  
> +#ifdef CONFIG_DMA_RESTRICTED_POOL
> +struct page *swiotlb_alloc(struct device *dev, size_t size);
> +bool swiotlb_free(struct device *dev, struct page *page, size_t size);
> +
> +static inline bool is_swiotlb_for_alloc(struct device *dev)
> +{
> +	return dev->dma_io_tlb_mem->for_alloc;
> +}
> +#else
> +static inline struct page *swiotlb_alloc(struct device *dev, size_t size)
> +{
> +	return NULL;
> +}
> +static inline bool swiotlb_free(struct device *dev, struct page *page,
> +				size_t size)
> +{
> +	return false;
> +}
> +static inline bool is_swiotlb_for_alloc(struct device *dev)
> +{
> +	return false;
> +}
> +#endif /* CONFIG_DMA_RESTRICTED_POOL */
> +
>  #endif /* __LINUX_SWIOTLB_H */
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index a92465b4eb12..2de33e5d302b 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -75,6 +75,15 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
>  		min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
>  }
>  
> +static void __dma_direct_free_pages(struct device *dev, struct page *page,
> +				    size_t size)
> +{
> +	if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) &&
> +	    swiotlb_free(dev, page, size))
> +		return;
> +	dma_free_contiguous(dev, page, size);
> +}
> +
>  static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
>  		gfp_t gfp)
>  {
> @@ -86,6 +95,16 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
>  
>  	gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
>  					   &phys_limit);
> +	if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) &&
> +	    is_swiotlb_for_alloc(dev)) {
> +		page = swiotlb_alloc(dev, size);
> +		if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
> +			__dma_direct_free_pages(dev, page, size);
> +			return NULL;
> +		}
> +		return page;
> +	}
> +
>  	page = dma_alloc_contiguous(dev, size, gfp);
>  	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
>  		dma_free_contiguous(dev, page, size);
> @@ -142,7 +161,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
>  		gfp |= __GFP_NOWARN;
>  
>  	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
> -	    !force_dma_unencrypted(dev)) {
> +	    !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) {
>  		page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO);
>  		if (!page)
>  			return NULL;
> @@ -155,18 +174,23 @@ void *dma_direct_alloc(struct device *dev, size_t size,
>  	}
>  
>  	if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
> -	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> -	    !dev_is_dma_coherent(dev))
> +	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) &&
> +	    !is_swiotlb_for_alloc(dev))
>  		return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
>  
>  	/*
>  	 * Remapping or decrypting memory may block. If either is required and
>  	 * we can't block, allocate the memory from the atomic pools.
> +	 * If restricted DMA (i.e., is_swiotlb_for_alloc) is required, one must
> +	 * set up another device coherent pool by shared-dma-pool and use
> +	 * dma_alloc_from_dev_coherent instead.
>  	 */
>  	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
>  	    !gfpflags_allow_blocking(gfp) &&
>  	    (force_dma_unencrypted(dev) ||
> -	     (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev))))
> +	     (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> +	      !dev_is_dma_coherent(dev))) &&
> +	    !is_swiotlb_for_alloc(dev))
>  		return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
>  
>  	/* we always manually zero the memory once we are done */
> @@ -237,7 +261,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
>  			return NULL;
>  	}
>  out_free_pages:
> -	dma_free_contiguous(dev, page, size);
> +	__dma_direct_free_pages(dev, page, size);
>  	return NULL;
>  }
>  
> @@ -247,15 +271,15 @@ void dma_direct_free(struct device *dev, size_t size,
>  	unsigned int page_order = get_order(size);
>  
>  	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
> -	    !force_dma_unencrypted(dev)) {
> +	    !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) {
>  		/* cpu_addr is a struct page cookie, not a kernel address */
>  		dma_free_contiguous(dev, cpu_addr, size);
>  		return;
>  	}
>  
>  	if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
> -	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> -	    !dev_is_dma_coherent(dev)) {
> +	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) &&
> +	    !is_swiotlb_for_alloc(dev)) {
>  		arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
>  		return;
>  	}
> @@ -273,7 +297,7 @@ void dma_direct_free(struct device *dev, size_t size,
>  	else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED))
>  		arch_dma_clear_uncached(cpu_addr, size);
>  
> -	dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size);
> +	__dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size);
>  }
>  
>  struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
> @@ -283,7 +307,8 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
>  	void *ret;
>  
>  	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
> -	    force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp))
> +	    force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp) &&
> +	    !is_swiotlb_for_alloc(dev))
>  		return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
>  
>  	page = __dma_direct_alloc_pages(dev, size, gfp);
> @@ -310,7 +335,7 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
>  	*dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
>  	return page;
>  out_free_pages:
> -	dma_free_contiguous(dev, page, size);
> +	__dma_direct_free_pages(dev, page, size);
>  	return NULL;
>  }
>  
> @@ -329,7 +354,7 @@ void dma_direct_free_pages(struct device *dev, size_t size,
>  	if (force_dma_unencrypted(dev))
>  		set_memory_encrypted((unsigned long)vaddr, 1 << page_order);
>  
> -	dma_free_contiguous(dev, page, size);
> +	__dma_direct_free_pages(dev, page, size);
>  }
>  
>  #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index ff09341bb9f5..6499cfbfe95f 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -463,8 +463,9 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
>  
>  	index = wrap = wrap_index(mem, ALIGN(mem->index, stride));
>  	do {
> -		if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
> -		    (orig_addr & iotlb_align_mask)) {
> +		if (orig_addr &&
> +		    (slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
> +			    (orig_addr & iotlb_align_mask)) {
>  			index = wrap_index(mem, index + 1);
>  			continue;
>  		}
> @@ -703,3 +704,36 @@ static int __init swiotlb_create_default_debugfs(void)
>  late_initcall(swiotlb_create_default_debugfs);
>  
>  #endif
> +
> +#ifdef CONFIG_DMA_RESTRICTED_POOL
> +struct page *swiotlb_alloc(struct device *dev, size_t size)
> +{
> +	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> +	phys_addr_t tlb_addr;
> +	int index;
> +
> +	if (!mem)
> +		return NULL;
> +
> +	index = swiotlb_find_slots(dev, 0, size);
> +	if (index == -1)
> +		return NULL;
> +
> +	tlb_addr = slot_addr(mem->start, index);
> +
> +	return pfn_to_page(PFN_DOWN(tlb_addr));
> +}
> +
> +bool swiotlb_free(struct device *dev, struct page *page, size_t size)
> +{
> +	phys_addr_t tlb_addr = page_to_phys(page);
> +
> +	if (!is_swiotlb_buffer(dev, tlb_addr))
> +		return false;
> +
> +	swiotlb_release_slots(dev, tlb_addr);
> +
> +	return true;
> +}
> +
> +#endif /* CONFIG_DMA_RESTRICTED_POOL */
> -- 
> 2.32.0.288.g62a8d224e6-goog
> 

^ permalink raw reply

* Re: [PATCH v6 13/17] powerpc/pseries/vas: Setup IRQ and fault handling
From: Nicholas Piggin @ 2021-06-17 23:34 UTC (permalink / raw)
  To: Haren Myneni, herbert, linux-crypto, linuxppc-dev, mpe
In-Reply-To: <b8fc66dcb783d06a099a303e5cfc69087bb3357a.camel@linux.ibm.com>

Excerpts from Haren Myneni's message of June 18, 2021 6:37 am:
> 
> NX generates an interrupt when sees a fault on the user space
> buffer and the hypervisor forwards that interrupt to OS. Then
> the kernel handles the interrupt by issuing H_GET_NX_FAULT hcall
> to retrieve the fault CRB information.
> 
> This patch also adds changes to setup and free IRQ per each
> window and also handles the fault by updating the CSB.

In as much as this pretty well corresponds to the PowerNV code AFAIKS,
it looks okay to me.

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>

Could you have an irq handler in your ops vector and have 
the core code set up the irq and call your handler, so the Linux irq
handling is in one place? Not something for this series, I was just
wondering.

Thanks,
Nick

> 
> Signed-off-by: Haren Myneni <haren@linux.ibm.com>
> ---
>  arch/powerpc/platforms/pseries/vas.c | 102 +++++++++++++++++++++++++++
>  1 file changed, 102 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
> index f5a44f2f0e99..3385b5400cc6 100644
> --- a/arch/powerpc/platforms/pseries/vas.c
> +++ b/arch/powerpc/platforms/pseries/vas.c
> @@ -11,6 +11,7 @@
>  #include <linux/types.h>
>  #include <linux/delay.h>
>  #include <linux/slab.h>
> +#include <linux/interrupt.h>
>  #include <asm/machdep.h>
>  #include <asm/hvcall.h>
>  #include <asm/plpar_wrappers.h>
> @@ -155,6 +156,50 @@ int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
>  }
>  EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
>  
> +/*
> + * hcall to get fault CRB from the hypervisor.
> + */
> +static int h_get_nx_fault(u32 winid, u64 buffer)
> +{
> +	long rc;
> +
> +	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
> +
> +	if (rc == H_SUCCESS)
> +		return 0;
> +
> +	pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
> +		rc, winid, buffer);
> +	return -EIO;
> +
> +}
> +
> +/*
> + * Handle the fault interrupt.
> + * When the fault interrupt is received for each window, query the
> + * hypervisor to get the fault CRB on the specific fault. Then
> + * process the CRB by updating CSB or send signal if the user space
> + * CSB is invalid.
> + * Note: The hypervisor forwards an interrupt for each fault request.
> + *	So one fault CRB to process for each H_GET_NX_FAULT hcall.
> + */
> +irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
> +{
> +	struct pseries_vas_window *txwin = data;
> +	struct coprocessor_request_block crb;
> +	struct vas_user_win_ref *tsk_ref;
> +	int rc;
> +
> +	rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
> +	if (!rc) {
> +		tsk_ref = &txwin->vas_win.task_ref;
> +		vas_dump_crb(&crb);
> +		vas_update_csb(&crb, tsk_ref);
> +	}
> +
> +	return IRQ_HANDLED;
> +}
> +
>  /*
>   * Allocate window and setup IRQ mapping.
>   */
> @@ -166,10 +211,51 @@ static int allocate_setup_window(struct pseries_vas_window *txwin,
>  	rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
>  	if (rc)
>  		return rc;
> +	/*
> +	 * On PowerVM, the hypervisor setup and forwards the fault
> +	 * interrupt per window. So the IRQ setup and fault handling
> +	 * will be done for each open window separately.
> +	 */
> +	txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
> +	if (!txwin->fault_virq) {
> +		pr_err("Failed irq mapping %d\n", txwin->fault_irq);
> +		rc = -EINVAL;
> +		goto out_win;
> +	}
> +
> +	txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
> +				txwin->vas_win.winid);
> +	if (!txwin->name) {
> +		rc = -ENOMEM;
> +		goto out_irq;
> +	}
> +
> +	rc = request_threaded_irq(txwin->fault_virq, NULL,
> +				  pseries_vas_fault_thread_fn, IRQF_ONESHOT,
> +				  txwin->name, txwin);
> +	if (rc) {
> +		pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
> +		       txwin->vas_win.winid, txwin->fault_virq, rc);
> +		goto out_free;
> +	}
>  
>  	txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
>  
>  	return 0;
> +out_free:
> +	kfree(txwin->name);
> +out_irq:
> +	irq_dispose_mapping(txwin->fault_virq);
> +out_win:
> +	h_deallocate_vas_window(txwin->vas_win.winid);
> +	return rc;
> +}
> +
> +static inline void free_irq_setup(struct pseries_vas_window *txwin)
> +{
> +	free_irq(txwin->fault_virq, txwin);
> +	kfree(txwin->name);
> +	irq_dispose_mapping(txwin->fault_virq);
>  }
>  
>  static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
> @@ -284,6 +370,11 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
>  	return &txwin->vas_win;
>  
>  out_free:
> +	/*
> +	 * Window is not operational. Free IRQ before closing
> +	 * window so that do not have to hold mutex.
> +	 */
> +	free_irq_setup(txwin);
>  	h_deallocate_vas_window(txwin->vas_win.winid);
>  out:
>  	atomic_dec(&cop_feat_caps->used_lpar_creds);
> @@ -303,7 +394,18 @@ static int deallocate_free_window(struct pseries_vas_window *win)
>  {
>  	int rc = 0;
>  
> +	/*
> +	 * The hypervisor waits for all requests including faults
> +	 * are processed before closing the window - Means all
> +	 * credits have to be returned. In the case of fault
> +	 * request, a credit is returned after OS issues
> +	 * H_GET_NX_FAULT hcall.
> +	 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
> +	 * hcall.
> +	 */
>  	rc = h_deallocate_vas_window(win->vas_win.winid);
> +	if (!rc)
> +		free_irq_setup(win);
>  
>  	return rc;
>  }
> -- 
> 2.18.2
> 
> 
> 

^ permalink raw reply

* Re: [PATCH v6 15/17] crypto/nx: Get NX capabilities for GZIP coprocessor type
From: Nicholas Piggin @ 2021-06-17 23:35 UTC (permalink / raw)
  To: Haren Myneni, herbert, linux-crypto, linuxppc-dev, mpe
In-Reply-To: <f2b6a1fb8b6112595a73d81c67a35af4e7f5d0a3.camel@linux.ibm.com>

Excerpts from Haren Myneni's message of June 18, 2021 6:38 am:
> 
> The hypervisor provides different NX capabilities that it
> supports. These capabilities such as recommended minimum
> compression / decompression lengths and the maximum request
> buffer size in bytes are used to define the user space NX
> request.
> 
> NX will reject the request if the buffer size is more than
> the maximum buffer size. Whereas compression / decompression
> lengths are recommended values for better performance.
> 
> Changes to get NX overall capabilities which points to the
> specific features that the hypervisor supports. Then retrieve
> the capabilities for the specific feature (available only
> for NXGZIP).
> 
> Signed-off-by: Haren Myneni <haren@linux.ibm.com>
> Acked-by: Herbert Xu <herbert@gondor.apana.org.au>

Acked-by: Nicholas Piggin <npiggin@gmail.com>

> ---
>  drivers/crypto/nx/nx-common-pseries.c | 87 +++++++++++++++++++++++++++
>  1 file changed, 87 insertions(+)
> 
> diff --git a/drivers/crypto/nx/nx-common-pseries.c b/drivers/crypto/nx/nx-common-pseries.c
> index cc8dd3072b8b..9fc2abb56019 100644
> --- a/drivers/crypto/nx/nx-common-pseries.c
> +++ b/drivers/crypto/nx/nx-common-pseries.c
> @@ -9,6 +9,8 @@
>   */
>  
>  #include <asm/vio.h>
> +#include <asm/hvcall.h>
> +#include <asm/vas.h>
>  
>  #include "nx-842.h"
>  #include "nx_csbcpb.h" /* struct nx_csbcpb */
> @@ -19,6 +21,29 @@ MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
>  MODULE_ALIAS_CRYPTO("842");
>  MODULE_ALIAS_CRYPTO("842-nx");
>  
> +/*
> + * Coprocessor type specific capabilities from the hypervisor.
> + */
> +struct hv_nx_cop_caps {
> +	__be64	descriptor;
> +	__be64	req_max_processed_len;	/* Max bytes in one GZIP request */
> +	__be64	min_compress_len;	/* Min compression size in bytes */
> +	__be64	min_decompress_len;	/* Min decompression size in bytes */
> +} __packed __aligned(0x1000);
> +
> +/*
> + * Coprocessor type specific capabilities.
> + */
> +struct nx_cop_caps {
> +	u64	descriptor;
> +	u64	req_max_processed_len;	/* Max bytes in one GZIP request */
> +	u64	min_compress_len;	/* Min compression in bytes */
> +	u64	min_decompress_len;	/* Min decompression in bytes */
> +};
> +
> +static u64 caps_feat;
> +static struct nx_cop_caps nx_cop_caps;
> +
>  static struct nx842_constraints nx842_pseries_constraints = {
>  	.alignment =	DDE_BUFFER_ALIGN,
>  	.multiple =	DDE_BUFFER_LAST_MULT,
> @@ -1065,6 +1090,64 @@ static void nx842_remove(struct vio_dev *viodev)
>  	kfree(old_devdata);
>  }
>  
> +/*
> + * Get NX capabilities from the hypervisor.
> + * Only NXGZIP capabilities are provided by the hypersvisor right
> + * now and these values are available to user space with sysfs.
> + */
> +static void __init nxcop_get_capabilities(void)
> +{
> +	struct hv_vas_all_caps *hv_caps;
> +	struct hv_nx_cop_caps *hv_nxc;
> +	int rc;
> +
> +	hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
> +	if (!hv_caps)
> +		return;
> +	/*
> +	 * Get NX overall capabilities with feature type=0
> +	 */
> +	rc = h_query_vas_capabilities(H_QUERY_NX_CAPABILITIES, 0,
> +					  (u64)virt_to_phys(hv_caps));
> +	if (rc)
> +		goto out;
> +
> +	caps_feat = be64_to_cpu(hv_caps->feat_type);
> +	/*
> +	 * NX-GZIP feature available
> +	 */
> +	if (caps_feat & VAS_NX_GZIP_FEAT_BIT) {
> +		hv_nxc = kmalloc(sizeof(*hv_nxc), GFP_KERNEL);
> +		if (!hv_nxc)
> +			goto out;
> +		/*
> +		 * Get capabilities for NX-GZIP feature
> +		 */
> +		rc = h_query_vas_capabilities(H_QUERY_NX_CAPABILITIES,
> +						  VAS_NX_GZIP_FEAT,
> +						  (u64)virt_to_phys(hv_nxc));
> +	} else {
> +		pr_err("NX-GZIP feature is not available\n");
> +		rc = -EINVAL;
> +	}
> +
> +	if (!rc) {
> +		nx_cop_caps.descriptor = be64_to_cpu(hv_nxc->descriptor);
> +		nx_cop_caps.req_max_processed_len =
> +				be64_to_cpu(hv_nxc->req_max_processed_len);
> +		nx_cop_caps.min_compress_len =
> +				be64_to_cpu(hv_nxc->min_compress_len);
> +		nx_cop_caps.min_decompress_len =
> +				be64_to_cpu(hv_nxc->min_decompress_len);
> +	} else {
> +		caps_feat = 0;
> +	}
> +
> +	kfree(hv_nxc);
> +out:
> +	kfree(hv_caps);
> +}
> +
>  static const struct vio_device_id nx842_vio_driver_ids[] = {
>  	{"ibm,compression-v1", "ibm,compression"},
>  	{"", ""},
> @@ -1092,6 +1175,10 @@ static int __init nx842_pseries_init(void)
>  		return -ENOMEM;
>  
>  	RCU_INIT_POINTER(devdata, new_devdata);
> +	/*
> +	 * Get NX capabilities from the hypervisor.
> +	 */
> +	nxcop_get_capabilities();
>  
>  	ret = vio_register_driver(&nx842_vio_driver);
>  	if (ret) {
> -- 
> 2.18.2
> 
> 
> 

^ permalink raw reply

* Re: [PATCH v6 16/17] crypto/nx: Add sysfs interface to export NX capabilities
From: Nicholas Piggin @ 2021-06-17 23:41 UTC (permalink / raw)
  To: Haren Myneni, herbert, linux-crypto, linuxppc-dev, mpe
In-Reply-To: <510da86abbd904878d5f13d74aba72603c37d783.camel@linux.ibm.com>

Excerpts from Haren Myneni's message of June 18, 2021 6:39 am:
> 
> Export NX-GZIP capabilities to usrespace in sysfs
> /sys/devices/vio/ibm,compression-v1/nx_gzip_caps directory.
> These are queried by userspace accelerator libraries to set
> minimum length heuristics and maximum limits on request sizes.
> 
> NX-GZIP capabilities:
> min_compress_len  /*Recommended minimum compress length in bytes*/
> min_decompress_len /*Recommended minimum decompress length in bytes*/
> req_max_processed_len /* Maximum number of bytes processed in one
> 			request */
> 
> NX will return RMA_Reject if the request buffer size is greater
> than req_max_processed_len.
> 
> Signed-off-by: Haren Myneni <haren@linux.ibm.com>
> Acked-by: Herbert Xu <herbert@gondor.apana.org.au>

Again, if you could just move those ^^ C style comments into the
code. Possibly have a small comment with the sysfs stuff saying that 
it's ABI which userspace code queries when using the accelerator (or
if you have an ABI documentation somewhere, point the comments to that).

sysfs is ABI, but some bits are more ABI than others, it would just be
good to be clear that it is actually used, and can't be changed.

Aside from that,

Acked-by: Nicholas Piggin <npiggin@gmail.com>

Thanks,
Nick

> ---
>  drivers/crypto/nx/nx-common-pseries.c | 43 +++++++++++++++++++++++++++
>  1 file changed, 43 insertions(+)
> 
> diff --git a/drivers/crypto/nx/nx-common-pseries.c b/drivers/crypto/nx/nx-common-pseries.c
> index 9fc2abb56019..f51a50d40504 100644
> --- a/drivers/crypto/nx/nx-common-pseries.c
> +++ b/drivers/crypto/nx/nx-common-pseries.c
> @@ -967,6 +967,36 @@ static struct attribute_group nx842_attribute_group = {
>  	.attrs = nx842_sysfs_entries,
>  };
>  
> +#define	nxcop_caps_read(_name)						\
> +static ssize_t nxcop_##_name##_show(struct device *dev,			\
> +			struct device_attribute *attr, char *buf)	\
> +{									\
> +	return sprintf(buf, "%lld\n", nx_cop_caps._name);		\
> +}
> +
> +#define NXCT_ATTR_RO(_name)						\
> +	nxcop_caps_read(_name);						\
> +	static struct device_attribute dev_attr_##_name = __ATTR(_name,	\
> +						0444,			\
> +						nxcop_##_name##_show,	\
> +						NULL);
> +
> +NXCT_ATTR_RO(req_max_processed_len);
> +NXCT_ATTR_RO(min_compress_len);
> +NXCT_ATTR_RO(min_decompress_len);
> +
> +static struct attribute *nxcop_caps_sysfs_entries[] = {
> +	&dev_attr_req_max_processed_len.attr,
> +	&dev_attr_min_compress_len.attr,
> +	&dev_attr_min_decompress_len.attr,
> +	NULL,
> +};
> +
> +static struct attribute_group nxcop_caps_attr_group = {
> +	.name	=	"nx_gzip_caps",
> +	.attrs	=	nxcop_caps_sysfs_entries,
> +};
> +
>  static struct nx842_driver nx842_pseries_driver = {
>  	.name =		KBUILD_MODNAME,
>  	.owner =	THIS_MODULE,
> @@ -1056,6 +1086,16 @@ static int nx842_probe(struct vio_dev *viodev,
>  		goto error;
>  	}
>  
> +	if (caps_feat) {
> +		if (sysfs_create_group(&viodev->dev.kobj,
> +					&nxcop_caps_attr_group)) {
> +			dev_err(&viodev->dev,
> +				"Could not create sysfs NX capability entries\n");
> +			ret = -1;
> +			goto error;
> +		}
> +	}
> +
>  	return 0;
>  
>  error_unlock:
> @@ -1075,6 +1115,9 @@ static void nx842_remove(struct vio_dev *viodev)
>  	pr_info("Removing IBM Power 842 compression device\n");
>  	sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group);
>  
> +	if (caps_feat)
> +		sysfs_remove_group(&viodev->dev.kobj, &nxcop_caps_attr_group);
> +
>  	crypto_unregister_alg(&nx842_pseries_alg);
>  
>  	spin_lock_irqsave(&devdata_mutex, flags);
> -- 
> 2.18.2
> 
> 
> 

^ permalink raw reply

* [Bug 213079] [bisected] IRQ problems and crashes on a PowerMac G5 with 5.12.3
From: bugzilla-daemon @ 2021-06-17 23:48 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-213079-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=213079

--- Comment #7 from Erhard F. (erhard_f@mailbox.org) ---
(In reply to Oliver O'Halloran from comment #5)
> Could you add "debug" to the kernel command line and post the dmesg output
> for a boot with the patch applied and reverted?
Ok, on top of 5.13-rc6 I reverted fbbefb3, which went fine execpt the
"pci-ioda.c"-part where I needed to manually apple the old code.

Here's the vanilla debug dmesg and the debug dmesg with the patch reverted.

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* [Bug 213079] [bisected] IRQ problems and crashes on a PowerMac G5 with 5.12.3
From: bugzilla-daemon @ 2021-06-17 23:51 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-213079-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=213079

--- Comment #8 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 297435
  --> https://bugzilla.kernel.org/attachment.cgi?id=297435&action=edit
dmesg (5.13-rc6 + debug, PowerMac G5 11,2)

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* [Bug 213079] [bisected] IRQ problems and crashes on a PowerMac G5 with 5.12.3
From: bugzilla-daemon @ 2021-06-17 23:52 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-213079-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=213079

--- Comment #9 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 297437
  --> https://bugzilla.kernel.org/attachment.cgi?id=297437&action=edit
dmesg (5.13-rc6 w. patch fbbefb3 reverted + debug, PowerMac G5 11,2)

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* [Bug 213079] [bisected] IRQ problems and crashes on a PowerMac G5 with 5.12.3
From: bugzilla-daemon @ 2021-06-17 23:54 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-213079-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=213079

Erhard F. (erhard_f@mailbox.org) changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
 Attachment #296759|0                           |1
        is obsolete|                            |
 Attachment #296761|0                           |1
        is obsolete|                            |

--- Comment #10 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 297439
  --> https://bugzilla.kernel.org/attachment.cgi?id=297439&action=edit
kernel .config (5.13-rc6, PowerMac G5 11,2)

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* Re: [PATCH 8/8] membarrier: Rewrite sync_core_before_usermode() and improve documentation
From: Andy Lutomirski @ 2021-06-18  0:12 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Catalin Marinas, Will Deacon, linux-mm, Peter Zijlstra, x86,
	linux-kernel, Nicholas Piggin, Dave Hansen, Paul Mackerras,
	stable, Andrew Morton, linuxppc-dev, linux-arm-kernel
In-Reply-To: <827549827.10547.1623941277868.JavaMail.zimbra@efficios.com>

On 6/17/21 7:47 AM, Mathieu Desnoyers wrote:

> Please change back this #ifndef / #else / #endif within function for
> 
> if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE)) {
>   ...
> } else {
>   ...
> }
> 
> I don't think mixing up preprocessor and code logic makes it more readable.

I agree, but I don't know how to make the result work well.
membarrier_sync_core_before_usermode() isn't defined in the !IS_ENABLED
case, so either I need to fake up a definition or use #ifdef.

If I faked up a definition, I would want to assert, at build time, that
it isn't called.  I don't think we can do:

static void membarrier_sync_core_before_usermode()
{
    BUILD_BUG_IF_REACHABLE();
}


^ permalink raw reply

* Re: [PATCH 8/8] membarrier: Rewrite sync_core_before_usermode() and improve documentation
From: Andy Lutomirski @ 2021-06-18  0:13 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Catalin Marinas, Will Deacon, linux-mm, Peter Zijlstra, x86,
	linux-kernel, Nicholas Piggin, Dave Hansen, Paul Mackerras,
	stable, Andrew Morton, linuxppc-dev, linux-arm-kernel
In-Reply-To: <1939350496.10696.1623943014767.JavaMail.zimbra@efficios.com>

On 6/17/21 8:16 AM, Mathieu Desnoyers wrote:
> ----- On Jun 15, 2021, at 11:21 PM, Andy Lutomirski luto@kernel.org wrote:
> 
> [...]
> 
>> +# An architecture that wants to support
>> +# MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE needs to define precisely what it
>> +# is supposed to do and implement membarrier_sync_core_before_usermode() to
>> +# make it do that.  Then it can select ARCH_HAS_MEMBARRIER_SYNC_CORE via
>> +# Kconfig.Unfortunately, MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE is not a
>> +# fantastic API and may not make sense on all architectures.  Once an
>> +# architecture meets these requirements,
> 
> Can we please remove the editorial comment about the quality of the membarrier
> sync-core's API ?

Done
>> +#
>> +# On x86, a program can safely modify code, issue
>> +# MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, and then execute that code, via
>> +# the modified address or an alias, from any thread in the calling process.
>> +#
>> +# On arm64, a program can modify code, flush the icache as needed, and issue
>> +# MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE to force a "context synchronizing
>> +# event", aka pipeline flush on all CPUs that might run the calling process.
>> +# Then the program can execute the modified code as long as it is executed
>> +# from an address consistent with the icache flush and the CPU's cache type.
>> +#
>> +# On powerpc, a program can use MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
>> +# similarly to arm64.  It would be nice if the powerpc maintainers could
>> +# add a more clear explanantion.
> 
> We should document the requirements on ARMv7 as well.

Done.

> 
> Thanks,
> 
> Mathieu
> 


^ permalink raw reply

* Re: [PATCH v6 13/17] powerpc/pseries/vas: Setup IRQ and fault handling
From: Haren Myneni @ 2021-06-18  2:09 UTC (permalink / raw)
  To: Nicholas Piggin, herbert, linux-crypto, linuxppc-dev, mpe
In-Reply-To: <1623972635.u8jj6g26re.astroid@bobo.none>

On Fri, 2021-06-18 at 09:34 +1000, Nicholas Piggin wrote:
> Excerpts from Haren Myneni's message of June 18, 2021 6:37 am:
> > NX generates an interrupt when sees a fault on the user space
> > buffer and the hypervisor forwards that interrupt to OS. Then
> > the kernel handles the interrupt by issuing H_GET_NX_FAULT hcall
> > to retrieve the fault CRB information.
> > 
> > This patch also adds changes to setup and free IRQ per each
> > window and also handles the fault by updating the CSB.
> 
> In as much as this pretty well corresponds to the PowerNV code
> AFAIKS,
> it looks okay to me.
> 
> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> 
> Could you have an irq handler in your ops vector and have 
> the core code set up the irq and call your handler, so the Linux irq
> handling is in one place? Not something for this series, I was just
> wondering.

Not possible to have common core code for IRQ  setup. 

PowerNV: Every VAS instance will be having IRQ and this setup will be
done during initialization (system boot). A fault FIFO will be assigned
for each instance and registered to VAS so that VAS/NX writes fault CRB
into this FIFO.  

PowerVM: Each window will have an IRQ and the setup will be done during
window open. 

Thanks
Haren

> 
> Thanks,
> Nick
> 
> > Signed-off-by: Haren Myneni <haren@linux.ibm.com>
> > ---
> >  arch/powerpc/platforms/pseries/vas.c | 102
> > +++++++++++++++++++++++++++
> >  1 file changed, 102 insertions(+)
> > 
> > diff --git a/arch/powerpc/platforms/pseries/vas.c
> > b/arch/powerpc/platforms/pseries/vas.c
> > index f5a44f2f0e99..3385b5400cc6 100644
> > --- a/arch/powerpc/platforms/pseries/vas.c
> > +++ b/arch/powerpc/platforms/pseries/vas.c
> > @@ -11,6 +11,7 @@
> >  #include <linux/types.h>
> >  #include <linux/delay.h>
> >  #include <linux/slab.h>
> > +#include <linux/interrupt.h>
> >  #include <asm/machdep.h>
> >  #include <asm/hvcall.h>
> >  #include <asm/plpar_wrappers.h>
> > @@ -155,6 +156,50 @@ int h_query_vas_capabilities(const u64 hcall,
> > u8 query_type, u64 result)
> >  }
> >  EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
> >  
> > +/*
> > + * hcall to get fault CRB from the hypervisor.
> > + */
> > +static int h_get_nx_fault(u32 winid, u64 buffer)
> > +{
> > +	long rc;
> > +
> > +	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
> > +
> > +	if (rc == H_SUCCESS)
> > +		return 0;
> > +
> > +	pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
> > +		rc, winid, buffer);
> > +	return -EIO;
> > +
> > +}
> > +
> > +/*
> > + * Handle the fault interrupt.
> > + * When the fault interrupt is received for each window, query the
> > + * hypervisor to get the fault CRB on the specific fault. Then
> > + * process the CRB by updating CSB or send signal if the user
> > space
> > + * CSB is invalid.
> > + * Note: The hypervisor forwards an interrupt for each fault
> > request.
> > + *	So one fault CRB to process for each H_GET_NX_FAULT hcall.
> > + */
> > +irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
> > +{
> > +	struct pseries_vas_window *txwin = data;
> > +	struct coprocessor_request_block crb;
> > +	struct vas_user_win_ref *tsk_ref;
> > +	int rc;
> > +
> > +	rc = h_get_nx_fault(txwin->vas_win.winid,
> > (u64)virt_to_phys(&crb));
> > +	if (!rc) {
> > +		tsk_ref = &txwin->vas_win.task_ref;
> > +		vas_dump_crb(&crb);
> > +		vas_update_csb(&crb, tsk_ref);
> > +	}
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
> >  /*
> >   * Allocate window and setup IRQ mapping.
> >   */
> > @@ -166,10 +211,51 @@ static int allocate_setup_window(struct
> > pseries_vas_window *txwin,
> >  	rc = h_allocate_vas_window(txwin, domain, wintype,
> > DEF_WIN_CREDS);
> >  	if (rc)
> >  		return rc;
> > +	/*
> > +	 * On PowerVM, the hypervisor setup and forwards the fault
> > +	 * interrupt per window. So the IRQ setup and fault handling
> > +	 * will be done for each open window separately.
> > +	 */
> > +	txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
> > +	if (!txwin->fault_virq) {
> > +		pr_err("Failed irq mapping %d\n", txwin->fault_irq);
> > +		rc = -EINVAL;
> > +		goto out_win;
> > +	}
> > +
> > +	txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
> > +				txwin->vas_win.winid);
> > +	if (!txwin->name) {
> > +		rc = -ENOMEM;
> > +		goto out_irq;
> > +	}
> > +
> > +	rc = request_threaded_irq(txwin->fault_virq, NULL,
> > +				  pseries_vas_fault_thread_fn,
> > IRQF_ONESHOT,
> > +				  txwin->name, txwin);
> > +	if (rc) {
> > +		pr_err("VAS-Window[%d]: Request IRQ(%u) failed with
> > %d\n",
> > +		       txwin->vas_win.winid, txwin->fault_virq, rc);
> > +		goto out_free;
> > +	}
> >  
> >  	txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
> >  
> >  	return 0;
> > +out_free:
> > +	kfree(txwin->name);
> > +out_irq:
> > +	irq_dispose_mapping(txwin->fault_virq);
> > +out_win:
> > +	h_deallocate_vas_window(txwin->vas_win.winid);
> > +	return rc;
> > +}
> > +
> > +static inline void free_irq_setup(struct pseries_vas_window
> > *txwin)
> > +{
> > +	free_irq(txwin->fault_virq, txwin);
> > +	kfree(txwin->name);
> > +	irq_dispose_mapping(txwin->fault_virq);
> >  }
> >  
> >  static struct vas_window *vas_allocate_window(int vas_id, u64
> > flags,
> > @@ -284,6 +370,11 @@ static struct vas_window
> > *vas_allocate_window(int vas_id, u64 flags,
> >  	return &txwin->vas_win;
> >  
> >  out_free:
> > +	/*
> > +	 * Window is not operational. Free IRQ before closing
> > +	 * window so that do not have to hold mutex.
> > +	 */
> > +	free_irq_setup(txwin);
> >  	h_deallocate_vas_window(txwin->vas_win.winid);
> >  out:
> >  	atomic_dec(&cop_feat_caps->used_lpar_creds);
> > @@ -303,7 +394,18 @@ static int deallocate_free_window(struct
> > pseries_vas_window *win)
> >  {
> >  	int rc = 0;
> >  
> > +	/*
> > +	 * The hypervisor waits for all requests including faults
> > +	 * are processed before closing the window - Means all
> > +	 * credits have to be returned. In the case of fault
> > +	 * request, a credit is returned after OS issues
> > +	 * H_GET_NX_FAULT hcall.
> > +	 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
> > +	 * hcall.
> > +	 */
> >  	rc = h_deallocate_vas_window(win->vas_win.winid);
> > +	if (!rc)
> > +		free_irq_setup(win);
> >  
> >  	return rc;
> >  }
> > -- 
> > 2.18.2
> > 
> > 
> > 


^ permalink raw reply

* Re: [PATCH 02/11] powerpc: Add Microwatt device tree
From: Paul Mackerras @ 2021-06-18  2:58 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev
In-Reply-To: <8735thrtl3.fsf@mpe.ellerman.id.au>

On Thu, Jun 17, 2021 at 02:41:28PM +1000, Michael Ellerman wrote:
> Paul Mackerras <paulus@ozlabs.org> writes:
> >
> 
> Little bit of change log never hurts :)
> 
> > Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> > ---
> >  arch/powerpc/boot/dts/microwatt.dts | 105 ++++++++++++++++++++++++++++
> >  1 file changed, 105 insertions(+)
> >  create mode 100644 arch/powerpc/boot/dts/microwatt.dts
> >
> > diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts
> > new file mode 100644
> > index 000000000000..9b2e64da9432
> > --- /dev/null
> > +++ b/arch/powerpc/boot/dts/microwatt.dts
> > @@ -0,0 +1,105 @@
> > +/dts-v1/;
> > +
> > +/ {
> > +	#size-cells = <0x02>;
> > +	#address-cells = <0x02>;
> > +	model-name = "microwatt";
> > +	compatible = "microwatt-soc";
> > +
> > +	reserved-memory {
> > +		#size-cells = <0x02>;
> > +		#address-cells = <0x02>;
> > +		ranges;
> > +	};
> > +
> > +	memory@0 {
> > +		device_type = "memory";
> > +		reg = <0x00000000 0x00000000 0x00000000 0x10000000>;
> > +	};
> > +
> > +	cpus {
> > +		#size-cells = <0x00>;
> > +		#address-cells = <0x01>;
> > +
> > +		ibm,powerpc-cpu-features {
> > +			display-name = "Microwatt";
> > +			isa = <3000>;
> > +			device_type = "cpu-features";
> > +			compatible = "ibm,powerpc-cpu-features";
> > +
> > +			mmu-radix {
> > +				isa = <3000>;
> > +				usable-privilege = <2>;
> 
> skiboot says 6?

That's for a machine with hypervisor mode - if I make it 6 here, then
the kernel prints a message about "HV feature passed to guest" and
then another about "missing dependency" and ends up not enabling the
feature.

Note that microwatt usually has MSR[HV] = 0 (you can set it to 1 but
it doesn't do anything).  Arguably it should force it to 1 always, but
if I do that, then the kernel starts trying to execute hrfid
instructions, which microwatt doesn't have (for example in
masked_Hinterrupt).

> > +				os-support = <0x00>;
> > +			};
> > +
> > +			little-endian {
> > +				isa = <0>;
> 
> I guess you just copied that from skiboot.
> 
> The binding says it's required, but AFAICS the kernel doesn't use it.
>
> And isa = 0 mean ISA_BASE, according to the skiboot source.

I changed it to 2050 since true little-endian mode was introduced for
POWER6.

> > +		PowerPC,Microwatt@0 {
> > +			i-cache-sets = <2>;
> > +			ibm,dec-bits = <64>;
> > +			reservation-granule-size = <64>;
> 
> Never seen that one before.

It's in PAPR+ (D.6.1.4, CPU Node Properties).

> > +			clock-frequency = <100000000>;
> > +			timebase-frequency = <100000000>;
> 
> Those seem quite high?

No, 100MHz is correct.

> > +			i-tlb-sets = <1>;
> > +			ibm,ppc-interrupt-server#s = <0>;
> > +			i-cache-block-size = <64>;
> > +			d-cache-block-size = <64>;
> 
> The kernel reads those, but also hard codes 128 in places.

Interesting, because it all seems to work.  I assume the critical
thing is doing the right dcbz's.

> See L1_CACHE_BYTES.
> 
> > +			ibm,pa-features = [40 00 c2 27 00 00 00 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 00 80 00 80 00 00 00 80 00 80 00 00 00 80 00 80 00 80 00 80 00 80 00 80 00 80 00 80 00 80 00 80 00 80 00 80 00 80 00];
> 
> Do you need that?
> 
> You shouldn't, if we've done things right with the cpu-features support.

Turns out I don't need it.

> > +			d-cache-sets = <2>;
> > +			ibm,pir = <0x3c>;
> 
> Needed?

Nope.

> > +			i-tlb-size = <64>;
> > +			cpu-version = <0x990000>;
> > +			status = "okay";
> > +			i-cache-size = <0x1000>;
> > +			ibm,processor-radix-AP-encodings = <0x0c 0xa0000010 0x20000015 0x4000001e>;
> > +			tlb-size = <0>;
> > +			tlb-sets = <0>;
> 
> Does the kernel use those? I can't find it.
> 
> > +			device_type = "cpu";
> > +			d-tlb-size = <128>;
> > +			d-tlb-sets = <2>;
> > +			reg = <0>;
> > +			general-purpose;
> > +			64-bit;
> > +			d-cache-size = <0x1000>;
> > +			ibm,chip-id = <0x00>;
> > +		};
> > +	};
> > +
> > +	chosen {
> > +		bootargs = "";
> > +		ibm,architecture-vec-5 = [19 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 40 00 40];
> 
> Do you need that?
> 
> I assume you run with MSR[HV] = 1 (you don't say anywhere), in which
> case we never look at that property.

I do need that given we're running with MSR[HV] = 0; without that the
kernel assumes HPT mode.

Paul.

^ permalink raw reply

* Re: [PATCH 01/18] mm: add a kunmap_local_dirty helper
From: Ira Weiny @ 2021-06-18  3:01 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, linux-block, Thomas Bogendoerfer, Herbert Xu,
	Mike Snitzer, David S. Miller, Geoff Levand, ceph-devel,
	linux-mips, Dongsheng Yang, linux-kernel, James E.J. Bottomley,
	dm-devel, linux-arch, Thomas Gleixner, linuxppc-dev, Ilya Dryomov
In-Reply-To: <20210615132456.753241-2-hch@lst.de>

On Tue, Jun 15, 2021 at 03:24:39PM +0200, Christoph Hellwig wrote:
> Add a helper that calls flush_kernel_dcache_page before unmapping the
> local mapping.  flush_kernel_dcache_page is required for all pages
> potentially mapped into userspace that were written to using kmap*,
> so having a helper that does the right thing can be very convenient.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  include/linux/highmem-internal.h | 7 +++++++
>  include/linux/highmem.h          | 4 ++++
>  2 files changed, 11 insertions(+)
> 
> diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
> index 7902c7d8b55f..bd37706db147 100644
> --- a/include/linux/highmem-internal.h
> +++ b/include/linux/highmem-internal.h
> @@ -224,4 +224,11 @@ do {								\
>  	__kunmap_local(__addr);					\
>  } while (0)
>  
> +#define kunmap_local_dirty(__page, __addr)			\

I think having to store the page and addr to return to kunmap_local_dirty() is
going to be a pain in some code paths.  Not a show stopper but see below...

> +do {								\
> +	if (!PageSlab(__page))					\

Was there some clarification why the page can't be a Slab page?  Or is this
just an optimization?

> +		flush_kernel_dcache_page(__page);		\

Is this required on 32bit systems?  Why is kunmap_flush_on_unmap() not
sufficient on 64bit systems?  The normal kunmap_local() path does that.

I'm sorry but I did not see a conclusion to my query on V1. Herbert implied the
he just copied from the crypto code.[1]  I'm concerned that this _dirty() call
is just going to confuse the users of kmap even more.  So why can't we get to
the bottom of why flush_kernel_dcache_page() needs so much logic around it
before complicating the general kernel users.

I would like to see it go away if possible.

Ira

[1] https://lore.kernel.org/lkml/20210615050258.GA5208@gondor.apana.org.au/

> +	kunmap_local(__addr);					\
> +} while (0)
> +
>  #endif
> diff --git a/include/linux/highmem.h b/include/linux/highmem.h
> index 832b49b50c7b..65f548db4f2d 100644
> --- a/include/linux/highmem.h
> +++ b/include/linux/highmem.h
> @@ -93,6 +93,10 @@ static inline void kmap_flush_unused(void);
>   * On HIGHMEM enabled systems mapping a highmem page has the side effect of
>   * disabling migration in order to keep the virtual address stable across
>   * preemption. No caller of kmap_local_page() can rely on this side effect.
> + *
> + * If data is written to the returned kernel mapping, the callers needs to
> + * unmap the mapping using kunmap_local_dirty(), else kunmap_local() should
> + * be used.
>   */
>  static inline void *kmap_local_page(struct page *page);
>  
> -- 
> 2.30.2
> 

^ permalink raw reply

* Re: [RFC PATCH 8/8] powerpc/papr_scm: Use FORM2 associativity details
From: Aneesh Kumar K.V @ 2021-06-18  3:18 UTC (permalink / raw)
  To: Daniel Henrique Barboza, David Gibson; +Cc: Nathan Lynch, linuxppc-dev
In-Reply-To: <5e180af8-9d48-7519-0b35-967065f8e3e1@gmail.com>

On 6/18/21 1:30 AM, Daniel Henrique Barboza wrote:
> 
> 
> On 6/17/21 8:11 AM, Aneesh Kumar K.V wrote:
>> Daniel Henrique Barboza <danielhb413@gmail.com> writes:
>>
>>> On 6/17/21 4:46 AM, David Gibson wrote:
>>>> On Tue, Jun 15, 2021 at 12:35:17PM +0530, Aneesh Kumar K.V wrote:
>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>

> 
> 
> In fact, the more I speak about this PMEM scenario the more I wonder:
> why doesn't the PMEM driver, when switching from persistent to regular
> memory and vice-versa, take care of all the necessary updates in the
> numa-distance-table and kernel internals to reflect the current distances
> of its current mode? Is this a technical limitation?
> 
> 

I sent v4 doing something similar to this .

-aneesh


^ permalink raw reply

* Re: [PATCH 01/18] mm: add a kunmap_local_dirty helper
From: Herbert Xu @ 2021-06-18  3:37 UTC (permalink / raw)
  To: Ira Weiny
  Cc: Jens Axboe, linux-block, Thomas Bogendoerfer, Mike Snitzer,
	David S. Miller, Geoff Levand, Christoph Lameter, ceph-devel,
	linux-mips, Dongsheng Yang, linux-kernel, James E.J. Bottomley,
	dm-devel, linux-arch, Thomas Gleixner, Ilya Dryomov, linuxppc-dev,
	Christoph Hellwig
In-Reply-To: <20210618030157.GA1905674@iweiny-DESK2.sc.intel.com>

On Thu, Jun 17, 2021 at 08:01:57PM -0700, Ira Weiny wrote:
>
> > +		flush_kernel_dcache_page(__page);		\
> 
> Is this required on 32bit systems?  Why is kunmap_flush_on_unmap() not
> sufficient on 64bit systems?  The normal kunmap_local() path does that.
> 
> I'm sorry but I did not see a conclusion to my query on V1. Herbert implied the
> he just copied from the crypto code.[1]  I'm concerned that this _dirty() call
> is just going to confuse the users of kmap even more.  So why can't we get to
> the bottom of why flush_kernel_dcache_page() needs so much logic around it
> before complicating the general kernel users.
> 
> I would like to see it go away if possible.

This thread may be related:

https://lwn.net/Articles/240249/

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [PATCH v3] lockdown,selinux: fix wrong subject in some SELinux lockdown checks
From: Paul Moore @ 2021-06-18  3:40 UTC (permalink / raw)
  To: Ondrej Mosnacek
  Cc: linux-efi, linux-pci, linux-cxl, Steffen Klassert, Herbert Xu,
	x86, James Morris, linux-acpi, Ingo Molnar, linux-serial,
	linux-pm, selinux, Steven Rostedt, Casey Schaufler, netdev,
	Stephen Smalley, kexec, linux-kernel, linux-security-module,
	linux-fsdevel, bpf, linuxppc-dev, David S . Miller
In-Reply-To: <20210616085118.1141101-1-omosnace@redhat.com>

On Wed, Jun 16, 2021 at 4:51 AM Ondrej Mosnacek <omosnace@redhat.com> wrote:
>
> Commit 59438b46471a ("security,lockdown,selinux: implement SELinux
> lockdown") added an implementation of the locked_down LSM hook to
> SELinux, with the aim to restrict which domains are allowed to perform
> operations that would breach lockdown.
>
> However, in several places the security_locked_down() hook is called in
> situations where the current task isn't doing any action that would
> directly breach lockdown, leading to SELinux checks that are basically
> bogus.
>
> To fix this, add an explicit struct cred pointer argument to
> security_lockdown() and define NULL as a special value to pass instead
> of current_cred() in such situations. LSMs that take the subject
> credentials into account can then fall back to some default or ignore
> such calls altogether. In the SELinux lockdown hook implementation, use
> SECINITSID_KERNEL in case the cred argument is NULL.
>
> Most of the callers are updated to pass current_cred() as the cred
> pointer, thus maintaining the same behavior. The following callers are
> modified to pass NULL as the cred pointer instead:
> 1. arch/powerpc/xmon/xmon.c
>      Seems to be some interactive debugging facility. It appears that
>      the lockdown hook is called from interrupt context here, so it
>      should be more appropriate to request a global lockdown decision.
> 2. fs/tracefs/inode.c:tracefs_create_file()
>      Here the call is used to prevent creating new tracefs entries when
>      the kernel is locked down. Assumes that locking down is one-way -
>      i.e. if the hook returns non-zero once, it will never return zero
>      again, thus no point in creating these files. Also, the hook is
>      often called by a module's init function when it is loaded by
>      userspace, where it doesn't make much sense to do a check against
>      the current task's creds, since the task itself doesn't actually
>      use the tracing functionality (i.e. doesn't breach lockdown), just
>      indirectly makes some new tracepoints available to whoever is
>      authorized to use them.
> 3. net/xfrm/xfrm_user.c:copy_to_user_*()
>      Here a cryptographic secret is redacted based on the value returned
>      from the hook. There are two possible actions that may lead here:
>      a) A netlink message XFRM_MSG_GETSA with NLM_F_DUMP set - here the
>         task context is relevant, since the dumped data is sent back to
>         the current task.
>      b) When adding/deleting/updating an SA via XFRM_MSG_xxxSA, the
>         dumped SA is broadcasted to tasks subscribed to XFRM events -
>         here the current task context is not relevant as it doesn't
>         represent the tasks that could potentially see the secret.
>      It doesn't seem worth it to try to keep using the current task's
>      context in the a) case, since the eventual data leak can be
>      circumvented anyway via b), plus there is no way for the task to
>      indicate that it doesn't care about the actual key value, so the
>      check could generate a lot of "false alert" denials with SELinux.
>      Thus, let's pass NULL instead of current_cred() here faute de
>      mieux.
>
> Improvements-suggested-by: Casey Schaufler <casey@schaufler-ca.com>
> Improvements-suggested-by: Paul Moore <paul@paul-moore.com>
> Fixes: 59438b46471a ("security,lockdown,selinux: implement SELinux lockdown")
> Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>

This seems reasonable to me, but before I merge it into the SELinux
tree I think it would be good to get some ACKs from the relevant
subsystem folks.  I don't believe we ever saw a response to the last
question for the PPC folks, did we?

> ---
>
> v3:
> - add the cred argument to security_locked_down() and adapt all callers
> - keep using current_cred() in BPF, as the hook calls have been shifted
>   to program load time (commit ff40e51043af ("bpf, lockdown, audit: Fix
>   buggy SELinux lockdown permission checks"))
> - in SELinux, don't ignore hook calls where cred == NULL, but use
>   SECINITSID_KERNEL as the subject instead
> - update explanations in the commit message
>
> v2: https://lore.kernel.org/lkml/20210517092006.803332-1-omosnace@redhat.com/
> - change to a single hook based on suggestions by Casey Schaufler
>
> v1: https://lore.kernel.org/lkml/20210507114048.138933-1-omosnace@redhat.com/
>
>  arch/powerpc/xmon/xmon.c             |  4 ++--
>  arch/x86/kernel/ioport.c             |  4 ++--
>  arch/x86/kernel/msr.c                |  4 ++--
>  arch/x86/mm/testmmiotrace.c          |  2 +-
>  drivers/acpi/acpi_configfs.c         |  2 +-
>  drivers/acpi/custom_method.c         |  2 +-
>  drivers/acpi/osl.c                   |  3 ++-
>  drivers/acpi/tables.c                |  2 +-
>  drivers/char/mem.c                   |  2 +-
>  drivers/cxl/mem.c                    |  2 +-
>  drivers/firmware/efi/efi.c           |  2 +-
>  drivers/firmware/efi/test/efi_test.c |  2 +-
>  drivers/pci/pci-sysfs.c              |  6 +++---
>  drivers/pci/proc.c                   |  6 +++---
>  drivers/pci/syscall.c                |  2 +-
>  drivers/pcmcia/cistpl.c              |  2 +-
>  drivers/tty/serial/serial_core.c     |  2 +-
>  fs/debugfs/file.c                    |  2 +-
>  fs/debugfs/inode.c                   |  2 +-
>  fs/proc/kcore.c                      |  2 +-
>  fs/tracefs/inode.c                   |  2 +-
>  include/linux/lsm_hook_defs.h        |  2 +-
>  include/linux/lsm_hooks.h            |  1 +
>  include/linux/security.h             |  4 ++--
>  kernel/bpf/helpers.c                 | 10 ++++++----
>  kernel/events/core.c                 |  2 +-
>  kernel/kexec.c                       |  2 +-
>  kernel/kexec_file.c                  |  2 +-
>  kernel/module.c                      |  2 +-
>  kernel/params.c                      |  2 +-
>  kernel/power/hibernate.c             |  3 ++-
>  kernel/trace/bpf_trace.c             | 20 ++++++++++++--------
>  kernel/trace/ftrace.c                |  4 ++--
>  kernel/trace/ring_buffer.c           |  2 +-
>  kernel/trace/trace.c                 | 10 +++++-----
>  kernel/trace/trace_events.c          |  2 +-
>  kernel/trace/trace_events_hist.c     |  4 ++--
>  kernel/trace/trace_events_synth.c    |  2 +-
>  kernel/trace/trace_events_trigger.c  |  2 +-
>  kernel/trace/trace_kprobe.c          |  6 +++---
>  kernel/trace/trace_printk.c          |  2 +-
>  kernel/trace/trace_stack.c           |  2 +-
>  kernel/trace/trace_stat.c            |  2 +-
>  kernel/trace/trace_uprobe.c          |  4 ++--
>  net/xfrm/xfrm_user.c                 | 11 +++++++++--
>  security/lockdown/lockdown.c         |  3 ++-
>  security/security.c                  |  4 ++--
>  security/selinux/hooks.c             |  7 +++++--
>  48 files changed, 97 insertions(+), 77 deletions(-)

-- 
paul moore
www.paul-moore.com

^ permalink raw reply

* Re: [PATCH] powerpc/mem: Add back missing header to fix 'no previous prototype' error
From: Michael Ellerman @ 2021-06-18  3:50 UTC (permalink / raw)
  To: Michael Ellerman, Paul Mackerras, Christophe Leroy,
	Benjamin Herrenschmidt
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <3e5b63bb3daab54a1eb9c20221c2e9528c4db9b3.1622883330.git.christophe.leroy@csgroup.eu>

On Sat, 5 Jun 2021 08:56:09 +0000 (UTC), Christophe Leroy wrote:
> Commit b26e8f27253a ("powerpc/mem: Move cache flushing functions into
> mm/cacheflush.c") removed asm/sparsemem.h which is required when
> CONFIG_MEMORY_HOTPLUG is selected to get the declaration of
> create_section_mapping().
> 
> Add it back.

Applied to powerpc/fixes.

[1/1] powerpc/mem: Add back missing header to fix 'no previous prototype' error
      https://git.kernel.org/powerpc/c/8e11d62e2e8769fe29d1ae98b44b23c7233eb8a2

cheers

^ permalink raw reply

* Re: [PATCH] powerpc: Fix initrd corruption with relative jump labels
From: Michael Ellerman @ 2021-06-18  3:50 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: r.bolshakov, a.kovaleva, groug, dja
In-Reply-To: <20210614131440.312360-1-mpe@ellerman.id.au>

On Mon, 14 Jun 2021 23:14:40 +1000, Michael Ellerman wrote:
> Commit b0b3b2c78ec0 ("powerpc: Switch to relative jump labels") switched
> us to using relative jump labels. That involves changing the code,
> target and key members in struct jump_entry to be relative to the
> address of the jump_entry, rather than absolute addresses.
> 
> We have two static inlines that create a struct jump_entry,
> arch_static_branch() and arch_static_branch_jump(), as well as an asm
> macro ARCH_STATIC_BRANCH, which is used by the pseries-only hypervisor
> tracing code.
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc: Fix initrd corruption with relative jump labels
      https://git.kernel.org/powerpc/c/478036c4cd1a16e613a2f883d79c03cf187faacb

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/signal64: Copy siginfo before changing regs->nip
From: Michael Ellerman @ 2021-06-18  3:50 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: dja, cmr
In-Reply-To: <20210608134605.2783677-1-mpe@ellerman.id.au>

On Tue, 8 Jun 2021 23:46:05 +1000, Michael Ellerman wrote:
> In commit 96d7a4e06fab ("powerpc/signal64: Rewrite handle_rt_signal64()
> to minimise uaccess switches") the 64-bit signal code was rearranged to
> use user_write_access_begin/end().
> 
> As part of that change the call to copy_siginfo_to_user() was moved
> later in the function, so that it could be done after the
> user_write_access_end().
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc/signal64: Copy siginfo before changing regs->nip
      https://git.kernel.org/powerpc/c/e41d6c3f4f9b4804e53ca87aba8ee11ada606c77

cheers

^ permalink raw reply

* [PATCH v2 7/9] powerpc/microwatt: Add microwatt_defconfig
From: Paul Mackerras @ 2021-06-18  3:48 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <YMwWPcsaWzMlDPqQ@thinks.paulus.ozlabs.org>

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/configs/microwatt_defconfig | 98 ++++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 arch/powerpc/configs/microwatt_defconfig

diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
new file mode 100644
index 000000000000..a08b739123da
--- /dev/null
+++ b/arch/powerpc/configs/microwatt_defconfig
@@ -0,0 +1,98 @@
+# CONFIG_SWAP is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_TICK_CPU_ACCOUNTING=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+CONFIG_PPC64=y
+# CONFIG_PPC_KUEP is not set
+# CONFIG_PPC_KUAP is not set
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_NR_IRQS=64
+CONFIG_PANIC_TIMEOUT=10
+# CONFIG_PPC_POWERNV is not set
+# CONFIG_PPC_PSERIES is not set
+CONFIG_PPC_MICROWATT=y
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_CPU_FREQ=y
+CONFIG_HZ_100=y
+# CONFIG_PPC_MEM_KEYS is not set
+# CONFIG_SECCOMP is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+# CONFIG_COREDUMP is not set
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_INET=y
+CONFIG_INET_UDP_DIAG=y
+CONFIG_INET_RAW_DIAG=y
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_PARTITIONED_MASTER=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_NETDEVICES=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_NVRAM is not set
+CONFIG_RANDOM_TRUST_CPU=y
+CONFIG_SPI=y
+CONFIG_SPI_DEBUG=y
+CONFIG_SPI_BITBANG=y
+CONFIG_SPI_SPIDEV=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_VIRTIO_MENU is not set
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_NVMEM is not set
+CONFIG_EXT4_FS=y
+# CONFIG_FILE_LOCKING is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_CRYPTO_HW is not set
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
+CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MISC is not set
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_FTRACE is not set
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_PPC_DISABLE_WERROR=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+# CONFIG_XMON_DEFAULT_RO_MODE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
-- 
2.31.1


^ permalink raw reply related

* [PATCH v2 5/9] powerpc/microwatt: Use standard 16550 UART for console
From: Paul Mackerras @ 2021-06-18  3:46 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <YMwWPcsaWzMlDPqQ@thinks.paulus.ozlabs.org>

From: Benjamin Herrenschmidt <benh@kernel.crashing.org>

This adds support to the Microwatt platform to use the standard
16550-style UART which available in the standalone Microwatt FPGA.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/boot/dts/microwatt.dts      | 27 ++++++++++++----
 arch/powerpc/kernel/udbg_16550.c         | 39 ++++++++++++++++++++++++
 arch/powerpc/platforms/microwatt/Kconfig |  1 +
 arch/powerpc/platforms/microwatt/setup.c |  2 ++
 4 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts
index 04e5dd92270e..974abbdda249 100644
--- a/arch/powerpc/boot/dts/microwatt.dts
+++ b/arch/powerpc/boot/dts/microwatt.dts
@@ -6,6 +6,10 @@ / {
 	model-name = "microwatt";
 	compatible = "microwatt-soc";
 
+	aliases {
+		serial0 = &UART0;
+	};
+
 	reserved-memory {
 		#size-cells = <0x02>;
 		#address-cells = <0x02>;
@@ -89,12 +93,6 @@ PowerPC,Microwatt@0 {
 		};
 	};
 
-	chosen {
-		bootargs = "";
-		ibm,architecture-vec-5 = [19 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00
-					  00 00 00 00 00 00 00 00 40 00 40];
-	};
-
 	soc@c0000000 {
 		compatible = "simple-bus";
 		#address-cells = <1>;
@@ -119,5 +117,22 @@ ICS: interrupt-controller@5000 {
 			#interrupt-cells = <2>;
 		};
 
+		UART0: serial@2000 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x2000 0x8>;
+			clock-frequency = <100000000>;
+			current-speed = <115200>;
+			reg-shift = <2>;
+			fifo-size = <16>;
+			interrupts = <0x10 0x1>;
+		};
+	};
+
+	chosen {
+		bootargs = "";
+		ibm,architecture-vec-5 = [19 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00
+					  00 00 00 00 00 00 00 00 40 00 40];
+		stdout-path = &UART0;
 	};
 };
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 9356b60d6030..8513aa49614e 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -296,3 +296,42 @@ void __init udbg_init_40x_realmode(void)
 }
 
 #endif /* CONFIG_PPC_EARLY_DEBUG_40x */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_MICROWATT
+
+#define UDBG_UART_MW_ADDR	((void __iomem *)0xc0002000)
+
+static u8 udbg_uart_in_isa300_rm(unsigned int reg)
+{
+	uint64_t msr = mfmsr();
+	uint8_t  c;
+
+	mtmsr(msr & ~(MSR_EE|MSR_DR));
+	isync();
+	eieio();
+	c = __raw_rm_readb(UDBG_UART_MW_ADDR + (reg << 2));
+	mtmsr(msr);
+	isync();
+	return c;
+}
+
+static void udbg_uart_out_isa300_rm(unsigned int reg, u8 val)
+{
+	uint64_t msr = mfmsr();
+
+	mtmsr(msr & ~(MSR_EE|MSR_DR));
+	isync();
+	eieio();
+	__raw_rm_writeb(val, UDBG_UART_MW_ADDR + (reg << 2));
+	mtmsr(msr);
+	isync();
+}
+
+void __init udbg_init_debug_microwatt(void)
+{
+	udbg_uart_in = udbg_uart_in_isa300_rm;
+	udbg_uart_out = udbg_uart_out_isa300_rm;
+	udbg_use_uart();
+}
+
+#endif /* CONFIG_PPC_EARLY_DEBUG_MICROWATT */
diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig
index b52c869c0eb8..50ed0cedb5f1 100644
--- a/arch/powerpc/platforms/microwatt/Kconfig
+++ b/arch/powerpc/platforms/microwatt/Kconfig
@@ -6,6 +6,7 @@ config PPC_MICROWATT
 	select PPC_ICS_NATIVE
 	select PPC_ICP_NATIVE
 	select PPC_NATIVE
+	select PPC_UDBG_16550
 	help
           This option enables support for FPGA-based Microwatt implementations.
 
diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c
index 1c1b7791fa57..0b02603bdb74 100644
--- a/arch/powerpc/platforms/microwatt/setup.c
+++ b/arch/powerpc/platforms/microwatt/setup.c
@@ -14,6 +14,7 @@
 #include <asm/machdep.h>
 #include <asm/time.h>
 #include <asm/xics.h>
+#include <asm/udbg.h>
 
 static void __init microwatt_init_IRQ(void)
 {
@@ -35,5 +36,6 @@ define_machine(microwatt) {
 	.name			= "microwatt",
 	.probe			= microwatt_probe,
 	.init_IRQ		= microwatt_init_IRQ,
+	.progress		= udbg_progress,
 	.calibrate_decr		= generic_calibrate_decr,
 };
-- 
2.31.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox