All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bingbu Cao <bingbu.cao@linux.intel.com>
To: bingbu.cao@intel.com, linux-media@vger.kernel.org,
	sakari.ailus@linux.intel.com
Cc: jianhui.j.dai@intel.com, tfiga@chromium.org,
	Dongcheng Yan <dongcheng.yan@intel.com>,
	"Yao, Hao" <hao.yao@intel.com>
Subject: Re: [PATCH v2] media: intel/ipu6: optimize the IPU6 MMU mapping and unmapping flow
Date: Tue, 3 Sep 2024 16:28:49 +0800	[thread overview]
Message-ID: <874b3dc9-e9f2-3aaf-e994-81d296dbbbd0@linux.intel.com> (raw)
In-Reply-To: <20240816033121.3961995-1-bingbu.cao@intel.com>


Dongcheng and Hao,

Could you help test this patch?

On 8/16/24 11:31 AM, bingbu.cao@intel.com wrote:
> From: Bingbu Cao <bingbu.cao@intel.com>
> 
> ipu6_mmu_map() and ipu6_mmu_unmap() operated on a per-page basis,
> leading to frequent calls to spin_locks/unlocks and
> clflush_cache_range for each page. This will cause inefficiencies,
> especially when handling large dma-bufs with hundreds of pages.
> 
> This change enhances ipu6_mmu_map()/ipu6_mmu_unmap() with batching
> process multiple contiguous pages. This significantly reduces calls
> for spin_lock/unlock and clflush_cache_range() and improve the
> performance.
> 
> Signed-off-by: Jianhui Dai <jianhui.j.dai@intel.com>
> Signed-off-by: Bingbu Cao <bingbu.cao@intel.com>
> ---
> v2: fix one build warning found by kernel test robot
> ---
>  drivers/media/pci/intel/ipu6/ipu6-mmu.c | 283 ++++++++++--------------
>  drivers/media/pci/intel/ipu6/ipu6-mmu.h |   4 +-
>  2 files changed, 121 insertions(+), 166 deletions(-)
> ---
> 
> diff --git a/drivers/media/pci/intel/ipu6/ipu6-mmu.c b/drivers/media/pci/intel/ipu6/ipu6-mmu.c
> index c3a20507d6db..2d9c6fbd5da2 100644
> --- a/drivers/media/pci/intel/ipu6/ipu6-mmu.c
> +++ b/drivers/media/pci/intel/ipu6/ipu6-mmu.c
> @@ -252,75 +252,140 @@ static u32 *alloc_l2_pt(struct ipu6_mmu_info *mmu_info)
>  	return pt;
>  }
>  
> +static void l2_unmap(struct ipu6_mmu_info *mmu_info, unsigned long iova,
> +		     phys_addr_t dummy, size_t size)
> +{
> +	unsigned int l2_entries;
> +	unsigned int l2_idx;
> +	unsigned long flags;
> +	u32 l1_idx;
> +	u32 *l2_pt;
> +
> +	spin_lock_irqsave(&mmu_info->lock, flags);
> +	for (l1_idx = iova >> ISP_L1PT_SHIFT;
> +	     size > 0 && l1_idx < ISP_L1PT_PTES; l1_idx++) {
> +		dev_dbg(mmu_info->dev,
> +			"unmapping l2 pgtable (l1 index %u (iova 0x%8.8lx))\n",
> +			l1_idx, iova);
> +
> +		if (mmu_info->l1_pt[l1_idx] == mmu_info->dummy_l2_pteval) {
> +			dev_err(mmu_info->dev,
> +				"unmap not mapped iova 0x%8.8lx l1 index %u\n",
> +				iova, l1_idx);
> +			continue;
> +		}
> +		l2_pt = mmu_info->l2_pts[l1_idx];
> +
> +		l2_entries = 0;
> +		for (l2_idx = (iova & ISP_L2PT_MASK) >> ISP_L2PT_SHIFT;
> +		     size > 0 && l2_idx < ISP_L2PT_PTES; l2_idx++) {
> +			dev_dbg(mmu_info->dev,
> +				"unmap l2 index %u with pteval 0x%10.10llx\n",
> +				l2_idx, TBL_PHYS_ADDR(l2_pt[l2_idx]));
> +			l2_pt[l2_idx] = mmu_info->dummy_page_pteval;
> +
> +			iova += ISP_PAGE_SIZE;
> +			size -= ISP_PAGE_SIZE;
> +
> +			l2_entries++;
> +		}
> +
> +		WARN_ON_ONCE(!l2_entries);
> +		clflush_cache_range(&l2_pt[l2_idx - l2_entries],
> +				    sizeof(l2_pt[0]) * l2_entries);
> +	}
> +
> +	WARN_ON_ONCE(size);
> +	spin_unlock_irqrestore(&mmu_info->lock, flags);
> +}
> +
>  static int l2_map(struct ipu6_mmu_info *mmu_info, unsigned long iova,
>  		  phys_addr_t paddr, size_t size)
>  {
> -	u32 l1_idx = iova >> ISP_L1PT_SHIFT;
> -	u32 iova_start = iova;
> +	struct device *dev = mmu_info->dev;
> +	unsigned int l2_entries;
>  	u32 *l2_pt, *l2_virt;
>  	unsigned int l2_idx;
>  	unsigned long flags;
> +	size_t mapped = 0;
>  	dma_addr_t dma;
>  	u32 l1_entry;
> -
> -	dev_dbg(mmu_info->dev,
> -		"mapping l2 page table for l1 index %u (iova %8.8x)\n",
> -		l1_idx, (u32)iova);
> +	u32 l1_idx;
> +	int err = 0;
>  
>  	spin_lock_irqsave(&mmu_info->lock, flags);
> -	l1_entry = mmu_info->l1_pt[l1_idx];
> -	if (l1_entry == mmu_info->dummy_l2_pteval) {
> -		l2_virt = mmu_info->l2_pts[l1_idx];
> -		if (likely(!l2_virt)) {
> -			l2_virt = alloc_l2_pt(mmu_info);
> -			if (!l2_virt) {
> -				spin_unlock_irqrestore(&mmu_info->lock, flags);
> -				return -ENOMEM;
> +
> +	paddr = ALIGN(paddr, ISP_PAGE_SIZE);
> +	for (l1_idx = iova >> ISP_L1PT_SHIFT;
> +	     size > 0 && l1_idx < ISP_L1PT_PTES; l1_idx++) {
> +		dev_dbg(dev,
> +			"mapping l2 page table for l1 index %u (iova %8.8x)\n",
> +			l1_idx, (u32)iova);
> +
> +		l1_entry = mmu_info->l1_pt[l1_idx];
> +		if (l1_entry == mmu_info->dummy_l2_pteval) {
> +			l2_virt = mmu_info->l2_pts[l1_idx];
> +			if (likely(!l2_virt)) {
> +				l2_virt = alloc_l2_pt(mmu_info);
> +				if (!l2_virt) {
> +					err = -ENOMEM;
> +					goto error;
> +				}
>  			}
> -		}
>  
> -		dma = map_single(mmu_info, l2_virt);
> -		if (!dma) {
> -			dev_err(mmu_info->dev, "Failed to map l2pt page\n");
> -			free_page((unsigned long)l2_virt);
> -			spin_unlock_irqrestore(&mmu_info->lock, flags);
> -			return -EINVAL;
> -		}
> +			dma = map_single(mmu_info, l2_virt);
> +			if (!dma) {
> +				dev_err(dev, "Failed to map l2pt page\n");
> +				free_page((unsigned long)l2_virt);
> +				err = -EINVAL;
> +				goto error;
> +			}
>  
> -		l1_entry = dma >> ISP_PADDR_SHIFT;
> +			l1_entry = dma >> ISP_PADDR_SHIFT;
>  
> -		dev_dbg(mmu_info->dev, "page for l1_idx %u %p allocated\n",
> -			l1_idx, l2_virt);
> -		mmu_info->l1_pt[l1_idx] = l1_entry;
> -		mmu_info->l2_pts[l1_idx] = l2_virt;
> -		clflush_cache_range((void *)&mmu_info->l1_pt[l1_idx],
> -				    sizeof(mmu_info->l1_pt[l1_idx]));
> -	}
> +			dev_dbg(dev, "page for l1_idx %u %p allocated\n",
> +				l1_idx, l2_virt);
> +			mmu_info->l1_pt[l1_idx] = l1_entry;
> +			mmu_info->l2_pts[l1_idx] = l2_virt;
>  
> -	l2_pt = mmu_info->l2_pts[l1_idx];
> +			clflush_cache_range(&mmu_info->l1_pt[l1_idx],
> +					    sizeof(mmu_info->l1_pt[l1_idx]));
> +		}
>  
> -	dev_dbg(mmu_info->dev, "l2_pt at %p with dma 0x%x\n", l2_pt, l1_entry);
> +		l2_pt = mmu_info->l2_pts[l1_idx];
> +		l2_entries = 0;
>  
> -	paddr = ALIGN(paddr, ISP_PAGE_SIZE);
> +		for (l2_idx = (iova & ISP_L2PT_MASK) >> ISP_L2PT_SHIFT;
> +		     size > 0 && l2_idx < ISP_L2PT_PTES; l2_idx++) {
> +			l2_pt[l2_idx] = paddr >> ISP_PADDR_SHIFT;
>  
> -	l2_idx = (iova_start & ISP_L2PT_MASK) >> ISP_L2PT_SHIFT;
> +			dev_dbg(dev, "l2 index %u mapped as 0x%8.8x\n", l2_idx,
> +				l2_pt[l2_idx]);
>  
> -	dev_dbg(mmu_info->dev, "l2_idx %u, phys 0x%8.8x\n", l2_idx,
> -		l2_pt[l2_idx]);
> -	if (l2_pt[l2_idx] != mmu_info->dummy_page_pteval) {
> -		spin_unlock_irqrestore(&mmu_info->lock, flags);
> -		return -EINVAL;
> -	}
> +			iova += ISP_PAGE_SIZE;
> +			paddr += ISP_PAGE_SIZE;
> +			mapped += ISP_PAGE_SIZE;
> +			size -= ISP_PAGE_SIZE;
>  
> -	l2_pt[l2_idx] = paddr >> ISP_PADDR_SHIFT;
> +			l2_entries++;
> +		}
>  
> -	clflush_cache_range((void *)&l2_pt[l2_idx], sizeof(l2_pt[l2_idx]));
> -	spin_unlock_irqrestore(&mmu_info->lock, flags);
> +		WARN_ON_ONCE(!l2_entries);
> +		clflush_cache_range(&l2_pt[l2_idx - l2_entries],
> +				    sizeof(l2_pt[0]) * l2_entries);
> +	}
>  
> -	dev_dbg(mmu_info->dev, "l2 index %u mapped as 0x%8.8x\n", l2_idx,
> -		l2_pt[l2_idx]);
> +	spin_unlock_irqrestore(&mmu_info->lock, flags);
>  
>  	return 0;
> +
> +error:
> +	spin_unlock_irqrestore(&mmu_info->lock, flags);
> +	/* unroll mapping in case something went wrong */
> +	if (mapped)
> +		l2_unmap(mmu_info, iova - mapped, paddr - mapped, mapped);
> +
> +	return err;
>  }
>  
>  static int __ipu6_mmu_map(struct ipu6_mmu_info *mmu_info, unsigned long iova,
> @@ -336,48 +401,8 @@ static int __ipu6_mmu_map(struct ipu6_mmu_info *mmu_info, unsigned long iova,
>  	return l2_map(mmu_info, iova_start, paddr, size);
>  }
>  
> -static size_t l2_unmap(struct ipu6_mmu_info *mmu_info, unsigned long iova,
> -		       phys_addr_t dummy, size_t size)
> -{
> -	u32 l1_idx = iova >> ISP_L1PT_SHIFT;
> -	u32 iova_start = iova;
> -	unsigned int l2_idx;
> -	size_t unmapped = 0;
> -	unsigned long flags;
> -	u32 *l2_pt;
> -
> -	dev_dbg(mmu_info->dev, "unmapping l2 page table for l1 index %u (iova 0x%8.8lx)\n",
> -		l1_idx, iova);
> -
> -	spin_lock_irqsave(&mmu_info->lock, flags);
> -	if (mmu_info->l1_pt[l1_idx] == mmu_info->dummy_l2_pteval) {
> -		spin_unlock_irqrestore(&mmu_info->lock, flags);
> -		dev_err(mmu_info->dev,
> -			"unmap iova 0x%8.8lx l1 idx %u which was not mapped\n",
> -			iova, l1_idx);
> -		return 0;
> -	}
> -
> -	for (l2_idx = (iova_start & ISP_L2PT_MASK) >> ISP_L2PT_SHIFT;
> -	     (iova_start & ISP_L1PT_MASK) + (l2_idx << ISP_PAGE_SHIFT)
> -		     < iova_start + size && l2_idx < ISP_L2PT_PTES; l2_idx++) {
> -		l2_pt = mmu_info->l2_pts[l1_idx];
> -		dev_dbg(mmu_info->dev,
> -			"unmap l2 index %u with pteval 0x%10.10llx\n",
> -			l2_idx, TBL_PHYS_ADDR(l2_pt[l2_idx]));
> -		l2_pt[l2_idx] = mmu_info->dummy_page_pteval;
> -
> -		clflush_cache_range((void *)&l2_pt[l2_idx],
> -				    sizeof(l2_pt[l2_idx]));
> -		unmapped++;
> -	}
> -	spin_unlock_irqrestore(&mmu_info->lock, flags);
> -
> -	return unmapped << ISP_PAGE_SHIFT;
> -}
> -
> -static size_t __ipu6_mmu_unmap(struct ipu6_mmu_info *mmu_info,
> -			       unsigned long iova, size_t size)
> +static void __ipu6_mmu_unmap(struct ipu6_mmu_info *mmu_info,
> +			     unsigned long iova, size_t size)
>  {
>  	return l2_unmap(mmu_info, iova, 0, size);
>  }
> @@ -619,40 +644,13 @@ phys_addr_t ipu6_mmu_iova_to_phys(struct ipu6_mmu_info *mmu_info,
>  	return phy_addr;
>  }
>  
> -static size_t ipu6_mmu_pgsize(unsigned long pgsize_bitmap,
> -			      unsigned long addr_merge, size_t size)
> +void ipu6_mmu_unmap(struct ipu6_mmu_info *mmu_info, unsigned long iova,
> +		    size_t size)
>  {
> -	unsigned int pgsize_idx;
> -	size_t pgsize;
> -
> -	/* Max page size that still fits into 'size' */
> -	pgsize_idx = __fls(size);
> -
> -	if (likely(addr_merge)) {
> -		/* Max page size allowed by address */
> -		unsigned int align_pgsize_idx = __ffs(addr_merge);
> -
> -		pgsize_idx = min(pgsize_idx, align_pgsize_idx);
> -	}
> -
> -	pgsize = (1UL << (pgsize_idx + 1)) - 1;
> -	pgsize &= pgsize_bitmap;
> -
> -	WARN_ON(!pgsize);
> -
> -	/* pick the biggest page */
> -	pgsize_idx = __fls(pgsize);
> -	pgsize = 1UL << pgsize_idx;
> -
> -	return pgsize;
> -}
> -
> -size_t ipu6_mmu_unmap(struct ipu6_mmu_info *mmu_info, unsigned long iova,
> -		      size_t size)
> -{
> -	size_t unmapped_page, unmapped = 0;
>  	unsigned int min_pagesz;
>  
> +	dev_dbg(mmu_info->dev, "unmapping iova 0x%lx size 0x%zx\n", iova, size);
> +
>  	/* find out the minimum page size supported */
>  	min_pagesz = 1 << __ffs(mmu_info->pgsize_bitmap);
>  
> @@ -664,38 +662,16 @@ size_t ipu6_mmu_unmap(struct ipu6_mmu_info *mmu_info, unsigned long iova,
>  	if (!IS_ALIGNED(iova | size, min_pagesz)) {
>  		dev_err(NULL, "unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
>  			iova, size, min_pagesz);
> -		return -EINVAL;
> -	}
> -
> -	/*
> -	 * Keep iterating until we either unmap 'size' bytes (or more)
> -	 * or we hit an area that isn't mapped.
> -	 */
> -	while (unmapped < size) {
> -		size_t pgsize = ipu6_mmu_pgsize(mmu_info->pgsize_bitmap,
> -						iova, size - unmapped);
> -
> -		unmapped_page = __ipu6_mmu_unmap(mmu_info, iova, pgsize);
> -		if (!unmapped_page)
> -			break;
> -
> -		dev_dbg(mmu_info->dev, "unmapped: iova 0x%lx size 0x%zx\n",
> -			iova, unmapped_page);
> -
> -		iova += unmapped_page;
> -		unmapped += unmapped_page;
> +		return;
>  	}
>  
> -	return unmapped;
> +	return __ipu6_mmu_unmap(mmu_info, iova, size);
>  }
>  
>  int ipu6_mmu_map(struct ipu6_mmu_info *mmu_info, unsigned long iova,
>  		 phys_addr_t paddr, size_t size)
>  {
> -	unsigned long orig_iova = iova;
>  	unsigned int min_pagesz;
> -	size_t orig_size = size;
> -	int ret = 0;
>  
>  	if (mmu_info->pgsize_bitmap == 0UL)
>  		return -ENODEV;
> @@ -718,28 +694,7 @@ int ipu6_mmu_map(struct ipu6_mmu_info *mmu_info, unsigned long iova,
>  	dev_dbg(mmu_info->dev, "map: iova 0x%lx pa %pa size 0x%zx\n",
>  		iova, &paddr, size);
>  
> -	while (size) {
> -		size_t pgsize = ipu6_mmu_pgsize(mmu_info->pgsize_bitmap,
> -						iova | paddr, size);
> -
> -		dev_dbg(mmu_info->dev,
> -			"mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
> -			iova, &paddr, pgsize);
> -
> -		ret = __ipu6_mmu_map(mmu_info, iova, paddr, pgsize);
> -		if (ret)
> -			break;
> -
> -		iova += pgsize;
> -		paddr += pgsize;
> -		size -= pgsize;
> -	}
> -
> -	/* unroll mapping in case something went wrong */
> -	if (ret)
> -		ipu6_mmu_unmap(mmu_info, orig_iova, orig_size - size);
> -
> -	return ret;
> +	return __ipu6_mmu_map(mmu_info, iova, paddr, size);
>  }
>  
>  static void ipu6_mmu_destroy(struct ipu6_mmu *mmu)
> diff --git a/drivers/media/pci/intel/ipu6/ipu6-mmu.h b/drivers/media/pci/intel/ipu6/ipu6-mmu.h
> index 21cdb0f146eb..8e40b4a66d7d 100644
> --- a/drivers/media/pci/intel/ipu6/ipu6-mmu.h
> +++ b/drivers/media/pci/intel/ipu6/ipu6-mmu.h
> @@ -66,8 +66,8 @@ int ipu6_mmu_hw_init(struct ipu6_mmu *mmu);
>  void ipu6_mmu_hw_cleanup(struct ipu6_mmu *mmu);
>  int ipu6_mmu_map(struct ipu6_mmu_info *mmu_info, unsigned long iova,
>  		 phys_addr_t paddr, size_t size);
> -size_t ipu6_mmu_unmap(struct ipu6_mmu_info *mmu_info, unsigned long iova,
> -		      size_t size);
> +void ipu6_mmu_unmap(struct ipu6_mmu_info *mmu_info, unsigned long iova,
> +		    size_t size);
>  phys_addr_t ipu6_mmu_iova_to_phys(struct ipu6_mmu_info *mmu_info,
>  				  dma_addr_t iova);
>  #endif
> 

-- 
Best regards,
Bingbu Cao

  reply	other threads:[~2024-09-03  8:26 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-16  3:31 [PATCH v2] media: intel/ipu6: optimize the IPU6 MMU mapping and unmapping flow bingbu.cao
2024-09-03  8:28 ` Bingbu Cao [this message]
2024-09-09  2:31   ` Yan, Dongcheng
2024-09-12  1:20 ` Bingbu Cao
2024-10-03 11:15 ` Sakari Ailus
2024-10-08  1:40   ` Cao, Bingbu
2024-10-21  8:36 ` Sakari Ailus
2024-10-22  0:39   ` Bingbu Cao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=874b3dc9-e9f2-3aaf-e994-81d296dbbbd0@linux.intel.com \
    --to=bingbu.cao@linux.intel.com \
    --cc=bingbu.cao@intel.com \
    --cc=dongcheng.yan@intel.com \
    --cc=hao.yao@intel.com \
    --cc=jianhui.j.dai@intel.com \
    --cc=linux-media@vger.kernel.org \
    --cc=sakari.ailus@linux.intel.com \
    --cc=tfiga@chromium.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.