Re: [PATCH 1/5] block: rewrite blk_bvec_map_sg to avoid a nth_page call

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Christoph Hellwig <hch@lst.de>
To: Guenter Roeck <linux@roeck-us.net>
Cc: Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>,
	Ming Lei <ming.lei@redhat.com>,
	linux-block@vger.kernel.org
Subject: Re: [PATCH 1/5] block: rewrite blk_bvec_map_sg to avoid a nth_page call
Date: Tue, 16 Apr 2019 08:33:56 +0200	[thread overview]
Message-ID: <20190416063356.GA25763@lst.de> (raw)
In-Reply-To: <20190415210731.GA32723@roeck-us.net>

On Mon, Apr 15, 2019 at 02:07:31PM -0700, Guenter Roeck wrote:
> On Mon, Apr 15, 2019 at 10:52:42PM +0200, Christoph Hellwig wrote:
> > On Mon, Apr 15, 2019 at 12:44:35PM -0700, Guenter Roeck wrote:
> > > This patch causes crashes with various boot tests. Most sparc tests crash, as
> > > well as several arm tests. Bisect results in both cases point to this patch.
> > 
> > That just means we trigger an existing bug more easily now.  I'll see
> > if I can help with the issues.
> 
> Code which previously worked reliably no longer does. I would be quite
> hesitant to call this "trigger an existing bug more easily". "Regression"
> seems to be a more appropriate term - even more so as it seems to cause
> 'init' crashes, at least on arm.

Well, we have these sgls in the wild already, it just is that they
are fairly rare.  For a related fix on a mainstream platform see
here for example:

	https://lore.kernel.org/patchwork/patch/1050367/

Below is a rework of the sparc32 iommu code that should avoid your
reported problem.  Please send any other reports to me as well.

diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index e8d5d73ca40d..93c2fc440cb0 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -175,16 +175,38 @@ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte)
 	}
 }
 
-static u32 iommu_get_one(struct device *dev, struct page *page, int npages)
+static u32 __sbus_iommu_map_page(struct device *dev, struct page *page, unsigned offset,
+		unsigned len, bool need_flush)
 {
 	struct iommu_struct *iommu = dev->archdata.iommu;
+	phys_addr_t paddr = page_to_phys(page) + offset, p;
+	unsigned long pfn = __phys_to_pfn(paddr);
+	unsigned long off = (unsigned long)paddr & ~PAGE_MASK;
+	unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	int ioptex;
 	iopte_t *iopte, *iopte0;
 	unsigned int busa, busa0;
 	int i;
 
+	/* XXX So what is maxphys for us and how do drivers know it? */
+	if (!len || len > 256 * 1024)
+		return DMA_MAPPING_ERROR;
+
+	/*
+	 * We expect unmapped highmem pages to be not in the cache.
+	 * XXX Is this a good assumption?
+	 * XXX What if someone else unmaps it here and races us?
+	 */
+	if (need_flush && !PageHighMem(page)) {
+		for (p = paddr & PAGE_MASK; p < paddr + len; p += PAGE_SIZE) {
+			unsigned long vaddr = (unsigned long)phys_to_virt(p);
+
+			flush_page_for_dma(vaddr);
+		}
+	}
+
 	/* page color = pfn of page */
-	ioptex = bit_map_string_get(&iommu->usemap, npages, page_to_pfn(page));
+	ioptex = bit_map_string_get(&iommu->usemap, npages, pfn);
 	if (ioptex < 0)
 		panic("iommu out");
 	busa0 = iommu->start + (ioptex << PAGE_SHIFT);
@@ -193,11 +215,11 @@ static u32 iommu_get_one(struct device *dev, struct page *page, int npages)
 	busa = busa0;
 	iopte = iopte0;
 	for (i = 0; i < npages; i++) {
-		iopte_val(*iopte) = MKIOPTE(page_to_pfn(page), IOPERM);
+		iopte_val(*iopte) = MKIOPTE(pfn, IOPERM);
 		iommu_invalidate_page(iommu->regs, busa);
 		busa += PAGE_SIZE;
 		iopte++;
-		page++;
+		pfn++;
 	}
 
 	iommu_flush_iotlb(iopte0, npages);
@@ -205,99 +227,62 @@ static u32 iommu_get_one(struct device *dev, struct page *page, int npages)
 	return busa0;
 }
 
-static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t len)
-{
-	void *vaddr = page_address(page) + offset;
-	unsigned long off = (unsigned long)vaddr & ~PAGE_MASK;
-	unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	
-	/* XXX So what is maxphys for us and how do drivers know it? */
-	if (!len || len > 256 * 1024)
-		return DMA_MAPPING_ERROR;
-	return iommu_get_one(dev, virt_to_page(vaddr), npages) + off;
-}
-
 static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev,
 		struct page *page, unsigned long offset, size_t len,
 		enum dma_data_direction dir, unsigned long attrs)
 {
 	flush_page_for_dma(0);
-	return __sbus_iommu_map_page(dev, page, offset, len);
+	return __sbus_iommu_map_page(dev, page, offset, len, false);
 }
 
 static dma_addr_t sbus_iommu_map_page_pflush(struct device *dev,
 		struct page *page, unsigned long offset, size_t len,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	void *vaddr = page_address(page) + offset;
-	unsigned long p = ((unsigned long)vaddr) & PAGE_MASK;
-
-	while (p < (unsigned long)vaddr + len) {
-		flush_page_for_dma(p);
-		p += PAGE_SIZE;
-	}
-
-	return __sbus_iommu_map_page(dev, page, offset, len);
+	return __sbus_iommu_map_page(dev, page, offset, len, true);
 }
 
-static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl,
-		int nents, enum dma_data_direction dir, unsigned long attrs)
+static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir, unsigned long attrs,
+		bool need_flush)
 {
 	struct scatterlist *sg;
-	int i, n;
-
-	flush_page_for_dma(0);
+	int i;
 
 	for_each_sg(sgl, sg, nents, i) {
-		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
-		sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset;
+		sg->dma_address = __sbus_iommu_map_page(dev, sg_page(sg),
+				sg->offset, sg->length, need_flush);
+		if (sg->dma_address == DMA_MAPPING_ERROR)
+			return 0;
 		sg->dma_length = sg->length;
 	}
 
 	return nents;
 }
 
-static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl,
+static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl,
 		int nents, enum dma_data_direction dir, unsigned long attrs)
 {
-	unsigned long page, oldpage = 0;
-	struct scatterlist *sg;
-	int i, j, n;
-
-	for_each_sg(sgl, sg, nents, j) {
-		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
-
-		/*
-		 * We expect unmapped highmem pages to be not in the cache.
-		 * XXX Is this a good assumption?
-		 * XXX What if someone else unmaps it here and races us?
-		 */
-		if ((page = (unsigned long) page_address(sg_page(sg))) != 0) {
-			for (i = 0; i < n; i++) {
-				if (page != oldpage) {	/* Already flushed? */
-					flush_page_for_dma(page);
-					oldpage = page;
-				}
-				page += PAGE_SIZE;
-			}
-		}
-
-		sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset;
-		sg->dma_length = sg->length;
-	}
+	flush_page_for_dma(0);
+	return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, false);
+}
 
-	return nents;
+static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, true);
 }
 
-static void iommu_release_one(struct device *dev, u32 busa, int npages)
+static void __sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr,
+		size_t len)
 {
 	struct iommu_struct *iommu = dev->archdata.iommu;
-	int ioptex;
-	int i;
+	unsigned busa, npages, ioptex, i;
 
+	busa = dma_addr & PAGE_MASK;
 	BUG_ON(busa < iommu->start);
 	ioptex = (busa - iommu->start) >> PAGE_SHIFT;
+	npages = ((dma_addr & ~PAGE_MASK) + len + PAGE_SIZE-1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		iopte_val(iommu->page_table[ioptex + i]) = 0;
 		iommu_invalidate_page(iommu->regs, busa);
@@ -309,22 +294,17 @@ static void iommu_release_one(struct device *dev, u32 busa, int npages)
 static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr,
 		size_t len, enum dma_data_direction dir, unsigned long attrs)
 {
-	unsigned long off = dma_addr & ~PAGE_MASK;
-	int npages;
-
-	npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
-	iommu_release_one(dev, dma_addr & PAGE_MASK, npages);
+	__sbus_iommu_unmap_page(dev, dma_addr, len);
 }
 
 static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
 		int nents, enum dma_data_direction dir, unsigned long attrs)
 {
 	struct scatterlist *sg;
-	int i, n;
+	int i;
 
 	for_each_sg(sgl, sg, nents, i) {
-		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
-		iommu_release_one(dev, sg->dma_address & PAGE_MASK, n);
+		__sbus_iommu_unmap_page(dev, sg->dma_address, sg->length);
 		sg->dma_address = 0x21212121;
 	}
 }

next prev parent reply	other threads:[~2019-04-16  6:34 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-08 10:46 avoid calling nth_page in the block I/O path Christoph Hellwig
2019-04-08 10:46 ` [PATCH 1/5] block: rewrite blk_bvec_map_sg to avoid a nth_page call Christoph Hellwig
2019-04-08 14:03   ` Johannes Thumshirn
2019-04-08 22:04   ` Bart Van Assche
2019-04-08 22:51   ` Ming Lei
2019-04-15 19:44   ` Guenter Roeck
2019-04-15 20:52     ` Christoph Hellwig
2019-04-15 21:07       ` Guenter Roeck
2019-04-16  6:33         ` Christoph Hellwig [this message]
2019-04-16 14:09           ` Guenter Roeck
2019-04-16 17:08           ` Guenter Roeck
2019-04-16 17:10             ` Christoph Hellwig
2019-04-16 17:51               ` Guenter Roeck
2019-04-17  5:27                 ` Christoph Hellwig
2019-04-17 13:42                   ` Guenter Roeck
2019-04-17 21:59                   ` Guenter Roeck
2019-04-19  2:27                     ` Ming Lei
2019-04-19  2:36                       ` Ming Lei
2019-04-08 10:46 ` [PATCH 2/5] block: refactor __bio_iov_bvec_add_pages Christoph Hellwig
2019-04-08 11:07   ` Johannes Thumshirn
2019-04-08 22:06   ` Bart Van Assche
2019-04-08 10:46 ` [PATCH 3/5] block: don't allow multiple bio_iov_iter_get_pages calls per bio Christoph Hellwig
2019-04-08 11:13   ` Johannes Thumshirn
2019-04-08 22:17   ` Bart Van Assche
2019-04-09 10:05     ` Christoph Hellwig
2019-04-08 10:46 ` [PATCH 4/5] block: change how we get page references in bio_iov_iter_get_pages Christoph Hellwig
2019-04-08 10:46 ` [PATCH 5/5] block: only allow contiguous page structs in a bio_vec Christoph Hellwig
2019-04-09 16:15 ` avoid calling nth_page in the block I/O path Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2019-04-11  6:23 avoid calling nth_page in the block I/O path v2 Christoph Hellwig
2019-04-11  6:23 ` [PATCH 1/5] block: rewrite blk_bvec_map_sg to avoid a nth_page call Christoph Hellwig

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:e8d5d73ca40 dfblob:93c2fc440cb )
 OR (
bs:"Re: [PATCH 1/5] block: rewrite blk_bvec_map_sg to avoid a nth_page call" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190416063356.GA25763@lst.de \
    --to=hch@lst.de \
    --cc=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=linux@roeck-us.net \
    --cc=ming.lei@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.