public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] brd: discard bugfix
@ 2025-04-18  9:38 Yu Kuai
  2025-04-18  9:38 ` [PATCH 1/5] brd: fix oops if write concurrent with discard Yu Kuai
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Yu Kuai @ 2025-04-18  9:38 UTC (permalink / raw)
  To: axboe, kbusch
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun,
	johnny.chenyi

From: Yu Kuai <yukuai3@huawei.com>

The backgroud is that our customer is using brd for dm-thinpool, and
they want brd to support discard. So today I'm reviewing the commit
9ead7efc6f3f ("brd: implement discard support") and try to backport it
to downstream kernel. Those problems are found while backporting, they
are not tested yet.

Yu Kuai (5):
  brd: fix oops if write concurrent with discard
  brd: synchronize using page and free page with rcu
  brd: fix aligned_sector from brd_do_discard()
  brd: fix discard end sector
  brd: zero data for discard that is not aligned to page

 drivers/block/brd.c | 76 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 55 insertions(+), 21 deletions(-)

-- 
2.39.2


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/5] brd: fix oops if write concurrent with discard
  2025-04-18  9:38 [PATCH 0/5] brd: discard bugfix Yu Kuai
@ 2025-04-18  9:38 ` Yu Kuai
  2025-04-21  5:22   ` Christoph Hellwig
  2025-04-18  9:38 ` [PATCH 2/5] brd: synchronize using page and free page with rcu Yu Kuai
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Yu Kuai @ 2025-04-18  9:38 UTC (permalink / raw)
  To: axboe, kbusch
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun,
	johnny.chenyi

From: Yu Kuai <yukuai3@huawei.com>

User can issue write and discard concurrently, causing following BUG_ON:
cpu0:
brd_submit_bio
 brd_do_bve
  copy_to_brd_setup
   brd_insert_page
    xa_lock
    __xa_insert
    xa_unlock
				cpu1
				brd_submit_bio
				 brd_do_discard
				  xa_lock
				  page = __xa_erase
				  __free_page
				  xa_unlock
  copy_to_brd
   brd_lookup_page
    page = xa_load
    BUG_ON(!page)

Fix this problem by skipping the write, and user will get zero data
later if the page is not here.

Also fix following checkpatch warnings:
WARNING: Deprecated use of 'kmap_atomic', prefer 'kmap_local_page' instead
WARNING: Deprecated use of 'kunmap_atomic', prefer 'kunmap_local' instead

Fixes: 9ead7efc6f3f ("brd: implement discard support")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/block/brd.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 292f127cae0a..a6e4f005cb76 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -133,22 +133,22 @@ static void copy_to_brd(struct brd_device *brd, const void *src,
 
 	copy = min_t(size_t, n, PAGE_SIZE - offset);
 	page = brd_lookup_page(brd, sector);
-	BUG_ON(!page);
-
-	dst = kmap_atomic(page);
-	memcpy(dst + offset, src, copy);
-	kunmap_atomic(dst);
+	if (page) {
+		dst = kmap_local_page(page);
+		memcpy(dst + offset, src, copy);
+		kunmap_local(dst);
+	}
 
 	if (copy < n) {
 		src += copy;
 		sector += copy >> SECTOR_SHIFT;
 		copy = n - copy;
 		page = brd_lookup_page(brd, sector);
-		BUG_ON(!page);
-
-		dst = kmap_atomic(page);
-		memcpy(dst, src, copy);
-		kunmap_atomic(dst);
+		if (page) {
+			dst = kmap_local_page(page);
+			memcpy(dst, src, copy);
+			kunmap_local(dst);
+		}
 	}
 }
 
@@ -166,9 +166,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
 	copy = min_t(size_t, n, PAGE_SIZE - offset);
 	page = brd_lookup_page(brd, sector);
 	if (page) {
-		src = kmap_atomic(page);
+		src = kmap_local_page(page);
 		memcpy(dst, src + offset, copy);
-		kunmap_atomic(src);
+		kunmap_local(src);
 	} else
 		memset(dst, 0, copy);
 
@@ -178,9 +178,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
 		copy = n - copy;
 		page = brd_lookup_page(brd, sector);
 		if (page) {
-			src = kmap_atomic(page);
+			src = kmap_local_page(page);
 			memcpy(dst, src, copy);
-			kunmap_atomic(src);
+			kunmap_local(src);
 		} else
 			memset(dst, 0, copy);
 	}
@@ -208,7 +208,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
 			goto out;
 	}
 
-	mem = kmap_atomic(page);
+	mem = kmap_local_page(page);
 	if (!op_is_write(opf)) {
 		copy_from_brd(mem + off, brd, sector, len);
 		flush_dcache_page(page);
@@ -216,7 +216,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
 		flush_dcache_page(page);
 		copy_to_brd(brd, mem + off, sector, len);
 	}
-	kunmap_atomic(mem);
+	kunmap_local(mem);
 
 out:
 	return err;
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/5] brd: synchronize using page and free page with rcu
  2025-04-18  9:38 [PATCH 0/5] brd: discard bugfix Yu Kuai
  2025-04-18  9:38 ` [PATCH 1/5] brd: fix oops if write concurrent with discard Yu Kuai
@ 2025-04-18  9:38 ` Yu Kuai
  2025-04-21  5:24   ` Christoph Hellwig
  2025-04-18  9:38 ` [PATCH 3/5] brd: fix aligned_sector from brd_do_discard() Yu Kuai
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Yu Kuai @ 2025-04-18  9:38 UTC (permalink / raw)
  To: axboe, kbusch
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun,
	johnny.chenyi

From: Yu Kuai <yukuai3@huawei.com>

Currently, after fetching the page by xa_load() in IO path, there is no
protection and page can be freed concurrently by discard:

cpu0
brd_submit_bio
 brd_do_bvec
  page = brd_lookup_page
                          cpu1
                          brd_submit_bio
                           brd_do_discard
                            page = __xa_erase()
                            __free_page()
  // page UAF

Fix the problem by protecting page with rcu.

Fixes: 9ead7efc6f3f ("brd: implement discard support")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/block/brd.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index a6e4f005cb76..740ed13faaff 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -208,6 +208,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
 			goto out;
 	}
 
+	rcu_read_lock();
 	mem = kmap_local_page(page);
 	if (!op_is_write(opf)) {
 		copy_from_brd(mem + off, brd, sector, len);
@@ -217,11 +218,19 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
 		copy_to_brd(brd, mem + off, sector, len);
 	}
 	kunmap_local(mem);
+	rcu_read_unlock();
 
 out:
 	return err;
 }
 
+static void brd_free_one_page(struct rcu_head *head)
+{
+	struct page *page = container_of(head, struct page, rcu_head);
+
+	__free_page(page);
+}
+
 static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
 {
 	sector_t aligned_sector = (sector + PAGE_SECTORS) & ~PAGE_SECTORS;
@@ -232,7 +241,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
 	while (size >= PAGE_SIZE && aligned_sector < rd_size * 2) {
 		page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
 		if (page) {
-			__free_page(page);
+			call_rcu(&page->rcu_head, brd_free_one_page);
 			brd->brd_nr_pages--;
 		}
 		aligned_sector += PAGE_SECTORS;
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/5] brd: fix aligned_sector from brd_do_discard()
  2025-04-18  9:38 [PATCH 0/5] brd: discard bugfix Yu Kuai
  2025-04-18  9:38 ` [PATCH 1/5] brd: fix oops if write concurrent with discard Yu Kuai
  2025-04-18  9:38 ` [PATCH 2/5] brd: synchronize using page and free page with rcu Yu Kuai
@ 2025-04-18  9:38 ` Yu Kuai
  2025-04-21  5:25   ` Christoph Hellwig
  2025-04-18  9:38 ` [PATCH 4/5] brd: fix discard end sector Yu Kuai
  2025-04-18  9:38 ` [PATCH 5/5] brd: zero data for discard that is not aligned to page Yu Kuai
  4 siblings, 1 reply; 12+ messages in thread
From: Yu Kuai @ 2025-04-18  9:38 UTC (permalink / raw)
  To: axboe, kbusch
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun,
	johnny.chenyi

From: Yu Kuai <yukuai3@huawei.com>

The calculation is just wrong, fix it by round_up().

Fixes: 9ead7efc6f3f ("brd: implement discard support")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/block/brd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 740ed13faaff..21e841e09a89 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -233,7 +233,7 @@ static void brd_free_one_page(struct rcu_head *head)
 
 static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
 {
-	sector_t aligned_sector = (sector + PAGE_SECTORS) & ~PAGE_SECTORS;
+	sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
 	struct page *page;
 
 	size -= (aligned_sector - sector) * SECTOR_SIZE;
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/5] brd: fix discard end sector
  2025-04-18  9:38 [PATCH 0/5] brd: discard bugfix Yu Kuai
                   ` (2 preceding siblings ...)
  2025-04-18  9:38 ` [PATCH 3/5] brd: fix aligned_sector from brd_do_discard() Yu Kuai
@ 2025-04-18  9:38 ` Yu Kuai
  2025-04-21  5:26   ` Christoph Hellwig
  2025-04-18  9:38 ` [PATCH 5/5] brd: zero data for discard that is not aligned to page Yu Kuai
  4 siblings, 1 reply; 12+ messages in thread
From: Yu Kuai @ 2025-04-18  9:38 UTC (permalink / raw)
  To: axboe, kbusch
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun,
	johnny.chenyi

From: Yu Kuai <yukuai3@huawei.com>

brd_do_discard() just aligned start sector to page, this can only work
if the discard size if at least one page. For example:

blkdiscard /dev/ram0 -o 5120 -l 1024

In this case, size = (1024 - (8192 - 5120)), which is a huge value.

Fix the problem by round_down() the end sector.

Fixes: 9ead7efc6f3f ("brd: implement discard support")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/block/brd.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 21e841e09a89..bf1e1b2a0d28 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -234,18 +234,21 @@ static void brd_free_one_page(struct rcu_head *head)
 static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
 {
 	sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
+	sector_t aligned_end = round_down(sector + (size >> SECTOR_SHIFT),
+					  PAGE_SECTORS);
 	struct page *page;
 
-	size -= (aligned_sector - sector) * SECTOR_SIZE;
+	if (aligned_end <= aligned_sector)
+		return;
+
 	xa_lock(&brd->brd_pages);
-	while (size >= PAGE_SIZE && aligned_sector < rd_size * 2) {
+	while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) {
 		page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
 		if (page) {
 			call_rcu(&page->rcu_head, brd_free_one_page);
 			brd->brd_nr_pages--;
 		}
 		aligned_sector += PAGE_SECTORS;
-		size -= PAGE_SIZE;
 	}
 	xa_unlock(&brd->brd_pages);
 }
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 5/5] brd: zero data for discard that is not aligned to page
  2025-04-18  9:38 [PATCH 0/5] brd: discard bugfix Yu Kuai
                   ` (3 preceding siblings ...)
  2025-04-18  9:38 ` [PATCH 4/5] brd: fix discard end sector Yu Kuai
@ 2025-04-18  9:38 ` Yu Kuai
  2025-04-21  5:27   ` Christoph Hellwig
  4 siblings, 1 reply; 12+ messages in thread
From: Yu Kuai @ 2025-04-18  9:38 UTC (permalink / raw)
  To: axboe, kbusch
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun,
	johnny.chenyi

From: Yu Kuai <yukuai3@huawei.com>

Currently brd_do_discard() will just skip unaligned sectors, and in this
case user will still get old data after discard. Fix this by writing
zero data to unaligned sectors.

Fixes: 9ead7efc6f3f ("brd: implement discard support")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/block/brd.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index bf1e1b2a0d28..b5908703fb4b 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -224,6 +224,21 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
 	return err;
 }
 
+static void brd_zero_range(struct brd_device *brd, sector_t sector, u32 size)
+{
+	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
+	struct page *page;
+	void *dst;
+
+	page = brd_lookup_page(brd, sector);
+	if (!page)
+		return;
+
+	dst = kmap_local_page(page);
+	memset(dst + offset, 0, size);
+	kunmap_local(dst);
+}
+
 static void brd_free_one_page(struct rcu_head *head)
 {
 	struct page *page = container_of(head, struct page, rcu_head);
@@ -234,10 +249,17 @@ static void brd_free_one_page(struct rcu_head *head)
 static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
 {
 	sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
-	sector_t aligned_end = round_down(sector + (size >> SECTOR_SHIFT),
-					  PAGE_SECTORS);
+	sector_t sector_end = sector + (size >> SECTOR_SHIFT);
+	sector_t aligned_end = round_down(sector_end, PAGE_SECTORS);
 	struct page *page;
 
+	if (aligned_sector > sector)
+		brd_zero_range(brd, sector,
+			       (aligned_sector - sector) << SECTOR_SHIFT);
+	if (aligned_end < sector_end)
+		brd_zero_range(brd, aligned_end,
+			       (sector_end - aligned_end) << SECTOR_SHIFT);
+
 	if (aligned_end <= aligned_sector)
 		return;
 
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/5] brd: fix oops if write concurrent with discard
  2025-04-18  9:38 ` [PATCH 1/5] brd: fix oops if write concurrent with discard Yu Kuai
@ 2025-04-21  5:22   ` Christoph Hellwig
  0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2025-04-21  5:22 UTC (permalink / raw)
  To: Yu Kuai
  Cc: axboe, kbusch, linux-block, linux-kernel, yukuai3, yi.zhang,
	yangerkun, johnny.chenyi

On Fri, Apr 18, 2025 at 05:38:22PM +0800, Yu Kuai wrote:
>  	copy = min_t(size_t, n, PAGE_SIZE - offset);
>  	page = brd_lookup_page(brd, sector);
> -	BUG_ON(!page);
> -
> -	dst = kmap_atomic(page);
> -	memcpy(dst + offset, src, copy);
> -	kunmap_atomic(dst);
> +	if (page) {
> +		dst = kmap_local_page(page);
> +		memcpy(dst + offset, src, copy);
> +		kunmap_local(dst);
> +	}

I don't see how this can fix any race, it just narrows down the
race window.  To fix the race for real, copy_to_brd_setup needs
to return a page and keep a reference to it for the caller.  The
caller then only needs to operate on a single page.

We'll also need to do something similar for brd_lookup_page to
ensure the page reference doesn't go away after the xarray lookup
but before using the page.

> Also fix following checkpatch warnings:
> WARNING: Deprecated use of 'kmap_atomic', prefer 'kmap_local_page' instead
> WARNING: Deprecated use of 'kunmap_atomic', prefer 'kunmap_local' instead

This really should be using bvec_kmap_local.  I actually have an
entire series to fix that and clean up some of the surroundings that I
need to send out.  Let me dust that off because it might help with the
above mentioned fixes as well.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/5] brd: synchronize using page and free page with rcu
  2025-04-18  9:38 ` [PATCH 2/5] brd: synchronize using page and free page with rcu Yu Kuai
@ 2025-04-21  5:24   ` Christoph Hellwig
  2025-04-21  7:08     ` Yu Kuai
  0 siblings, 1 reply; 12+ messages in thread
From: Christoph Hellwig @ 2025-04-21  5:24 UTC (permalink / raw)
  To: Yu Kuai
  Cc: axboe, kbusch, linux-block, linux-kernel, yukuai3, yi.zhang,
	yangerkun, johnny.chenyi, linux-mm

On Fri, Apr 18, 2025 at 05:38:23PM +0800, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> Currently, after fetching the page by xa_load() in IO path, there is no
> protection and page can be freed concurrently by discard:

Ah, I guess this helps with the race I mentioned in reply to the
previous patch.  Is the rcu_head in struct page available for use
by subsystems freeing the page?

> 
> cpu0
> brd_submit_bio
>  brd_do_bvec
>   page = brd_lookup_page
>                           cpu1
>                           brd_submit_bio
>                            brd_do_discard
>                             page = __xa_erase()
>                             __free_page()
>   // page UAF
> 
> Fix the problem by protecting page with rcu.
> 
> Fixes: 9ead7efc6f3f ("brd: implement discard support")
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  drivers/block/brd.c | 11 ++++++++++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/block/brd.c b/drivers/block/brd.c
> index a6e4f005cb76..740ed13faaff 100644
> --- a/drivers/block/brd.c
> +++ b/drivers/block/brd.c
> @@ -208,6 +208,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
>  			goto out;
>  	}
>  
> +	rcu_read_lock();
>  	mem = kmap_local_page(page);
>  	if (!op_is_write(opf)) {
>  		copy_from_brd(mem + off, brd, sector, len);
> @@ -217,11 +218,19 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
>  		copy_to_brd(brd, mem + off, sector, len);
>  	}
>  	kunmap_local(mem);
> +	rcu_read_unlock();
>  
>  out:
>  	return err;
>  }
>  
> +static void brd_free_one_page(struct rcu_head *head)
> +{
> +	struct page *page = container_of(head, struct page, rcu_head);
> +
> +	__free_page(page);
> +}
> +
>  static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
>  {
>  	sector_t aligned_sector = (sector + PAGE_SECTORS) & ~PAGE_SECTORS;
> @@ -232,7 +241,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
>  	while (size >= PAGE_SIZE && aligned_sector < rd_size * 2) {
>  		page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
>  		if (page) {
> -			__free_page(page);
> +			call_rcu(&page->rcu_head, brd_free_one_page);
>  			brd->brd_nr_pages--;
>  		}
>  		aligned_sector += PAGE_SECTORS;
> -- 
> 2.39.2
> 
> 
---end quoted text---

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/5] brd: fix aligned_sector from brd_do_discard()
  2025-04-18  9:38 ` [PATCH 3/5] brd: fix aligned_sector from brd_do_discard() Yu Kuai
@ 2025-04-21  5:25   ` Christoph Hellwig
  0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2025-04-21  5:25 UTC (permalink / raw)
  To: Yu Kuai
  Cc: axboe, kbusch, linux-block, linux-kernel, yukuai3, yi.zhang,
	yangerkun, johnny.chenyi

On Fri, Apr 18, 2025 at 05:38:24PM +0800, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> The calculation is just wrong, fix it by round_up().
> 
> Fixes: 9ead7efc6f3f ("brd: implement discard support")
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/5] brd: fix discard end sector
  2025-04-18  9:38 ` [PATCH 4/5] brd: fix discard end sector Yu Kuai
@ 2025-04-21  5:26   ` Christoph Hellwig
  0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2025-04-21  5:26 UTC (permalink / raw)
  To: Yu Kuai
  Cc: axboe, kbusch, linux-block, linux-kernel, yukuai3, yi.zhang,
	yangerkun, johnny.chenyi

On Fri, Apr 18, 2025 at 05:38:25PM +0800, Yu Kuai wrote:
> +	sector_t aligned_end = round_down(sector + (size >> SECTOR_SHIFT),
> +					  PAGE_SECTORS);

Nit: I think the version below would be a bit more readable:

	sector_t aligned_end =
		round_down(sector + (size >> SECTOR_SHIFT), PAGE_SECTORS);

otherwise look good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/5] brd: zero data for discard that is not aligned to page
  2025-04-18  9:38 ` [PATCH 5/5] brd: zero data for discard that is not aligned to page Yu Kuai
@ 2025-04-21  5:27   ` Christoph Hellwig
  0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2025-04-21  5:27 UTC (permalink / raw)
  To: Yu Kuai
  Cc: axboe, kbusch, linux-block, linux-kernel, yukuai3, yi.zhang,
	yangerkun, johnny.chenyi

On Fri, Apr 18, 2025 at 05:38:26PM +0800, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> Currently brd_do_discard() will just skip unaligned sectors, and in this
> case user will still get old data after discard. Fix this by writing
> zero data to unaligned sectors.

Which is perfectly fine with discard semantics and that of the
underlying primitives in storage standards.  If you need guaranteed
zeroing you need to implement the write zeroes operation.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/5] brd: synchronize using page and free page with rcu
  2025-04-21  5:24   ` Christoph Hellwig
@ 2025-04-21  7:08     ` Yu Kuai
  0 siblings, 0 replies; 12+ messages in thread
From: Yu Kuai @ 2025-04-21  7:08 UTC (permalink / raw)
  To: Christoph Hellwig, Yu Kuai
  Cc: axboe, kbusch, linux-block, linux-kernel, yi.zhang, yangerkun,
	johnny.chenyi, linux-mm, yukuai (C)

Hi,

在 2025/04/21 13:24, Christoph Hellwig 写道:
> On Fri, Apr 18, 2025 at 05:38:23PM +0800, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Currently, after fetching the page by xa_load() in IO path, there is no
>> protection and page can be freed concurrently by discard:
> 
> Ah, I guess this helps with the race I mentioned in reply to the
> previous patch.  Is the rcu_head in struct page available for use
> by subsystems freeing the page?

Take a look at other union fileds int the struct page, in this case,
the page:
  - not used for pagecache or anonymous page
  - not used for page_pool
  - not used for compound page
  - not used for zone device page

So, I think it's fine to use the rcu_head.

We may want to avoid the page reference here since it's atomic and will
affect IO performance.

BTW, perhaps this patch should be the first patch in this set. :)

Thanks,
Kuai
> 
>>
>> cpu0
>> brd_submit_bio
>>   brd_do_bvec
>>    page = brd_lookup_page
>>                            cpu1
>>                            brd_submit_bio
>>                             brd_do_discard
>>                              page = __xa_erase()
>>                              __free_page()
>>    // page UAF
>>
>> Fix the problem by protecting page with rcu.
>>
>> Fixes: 9ead7efc6f3f ("brd: implement discard support")
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   drivers/block/brd.c | 11 ++++++++++-
>>   1 file changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/block/brd.c b/drivers/block/brd.c
>> index a6e4f005cb76..740ed13faaff 100644
>> --- a/drivers/block/brd.c
>> +++ b/drivers/block/brd.c
>> @@ -208,6 +208,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
>>   			goto out;
>>   	}
>>   
>> +	rcu_read_lock();
>>   	mem = kmap_local_page(page);
>>   	if (!op_is_write(opf)) {
>>   		copy_from_brd(mem + off, brd, sector, len);
>> @@ -217,11 +218,19 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
>>   		copy_to_brd(brd, mem + off, sector, len);
>>   	}
>>   	kunmap_local(mem);
>> +	rcu_read_unlock();
>>   
>>   out:
>>   	return err;
>>   }
>>   
>> +static void brd_free_one_page(struct rcu_head *head)
>> +{
>> +	struct page *page = container_of(head, struct page, rcu_head);
>> +
>> +	__free_page(page);
>> +}
>> +
>>   static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
>>   {
>>   	sector_t aligned_sector = (sector + PAGE_SECTORS) & ~PAGE_SECTORS;
>> @@ -232,7 +241,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
>>   	while (size >= PAGE_SIZE && aligned_sector < rd_size * 2) {
>>   		page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
>>   		if (page) {
>> -			__free_page(page);
>> +			call_rcu(&page->rcu_head, brd_free_one_page);
>>   			brd->brd_nr_pages--;
>>   		}
>>   		aligned_sector += PAGE_SECTORS;
>> -- 
>> 2.39.2
>>
>>
> ---end quoted text---
> 
> .
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2025-04-21  7:09 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-18  9:38 [PATCH 0/5] brd: discard bugfix Yu Kuai
2025-04-18  9:38 ` [PATCH 1/5] brd: fix oops if write concurrent with discard Yu Kuai
2025-04-21  5:22   ` Christoph Hellwig
2025-04-18  9:38 ` [PATCH 2/5] brd: synchronize using page and free page with rcu Yu Kuai
2025-04-21  5:24   ` Christoph Hellwig
2025-04-21  7:08     ` Yu Kuai
2025-04-18  9:38 ` [PATCH 3/5] brd: fix aligned_sector from brd_do_discard() Yu Kuai
2025-04-21  5:25   ` Christoph Hellwig
2025-04-18  9:38 ` [PATCH 4/5] brd: fix discard end sector Yu Kuai
2025-04-21  5:26   ` Christoph Hellwig
2025-04-18  9:38 ` [PATCH 5/5] brd: zero data for discard that is not aligned to page Yu Kuai
2025-04-21  5:27   ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox