linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] brd: use page reference to protect page lifetime
@ 2025-08-11  6:56 Yu Kuai
  2025-09-01  3:42 ` Yu Kuai
  2025-09-01 14:37 ` Jens Axboe
  0 siblings, 2 replies; 3+ messages in thread
From: Yu Kuai @ 2025-08-11  6:56 UTC (permalink / raw)
  To: hch, houtao1, axboe
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun,
	johnny.chenyi

From: Yu Kuai <yukuai3@huawei.com>

As discussed [1], hold rcu for copying data from/to page is too heavy,
it's better to protect page with rcu around for page lookup and then
grab a reference to prevent page to be freed by discard.

[1] https://lore.kernel.org/all/eb41cab3-5946-4fe3-a1be-843dd6fca159@kernel.dk/

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
Changes from v2:
 - move xas_reset() to error path;
 - remove unnecessary checking xa_is_value();
Changes from v1:
 - refer to filemap_get_entry(), use xas_load + xas_reload to fix
 concurrent problems.

 drivers/block/brd.c | 75 +++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 27 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 0c2eabe14af3..9778259b30d4 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -44,45 +44,74 @@ struct brd_device {
 };
 
 /*
- * Look up and return a brd's page for a given sector.
+ * Look up and return a brd's page with reference grabbed for a given sector.
  */
 static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
 {
-	return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
+	struct page *page;
+	XA_STATE(xas, &brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
+
+	rcu_read_lock();
+repeat:
+	page = xas_load(&xas);
+	if (xas_retry(&xas, page)) {
+		xas_reset(&xas);
+		goto repeat;
+	}
+
+	if (!page)
+		goto out;
+
+	if (!get_page_unless_zero(page)) {
+		xas_reset(&xas);
+		goto repeat;
+	}
+
+	if (unlikely(page != xas_reload(&xas))) {
+		put_page(page);
+		xas_reset(&xas);
+		goto repeat;
+	}
+out:
+	rcu_read_unlock();
+
+	return page;
 }
 
 /*
  * Insert a new page for a given sector, if one does not already exist.
+ * The returned page will grab reference.
  */
 static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
 		blk_opf_t opf)
-	__releases(rcu)
-	__acquires(rcu)
 {
 	gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
 	struct page *page, *ret;
 
-	rcu_read_unlock();
 	page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
-	if (!page) {
-		rcu_read_lock();
+	if (!page)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	xa_lock(&brd->brd_pages);
 	ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
 			page, gfp);
-	rcu_read_lock();
-	if (ret) {
+	if (!ret) {
+		brd->brd_nr_pages++;
+		get_page(page);
+		xa_unlock(&brd->brd_pages);
+		return page;
+	}
+
+	if (!xa_is_err(ret)) {
+		get_page(ret);
 		xa_unlock(&brd->brd_pages);
-		__free_page(page);
-		if (xa_is_err(ret))
-			return ERR_PTR(xa_err(ret));
+		put_page(page);
 		return ret;
 	}
-	brd->brd_nr_pages++;
+
 	xa_unlock(&brd->brd_pages);
-	return page;
+	put_page(page);
+	return ERR_PTR(xa_err(ret));
 }
 
 /*
@@ -95,7 +124,7 @@ static void brd_free_pages(struct brd_device *brd)
 	pgoff_t idx;
 
 	xa_for_each(&brd->brd_pages, idx, page) {
-		__free_page(page);
+		put_page(page);
 		cond_resched();
 	}
 
@@ -117,7 +146,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
 
 	bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
 
-	rcu_read_lock();
 	page = brd_lookup_page(brd, sector);
 	if (!page && op_is_write(opf)) {
 		page = brd_insert_page(brd, sector, opf);
@@ -135,13 +163,13 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
 			memset(kaddr, 0, bv.bv_len);
 	}
 	kunmap_local(kaddr);
-	rcu_read_unlock();
 
 	bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len);
+	if (page)
+		put_page(page);
 	return true;
 
 out_error:
-	rcu_read_unlock();
 	if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT))
 		bio_wouldblock_error(bio);
 	else
@@ -149,13 +177,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
 	return false;
 }
 
-static void brd_free_one_page(struct rcu_head *head)
-{
-	struct page *page = container_of(head, struct page, rcu_head);
-
-	__free_page(page);
-}
-
 static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
 {
 	sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
@@ -170,7 +191,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
 	while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) {
 		page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
 		if (page) {
-			call_rcu(&page->rcu_head, brd_free_one_page);
+			put_page(page);
 			brd->brd_nr_pages--;
 		}
 		aligned_sector += PAGE_SECTORS;
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] brd: use page reference to protect page lifetime
  2025-08-11  6:56 [PATCH v3] brd: use page reference to protect page lifetime Yu Kuai
@ 2025-09-01  3:42 ` Yu Kuai
  2025-09-01 14:37 ` Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Yu Kuai @ 2025-09-01  3:42 UTC (permalink / raw)
  To: Yu Kuai, hch, houtao1, axboe
  Cc: linux-block, linux-kernel, yi.zhang, yangerkun, johnny.chenyi,
	yukuai (C)

Friendly ping ...

在 2025/08/11 14:56, Yu Kuai 写道:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> As discussed [1], hold rcu for copying data from/to page is too heavy,
> it's better to protect page with rcu around for page lookup and then
> grab a reference to prevent page to be freed by discard.
> 
> [1] https://lore.kernel.org/all/eb41cab3-5946-4fe3-a1be-843dd6fca159@kernel.dk/
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
> Changes from v2:
>   - move xas_reset() to error path;
>   - remove unnecessary checking xa_is_value();
> Changes from v1:
>   - refer to filemap_get_entry(), use xas_load + xas_reload to fix
>   concurrent problems.
> 
>   drivers/block/brd.c | 75 +++++++++++++++++++++++++++++----------------
>   1 file changed, 48 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/block/brd.c b/drivers/block/brd.c
> index 0c2eabe14af3..9778259b30d4 100644
> --- a/drivers/block/brd.c
> +++ b/drivers/block/brd.c
> @@ -44,45 +44,74 @@ struct brd_device {
>   };
>   
>   /*
> - * Look up and return a brd's page for a given sector.
> + * Look up and return a brd's page with reference grabbed for a given sector.
>    */
>   static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
>   {
> -	return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
> +	struct page *page;
> +	XA_STATE(xas, &brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
> +
> +	rcu_read_lock();
> +repeat:
> +	page = xas_load(&xas);
> +	if (xas_retry(&xas, page)) {
> +		xas_reset(&xas);
> +		goto repeat;
> +	}
> +
> +	if (!page)
> +		goto out;
> +
> +	if (!get_page_unless_zero(page)) {
> +		xas_reset(&xas);
> +		goto repeat;
> +	}
> +
> +	if (unlikely(page != xas_reload(&xas))) {
> +		put_page(page);
> +		xas_reset(&xas);
> +		goto repeat;
> +	}
> +out:
> +	rcu_read_unlock();
> +
> +	return page;
>   }
>   
>   /*
>    * Insert a new page for a given sector, if one does not already exist.
> + * The returned page will grab reference.
>    */
>   static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
>   		blk_opf_t opf)
> -	__releases(rcu)
> -	__acquires(rcu)
>   {
>   	gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
>   	struct page *page, *ret;
>   
> -	rcu_read_unlock();
>   	page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
> -	if (!page) {
> -		rcu_read_lock();
> +	if (!page)
>   		return ERR_PTR(-ENOMEM);
> -	}
>   
>   	xa_lock(&brd->brd_pages);
>   	ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
>   			page, gfp);
> -	rcu_read_lock();
> -	if (ret) {
> +	if (!ret) {
> +		brd->brd_nr_pages++;
> +		get_page(page);
> +		xa_unlock(&brd->brd_pages);
> +		return page;
> +	}
> +
> +	if (!xa_is_err(ret)) {
> +		get_page(ret);
>   		xa_unlock(&brd->brd_pages);
> -		__free_page(page);
> -		if (xa_is_err(ret))
> -			return ERR_PTR(xa_err(ret));
> +		put_page(page);
>   		return ret;
>   	}
> -	brd->brd_nr_pages++;
> +
>   	xa_unlock(&brd->brd_pages);
> -	return page;
> +	put_page(page);
> +	return ERR_PTR(xa_err(ret));
>   }
>   
>   /*
> @@ -95,7 +124,7 @@ static void brd_free_pages(struct brd_device *brd)
>   	pgoff_t idx;
>   
>   	xa_for_each(&brd->brd_pages, idx, page) {
> -		__free_page(page);
> +		put_page(page);
>   		cond_resched();
>   	}
>   
> @@ -117,7 +146,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
>   
>   	bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
>   
> -	rcu_read_lock();
>   	page = brd_lookup_page(brd, sector);
>   	if (!page && op_is_write(opf)) {
>   		page = brd_insert_page(brd, sector, opf);
> @@ -135,13 +163,13 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
>   			memset(kaddr, 0, bv.bv_len);
>   	}
>   	kunmap_local(kaddr);
> -	rcu_read_unlock();
>   
>   	bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len);
> +	if (page)
> +		put_page(page);
>   	return true;
>   
>   out_error:
> -	rcu_read_unlock();
>   	if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT))
>   		bio_wouldblock_error(bio);
>   	else
> @@ -149,13 +177,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
>   	return false;
>   }
>   
> -static void brd_free_one_page(struct rcu_head *head)
> -{
> -	struct page *page = container_of(head, struct page, rcu_head);
> -
> -	__free_page(page);
> -}
> -
>   static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
>   {
>   	sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
> @@ -170,7 +191,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
>   	while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) {
>   		page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
>   		if (page) {
> -			call_rcu(&page->rcu_head, brd_free_one_page);
> +			put_page(page);
>   			brd->brd_nr_pages--;
>   		}
>   		aligned_sector += PAGE_SECTORS;
> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] brd: use page reference to protect page lifetime
  2025-08-11  6:56 [PATCH v3] brd: use page reference to protect page lifetime Yu Kuai
  2025-09-01  3:42 ` Yu Kuai
@ 2025-09-01 14:37 ` Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2025-09-01 14:37 UTC (permalink / raw)
  To: hch, houtao1, Yu Kuai
  Cc: linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun,
	johnny.chenyi


On Mon, 11 Aug 2025 14:56:28 +0800, Yu Kuai wrote:
> As discussed [1], hold rcu for copying data from/to page is too heavy,
> it's better to protect page with rcu around for page lookup and then
> grab a reference to prevent page to be freed by discard.
> 
> [1] https://lore.kernel.org/all/eb41cab3-5946-4fe3-a1be-843dd6fca159@kernel.dk/
> 
> 
> [...]

Applied, thanks!

[1/1] brd: use page reference to protect page lifetime
      commit: 2a0614522885b136e9e650791c794dd49abb5b31

Best regards,
-- 
Jens Axboe




^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-09-01 14:37 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-11  6:56 [PATCH v3] brd: use page reference to protect page lifetime Yu Kuai
2025-09-01  3:42 ` Yu Kuai
2025-09-01 14:37 ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).