* [dm-devel] [PATCH 2/3] brd: enable discard
@ 2023-07-19 20:20 Mikulas Patocka
2023-07-20 5:45 ` Christoph Hellwig
0 siblings, 1 reply; 3+ messages in thread
From: Mikulas Patocka @ 2023-07-19 20:20 UTC (permalink / raw)
To: Jens Axboe
Cc: Christoph Hellwig, Li Nan, dm-devel, linux-block, Zdenek Kabelac
This patch implements discard in the brd driver. We use RCU to free the
page, so that if there are any concurrent readers or writes, they won't
touch the page after it is freed.
Note that we replace "BUG_ON(!page);" with "if (page) ..." in copy_to_brd
- the page can't be NULL under normal circumstances, it can only be NULL
if REQ_OP_WRITE races with REQ_OP_DISCARD. If these two bios race with
each other on the same page, the result is undefined, so we can handle
this race condition just by skipping the copying.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/block/brd.c | 85 +++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 74 insertions(+), 11 deletions(-)
Index: linux-2.6/drivers/block/brd.c
===================================================================
--- linux-2.6.orig/drivers/block/brd.c
+++ linux-2.6/drivers/block/brd.c
@@ -46,6 +46,8 @@ struct brd_device {
u64 brd_nr_pages;
};
+static bool discard;
+
/*
* Look up and return a brd's page for a given sector.
*/
@@ -100,6 +102,26 @@ static int brd_insert_page(struct brd_de
return ret;
}
+static void brd_free_page_rcu(struct rcu_head *head)
+{
+ struct page *page = container_of(head, struct page, rcu_head);
+ __free_page(page);
+}
+
+static void brd_free_page(struct brd_device *brd, sector_t sector)
+{
+ struct page *page;
+ pgoff_t idx;
+
+ idx = sector >> PAGE_SECTORS_SHIFT;
+ page = xa_erase(&brd->brd_pages, idx);
+
+ if (page) {
+ BUG_ON(page->index != idx);
+ call_rcu(&page->rcu_head, brd_free_page_rcu);
+ }
+}
+
/*
* Free all backing store pages and xarray. This must only be called when
* there are no other users of the device.
@@ -152,11 +174,11 @@ static void copy_to_brd(struct brd_devic
copy = min_t(size_t, n, PAGE_SIZE - offset);
rcu_read_lock();
page = brd_lookup_page(brd, sector);
- BUG_ON(!page);
-
- dst = kmap_atomic(page);
- memcpy(dst + offset, src, copy);
- kunmap_atomic(dst);
+ if (page) {
+ dst = kmap_atomic(page);
+ memcpy(dst + offset, src, copy);
+ kunmap_atomic(dst);
+ }
rcu_read_unlock();
if (copy < n) {
@@ -165,11 +187,11 @@ static void copy_to_brd(struct brd_devic
copy = n - copy;
rcu_read_lock();
page = brd_lookup_page(brd, sector);
- BUG_ON(!page);
-
- dst = kmap_atomic(page);
- memcpy(dst, src, copy);
- kunmap_atomic(dst);
+ if (page) {
+ dst = kmap_atomic(page);
+ memcpy(dst, src, copy);
+ kunmap_atomic(dst);
+ }
rcu_read_unlock();
}
}
@@ -248,13 +270,44 @@ out:
return err;
}
+void brd_do_discard(struct brd_device *brd, struct bio *bio)
+{
+ sector_t sector, len, front_pad;
+
+ if (unlikely(!discard)) {
+ bio->bi_status = BLK_STS_NOTSUPP;
+ return;
+ }
+
+ sector = bio->bi_iter.bi_sector;
+ len = bio_sectors(bio);
+ front_pad = -sector & (PAGE_SECTORS - 1);
+ sector += front_pad;
+ if (unlikely(len <= front_pad))
+ return;
+ len -= front_pad;
+ len = round_down(len, PAGE_SECTORS);
+ while (len) {
+ brd_free_page(brd, sector);
+ sector += PAGE_SECTORS;
+ len -= PAGE_SECTORS;
+ cond_resched();
+ }
+}
+
static void brd_submit_bio(struct bio *bio)
{
struct brd_device *brd = bio->bi_bdev->bd_disk->private_data;
- sector_t sector = bio->bi_iter.bi_sector;
+ sector_t sector;
struct bio_vec bvec;
struct bvec_iter iter;
+ if (bio_op(bio) == REQ_OP_DISCARD) {
+ brd_do_discard(brd, bio);
+ goto endio;
+ }
+
+ sector = bio->bi_iter.bi_sector;
bio_for_each_segment(bvec, bio, iter) {
unsigned int len = bvec.bv_len;
int err;
@@ -276,6 +329,7 @@ static void brd_submit_bio(struct bio *b
sector += len >> SECTOR_SHIFT;
}
+endio:
bio_endio(bio);
}
@@ -299,6 +353,10 @@ static int max_part = 1;
module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
+static bool discard = false;
+module_param(discard, bool, 0444);
+MODULE_PARM_DESC(discard, "Support discard");
+
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
MODULE_ALIAS("rd");
@@ -364,6 +422,11 @@ static int brd_alloc(int i)
*/
blk_queue_physical_block_size(disk->queue, PAGE_SIZE);
+ if (discard) {
+ disk->queue->limits.discard_granularity = PAGE_SIZE;
+ blk_queue_max_discard_sectors(disk->queue, round_down(UINT_MAX, PAGE_SECTORS));
+ }
+
/* Tell the block layer that this is not a rotational device */
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue);
--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [dm-devel] [PATCH 2/3] brd: enable discard
2023-07-19 20:20 [dm-devel] [PATCH 2/3] brd: enable discard Mikulas Patocka
@ 2023-07-20 5:45 ` Christoph Hellwig
2023-07-21 13:46 ` Mikulas Patocka
0 siblings, 1 reply; 3+ messages in thread
From: Christoph Hellwig @ 2023-07-20 5:45 UTC (permalink / raw)
To: Mikulas Patocka
Cc: Jens Axboe, Christoph Hellwig, linux-block, Li Nan, dm-devel,
Zdenek Kabelac
> +static void brd_free_page_rcu(struct rcu_head *head)
> +{
> + struct page *page = container_of(head, struct page, rcu_head);
> + __free_page(page);
Nit: missing empty line here. Although I see no point in the local
variable anyay.
> +}
> +
> +static void brd_free_page(struct brd_device *brd, sector_t sector)
> +{
> + struct page *page;
> + pgoff_t idx;
> +
> + idx = sector >> PAGE_SECTORS_SHIFT;
> + page = xa_erase(&brd->brd_pages, idx);
> +
> + if (page) {
> + BUG_ON(page->index != idx);
> + call_rcu(&page->rcu_head, brd_free_page_rcu);
> + }
Doing one call_rcu per page is horribly inefficient. Please look into
batching this up.
> +static bool discard = false;
> +module_param(discard, bool, 0444);
> +MODULE_PARM_DESC(discard, "Support discard");
Doing this as a global paramter that can't even be changed at run time
does not feel very user friendly.
--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [dm-devel] [PATCH 2/3] brd: enable discard
2023-07-20 5:45 ` Christoph Hellwig
@ 2023-07-21 13:46 ` Mikulas Patocka
0 siblings, 0 replies; 3+ messages in thread
From: Mikulas Patocka @ 2023-07-21 13:46 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jens Axboe, Li Nan, dm-devel, linux-block, Zdenek Kabelac
On Wed, 19 Jul 2023, Christoph Hellwig wrote:
> > +static void brd_free_page_rcu(struct rcu_head *head)
> > +{
> > + struct page *page = container_of(head, struct page, rcu_head);
> > + __free_page(page);
>
> Nit: missing empty line here. Although I see no point in the local
> variable anyay.
>
> > +}
> > +
> > +static void brd_free_page(struct brd_device *brd, sector_t sector)
> > +{
> > + struct page *page;
> > + pgoff_t idx;
> > +
> > + idx = sector >> PAGE_SECTORS_SHIFT;
> > + page = xa_erase(&brd->brd_pages, idx);
> > +
> > + if (page) {
> > + BUG_ON(page->index != idx);
> > + call_rcu(&page->rcu_head, brd_free_page_rcu);
> > + }
>
> Doing one call_rcu per page is horribly inefficient. Please look into
> batching this up.
>
> > +static bool discard = false;
> > +module_param(discard, bool, 0444);
> > +MODULE_PARM_DESC(discard, "Support discard");
>
> Doing this as a global paramter that can't even be changed at run time
> does not feel very user friendly.
Hi
I addressed these issues and I'll send a new version.
Mikulas
--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2023-07-21 13:47 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-07-19 20:20 [dm-devel] [PATCH 2/3] brd: enable discard Mikulas Patocka
2023-07-20 5:45 ` Christoph Hellwig
2023-07-21 13:46 ` Mikulas Patocka
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).