From: Dongsheng Yang <dongsheng.yang@linux.dev>
To: mpatocka@redhat.com, agk@redhat.com, snitzer@kernel.org,
axboe@kernel.dk, hch@lst.de, dan.j.williams@intel.com,
Jonathan.Cameron@Huawei.com
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-cxl@vger.kernel.org, nvdimm@lists.linux.dev,
dm-devel@lists.linux.dev,
Dongsheng Yang <dongsheng.yang@linux.dev>
Subject: [PATCH v1 02/11] dm-pcache: add backing device management
Date: Tue, 24 Jun 2025 07:33:49 +0000 [thread overview]
Message-ID: <20250624073359.2041340-3-dongsheng.yang@linux.dev> (raw)
In-Reply-To: <20250624073359.2041340-1-dongsheng.yang@linux.dev>
This patch introduces *backing_dev.{c,h}*, a self-contained layer that
handles all interaction with the *backing block device* where cache
write-back and cache-miss reads are serviced. Isolating this logic
keeps the core dm-pcache code free of low-level bio plumbing.
* Device setup / teardown
- Opens the target with `dm_get_device()`, stores `bdev`, file and
size, and initialises a dedicated `bioset`.
- Gracefully releases resources via `backing_dev_stop()`.
* Request object (`struct pcache_backing_dev_req`)
- Two request flavours:
- REQ-type – cloned from an upper `struct bio` issued to
dm-pcache; trimmed and re-targeted to the backing LBA.
- KMEM-type – maps an arbitrary kernel memory buffer
into a freshly built.
- Private completion callback (`end_req`) propagates status to the
upper layer and handles resource recycling.
* Submission & completion path
- Lock-protected submit queue + worker (`req_submit_work`) let pcache
push many requests asynchronously, at the same time, allow caller
to submit backing_dev_req in atomic context.
- End-io handler moves finished requests to a completion list processed
by `req_complete_work`, ensuring callbacks run in process context.
- Direct-submit option for non-atomic context.
* Flush
- `backing_dev_flush()` issues a flush to persist backing-device data.
Signed-off-by: Dongsheng Yang <dongsheng.yang@linux.dev>
---
drivers/md/dm-pcache/backing_dev.c | 292 +++++++++++++++++++++++++++++
drivers/md/dm-pcache/backing_dev.h | 88 +++++++++
2 files changed, 380 insertions(+)
create mode 100644 drivers/md/dm-pcache/backing_dev.c
create mode 100644 drivers/md/dm-pcache/backing_dev.h
diff --git a/drivers/md/dm-pcache/backing_dev.c b/drivers/md/dm-pcache/backing_dev.c
new file mode 100644
index 000000000000..590c6415319d
--- /dev/null
+++ b/drivers/md/dm-pcache/backing_dev.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/blkdev.h>
+
+#include "../dm-core.h"
+#include "pcache_internal.h"
+#include "cache_dev.h"
+#include "backing_dev.h"
+#include "cache.h"
+#include "dm_pcache.h"
+
+static void backing_dev_exit(struct pcache_backing_dev *backing_dev)
+{
+ kmem_cache_destroy(backing_dev->backing_req_cache);
+}
+
+static void req_submit_fn(struct work_struct *work);
+static void req_complete_fn(struct work_struct *work);
+static int backing_dev_init(struct dm_pcache *pcache)
+{
+ struct pcache_backing_dev *backing_dev = &pcache->backing_dev;
+ int ret;
+
+ backing_dev->backing_req_cache = KMEM_CACHE(pcache_backing_dev_req, 0);
+ if (!backing_dev->backing_req_cache) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ INIT_LIST_HEAD(&backing_dev->submit_list);
+ INIT_LIST_HEAD(&backing_dev->complete_list);
+ spin_lock_init(&backing_dev->submit_lock);
+ spin_lock_init(&backing_dev->complete_lock);
+ INIT_WORK(&backing_dev->req_submit_work, req_submit_fn);
+ INIT_WORK(&backing_dev->req_complete_work, req_complete_fn);
+
+ return 0;
+err:
+ return ret;
+}
+
+int backing_dev_start(struct dm_pcache *pcache)
+{
+ struct pcache_backing_dev *backing_dev = &pcache->backing_dev;
+ int ret;
+
+ ret = backing_dev_init(pcache);
+ if (ret)
+ return ret;
+
+ backing_dev->dev_size = bdev_nr_sectors(backing_dev->dm_dev->bdev);
+
+ return 0;
+}
+
+void backing_dev_stop(struct dm_pcache *pcache)
+{
+ struct pcache_backing_dev *backing_dev = &pcache->backing_dev;
+
+ flush_work(&backing_dev->req_submit_work);
+ flush_work(&backing_dev->req_complete_work);
+
+ /* There should be no inflight backing_dev_request */
+ BUG_ON(!list_empty(&backing_dev->submit_list));
+ BUG_ON(!list_empty(&backing_dev->complete_list));
+
+ backing_dev_exit(backing_dev);
+}
+
+/* pcache_backing_dev_req functions */
+void backing_dev_req_end(struct pcache_backing_dev_req *backing_req)
+{
+ struct pcache_backing_dev *backing_dev = backing_req->backing_dev;
+
+ if (backing_req->end_req)
+ backing_req->end_req(backing_req, backing_req->ret);
+
+ switch (backing_req->type) {
+ case BACKING_DEV_REQ_TYPE_REQ:
+ pcache_req_put(backing_req->req.upper_req, backing_req->ret);
+ break;
+ case BACKING_DEV_REQ_TYPE_KMEM:
+ if (backing_req->kmem.bvecs != backing_req->kmem.inline_bvecs)
+ kfree(backing_req->kmem.bvecs);
+ break;
+ default:
+ BUG();
+ }
+
+ kmem_cache_free(backing_dev->backing_req_cache, backing_req);
+}
+
+static void req_complete_fn(struct work_struct *work)
+{
+ struct pcache_backing_dev *backing_dev = container_of(work, struct pcache_backing_dev, req_complete_work);
+ struct pcache_backing_dev_req *backing_req;
+ LIST_HEAD(tmp_list);
+
+ spin_lock_irq(&backing_dev->complete_lock);
+ list_splice_init(&backing_dev->complete_list, &tmp_list);
+ spin_unlock_irq(&backing_dev->complete_lock);
+
+ while (!list_empty(&tmp_list)) {
+ backing_req = list_first_entry(&tmp_list,
+ struct pcache_backing_dev_req, node);
+ list_del_init(&backing_req->node);
+ backing_dev_req_end(backing_req);
+ }
+}
+
+static void backing_dev_bio_end(struct bio *bio)
+{
+ struct pcache_backing_dev_req *backing_req = bio->bi_private;
+ struct pcache_backing_dev *backing_dev = backing_req->backing_dev;
+ unsigned long flags;
+
+ backing_req->ret = bio->bi_status;
+
+ spin_lock_irqsave(&backing_dev->complete_lock, flags);
+ list_move_tail(&backing_req->node, &backing_dev->complete_list);
+ queue_work(BACKING_DEV_TO_PCACHE(backing_dev)->task_wq, &backing_dev->req_complete_work);
+ spin_unlock_irqrestore(&backing_dev->complete_lock, flags);
+}
+
+static void req_submit_fn(struct work_struct *work)
+{
+ struct pcache_backing_dev *backing_dev = container_of(work, struct pcache_backing_dev, req_submit_work);
+ struct pcache_backing_dev_req *backing_req;
+ LIST_HEAD(tmp_list);
+
+ spin_lock(&backing_dev->submit_lock);
+ list_splice_init(&backing_dev->submit_list, &tmp_list);
+ spin_unlock(&backing_dev->submit_lock);
+
+ while (!list_empty(&tmp_list)) {
+ backing_req = list_first_entry(&tmp_list,
+ struct pcache_backing_dev_req, node);
+ list_del_init(&backing_req->node);
+ submit_bio_noacct(&backing_req->bio);
+ }
+}
+
+void backing_dev_req_submit(struct pcache_backing_dev_req *backing_req, bool direct)
+{
+ struct pcache_backing_dev *backing_dev = backing_req->backing_dev;
+
+ if (direct) {
+ submit_bio_noacct(&backing_req->bio);
+ return;
+ }
+
+ spin_lock(&backing_dev->submit_lock);
+ list_add_tail(&backing_req->node, &backing_dev->submit_list);
+ queue_work(BACKING_DEV_TO_PCACHE(backing_dev)->task_wq, &backing_dev->req_submit_work);
+ spin_unlock(&backing_dev->submit_lock);
+}
+
+static struct pcache_backing_dev_req *req_type_req_create(struct pcache_backing_dev *backing_dev,
+ struct pcache_backing_dev_req_opts *opts)
+{
+ struct pcache_request *pcache_req = opts->req.upper_req;
+ struct pcache_backing_dev_req *backing_req;
+ struct bio *clone, *orig = pcache_req->bio;
+ u32 off = opts->req.req_off;
+ u32 len = opts->req.len;
+ int ret;
+
+ backing_req = kmem_cache_zalloc(backing_dev->backing_req_cache, opts->gfp_mask);
+ if (!backing_req)
+ return NULL;
+
+ ret = bio_init_clone(backing_dev->dm_dev->bdev, &backing_req->bio, orig, opts->gfp_mask);
+ if (ret)
+ goto err_free_req;
+
+ backing_req->type = BACKING_DEV_REQ_TYPE_REQ;
+
+ clone = &backing_req->bio;
+ BUG_ON(off & SECTOR_MASK);
+ BUG_ON(len & SECTOR_MASK);
+ bio_trim(clone, off >> SECTOR_SHIFT, len >> SECTOR_SHIFT);
+
+ clone->bi_iter.bi_sector = (pcache_req->off + off) >> SECTOR_SHIFT;
+ clone->bi_private = backing_req;
+ clone->bi_end_io = backing_dev_bio_end;
+
+ backing_req->backing_dev = backing_dev;
+ INIT_LIST_HEAD(&backing_req->node);
+ backing_req->end_req = opts->end_fn;
+
+ pcache_req_get(pcache_req);
+ backing_req->req.upper_req = pcache_req;
+ backing_req->req.bio_off = off;
+
+ return backing_req;
+
+err_free_req:
+ kmem_cache_free(backing_dev->backing_req_cache, backing_req);
+ return NULL;
+}
+
+static void bio_map(struct bio *bio, void *base, size_t size)
+{
+ struct page *page;
+ unsigned int offset;
+ unsigned int len;
+
+ if (!is_vmalloc_addr(base)) {
+ page = virt_to_page(base);
+ offset = offset_in_page(base);
+
+ BUG_ON(!bio_add_page(bio, page, size, offset));
+ return;
+ }
+
+ flush_kernel_vmap_range(base, size);
+ while (size) {
+ page = vmalloc_to_page(base);
+ offset = offset_in_page(base);
+ len = min_t(size_t, PAGE_SIZE - offset, size);
+
+ BUG_ON(!bio_add_page(bio, page, len, offset));
+ size -= len;
+ base += len;
+ }
+}
+
+static u32 get_n_vecs(void *data, u32 len)
+{
+ if (!is_vmalloc_addr(data))
+ return 1;
+
+ return DIV_ROUND_UP(len, PAGE_SIZE);
+}
+
+static struct pcache_backing_dev_req *kmem_type_req_create(struct pcache_backing_dev *backing_dev,
+ struct pcache_backing_dev_req_opts *opts)
+{
+ struct pcache_backing_dev_req *backing_req;
+ struct bio *backing_bio;
+ u32 n_vecs = get_n_vecs(opts->kmem.data, opts->kmem.len);
+
+ backing_req = kmem_cache_zalloc(backing_dev->backing_req_cache, opts->gfp_mask);
+ if (!backing_req)
+ return NULL;
+
+ if (n_vecs > BACKING_DEV_REQ_INLINE_BVECS) {
+ backing_req->kmem.bvecs = kmalloc_array(n_vecs, sizeof(struct bio_vec), opts->gfp_mask);
+ if (!backing_req->kmem.bvecs)
+ goto err_free_req;
+ } else {
+ backing_req->kmem.bvecs = backing_req->kmem.inline_bvecs;
+ }
+
+ backing_req->type = BACKING_DEV_REQ_TYPE_KMEM;
+
+ bio_init(&backing_req->bio, backing_dev->dm_dev->bdev, backing_req->kmem.bvecs,
+ n_vecs, opts->kmem.opf);
+
+ backing_bio = &backing_req->bio;
+ bio_map(backing_bio, opts->kmem.data, opts->kmem.len);
+
+ backing_bio->bi_iter.bi_sector = (opts->kmem.backing_off) >> SECTOR_SHIFT;
+ backing_bio->bi_private = backing_req;
+ backing_bio->bi_end_io = backing_dev_bio_end;
+
+ backing_req->backing_dev = backing_dev;
+ INIT_LIST_HEAD(&backing_req->node);
+ backing_req->end_req = opts->end_fn;
+ backing_req->priv_data = opts->priv_data;
+
+ return backing_req;
+
+err_free_req:
+ kmem_cache_free(backing_dev->backing_req_cache, backing_req);
+ return NULL;
+}
+
+struct pcache_backing_dev_req *backing_dev_req_create(struct pcache_backing_dev *backing_dev,
+ struct pcache_backing_dev_req_opts *opts)
+{
+ if (opts->type == BACKING_DEV_REQ_TYPE_REQ)
+ return req_type_req_create(backing_dev, opts);
+ else if (opts->type == BACKING_DEV_REQ_TYPE_KMEM)
+ return kmem_type_req_create(backing_dev, opts);
+
+ return NULL;
+}
+
+void backing_dev_flush(struct pcache_backing_dev *backing_dev)
+{
+ blkdev_issue_flush(backing_dev->dm_dev->bdev);
+}
diff --git a/drivers/md/dm-pcache/backing_dev.h b/drivers/md/dm-pcache/backing_dev.h
new file mode 100644
index 000000000000..8717ce456393
--- /dev/null
+++ b/drivers/md/dm-pcache/backing_dev.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _BACKING_DEV_H
+#define _BACKING_DEV_H
+
+#include <linux/device-mapper.h>
+
+#include "pcache_internal.h"
+
+struct pcache_backing_dev_req;
+typedef void (*backing_req_end_fn_t)(struct pcache_backing_dev_req *backing_req, int ret);
+
+#define BACKING_DEV_REQ_TYPE_REQ 1
+#define BACKING_DEV_REQ_TYPE_KMEM 2
+
+#define BACKING_DEV_REQ_INLINE_BVECS 4
+
+struct pcache_request;
+struct pcache_backing_dev_req {
+ u8 type;
+ struct bio bio;
+ struct pcache_backing_dev *backing_dev;
+
+ void *priv_data;
+ backing_req_end_fn_t end_req;
+
+ struct list_head node;
+ int ret;
+
+ union {
+ struct {
+ struct pcache_request *upper_req;
+ u32 bio_off;
+ } req;
+ struct {
+ struct bio_vec inline_bvecs[BACKING_DEV_REQ_INLINE_BVECS];
+ struct bio_vec *bvecs;
+ } kmem;
+ };
+};
+
+struct pcache_backing_dev {
+ struct pcache_cache *cache;
+
+ struct dm_dev *dm_dev;
+ struct kmem_cache *backing_req_cache;
+
+ struct list_head submit_list;
+ spinlock_t submit_lock;
+ struct work_struct req_submit_work;
+
+ struct list_head complete_list;
+ spinlock_t complete_lock;
+ struct work_struct req_complete_work;
+
+ u64 dev_size;
+};
+
+struct dm_pcache;
+int backing_dev_start(struct dm_pcache *pcache);
+void backing_dev_stop(struct dm_pcache *pcache);
+
+struct pcache_backing_dev_req_opts {
+ u32 type;
+ union {
+ struct {
+ struct pcache_request *upper_req;
+ u32 req_off;
+ u32 len;
+ } req;
+ struct {
+ void *data;
+ blk_opf_t opf;
+ u32 len;
+ u64 backing_off;
+ } kmem;
+ };
+
+ gfp_t gfp_mask;
+ backing_req_end_fn_t end_fn;
+ void *priv_data;
+};
+
+void backing_dev_req_submit(struct pcache_backing_dev_req *backing_req, bool direct);
+void backing_dev_req_end(struct pcache_backing_dev_req *backing_req);
+struct pcache_backing_dev_req *backing_dev_req_create(struct pcache_backing_dev *backing_dev,
+ struct pcache_backing_dev_req_opts *opts);
+void backing_dev_flush(struct pcache_backing_dev *backing_dev);
+#endif /* _BACKING_DEV_H */
--
2.43.0
next prev parent reply other threads:[~2025-06-24 7:34 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-24 7:33 [PATCH v1 00/11] dm-pcache – persistent-memory cache for block devices Dongsheng Yang
2025-06-24 7:33 ` [PATCH v1 01/11] dm-pcache: add pcache_internal.h Dongsheng Yang
2025-07-01 13:43 ` Jonathan Cameron
2025-06-24 7:33 ` Dongsheng Yang [this message]
2025-07-01 13:56 ` [PATCH v1 02/11] dm-pcache: add backing device management Jonathan Cameron
2025-07-07 6:25 ` Dongsheng Yang
2025-06-24 7:33 ` [PATCH v1 03/11] dm-pcache: add cache device Dongsheng Yang
2025-07-01 14:07 ` Jonathan Cameron
2025-06-24 7:33 ` [PATCH v1 04/11] dm-pcache: add segment layer Dongsheng Yang
2025-07-01 14:46 ` Jonathan Cameron
2025-07-07 6:24 ` Dongsheng Yang
2025-06-24 7:33 ` [PATCH v1 05/11] dm-pcache: add cache_segment Dongsheng Yang
2025-07-01 14:59 ` Jonathan Cameron
2025-07-07 6:24 ` Dongsheng Yang
2025-06-24 7:33 ` [PATCH v1 06/11] dm-pcache: add cache_writeback Dongsheng Yang
2025-06-24 7:33 ` [PATCH v1 07/11] dm-pcache: add cache_gc Dongsheng Yang
2025-06-24 7:33 ` [PATCH v1 08/11] dm-pcache: add cache_key Dongsheng Yang
2025-06-24 7:33 ` [PATCH v1 09/11] dm-pcache: add cache_req Dongsheng Yang
2025-06-24 7:33 ` [PATCH v1 10/11] dm-pcache: add cache core Dongsheng Yang
2025-06-24 7:33 ` [PATCH v1 11/11] dm-pcache: initial dm-pcache target Dongsheng Yang
2025-06-30 13:30 ` [PATCH v1 00/11] dm-pcache – persistent-memory cache for block devices Mikulas Patocka
2025-06-30 13:40 ` Dongsheng Yang
2025-06-30 14:16 ` Dongsheng Yang
2025-06-30 15:45 ` Mikulas Patocka
2025-06-30 16:30 ` Dongsheng Yang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250624073359.2041340-3-dongsheng.yang@linux.dev \
--to=dongsheng.yang@linux.dev \
--cc=Jonathan.Cameron@Huawei.com \
--cc=agk@redhat.com \
--cc=axboe@kernel.dk \
--cc=dan.j.williams@intel.com \
--cc=dm-devel@lists.linux.dev \
--cc=hch@lst.de \
--cc=linux-block@vger.kernel.org \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mpatocka@redhat.com \
--cc=nvdimm@lists.linux.dev \
--cc=snitzer@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.