From: Dongsheng Yang <dongsheng.yang@linux.dev>
To: axboe@kernel.dk, hch@lst.de, dan.j.williams@intel.com,
gregory.price@memverge.com, John@groves.net,
Jonathan.Cameron@Huawei.com, bbhushan2@marvell.com,
chaitanyak@nvidia.com, rdunlap@infradead.org
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-cxl@vger.kernel.org, linux-bcache@vger.kernel.org,
nvdimm@lists.linux.dev, Dongsheng Yang <dongsheng.yang@linux.dev>
Subject: [RFC PATCH 02/11] pcache: introduce segment abstraction
Date: Mon, 14 Apr 2025 01:44:56 +0000 [thread overview]
Message-ID: <20250414014505.20477-3-dongsheng.yang@linux.dev> (raw)
In-Reply-To: <20250414014505.20477-1-dongsheng.yang@linux.dev>
pcache: introduce segment abstraction and metadata support
This patch introduces the basic infrastructure for managing segments
in the pcache system. A "segment" is the minimum unit of allocation
and persistence on the persistent memory used as cache.
Key features introduced:
- `struct pcache_segment` and associated helpers for managing segment data.
- Metadata handling for segments via `struct pcache_segment_info`, including
type, state, data offset, and next-segment pointer.
- Support for reading and writing segment metadata with on-media consistency
using `pcache_meta_find_latest()` and `pcache_meta_find_oldest()` helpers.
- Abstractions for copying data to and from segments and bio vectors, including:
- `segment_copy_to_bio()`
- `segment_copy_from_bio()`
- Logical cursor `segment_pos_advance()` for iterating over data inside a segment.
Segment metadata is stored inline in each segment and versioned with CRC to ensure
integrity and crash safety. The segment design also lays the foundation for
segment chaining via `next_seg`, which will be used in cache_segment and other higher
level structures.
This patch is part of the core segment layer and will be utilized by metadata
and data layers such as meta_segment and cache_segment in subsequent patches.
Signed-off-by: Dongsheng Yang <dongsheng.yang@linux.dev>
---
drivers/block/pcache/segment.c | 175 +++++++++++++++++++++++++++++++++
drivers/block/pcache/segment.h | 78 +++++++++++++++
2 files changed, 253 insertions(+)
create mode 100644 drivers/block/pcache/segment.c
create mode 100644 drivers/block/pcache/segment.h
diff --git a/drivers/block/pcache/segment.c b/drivers/block/pcache/segment.c
new file mode 100644
index 000000000000..01e43c9d9bfa
--- /dev/null
+++ b/drivers/block/pcache/segment.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/dax.h>
+
+#include "pcache_internal.h"
+#include "cache_dev.h"
+#include "cache.h"
+#include "backing_dev.h"
+#include "meta_segment.h"
+#include "segment.h"
+
+int segment_pos_advance(struct pcache_segment_pos *seg_pos, u32 len)
+{
+ u32 to_advance;
+
+ while (len) {
+ to_advance = len;
+
+ if (to_advance > seg_pos->segment->data_size - seg_pos->off)
+ to_advance = seg_pos->segment->data_size - seg_pos->off;
+
+ seg_pos->off += to_advance;
+
+ len -= to_advance;
+ }
+
+ return 0;
+}
+
+int segment_copy_to_bio(struct pcache_segment *segment,
+ u32 data_off, u32 data_len, struct bio *bio, u32 bio_off)
+{
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ void *dst;
+ u32 to_copy, page_off = 0;
+ struct pcache_segment_pos pos = { .segment = segment,
+ .off = data_off };
+next:
+ bio_for_each_segment(bv, bio, iter) {
+ if (bio_off > bv.bv_len) {
+ bio_off -= bv.bv_len;
+ continue;
+ }
+ page_off = bv.bv_offset;
+ page_off += bio_off;
+ bio_off = 0;
+
+ dst = kmap_local_page(bv.bv_page);
+again:
+ segment = pos.segment;
+
+ to_copy = min(bv.bv_offset + bv.bv_len - page_off,
+ segment->data_size - pos.off);
+ if (to_copy > data_len)
+ to_copy = data_len;
+
+ flush_dcache_page(bv.bv_page);
+ memcpy(dst + page_off, segment->data + pos.off, to_copy);
+
+ /* advance */
+ pos.off += to_copy;
+ page_off += to_copy;
+ data_len -= to_copy;
+ if (!data_len) {
+ kunmap_local(dst);
+ return 0;
+ }
+
+ /* more data in this bv page */
+ if (page_off < bv.bv_offset + bv.bv_len)
+ goto again;
+ kunmap_local(dst);
+ }
+
+ if (bio->bi_next) {
+ bio = bio->bi_next;
+ goto next;
+ }
+
+ return 0;
+}
+
+void segment_copy_from_bio(struct pcache_segment *segment,
+ u32 data_off, u32 data_len, struct bio *bio, u32 bio_off)
+{
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ void *src;
+ u32 to_copy, page_off = 0;
+ struct pcache_segment_pos pos = { .segment = segment,
+ .off = data_off };
+next:
+ bio_for_each_segment(bv, bio, iter) {
+ if (bio_off > bv.bv_len) {
+ bio_off -= bv.bv_len;
+ continue;
+ }
+ page_off = bv.bv_offset;
+ page_off += bio_off;
+ bio_off = 0;
+
+ src = kmap_local_page(bv.bv_page);
+again:
+ segment = pos.segment;
+
+ to_copy = min(bv.bv_offset + bv.bv_len - page_off,
+ segment->data_size - pos.off);
+ if (to_copy > data_len)
+ to_copy = data_len;
+
+ memcpy_flushcache(segment->data + pos.off, src + page_off, to_copy);
+ flush_dcache_page(bv.bv_page);
+
+ /* advance */
+ pos.off += to_copy;
+ page_off += to_copy;
+ data_len -= to_copy;
+ if (!data_len) {
+ kunmap_local(src);
+ return;
+ }
+
+ /* more data in this bv page */
+ if (page_off < bv.bv_offset + bv.bv_len)
+ goto again;
+ kunmap_local(src);
+ }
+
+ if (bio->bi_next) {
+ bio = bio->bi_next;
+ goto next;
+ }
+}
+
+int pcache_segment_init(struct pcache_cache_dev *cache_dev, struct pcache_segment *segment,
+ struct pcache_segment_init_options *options)
+{
+ segment->seg_info = options->seg_info;
+
+ segment->seg_info->type = options->type;
+ segment->seg_info->state = options->state;
+ segment->seg_info->seg_id = options->seg_id;
+ segment->seg_info->data_off = options->data_off;
+
+ segment->cache_dev = cache_dev;
+ segment->data_size = PCACHE_SEG_SIZE - options->data_off;
+ segment->data = CACHE_DEV_SEGMENT(cache_dev, options->seg_id) + options->data_off;
+
+ return 0;
+}
+
+void pcache_segment_info_write(struct pcache_cache_dev *cache_dev, struct pcache_segment_info *seg_info, u32 seg_id)
+{
+ struct pcache_segment_info *seg_info_addr;
+
+ seg_info->header.seq++;
+
+ seg_info_addr = CACHE_DEV_SEGMENT(cache_dev, seg_id);
+ seg_info_addr = pcache_meta_find_oldest(&seg_info_addr->header, PCACHE_SEG_INFO_SIZE);
+
+ memcpy(seg_info_addr, seg_info, sizeof(struct pcache_segment_info));
+
+ seg_info_addr->header.crc = pcache_meta_crc(&seg_info_addr->header, PCACHE_SEG_INFO_SIZE);
+ cache_dev_flush(cache_dev, seg_info_addr, PCACHE_SEG_INFO_SIZE);
+
+}
+
+struct pcache_segment_info *pcache_segment_info_read(struct pcache_cache_dev *cache_dev, u32 seg_id)
+{
+ struct pcache_segment_info *seg_info_addr;
+
+ seg_info_addr = CACHE_DEV_SEGMENT(cache_dev, seg_id);
+
+ return pcache_meta_find_latest(&seg_info_addr->header, PCACHE_SEG_INFO_SIZE);
+}
diff --git a/drivers/block/pcache/segment.h b/drivers/block/pcache/segment.h
new file mode 100644
index 000000000000..c41cb8d5b921
--- /dev/null
+++ b/drivers/block/pcache/segment.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PCACHE_SEGMENT_H
+#define _PCACHE_SEGMENT_H
+
+#include <linux/bio.h>
+
+#include "pcache_internal.h"
+
+#define segment_err(segment, fmt, ...) \
+ cache_dev_err(segment->cache_dev, "segment%d: " fmt, \
+ segment->seg_id, ##__VA_ARGS__)
+#define segment_info(segment, fmt, ...) \
+ cache_dev_info(segment->cache_dev, "segment%d: " fmt, \
+ segment->seg_id, ##__VA_ARGS__)
+#define segment_debug(segment, fmt, ...) \
+ cache_dev_debug(segment->cache_dev, "segment%d: " fmt, \
+ segment->seg_id, ##__VA_ARGS__)
+
+
+#define PCACHE_SEGMENT_STATE_NONE 0
+#define PCACHE_SEGMENT_STATE_RUNNING 1
+
+#define PCACHES_TYPE_NONE 0
+#define PCACHES_TYPE_META 1
+#define PCACHE_SEGMENT_TYPE_DATA 2
+
+struct pcache_segment_info {
+ struct pcache_meta_header header; /* Metadata header for the segment */
+ u8 type;
+ u8 state;
+ u16 flags;
+ u32 next_seg;
+ u32 seg_id;
+ u32 data_off;
+};
+
+#define PCACHE_SEG_INFO_FLAGS_HAS_NEXT (1 << 0)
+
+static inline bool segment_info_has_next(struct pcache_segment_info *seg_info)
+{
+ return (seg_info->flags & PCACHE_SEG_INFO_FLAGS_HAS_NEXT);
+}
+
+struct pcache_segment_pos {
+ struct pcache_segment *segment; /* Segment associated with the position */
+ u32 off; /* Offset within the segment */
+};
+
+struct pcache_segment_init_options {
+ u8 type;
+ u8 state;
+ u32 seg_id;
+ u32 data_off;
+
+ struct pcache_segment_info *seg_info;
+};
+
+struct pcache_segment {
+ struct pcache_cache_dev *cache_dev;
+
+ void *data;
+ u32 data_size;
+
+ struct pcache_segment_info *seg_info;
+};
+
+int segment_copy_to_bio(struct pcache_segment *segment,
+ u32 data_off, u32 data_len, struct bio *bio, u32 bio_off);
+void segment_copy_from_bio(struct pcache_segment *segment,
+ u32 data_off, u32 data_len, struct bio *bio, u32 bio_off);
+int segment_pos_advance(struct pcache_segment_pos *seg_pos, u32 len);
+int pcache_segment_init(struct pcache_cache_dev *cache_dev, struct pcache_segment *segment,
+ struct pcache_segment_init_options *options);
+
+void pcache_segment_info_write(struct pcache_cache_dev *cache_dev, struct pcache_segment_info *seg_info, u32 seg_id);
+struct pcache_segment_info *pcache_segment_info_read(struct pcache_cache_dev *cache_dev, u32 set_id);
+
+#endif /* _PCACHE_SEGMENT_H */
--
2.34.1
next prev parent reply other threads:[~2025-04-14 1:45 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-14 1:44 [RFC PATCH 00/11] pcache: Persistent Memory Cache for Block Devices Dongsheng Yang
2025-04-14 1:44 ` [RFC PATCH 01/11] pcache: introduce cache_dev for managing persistent memory-based cache devices Dongsheng Yang
2025-04-14 1:44 ` Dongsheng Yang [this message]
2025-04-14 1:44 ` [RFC PATCH 03/11] pcache: introduce meta_segment abstraction Dongsheng Yang
2025-04-14 1:44 ` [RFC PATCH 04/11] pcache: introduce cache_segment abstraction Dongsheng Yang
2025-04-14 1:44 ` [RFC PATCH 05/11] pcache: introduce lifecycle management of pcache_cache Dongsheng Yang
2025-04-14 1:45 ` [RFC PATCH 06/11] pcache: gc and writeback Dongsheng Yang
2025-04-14 1:45 ` [RFC PATCH 07/11] pcache: introduce cache_key infrastructure for persistent metadata management Dongsheng Yang
2025-04-14 1:45 ` [RFC PATCH 08/11] pcache: implement request processing and cache I/O path in cache_req Dongsheng Yang
2025-04-14 1:45 ` [RFC PATCH 09/11] pcache: introduce logic block device and request handling Dongsheng Yang
2025-04-14 1:45 ` [RFC PATCH 10/11] pcache: add backing device management Dongsheng Yang
2025-04-14 1:45 ` [RFC PATCH 11/11] block: introduce pcache (persistent memory to be cache for block device) Dongsheng Yang
2025-04-15 18:00 ` [RFC PATCH 00/11] pcache: Persistent Memory Cache for Block Devices Dan Williams
2025-04-16 1:04 ` Jens Axboe
2025-04-16 6:08 ` Dongsheng Yang
2025-04-16 15:10 ` Jens Axboe
2025-04-16 21:40 ` Dongsheng Yang
2025-04-22 10:29 ` Mikulas Patocka
2025-04-22 13:23 ` Dongsheng Yang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250414014505.20477-3-dongsheng.yang@linux.dev \
--to=dongsheng.yang@linux.dev \
--cc=John@groves.net \
--cc=Jonathan.Cameron@Huawei.com \
--cc=axboe@kernel.dk \
--cc=bbhushan2@marvell.com \
--cc=chaitanyak@nvidia.com \
--cc=dan.j.williams@intel.com \
--cc=gregory.price@memverge.com \
--cc=hch@lst.de \
--cc=linux-bcache@vger.kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=nvdimm@lists.linux.dev \
--cc=rdunlap@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.