From: Dongsheng Yang <dongsheng.yang@linux.dev>
To: axboe@kernel.dk, hch@lst.de, dan.j.williams@intel.com,
gregory.price@memverge.com, John@groves.net,
Jonathan.Cameron@Huawei.com, bbhushan2@marvell.com,
chaitanyak@nvidia.com, rdunlap@infradead.org
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-cxl@vger.kernel.org, linux-bcache@vger.kernel.org,
nvdimm@lists.linux.dev, Dongsheng Yang <dongsheng.yang@linux.dev>
Subject: [RFC PATCH 02/11] pcache: introduce segment abstraction
Date: Mon, 14 Apr 2025 01:44:56 +0000 [thread overview]
Message-ID: <20250414014505.20477-3-dongsheng.yang@linux.dev> (raw)
In-Reply-To: <20250414014505.20477-1-dongsheng.yang@linux.dev>
pcache: introduce segment abstraction and metadata support
This patch introduces the basic infrastructure for managing segments
in the pcache system. A "segment" is the minimum unit of allocation
and persistence on the persistent memory used as cache.
Key features introduced:
- `struct pcache_segment` and associated helpers for managing segment data.
- Metadata handling for segments via `struct pcache_segment_info`, including
type, state, data offset, and next-segment pointer.
- Support for reading and writing segment metadata with on-media consistency
using `pcache_meta_find_latest()` and `pcache_meta_find_oldest()` helpers.
- Abstractions for copying data to and from segments and bio vectors, including:
- `segment_copy_to_bio()`
- `segment_copy_from_bio()`
- Logical cursor `segment_pos_advance()` for iterating over data inside a segment.
Segment metadata is stored inline in each segment and versioned with CRC to ensure
integrity and crash safety. The segment design also lays the foundation for
segment chaining via `next_seg`, which will be used in cache_segment and other higher
level structures.
This patch is part of the core segment layer and will be utilized by metadata
and data layers such as meta_segment and cache_segment in subsequent patches.
Signed-off-by: Dongsheng Yang <dongsheng.yang@linux.dev>
---
drivers/block/pcache/segment.c | 175 +++++++++++++++++++++++++++++++++
drivers/block/pcache/segment.h | 78 +++++++++++++++
2 files changed, 253 insertions(+)
create mode 100644 drivers/block/pcache/segment.c
create mode 100644 drivers/block/pcache/segment.h
diff --git a/drivers/block/pcache/segment.c b/drivers/block/pcache/segment.c
new file mode 100644
index 000000000000..01e43c9d9bfa
--- /dev/null
+++ b/drivers/block/pcache/segment.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/dax.h>
+
+#include "pcache_internal.h"
+#include "cache_dev.h"
+#include "cache.h"
+#include "backing_dev.h"
+#include "meta_segment.h"
+#include "segment.h"
+
+int segment_pos_advance(struct pcache_segment_pos *seg_pos, u32 len)
+{
+ u32 to_advance;
+
+ while (len) {
+ to_advance = len;
+
+ if (to_advance > seg_pos->segment->data_size - seg_pos->off)
+ to_advance = seg_pos->segment->data_size - seg_pos->off;
+
+ seg_pos->off += to_advance;
+
+ len -= to_advance;
+ }
+
+ return 0;
+}
+
+int segment_copy_to_bio(struct pcache_segment *segment,
+ u32 data_off, u32 data_len, struct bio *bio, u32 bio_off)
+{
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ void *dst;
+ u32 to_copy, page_off = 0;
+ struct pcache_segment_pos pos = { .segment = segment,
+ .off = data_off };
+next:
+ bio_for_each_segment(bv, bio, iter) {
+ if (bio_off > bv.bv_len) {
+ bio_off -= bv.bv_len;
+ continue;
+ }
+ page_off = bv.bv_offset;
+ page_off += bio_off;
+ bio_off = 0;
+
+ dst = kmap_local_page(bv.bv_page);
+again:
+ segment = pos.segment;
+
+ to_copy = min(bv.bv_offset + bv.bv_len - page_off,
+ segment->data_size - pos.off);
+ if (to_copy > data_len)
+ to_copy = data_len;
+
+ flush_dcache_page(bv.bv_page);
+ memcpy(dst + page_off, segment->data + pos.off, to_copy);
+
+ /* advance */
+ pos.off += to_copy;
+ page_off += to_copy;
+ data_len -= to_copy;
+ if (!data_len) {
+ kunmap_local(dst);
+ return 0;
+ }
+
+ /* more data in this bv page */
+ if (page_off < bv.bv_offset + bv.bv_len)
+ goto again;
+ kunmap_local(dst);
+ }
+
+ if (bio->bi_next) {
+ bio = bio->bi_next;
+ goto next;
+ }
+
+ return 0;
+}
+
+void segment_copy_from_bio(struct pcache_segment *segment,
+ u32 data_off, u32 data_len, struct bio *bio, u32 bio_off)
+{
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ void *src;
+ u32 to_copy, page_off = 0;
+ struct pcache_segment_pos pos = { .segment = segment,
+ .off = data_off };
+next:
+ bio_for_each_segment(bv, bio, iter) {
+ if (bio_off > bv.bv_len) {
+ bio_off -= bv.bv_len;
+ continue;
+ }
+ page_off = bv.bv_offset;
+ page_off += bio_off;
+ bio_off = 0;
+
+ src = kmap_local_page(bv.bv_page);
+again:
+ segment = pos.segment;
+
+ to_copy = min(bv.bv_offset + bv.bv_len - page_off,
+ segment->data_size - pos.off);
+ if (to_copy > data_len)
+ to_copy = data_len;
+
+ memcpy_flushcache(segment->data + pos.off, src + page_off, to_copy);
+ flush_dcache_page(bv.bv_page);
+
+ /* advance */
+ pos.off += to_copy;
+ page_off += to_copy;
+ data_len -= to_copy;
+ if (!data_len) {
+ kunmap_local(src);
+ return;
+ }
+
+ /* more data in this bv page */
+ if (page_off < bv.bv_offset + bv.bv_len)
+ goto again;
+ kunmap_local(src);
+ }
+
+ if (bio->bi_next) {
+ bio = bio->bi_next;
+ goto next;
+ }
+}
+
+int pcache_segment_init(struct pcache_cache_dev *cache_dev, struct pcache_segment *segment,
+ struct pcache_segment_init_options *options)
+{
+ segment->seg_info = options->seg_info;
+
+ segment->seg_info->type = options->type;
+ segment->seg_info->state = options->state;
+ segment->seg_info->seg_id = options->seg_id;
+ segment->seg_info->data_off = options->data_off;
+
+ segment->cache_dev = cache_dev;
+ segment->data_size = PCACHE_SEG_SIZE - options->data_off;
+ segment->data = CACHE_DEV_SEGMENT(cache_dev, options->seg_id) + options->data_off;
+
+ return 0;
+}
+
+void pcache_segment_info_write(struct pcache_cache_dev *cache_dev, struct pcache_segment_info *seg_info, u32 seg_id)
+{
+ struct pcache_segment_info *seg_info_addr;
+
+ seg_info->header.seq++;
+
+ seg_info_addr = CACHE_DEV_SEGMENT(cache_dev, seg_id);
+ seg_info_addr = pcache_meta_find_oldest(&seg_info_addr->header, PCACHE_SEG_INFO_SIZE);
+
+ memcpy(seg_info_addr, seg_info, sizeof(struct pcache_segment_info));
+
+ seg_info_addr->header.crc = pcache_meta_crc(&seg_info_addr->header, PCACHE_SEG_INFO_SIZE);
+ cache_dev_flush(cache_dev, seg_info_addr, PCACHE_SEG_INFO_SIZE);
+
+}
+
+struct pcache_segment_info *pcache_segment_info_read(struct pcache_cache_dev *cache_dev, u32 seg_id)
+{
+ struct pcache_segment_info *seg_info_addr;
+
+ seg_info_addr = CACHE_DEV_SEGMENT(cache_dev, seg_id);
+
+ return pcache_meta_find_latest(&seg_info_addr->header, PCACHE_SEG_INFO_SIZE);
+}
diff --git a/drivers/block/pcache/segment.h b/drivers/block/pcache/segment.h
new file mode 100644
index 000000000000..c41cb8d5b921
--- /dev/null
+++ b/drivers/block/pcache/segment.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PCACHE_SEGMENT_H
+#define _PCACHE_SEGMENT_H
+
+#include <linux/bio.h>
+
+#include "pcache_internal.h"
+
+#define segment_err(segment, fmt, ...) \
+ cache_dev_err(segment->cache_dev, "segment%d: " fmt, \
+ segment->seg_id, ##__VA_ARGS__)
+#define segment_info(segment, fmt, ...) \
+ cache_dev_info(segment->cache_dev, "segment%d: " fmt, \
+ segment->seg_id, ##__VA_ARGS__)
+#define segment_debug(segment, fmt, ...) \
+ cache_dev_debug(segment->cache_dev, "segment%d: " fmt, \
+ segment->seg_id, ##__VA_ARGS__)
+
+
+#define PCACHE_SEGMENT_STATE_NONE 0
+#define PCACHE_SEGMENT_STATE_RUNNING 1
+
+#define PCACHES_TYPE_NONE 0
+#define PCACHES_TYPE_META 1
+#define PCACHE_SEGMENT_TYPE_DATA 2
+
+struct pcache_segment_info {
+ struct pcache_meta_header header; /* Metadata header for the segment */
+ u8 type;
+ u8 state;
+ u16 flags;
+ u32 next_seg;
+ u32 seg_id;
+ u32 data_off;
+};
+
+#define PCACHE_SEG_INFO_FLAGS_HAS_NEXT (1 << 0)
+
+static inline bool segment_info_has_next(struct pcache_segment_info *seg_info)
+{
+ return (seg_info->flags & PCACHE_SEG_INFO_FLAGS_HAS_NEXT);
+}
+
+struct pcache_segment_pos {
+ struct pcache_segment *segment; /* Segment associated with the position */
+ u32 off; /* Offset within the segment */
+};
+
+struct pcache_segment_init_options {
+ u8 type;
+ u8 state;
+ u32 seg_id;
+ u32 data_off;
+
+ struct pcache_segment_info *seg_info;
+};
+
+struct pcache_segment {
+ struct pcache_cache_dev *cache_dev;
+
+ void *data;
+ u32 data_size;
+
+ struct pcache_segment_info *seg_info;
+};
+
+int segment_copy_to_bio(struct pcache_segment *segment,
+ u32 data_off, u32 data_len, struct bio *bio, u32 bio_off);
+void segment_copy_from_bio(struct pcache_segment *segment,
+ u32 data_off, u32 data_len, struct bio *bio, u32 bio_off);
+int segment_pos_advance(struct pcache_segment_pos *seg_pos, u32 len);
+int pcache_segment_init(struct pcache_cache_dev *cache_dev, struct pcache_segment *segment,
+ struct pcache_segment_init_options *options);
+
+void pcache_segment_info_write(struct pcache_cache_dev *cache_dev, struct pcache_segment_info *seg_info, u32 seg_id);
+struct pcache_segment_info *pcache_segment_info_read(struct pcache_cache_dev *cache_dev, u32 set_id);
+
+#endif /* _PCACHE_SEGMENT_H */
--
2.34.1
next prev parent reply other threads:[~2025-04-14 1:45 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-14 1:44 [RFC PATCH 00/11] pcache: Persistent Memory Cache for Block Devices Dongsheng Yang
2025-04-14 1:44 ` [RFC PATCH 01/11] pcache: introduce cache_dev for managing persistent memory-based cache devices Dongsheng Yang
2025-04-14 1:44 ` Dongsheng Yang [this message]
2025-04-14 1:44 ` [RFC PATCH 03/11] pcache: introduce meta_segment abstraction Dongsheng Yang
2025-04-14 1:44 ` [RFC PATCH 04/11] pcache: introduce cache_segment abstraction Dongsheng Yang
2025-04-14 1:44 ` [RFC PATCH 05/11] pcache: introduce lifecycle management of pcache_cache Dongsheng Yang
2025-04-14 1:45 ` [RFC PATCH 06/11] pcache: gc and writeback Dongsheng Yang
2025-04-14 1:45 ` [RFC PATCH 07/11] pcache: introduce cache_key infrastructure for persistent metadata management Dongsheng Yang
2025-04-14 1:45 ` [RFC PATCH 08/11] pcache: implement request processing and cache I/O path in cache_req Dongsheng Yang
2025-04-14 1:45 ` [RFC PATCH 09/11] pcache: introduce logic block device and request handling Dongsheng Yang
2025-04-14 1:45 ` [RFC PATCH 10/11] pcache: add backing device management Dongsheng Yang
2025-04-14 1:45 ` [RFC PATCH 11/11] block: introduce pcache (persistent memory to be cache for block device) Dongsheng Yang
2025-04-15 18:00 ` [RFC PATCH 00/11] pcache: Persistent Memory Cache for Block Devices Dan Williams
2025-04-16 1:04 ` Jens Axboe
2025-04-16 6:08 ` Dongsheng Yang
2025-04-16 15:10 ` Jens Axboe
2025-04-16 21:40 ` Dongsheng Yang
2025-04-22 10:29 ` Mikulas Patocka
2025-04-22 13:23 ` Dongsheng Yang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250414014505.20477-3-dongsheng.yang@linux.dev \
--to=dongsheng.yang@linux.dev \
--cc=John@groves.net \
--cc=Jonathan.Cameron@Huawei.com \
--cc=axboe@kernel.dk \
--cc=bbhushan2@marvell.com \
--cc=chaitanyak@nvidia.com \
--cc=dan.j.williams@intel.com \
--cc=gregory.price@memverge.com \
--cc=hch@lst.de \
--cc=linux-bcache@vger.kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=nvdimm@lists.linux.dev \
--cc=rdunlap@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox