All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dongsheng Yang <dongsheng.yang@linux.dev>
To: axboe@kernel.dk, hch@lst.de, dan.j.williams@intel.com,
	gregory.price@memverge.com, John@groves.net,
	Jonathan.Cameron@Huawei.com, bbhushan2@marvell.com,
	chaitanyak@nvidia.com, rdunlap@infradead.org
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-cxl@vger.kernel.org, linux-bcache@vger.kernel.org,
	nvdimm@lists.linux.dev, Dongsheng Yang <dongsheng.yang@linux.dev>
Subject: [RFC PATCH 02/11] pcache: introduce segment abstraction
Date: Mon, 14 Apr 2025 01:44:56 +0000	[thread overview]
Message-ID: <20250414014505.20477-3-dongsheng.yang@linux.dev> (raw)
In-Reply-To: <20250414014505.20477-1-dongsheng.yang@linux.dev>

pcache: introduce segment abstraction and metadata support

This patch introduces the basic infrastructure for managing segments
in the pcache system. A "segment" is the minimum unit of allocation
and persistence on the persistent memory used as cache.

Key features introduced:

- `struct pcache_segment` and associated helpers for managing segment data.
- Metadata handling for segments via `struct pcache_segment_info`, including
  type, state, data offset, and next-segment pointer.
- Support for reading and writing segment metadata with on-media consistency
  using `pcache_meta_find_latest()` and `pcache_meta_find_oldest()` helpers.
- Abstractions for copying data to and from segments and bio vectors, including:
  - `segment_copy_to_bio()`
  - `segment_copy_from_bio()`
- Logical cursor `segment_pos_advance()` for iterating over data inside a segment.

Segment metadata is stored inline in each segment and versioned with CRC to ensure
integrity and crash safety. The segment design also lays the foundation for
segment chaining via `next_seg`, which will be used in cache_segment and other higher
level structures.

This patch is part of the core segment layer and will be utilized by metadata
and data layers such as meta_segment and cache_segment in subsequent patches.

Signed-off-by: Dongsheng Yang <dongsheng.yang@linux.dev>
---
 drivers/block/pcache/segment.c | 175 +++++++++++++++++++++++++++++++++
 drivers/block/pcache/segment.h |  78 +++++++++++++++
 2 files changed, 253 insertions(+)
 create mode 100644 drivers/block/pcache/segment.c
 create mode 100644 drivers/block/pcache/segment.h

diff --git a/drivers/block/pcache/segment.c b/drivers/block/pcache/segment.c
new file mode 100644
index 000000000000..01e43c9d9bfa
--- /dev/null
+++ b/drivers/block/pcache/segment.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/dax.h>
+
+#include "pcache_internal.h"
+#include "cache_dev.h"
+#include "cache.h"
+#include "backing_dev.h"
+#include "meta_segment.h"
+#include "segment.h"
+
+int segment_pos_advance(struct pcache_segment_pos *seg_pos, u32 len)
+{
+	u32 to_advance;
+
+	while (len) {
+		to_advance = len;
+
+		if (to_advance > seg_pos->segment->data_size - seg_pos->off)
+			to_advance = seg_pos->segment->data_size - seg_pos->off;
+
+		seg_pos->off += to_advance;
+
+		len -= to_advance;
+	}
+
+	return 0;
+}
+
+int segment_copy_to_bio(struct pcache_segment *segment,
+		u32 data_off, u32 data_len, struct bio *bio, u32 bio_off)
+{
+	struct bio_vec bv;
+	struct bvec_iter iter;
+	void *dst;
+	u32 to_copy, page_off = 0;
+	struct pcache_segment_pos pos = { .segment = segment,
+				   .off = data_off };
+next:
+	bio_for_each_segment(bv, bio, iter) {
+		if (bio_off > bv.bv_len) {
+			bio_off -= bv.bv_len;
+			continue;
+		}
+		page_off = bv.bv_offset;
+		page_off += bio_off;
+		bio_off = 0;
+
+		dst = kmap_local_page(bv.bv_page);
+again:
+		segment = pos.segment;
+
+		to_copy = min(bv.bv_offset + bv.bv_len - page_off,
+				segment->data_size - pos.off);
+		if (to_copy > data_len)
+			to_copy = data_len;
+
+		flush_dcache_page(bv.bv_page);
+		memcpy(dst + page_off, segment->data + pos.off, to_copy);
+
+		/* advance */
+		pos.off += to_copy;
+		page_off += to_copy;
+		data_len -= to_copy;
+		if (!data_len) {
+			kunmap_local(dst);
+			return 0;
+		}
+
+		/* more data in this bv page */
+		if (page_off < bv.bv_offset + bv.bv_len)
+			goto again;
+		kunmap_local(dst);
+	}
+
+	if (bio->bi_next) {
+		bio = bio->bi_next;
+		goto next;
+	}
+
+	return 0;
+}
+
+void segment_copy_from_bio(struct pcache_segment *segment,
+		u32 data_off, u32 data_len, struct bio *bio, u32 bio_off)
+{
+	struct bio_vec bv;
+	struct bvec_iter iter;
+	void *src;
+	u32 to_copy, page_off = 0;
+	struct pcache_segment_pos pos = { .segment = segment,
+				   .off = data_off };
+next:
+	bio_for_each_segment(bv, bio, iter) {
+		if (bio_off > bv.bv_len) {
+			bio_off -= bv.bv_len;
+			continue;
+		}
+		page_off = bv.bv_offset;
+		page_off += bio_off;
+		bio_off = 0;
+
+		src = kmap_local_page(bv.bv_page);
+again:
+		segment = pos.segment;
+
+		to_copy = min(bv.bv_offset + bv.bv_len - page_off,
+				segment->data_size - pos.off);
+		if (to_copy > data_len)
+			to_copy = data_len;
+
+		memcpy_flushcache(segment->data + pos.off, src + page_off, to_copy);
+		flush_dcache_page(bv.bv_page);
+
+		/* advance */
+		pos.off += to_copy;
+		page_off += to_copy;
+		data_len -= to_copy;
+		if (!data_len) {
+			kunmap_local(src);
+			return;
+		}
+
+		/* more data in this bv page */
+		if (page_off < bv.bv_offset + bv.bv_len)
+			goto again;
+		kunmap_local(src);
+	}
+
+	if (bio->bi_next) {
+		bio = bio->bi_next;
+		goto next;
+	}
+}
+
+int pcache_segment_init(struct pcache_cache_dev *cache_dev, struct pcache_segment *segment,
+		      struct pcache_segment_init_options *options)
+{
+	segment->seg_info = options->seg_info;
+
+	segment->seg_info->type = options->type;
+	segment->seg_info->state = options->state;
+	segment->seg_info->seg_id = options->seg_id;
+	segment->seg_info->data_off = options->data_off;
+
+	segment->cache_dev = cache_dev;
+	segment->data_size = PCACHE_SEG_SIZE - options->data_off;
+	segment->data = CACHE_DEV_SEGMENT(cache_dev, options->seg_id) + options->data_off;
+
+	return 0;
+}
+
+void pcache_segment_info_write(struct pcache_cache_dev *cache_dev, struct pcache_segment_info *seg_info, u32 seg_id)
+{
+	struct pcache_segment_info *seg_info_addr;
+
+	seg_info->header.seq++;
+
+	seg_info_addr = CACHE_DEV_SEGMENT(cache_dev, seg_id);
+	seg_info_addr = pcache_meta_find_oldest(&seg_info_addr->header, PCACHE_SEG_INFO_SIZE);
+
+	memcpy(seg_info_addr, seg_info, sizeof(struct pcache_segment_info));
+
+	seg_info_addr->header.crc = pcache_meta_crc(&seg_info_addr->header, PCACHE_SEG_INFO_SIZE);
+	cache_dev_flush(cache_dev, seg_info_addr, PCACHE_SEG_INFO_SIZE);
+
+}
+
+struct pcache_segment_info *pcache_segment_info_read(struct pcache_cache_dev *cache_dev, u32 seg_id)
+{
+	struct pcache_segment_info *seg_info_addr;
+
+	seg_info_addr = CACHE_DEV_SEGMENT(cache_dev, seg_id);
+
+	return pcache_meta_find_latest(&seg_info_addr->header, PCACHE_SEG_INFO_SIZE);
+}
diff --git a/drivers/block/pcache/segment.h b/drivers/block/pcache/segment.h
new file mode 100644
index 000000000000..c41cb8d5b921
--- /dev/null
+++ b/drivers/block/pcache/segment.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PCACHE_SEGMENT_H
+#define _PCACHE_SEGMENT_H
+
+#include <linux/bio.h>
+
+#include "pcache_internal.h"
+
+#define segment_err(segment, fmt, ...)					\
+	cache_dev_err(segment->cache_dev, "segment%d: " fmt,				\
+		 segment->seg_id, ##__VA_ARGS__)
+#define segment_info(segment, fmt, ...)					\
+	cache_dev_info(segment->cache_dev, "segment%d: " fmt,				\
+		 segment->seg_id, ##__VA_ARGS__)
+#define segment_debug(segment, fmt, ...)					\
+	cache_dev_debug(segment->cache_dev, "segment%d: " fmt,				\
+		 segment->seg_id, ##__VA_ARGS__)
+
+
+#define PCACHE_SEGMENT_STATE_NONE		0
+#define PCACHE_SEGMENT_STATE_RUNNING	1
+
+#define PCACHES_TYPE_NONE			0
+#define PCACHES_TYPE_META			1
+#define PCACHE_SEGMENT_TYPE_DATA			2
+
+struct pcache_segment_info {
+	struct pcache_meta_header	header;	/* Metadata header for the segment */
+	u8			type;
+	u8			state;
+	u16			flags;
+	u32			next_seg;
+	u32			seg_id;
+	u32			data_off;
+};
+
+#define PCACHE_SEG_INFO_FLAGS_HAS_NEXT	(1 << 0)
+
+static inline bool segment_info_has_next(struct pcache_segment_info *seg_info)
+{
+	return (seg_info->flags & PCACHE_SEG_INFO_FLAGS_HAS_NEXT);
+}
+
+struct pcache_segment_pos {
+	struct pcache_segment	*segment;	/* Segment associated with the position */
+	u32			off;		/* Offset within the segment */
+};
+
+struct pcache_segment_init_options {
+	u8			type;
+	u8			state;
+	u32			seg_id;
+	u32			data_off;
+
+	struct pcache_segment_info	*seg_info;
+};
+
+struct pcache_segment {
+	struct pcache_cache_dev	*cache_dev;
+
+	void			*data;
+	u32			data_size;
+
+	struct pcache_segment_info	*seg_info;
+};
+
+int segment_copy_to_bio(struct pcache_segment *segment,
+		      u32 data_off, u32 data_len, struct bio *bio, u32 bio_off);
+void segment_copy_from_bio(struct pcache_segment *segment,
+			u32 data_off, u32 data_len, struct bio *bio, u32 bio_off);
+int segment_pos_advance(struct pcache_segment_pos *seg_pos, u32 len);
+int pcache_segment_init(struct pcache_cache_dev *cache_dev, struct pcache_segment *segment,
+		      struct pcache_segment_init_options *options);
+
+void pcache_segment_info_write(struct pcache_cache_dev *cache_dev, struct pcache_segment_info *seg_info, u32 seg_id);
+struct pcache_segment_info *pcache_segment_info_read(struct pcache_cache_dev *cache_dev, u32 set_id);
+
+#endif /* _PCACHE_SEGMENT_H */
-- 
2.34.1


  parent reply	other threads:[~2025-04-14  1:45 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-14  1:44 [RFC PATCH 00/11] pcache: Persistent Memory Cache for Block Devices Dongsheng Yang
2025-04-14  1:44 ` [RFC PATCH 01/11] pcache: introduce cache_dev for managing persistent memory-based cache devices Dongsheng Yang
2025-04-14  1:44 ` Dongsheng Yang [this message]
2025-04-14  1:44 ` [RFC PATCH 03/11] pcache: introduce meta_segment abstraction Dongsheng Yang
2025-04-14  1:44 ` [RFC PATCH 04/11] pcache: introduce cache_segment abstraction Dongsheng Yang
2025-04-14  1:44 ` [RFC PATCH 05/11] pcache: introduce lifecycle management of pcache_cache Dongsheng Yang
2025-04-14  1:45 ` [RFC PATCH 06/11] pcache: gc and writeback Dongsheng Yang
2025-04-14  1:45 ` [RFC PATCH 07/11] pcache: introduce cache_key infrastructure for persistent metadata management Dongsheng Yang
2025-04-14  1:45 ` [RFC PATCH 08/11] pcache: implement request processing and cache I/O path in cache_req Dongsheng Yang
2025-04-14  1:45 ` [RFC PATCH 09/11] pcache: introduce logic block device and request handling Dongsheng Yang
2025-04-14  1:45 ` [RFC PATCH 10/11] pcache: add backing device management Dongsheng Yang
2025-04-14  1:45 ` [RFC PATCH 11/11] block: introduce pcache (persistent memory to be cache for block device) Dongsheng Yang
2025-04-15 18:00 ` [RFC PATCH 00/11] pcache: Persistent Memory Cache for Block Devices Dan Williams
2025-04-16  1:04   ` Jens Axboe
2025-04-16  6:08     ` Dongsheng Yang
2025-04-16 15:10       ` Jens Axboe
2025-04-16 21:40         ` Dongsheng Yang
2025-04-22 10:29           ` Mikulas Patocka
2025-04-22 13:23             ` Dongsheng Yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250414014505.20477-3-dongsheng.yang@linux.dev \
    --to=dongsheng.yang@linux.dev \
    --cc=John@groves.net \
    --cc=Jonathan.Cameron@Huawei.com \
    --cc=axboe@kernel.dk \
    --cc=bbhushan2@marvell.com \
    --cc=chaitanyak@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=gregory.price@memverge.com \
    --cc=hch@lst.de \
    --cc=linux-bcache@vger.kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nvdimm@lists.linux.dev \
    --cc=rdunlap@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.