add a "discard cache" debug option to zloop

public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed

* add a "discard cache" debug option to zloop
@ 2026-03-18  5:53 Christoph Hellwig
  2026-03-18  5:53 ` [PATCH 1/2] zloop: refactor zloop_rw Christoph Hellwig
  2026-03-18  5:53 ` [PATCH 2/2] zloop: forget write cache on force removal Christoph Hellwig
  0 siblings, 2 replies; 8+ messages in thread
From: Christoph Hellwig @ 2026-03-18  5:53 UTC (permalink / raw)
  To: Damien Le Moal, Jens Axboe; +Cc: linux-block

Hi all,

this series adds a new option to zloop to lose data not committed to stable
storage using a flush operation on device removal.  The idea is to help
testing that file system code does the right thing in face of volatile
write caches.  For conventional devices, this can be tested using
dm-log-writes, but the concepts there don't work for sequential write
required zones.  Instead this adds an option to zloop, which records the
write pointer at the last cache flush for each zone file in an xattr,
and truncates the files down to that value on removal, simulating losing
the contents of the volatile write cache.

Diffstat:
 Documentation/admin-guide/blockdev/zoned_loop.rst |    5 
 drivers/block/zloop.c                             |  337 ++++++++++++++--------
 2 files changed, 226 insertions(+), 116 deletions(-)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] zloop: refactor zloop_rw
  2026-03-18  5:53 add a "discard cache" debug option to zloop Christoph Hellwig
@ 2026-03-18  5:53 ` Christoph Hellwig
  2026-03-18  6:58   ` Damien Le Moal
  2026-03-18  5:53 ` [PATCH 2/2] zloop: forget write cache on force removal Christoph Hellwig
  1 sibling, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2026-03-18  5:53 UTC (permalink / raw)
  To: Damien Le Moal, Jens Axboe; +Cc: linux-block

Split out two helpers functions to make the function more readable and
to avoid conditional locking.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/block/zloop.c | 240 ++++++++++++++++++++++--------------------
 1 file changed, 124 insertions(+), 116 deletions(-)

diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c
index 51c043342127..8ca37ca1935a 100644
--- a/drivers/block/zloop.c
+++ b/drivers/block/zloop.c
@@ -378,125 +378,22 @@ static void zloop_rw_complete(struct kiocb *iocb, long ret)
 	zloop_put_cmd(cmd);
 }
 
-static void zloop_rw(struct zloop_cmd *cmd)
+static int zloop_do_rw(struct zloop_cmd *cmd)
 {
 	struct request *rq = blk_mq_rq_from_pdu(cmd);
+	int rw = req_op(rq) == REQ_OP_READ ? ITER_DEST : ITER_SOURCE;
+	unsigned int nr_bvec = blk_rq_nr_bvec(rq);
 	struct zloop_device *zlo = rq->q->queuedata;
-	unsigned int zone_no = rq_zone_no(rq);
-	sector_t sector = blk_rq_pos(rq);
-	sector_t nr_sectors = blk_rq_sectors(rq);
-	bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
-	bool is_write = req_op(rq) == REQ_OP_WRITE || is_append;
-	int rw = is_write ? ITER_SOURCE : ITER_DEST;
+	struct zloop_zone *zone = &zlo->zones[rq_zone_no(rq)];
 	struct req_iterator rq_iter;
-	struct zloop_zone *zone;
 	struct iov_iter iter;
-	struct bio_vec tmp;
-	unsigned long flags;
-	sector_t zone_end;
-	unsigned int nr_bvec;
-	int ret;
-
-	atomic_set(&cmd->ref, 2);
-	cmd->sector = sector;
-	cmd->nr_sectors = nr_sectors;
-	cmd->ret = 0;
-
-	if (WARN_ON_ONCE(is_append && !zlo->zone_append)) {
-		ret = -EIO;
-		goto out;
-	}
-
-	/* We should never get an I/O beyond the device capacity. */
-	if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
-		ret = -EIO;
-		goto out;
-	}
-	zone = &zlo->zones[zone_no];
-	zone_end = zone->start + zlo->zone_capacity;
-
-	/*
-	 * The block layer should never send requests that are not fully
-	 * contained within the zone.
-	 */
-	if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) {
-		ret = -EIO;
-		goto out;
-	}
-
-	if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
-		mutex_lock(&zone->lock);
-		ret = zloop_update_seq_zone(zlo, zone_no);
-		mutex_unlock(&zone->lock);
-		if (ret)
-			goto out;
-	}
-
-	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
-		mutex_lock(&zone->lock);
-
-		spin_lock_irqsave(&zone->wp_lock, flags);
-
-		/*
-		 * Zone append operations always go at the current write
-		 * pointer, but regular write operations must already be
-		 * aligned to the write pointer when submitted.
-		 */
-		if (is_append) {
-			/*
-			 * If ordered zone append is in use, we already checked
-			 * and set the target sector in zloop_queue_rq().
-			 */
-			if (!zlo->ordered_zone_append) {
-				if (zone->cond == BLK_ZONE_COND_FULL ||
-				    zone->wp + nr_sectors > zone_end) {
-					spin_unlock_irqrestore(&zone->wp_lock,
-							       flags);
-					ret = -EIO;
-					goto unlock;
-				}
-				sector = zone->wp;
-			}
-			cmd->sector = sector;
-		} else if (sector != zone->wp) {
-			spin_unlock_irqrestore(&zone->wp_lock, flags);
-			pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
-			       zone_no, sector, zone->wp);
-			ret = -EIO;
-			goto unlock;
-		}
-
-		/* Implicitly open the target zone. */
-		if (zone->cond == BLK_ZONE_COND_CLOSED ||
-		    zone->cond == BLK_ZONE_COND_EMPTY)
-			zone->cond = BLK_ZONE_COND_IMP_OPEN;
-
-		/*
-		 * Advance the write pointer, unless ordered zone append is in
-		 * use. If the write fails, the write pointer position will be
-		 * corrected when the next I/O starts execution.
-		 */
-		if (!is_append || !zlo->ordered_zone_append) {
-			zone->wp += nr_sectors;
-			if (zone->wp == zone_end) {
-				zone->cond = BLK_ZONE_COND_FULL;
-				zone->wp = ULLONG_MAX;
-			}
-		}
-
-		spin_unlock_irqrestore(&zone->wp_lock, flags);
-	}
-
-	nr_bvec = blk_rq_nr_bvec(rq);
 
 	if (rq->bio != rq->biotail) {
-		struct bio_vec *bvec;
+		struct bio_vec tmp, *bvec;
 
 		cmd->bvec = kmalloc_objs(*cmd->bvec, nr_bvec, GFP_NOIO);
-		if (!cmd->bvec) {
-			ret = -EIO;
-			goto unlock;
-		}
+		if (!cmd->bvec)
+			return -EIO;
 
 		/*
 		 * The bios of the request may be started from the middle of
@@ -522,7 +419,7 @@ static void zloop_rw(struct zloop_cmd *cmd)
 		iter.iov_offset = rq->bio->bi_iter.bi_bvec_done;
 	}
 
-	cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT;
+	cmd->iocb.ki_pos = (cmd->sector - zone->start) << SECTOR_SHIFT;
 	cmd->iocb.ki_filp = zone->file;
 	cmd->iocb.ki_complete = zloop_rw_complete;
 	if (!zlo->buffered_io)
@@ -530,12 +427,123 @@ static void zloop_rw(struct zloop_cmd *cmd)
 	cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
 
 	if (rw == ITER_SOURCE)
-		ret = zone->file->f_op->write_iter(&cmd->iocb, &iter);
-	else
-		ret = zone->file->f_op->read_iter(&cmd->iocb, &iter);
-unlock:
-	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write)
+		return zone->file->f_op->write_iter(&cmd->iocb, &iter);
+	return zone->file->f_op->read_iter(&cmd->iocb, &iter);
+}
+
+static int zloop_seq_write_prep(struct zloop_cmd *cmd)
+{
+	struct request *rq = blk_mq_rq_from_pdu(cmd);
+	struct zloop_device *zlo = rq->q->queuedata;
+	unsigned int zone_no = rq_zone_no(rq);
+	sector_t nr_sectors = blk_rq_sectors(rq);
+	bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
+	struct zloop_zone *zone = &zlo->zones[zone_no];
+	sector_t zone_end = zone->start + zlo->zone_capacity;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&zone->wp_lock, flags);
+
+	/*
+	 * Zone append operations always go at the current write pointer, but
+	 * regular write operations must already be aligned to the write pointer
+	 * when submitted.
+	 */
+	if (is_append) {
+		/*
+		 * If ordered zone append is in use, we already checked and set
+		 * the target sector in zloop_queue_rq().
+		 */
+		if (!zlo->ordered_zone_append) {
+			if (zone->cond == BLK_ZONE_COND_FULL ||
+			    zone->wp + nr_sectors > zone_end) {
+				ret = -EIO;
+				goto out_unlock;
+			}
+			cmd->sector = zone->wp;
+		}
+	} else {
+		if (cmd->sector != zone->wp) {
+			pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
+			       zone_no, cmd->sector, zone->wp);
+			ret = -EIO;
+			goto out_unlock;
+		}
+	}
+
+	/* Implicitly open the target zone. */
+	if (zone->cond == BLK_ZONE_COND_CLOSED ||
+	    zone->cond == BLK_ZONE_COND_EMPTY)
+		zone->cond = BLK_ZONE_COND_IMP_OPEN;
+
+	/*
+	 * Advance the write pointer, unless ordered zone append is in use. If
+	 * the write fails, the write pointer position will be corrected when
+	 * the next I/O starts execution.
+	 */
+	if (!is_append || !zlo->ordered_zone_append) {
+		zone->wp += nr_sectors;
+		if (zone->wp == zone_end) {
+			zone->cond = BLK_ZONE_COND_FULL;
+			zone->wp = ULLONG_MAX;
+		}
+	}
+out_unlock:
+	spin_unlock_irqrestore(&zone->wp_lock, flags);
+	return ret;
+}
+
+static void zloop_rw(struct zloop_cmd *cmd)
+{
+	struct request *rq = blk_mq_rq_from_pdu(cmd);
+	struct zloop_device *zlo = rq->q->queuedata;
+	unsigned int zone_no = rq_zone_no(rq);
+	sector_t nr_sectors = blk_rq_sectors(rq);
+	bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
+	bool is_write = req_op(rq) == REQ_OP_WRITE || is_append;
+	struct zloop_zone *zone;
+	int ret = -EIO;
+
+	atomic_set(&cmd->ref, 2);
+	cmd->sector = blk_rq_pos(rq);
+	cmd->nr_sectors = nr_sectors;
+	cmd->ret = 0;
+
+	if (WARN_ON_ONCE(is_append && !zlo->zone_append))
+		goto out;
+
+	/* We should never get an I/O beyond the device capacity. */
+	if (WARN_ON_ONCE(zone_no >= zlo->nr_zones))
+		goto out;
+
+	zone = &zlo->zones[zone_no];
+
+	/*
+	 * The block layer should never send requests that are not fully
+	 * contained within the zone.
+	 */
+	if (WARN_ON_ONCE(cmd->sector + nr_sectors >
+			 zone->start + zlo->zone_size))
+		goto out;
+
+	if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
+		mutex_lock(&zone->lock);
+		ret = zloop_update_seq_zone(zlo, zone_no);
 		mutex_unlock(&zone->lock);
+		if (ret)
+			goto out;
+	}
+
+	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
+		mutex_lock(&zone->lock);
+		ret = zloop_seq_write_prep(cmd);
+		if (!ret)
+			ret = zloop_do_rw(cmd);
+		mutex_unlock(&zone->lock);
+	} else {
+		ret = zloop_do_rw(cmd);
+	}
 out:
 	if (ret != -EIOCBQUEUED)
 		zloop_rw_complete(&cmd->iocb, ret);
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] zloop: refactor zloop_rw
  2026-03-18  5:53 ` [PATCH 1/2] zloop: refactor zloop_rw Christoph Hellwig
@ 2026-03-18  6:58   ` Damien Le Moal
  0 siblings, 0 replies; 8+ messages in thread
From: Damien Le Moal @ 2026-03-18  6:58 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe; +Cc: linux-block

On 3/18/26 2:53 PM, Christoph Hellwig wrote:
> Split out two helpers functions to make the function more readable and
> to avoid conditional locking.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Nice cleanup.

Reviewed-by: Damien Le Moal <dlemoal@kernel.org>

-- 
Damien Le Moal
Western Digital Research

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/2] zloop: forget write cache on force removal
  2026-03-18  5:53 add a "discard cache" debug option to zloop Christoph Hellwig
  2026-03-18  5:53 ` [PATCH 1/2] zloop: refactor zloop_rw Christoph Hellwig
@ 2026-03-18  5:53 ` Christoph Hellwig
  2026-03-18  7:03   ` Damien Le Moal
  1 sibling, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2026-03-18  5:53 UTC (permalink / raw)
  To: Damien Le Moal, Jens Axboe; +Cc: linux-block

Add a new options that causes zloop to truncate the zone files to the
write pointer value recorded at the last cache flush to simulate
unclean shutdowns.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 .../admin-guide/blockdev/zoned_loop.rst       |  5 +
 drivers/block/zloop.c                         | 97 +++++++++++++++++++
 2 files changed, 102 insertions(+)

diff --git a/Documentation/admin-guide/blockdev/zoned_loop.rst b/Documentation/admin-guide/blockdev/zoned_loop.rst
index 6aa865424ac3..237ee2fccb82 100644
--- a/Documentation/admin-guide/blockdev/zoned_loop.rst
+++ b/Documentation/admin-guide/blockdev/zoned_loop.rst
@@ -104,6 +104,11 @@ ordered_zone_append   Enable zloop mitigation of zone append reordering.
                       (extents), as when enabled, this can significantly reduce
                       the number of data extents needed to for a file data
                       mapping.
+discard_write_cache   Discard all data that was not explicitly persisted using a
+                      flush operation when removed by truncating each zone file
+                      to the size recorded during the last flush operation.
+                      This simulates power fail events where uncommitted data is
+                      lost.
 ===================   =========================================================
 
 3) Deleting a Zoned Device
diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c
index 8ca37ca1935a..86a1324c27b3 100644
--- a/drivers/block/zloop.c
+++ b/drivers/block/zloop.c
@@ -17,6 +17,7 @@
 #include <linux/mutex.h>
 #include <linux/parser.h>
 #include <linux/seq_file.h>
+#include <linux/xattr.h>
 
 /*
  * Options for adding (and removing) a device.
@@ -34,6 +35,7 @@ enum {
 	ZLOOP_OPT_BUFFERED_IO		= (1 << 8),
 	ZLOOP_OPT_ZONE_APPEND		= (1 << 9),
 	ZLOOP_OPT_ORDERED_ZONE_APPEND	= (1 << 10),
+	ZLOOP_OPT_DISCARD_WRITE_CACHE	= (1 << 11),
 };
 
 static const match_table_t zloop_opt_tokens = {
@@ -48,6 +50,7 @@ static const match_table_t zloop_opt_tokens = {
 	{ ZLOOP_OPT_BUFFERED_IO,	"buffered_io"		},
 	{ ZLOOP_OPT_ZONE_APPEND,	"zone_append=%u"	},
 	{ ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append"	},
+	{ ZLOOP_OPT_DISCARD_WRITE_CACHE, "discard_write_cache" },
 	{ ZLOOP_OPT_ERR,		NULL			}
 };
 
@@ -79,6 +82,7 @@ struct zloop_options {
 	bool			buffered_io;
 	bool			zone_append;
 	bool			ordered_zone_append;
+	bool			discard_write_cache;
 };
 
 /*
@@ -119,6 +123,7 @@ struct zloop_device {
 	bool			buffered_io;
 	bool			zone_append;
 	bool			ordered_zone_append;
+	bool			discard_write_cache;
 
 	const char		*base_dir;
 	struct file		*data_dir;
@@ -550,6 +555,41 @@ static void zloop_rw(struct zloop_cmd *cmd)
 	zloop_put_cmd(cmd);
 }
 
+static inline bool zloop_zone_is_active(struct zloop_zone *zone)
+{
+	switch (zone->cond) {
+	case BLK_ZONE_COND_EXP_OPEN:
+	case BLK_ZONE_COND_IMP_OPEN:
+	case BLK_ZONE_COND_CLOSED:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int zloop_record_safe_wps(struct zloop_device *zlo)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < zlo->nr_zones; i++) {
+		struct zloop_zone *zone = &zlo->zones[i];
+		struct file *file = zone->file;
+
+		if (!zloop_zone_is_active(zone))
+			continue;
+		ret = vfs_setxattr(file_mnt_idmap(file), file_dentry(file),
+				"user.zloop.wp", &zone->wp, sizeof(zone->wp), 0);
+		if (ret) {
+			pr_err("%pg: failed to record write pointer (%d)\n",
+				zlo->disk->part0, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Sync the entire FS containing the zone files instead of walking all files.
  */
@@ -558,6 +598,12 @@ static int zloop_flush(struct zloop_device *zlo)
 	struct super_block *sb = file_inode(zlo->data_dir)->i_sb;
 	int ret;
 
+	if (zlo->discard_write_cache) {
+		ret = zloop_record_safe_wps(zlo);
+		if (ret)
+			return ret;
+	}
+
 	down_read(&sb->s_umount);
 	ret = sync_filesystem(sb);
 	up_read(&sb->s_umount);
@@ -1054,6 +1100,7 @@ static int zloop_ctl_add(struct zloop_options *opts)
 	zlo->zone_append = opts->zone_append;
 	if (zlo->zone_append)
 		zlo->ordered_zone_append = opts->ordered_zone_append;
+	zlo->discard_write_cache = opts->discard_write_cache;
 
 	zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE,
 				opts->nr_queues * opts->queue_depth, zlo->id);
@@ -1176,6 +1223,49 @@ static int zloop_ctl_add(struct zloop_options *opts)
 	return ret;
 }
 
+static void zloop_truncate(struct file *file, loff_t pos)
+{
+	struct mnt_idmap *idmap = file_mnt_idmap(file);
+	struct dentry *dentry = file_dentry(file);
+	struct iattr newattrs;
+
+	newattrs.ia_size = pos;
+	newattrs.ia_valid = ATTR_SIZE;
+
+	inode_lock(dentry->d_inode);
+	notify_change(idmap, dentry, &newattrs, NULL);
+	inode_unlock(dentry->d_inode);
+}
+
+static void zloop_forget_cache(struct zloop_device *zlo)
+{
+	unsigned int i;
+	int ret;
+
+	pr_info("%pg: discarding volatile write cache\n", zlo->disk->part0);
+
+	for (i = 0; i < zlo->nr_zones; i++) {
+		struct zloop_zone *zone = &zlo->zones[i];
+		struct file *file = zone->file;
+		sector_t old_wp;
+
+		if (!zloop_zone_is_active(zone))
+			continue;
+
+		ret = vfs_getxattr(file_mnt_idmap(file), file_dentry(file),
+				"user.zloop.wp", &old_wp, sizeof(old_wp));
+		if (ret == -ENODATA) {
+			old_wp = 0;
+		} else if (ret != sizeof(old_wp)) {
+			pr_err("%pg: failed to retrieve write pointer (%d)\n",
+				zlo->disk->part0, ret);
+			continue;
+		}
+		if (old_wp < zone->wp)
+			zloop_truncate(file, old_wp);
+	}
+}
+
 static int zloop_ctl_remove(struct zloop_options *opts)
 {
 	struct zloop_device *zlo;
@@ -1210,6 +1300,10 @@ static int zloop_ctl_remove(struct zloop_options *opts)
 		return ret;
 
 	del_gendisk(zlo->disk);
+
+	if (zlo->discard_write_cache)
+		zloop_forget_cache(zlo);
+
 	put_disk(zlo->disk);
 
 	pr_info("Removed device %d\n", opts->id);
@@ -1361,6 +1455,9 @@ static int zloop_parse_options(struct zloop_options *opts, const char *buf)
 		case ZLOOP_OPT_ORDERED_ZONE_APPEND:
 			opts->ordered_zone_append = true;
 			break;
+		case ZLOOP_OPT_DISCARD_WRITE_CACHE:
+			opts->discard_write_cache = true;
+			break;
 		case ZLOOP_OPT_ERR:
 		default:
 			pr_warn("unknown parameter or missing value '%s'\n", p);
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] zloop: forget write cache on force removal
  2026-03-18  5:53 ` [PATCH 2/2] zloop: forget write cache on force removal Christoph Hellwig
@ 2026-03-18  7:03   ` Damien Le Moal
  0 siblings, 0 replies; 8+ messages in thread
From: Damien Le Moal @ 2026-03-18  7:03 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe; +Cc: linux-block

On 3/18/26 2:53 PM, Christoph Hellwig wrote:
> Add a new options that causes zloop to truncate the zone files to the
> write pointer value recorded at the last cache flush to simulate
> unclean shutdowns.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks OK to me. One nit below.
With that corrected:

Reviewed-by: Damien Le Moal <dlemoal@kernel.org>

> ---
>  .../admin-guide/blockdev/zoned_loop.rst       |  5 +
>  drivers/block/zloop.c                         | 97 +++++++++++++++++++
>  2 files changed, 102 insertions(+)
> 
> diff --git a/Documentation/admin-guide/blockdev/zoned_loop.rst b/Documentation/admin-guide/blockdev/zoned_loop.rst
> index 6aa865424ac3..237ee2fccb82 100644
> --- a/Documentation/admin-guide/blockdev/zoned_loop.rst
> +++ b/Documentation/admin-guide/blockdev/zoned_loop.rst
> @@ -104,6 +104,11 @@ ordered_zone_append   Enable zloop mitigation of zone append reordering.
>                        (extents), as when enabled, this can significantly reduce
>                        the number of data extents needed to for a file data
>                        mapping.
> +discard_write_cache   Discard all data that was not explicitly persisted using a
> +                      flush operation when removed by truncating each zone file

			 flush operation when the device is removed by
		         truncating each zone file...

> +                      to the size recorded during the last flush operation.
> +                      This simulates power fail events where uncommitted data is
> +                      lost.
>  ===================   =========================================================


-- 
Damien Le Moal
Western Digital Research

^ permalink raw reply	[flat|nested] 8+ messages in thread

* add a "discard cache" debug option to zloop v2
@ 2026-03-19  6:02 Christoph Hellwig
  2026-03-19  6:02 ` [PATCH 1/2] zloop: refactor zloop_rw Christoph Hellwig
  0 siblings, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2026-03-19  6:02 UTC (permalink / raw)
  To: Damien Le Moal, Jens Axboe; +Cc: linux-block

Hi all,

this series adds a new option to zloop to lose data not committed to
stable storage using a flush operation on device removal.  The idea is
to help testing that file system code does the right thing in face of
volatile write caches.  For conventional devices, this can be tested
using dm-log-writes, but the concepts there don't work for sequential
write required zones.  Instead this adds an option to zloop, which
records the write pointer at the last cache flush for each zone file in
an xattr, and truncates the files down to that value on removal,
simulating losing the contents of the volatile write cache.

Changes since v1:
 - fix up the documentation

Diffstat:
 Documentation/admin-guide/blockdev/zoned_loop.rst |    5 
 drivers/block/zloop.c                             |  337 ++++++++++++++--------
 2 files changed, 226 insertions(+), 116 deletions(-)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] zloop: refactor zloop_rw
  2026-03-19  6:02 add a "discard cache" debug option to zloop v2 Christoph Hellwig
@ 2026-03-19  6:02 ` Christoph Hellwig
  2026-03-19 14:06   ` Martin K. Petersen
  0 siblings, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2026-03-19  6:02 UTC (permalink / raw)
  To: Damien Le Moal, Jens Axboe; +Cc: linux-block

Split out two helpers functions to make the function more readable and
to avoid conditional locking.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
---
 drivers/block/zloop.c | 240 ++++++++++++++++++++++--------------------
 1 file changed, 124 insertions(+), 116 deletions(-)

diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c
index 51c043342127..8ca37ca1935a 100644
--- a/drivers/block/zloop.c
+++ b/drivers/block/zloop.c
@@ -378,125 +378,22 @@ static void zloop_rw_complete(struct kiocb *iocb, long ret)
 	zloop_put_cmd(cmd);
 }
 
-static void zloop_rw(struct zloop_cmd *cmd)
+static int zloop_do_rw(struct zloop_cmd *cmd)
 {
 	struct request *rq = blk_mq_rq_from_pdu(cmd);
+	int rw = req_op(rq) == REQ_OP_READ ? ITER_DEST : ITER_SOURCE;
+	unsigned int nr_bvec = blk_rq_nr_bvec(rq);
 	struct zloop_device *zlo = rq->q->queuedata;
-	unsigned int zone_no = rq_zone_no(rq);
-	sector_t sector = blk_rq_pos(rq);
-	sector_t nr_sectors = blk_rq_sectors(rq);
-	bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
-	bool is_write = req_op(rq) == REQ_OP_WRITE || is_append;
-	int rw = is_write ? ITER_SOURCE : ITER_DEST;
+	struct zloop_zone *zone = &zlo->zones[rq_zone_no(rq)];
 	struct req_iterator rq_iter;
-	struct zloop_zone *zone;
 	struct iov_iter iter;
-	struct bio_vec tmp;
-	unsigned long flags;
-	sector_t zone_end;
-	unsigned int nr_bvec;
-	int ret;
-
-	atomic_set(&cmd->ref, 2);
-	cmd->sector = sector;
-	cmd->nr_sectors = nr_sectors;
-	cmd->ret = 0;
-
-	if (WARN_ON_ONCE(is_append && !zlo->zone_append)) {
-		ret = -EIO;
-		goto out;
-	}
-
-	/* We should never get an I/O beyond the device capacity. */
-	if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
-		ret = -EIO;
-		goto out;
-	}
-	zone = &zlo->zones[zone_no];
-	zone_end = zone->start + zlo->zone_capacity;
-
-	/*
-	 * The block layer should never send requests that are not fully
-	 * contained within the zone.
-	 */
-	if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) {
-		ret = -EIO;
-		goto out;
-	}
-
-	if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
-		mutex_lock(&zone->lock);
-		ret = zloop_update_seq_zone(zlo, zone_no);
-		mutex_unlock(&zone->lock);
-		if (ret)
-			goto out;
-	}
-
-	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
-		mutex_lock(&zone->lock);
-
-		spin_lock_irqsave(&zone->wp_lock, flags);
-
-		/*
-		 * Zone append operations always go at the current write
-		 * pointer, but regular write operations must already be
-		 * aligned to the write pointer when submitted.
-		 */
-		if (is_append) {
-			/*
-			 * If ordered zone append is in use, we already checked
-			 * and set the target sector in zloop_queue_rq().
-			 */
-			if (!zlo->ordered_zone_append) {
-				if (zone->cond == BLK_ZONE_COND_FULL ||
-				    zone->wp + nr_sectors > zone_end) {
-					spin_unlock_irqrestore(&zone->wp_lock,
-							       flags);
-					ret = -EIO;
-					goto unlock;
-				}
-				sector = zone->wp;
-			}
-			cmd->sector = sector;
-		} else if (sector != zone->wp) {
-			spin_unlock_irqrestore(&zone->wp_lock, flags);
-			pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
-			       zone_no, sector, zone->wp);
-			ret = -EIO;
-			goto unlock;
-		}
-
-		/* Implicitly open the target zone. */
-		if (zone->cond == BLK_ZONE_COND_CLOSED ||
-		    zone->cond == BLK_ZONE_COND_EMPTY)
-			zone->cond = BLK_ZONE_COND_IMP_OPEN;
-
-		/*
-		 * Advance the write pointer, unless ordered zone append is in
-		 * use. If the write fails, the write pointer position will be
-		 * corrected when the next I/O starts execution.
-		 */
-		if (!is_append || !zlo->ordered_zone_append) {
-			zone->wp += nr_sectors;
-			if (zone->wp == zone_end) {
-				zone->cond = BLK_ZONE_COND_FULL;
-				zone->wp = ULLONG_MAX;
-			}
-		}
-
-		spin_unlock_irqrestore(&zone->wp_lock, flags);
-	}
-
-	nr_bvec = blk_rq_nr_bvec(rq);
 
 	if (rq->bio != rq->biotail) {
-		struct bio_vec *bvec;
+		struct bio_vec tmp, *bvec;
 
 		cmd->bvec = kmalloc_objs(*cmd->bvec, nr_bvec, GFP_NOIO);
-		if (!cmd->bvec) {
-			ret = -EIO;
-			goto unlock;
-		}
+		if (!cmd->bvec)
+			return -EIO;
 
 		/*
 		 * The bios of the request may be started from the middle of
@@ -522,7 +419,7 @@ static void zloop_rw(struct zloop_cmd *cmd)
 		iter.iov_offset = rq->bio->bi_iter.bi_bvec_done;
 	}
 
-	cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT;
+	cmd->iocb.ki_pos = (cmd->sector - zone->start) << SECTOR_SHIFT;
 	cmd->iocb.ki_filp = zone->file;
 	cmd->iocb.ki_complete = zloop_rw_complete;
 	if (!zlo->buffered_io)
@@ -530,12 +427,123 @@ static void zloop_rw(struct zloop_cmd *cmd)
 	cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
 
 	if (rw == ITER_SOURCE)
-		ret = zone->file->f_op->write_iter(&cmd->iocb, &iter);
-	else
-		ret = zone->file->f_op->read_iter(&cmd->iocb, &iter);
-unlock:
-	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write)
+		return zone->file->f_op->write_iter(&cmd->iocb, &iter);
+	return zone->file->f_op->read_iter(&cmd->iocb, &iter);
+}
+
+static int zloop_seq_write_prep(struct zloop_cmd *cmd)
+{
+	struct request *rq = blk_mq_rq_from_pdu(cmd);
+	struct zloop_device *zlo = rq->q->queuedata;
+	unsigned int zone_no = rq_zone_no(rq);
+	sector_t nr_sectors = blk_rq_sectors(rq);
+	bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
+	struct zloop_zone *zone = &zlo->zones[zone_no];
+	sector_t zone_end = zone->start + zlo->zone_capacity;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&zone->wp_lock, flags);
+
+	/*
+	 * Zone append operations always go at the current write pointer, but
+	 * regular write operations must already be aligned to the write pointer
+	 * when submitted.
+	 */
+	if (is_append) {
+		/*
+		 * If ordered zone append is in use, we already checked and set
+		 * the target sector in zloop_queue_rq().
+		 */
+		if (!zlo->ordered_zone_append) {
+			if (zone->cond == BLK_ZONE_COND_FULL ||
+			    zone->wp + nr_sectors > zone_end) {
+				ret = -EIO;
+				goto out_unlock;
+			}
+			cmd->sector = zone->wp;
+		}
+	} else {
+		if (cmd->sector != zone->wp) {
+			pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
+			       zone_no, cmd->sector, zone->wp);
+			ret = -EIO;
+			goto out_unlock;
+		}
+	}
+
+	/* Implicitly open the target zone. */
+	if (zone->cond == BLK_ZONE_COND_CLOSED ||
+	    zone->cond == BLK_ZONE_COND_EMPTY)
+		zone->cond = BLK_ZONE_COND_IMP_OPEN;
+
+	/*
+	 * Advance the write pointer, unless ordered zone append is in use. If
+	 * the write fails, the write pointer position will be corrected when
+	 * the next I/O starts execution.
+	 */
+	if (!is_append || !zlo->ordered_zone_append) {
+		zone->wp += nr_sectors;
+		if (zone->wp == zone_end) {
+			zone->cond = BLK_ZONE_COND_FULL;
+			zone->wp = ULLONG_MAX;
+		}
+	}
+out_unlock:
+	spin_unlock_irqrestore(&zone->wp_lock, flags);
+	return ret;
+}
+
+static void zloop_rw(struct zloop_cmd *cmd)
+{
+	struct request *rq = blk_mq_rq_from_pdu(cmd);
+	struct zloop_device *zlo = rq->q->queuedata;
+	unsigned int zone_no = rq_zone_no(rq);
+	sector_t nr_sectors = blk_rq_sectors(rq);
+	bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
+	bool is_write = req_op(rq) == REQ_OP_WRITE || is_append;
+	struct zloop_zone *zone;
+	int ret = -EIO;
+
+	atomic_set(&cmd->ref, 2);
+	cmd->sector = blk_rq_pos(rq);
+	cmd->nr_sectors = nr_sectors;
+	cmd->ret = 0;
+
+	if (WARN_ON_ONCE(is_append && !zlo->zone_append))
+		goto out;
+
+	/* We should never get an I/O beyond the device capacity. */
+	if (WARN_ON_ONCE(zone_no >= zlo->nr_zones))
+		goto out;
+
+	zone = &zlo->zones[zone_no];
+
+	/*
+	 * The block layer should never send requests that are not fully
+	 * contained within the zone.
+	 */
+	if (WARN_ON_ONCE(cmd->sector + nr_sectors >
+			 zone->start + zlo->zone_size))
+		goto out;
+
+	if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
+		mutex_lock(&zone->lock);
+		ret = zloop_update_seq_zone(zlo, zone_no);
 		mutex_unlock(&zone->lock);
+		if (ret)
+			goto out;
+	}
+
+	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
+		mutex_lock(&zone->lock);
+		ret = zloop_seq_write_prep(cmd);
+		if (!ret)
+			ret = zloop_do_rw(cmd);
+		mutex_unlock(&zone->lock);
+	} else {
+		ret = zloop_do_rw(cmd);
+	}
 out:
 	if (ret != -EIOCBQUEUED)
 		zloop_rw_complete(&cmd->iocb, ret);
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] zloop: refactor zloop_rw
  2026-03-19  6:02 ` [PATCH 1/2] zloop: refactor zloop_rw Christoph Hellwig
@ 2026-03-19 14:06   ` Martin K. Petersen
  0 siblings, 0 replies; 8+ messages in thread
From: Martin K. Petersen @ 2026-03-19 14:06 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Damien Le Moal, Jens Axboe, linux-block


Christoph,

> Split out two helpers functions to make the function more readable and
> to avoid conditional locking.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen

^ permalink raw reply	[flat|nested] 8+ messages in thread

* add a "discard cache" debug option to zloop v3
@ 2026-03-23  7:11 Christoph Hellwig
  2026-03-23  7:11 ` [PATCH 1/2] zloop: refactor zloop_rw Christoph Hellwig
  0 siblings, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2026-03-23  7:11 UTC (permalink / raw)
  To: Damien Le Moal, Jens Axboe; +Cc: linux-block

Hi all,

this series adds a new option to zloop to lose data not committed to
stable storage using a flush operation on device removal.  The idea is
to help testing that file system code does the right thing in face of
volatile write caches.  For conventional devices, this can be tested
using dm-log-writes, but the concepts there don't work for sequential
write required zones.  Instead this adds an option to zloop, which
records the write pointer at the last cache flush for each zone file in
an xattr, and truncates the files down to that value on removal,
simulating losing the contents of the volatile write cache.

Changes since v2:
 - spelling fix

Changes since v1:
 - fix up the documentation

Diffstat:
 Documentation/admin-guide/blockdev/zoned_loop.rst |    5 
 drivers/block/zloop.c                             |  337 ++++++++++++++--------
 2 files changed, 226 insertions(+), 116 deletions(-)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] zloop: refactor zloop_rw
  2026-03-23  7:11 add a "discard cache" debug option to zloop v3 Christoph Hellwig
@ 2026-03-23  7:11 ` Christoph Hellwig
  0 siblings, 0 replies; 8+ messages in thread
From: Christoph Hellwig @ 2026-03-23  7:11 UTC (permalink / raw)
  To: Damien Le Moal, Jens Axboe
  Cc: linux-block, Bart Van Assche, Martin K. Petersen

Split out two helpers functions to make the function more readable and
to avoid conditional locking.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/block/zloop.c | 240 ++++++++++++++++++++++--------------------
 1 file changed, 124 insertions(+), 116 deletions(-)

diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c
index 51c043342127..8ca37ca1935a 100644
--- a/drivers/block/zloop.c
+++ b/drivers/block/zloop.c
@@ -378,125 +378,22 @@ static void zloop_rw_complete(struct kiocb *iocb, long ret)
 	zloop_put_cmd(cmd);
 }
 
-static void zloop_rw(struct zloop_cmd *cmd)
+static int zloop_do_rw(struct zloop_cmd *cmd)
 {
 	struct request *rq = blk_mq_rq_from_pdu(cmd);
+	int rw = req_op(rq) == REQ_OP_READ ? ITER_DEST : ITER_SOURCE;
+	unsigned int nr_bvec = blk_rq_nr_bvec(rq);
 	struct zloop_device *zlo = rq->q->queuedata;
-	unsigned int zone_no = rq_zone_no(rq);
-	sector_t sector = blk_rq_pos(rq);
-	sector_t nr_sectors = blk_rq_sectors(rq);
-	bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
-	bool is_write = req_op(rq) == REQ_OP_WRITE || is_append;
-	int rw = is_write ? ITER_SOURCE : ITER_DEST;
+	struct zloop_zone *zone = &zlo->zones[rq_zone_no(rq)];
 	struct req_iterator rq_iter;
-	struct zloop_zone *zone;
 	struct iov_iter iter;
-	struct bio_vec tmp;
-	unsigned long flags;
-	sector_t zone_end;
-	unsigned int nr_bvec;
-	int ret;
-
-	atomic_set(&cmd->ref, 2);
-	cmd->sector = sector;
-	cmd->nr_sectors = nr_sectors;
-	cmd->ret = 0;
-
-	if (WARN_ON_ONCE(is_append && !zlo->zone_append)) {
-		ret = -EIO;
-		goto out;
-	}
-
-	/* We should never get an I/O beyond the device capacity. */
-	if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
-		ret = -EIO;
-		goto out;
-	}
-	zone = &zlo->zones[zone_no];
-	zone_end = zone->start + zlo->zone_capacity;
-
-	/*
-	 * The block layer should never send requests that are not fully
-	 * contained within the zone.
-	 */
-	if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) {
-		ret = -EIO;
-		goto out;
-	}
-
-	if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
-		mutex_lock(&zone->lock);
-		ret = zloop_update_seq_zone(zlo, zone_no);
-		mutex_unlock(&zone->lock);
-		if (ret)
-			goto out;
-	}
-
-	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
-		mutex_lock(&zone->lock);
-
-		spin_lock_irqsave(&zone->wp_lock, flags);
-
-		/*
-		 * Zone append operations always go at the current write
-		 * pointer, but regular write operations must already be
-		 * aligned to the write pointer when submitted.
-		 */
-		if (is_append) {
-			/*
-			 * If ordered zone append is in use, we already checked
-			 * and set the target sector in zloop_queue_rq().
-			 */
-			if (!zlo->ordered_zone_append) {
-				if (zone->cond == BLK_ZONE_COND_FULL ||
-				    zone->wp + nr_sectors > zone_end) {
-					spin_unlock_irqrestore(&zone->wp_lock,
-							       flags);
-					ret = -EIO;
-					goto unlock;
-				}
-				sector = zone->wp;
-			}
-			cmd->sector = sector;
-		} else if (sector != zone->wp) {
-			spin_unlock_irqrestore(&zone->wp_lock, flags);
-			pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
-			       zone_no, sector, zone->wp);
-			ret = -EIO;
-			goto unlock;
-		}
-
-		/* Implicitly open the target zone. */
-		if (zone->cond == BLK_ZONE_COND_CLOSED ||
-		    zone->cond == BLK_ZONE_COND_EMPTY)
-			zone->cond = BLK_ZONE_COND_IMP_OPEN;
-
-		/*
-		 * Advance the write pointer, unless ordered zone append is in
-		 * use. If the write fails, the write pointer position will be
-		 * corrected when the next I/O starts execution.
-		 */
-		if (!is_append || !zlo->ordered_zone_append) {
-			zone->wp += nr_sectors;
-			if (zone->wp == zone_end) {
-				zone->cond = BLK_ZONE_COND_FULL;
-				zone->wp = ULLONG_MAX;
-			}
-		}
-
-		spin_unlock_irqrestore(&zone->wp_lock, flags);
-	}
-
-	nr_bvec = blk_rq_nr_bvec(rq);
 
 	if (rq->bio != rq->biotail) {
-		struct bio_vec *bvec;
+		struct bio_vec tmp, *bvec;
 
 		cmd->bvec = kmalloc_objs(*cmd->bvec, nr_bvec, GFP_NOIO);
-		if (!cmd->bvec) {
-			ret = -EIO;
-			goto unlock;
-		}
+		if (!cmd->bvec)
+			return -EIO;
 
 		/*
 		 * The bios of the request may be started from the middle of
@@ -522,7 +419,7 @@ static void zloop_rw(struct zloop_cmd *cmd)
 		iter.iov_offset = rq->bio->bi_iter.bi_bvec_done;
 	}
 
-	cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT;
+	cmd->iocb.ki_pos = (cmd->sector - zone->start) << SECTOR_SHIFT;
 	cmd->iocb.ki_filp = zone->file;
 	cmd->iocb.ki_complete = zloop_rw_complete;
 	if (!zlo->buffered_io)
@@ -530,12 +427,123 @@ static void zloop_rw(struct zloop_cmd *cmd)
 	cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
 
 	if (rw == ITER_SOURCE)
-		ret = zone->file->f_op->write_iter(&cmd->iocb, &iter);
-	else
-		ret = zone->file->f_op->read_iter(&cmd->iocb, &iter);
-unlock:
-	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write)
+		return zone->file->f_op->write_iter(&cmd->iocb, &iter);
+	return zone->file->f_op->read_iter(&cmd->iocb, &iter);
+}
+
+static int zloop_seq_write_prep(struct zloop_cmd *cmd)
+{
+	struct request *rq = blk_mq_rq_from_pdu(cmd);
+	struct zloop_device *zlo = rq->q->queuedata;
+	unsigned int zone_no = rq_zone_no(rq);
+	sector_t nr_sectors = blk_rq_sectors(rq);
+	bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
+	struct zloop_zone *zone = &zlo->zones[zone_no];
+	sector_t zone_end = zone->start + zlo->zone_capacity;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&zone->wp_lock, flags);
+
+	/*
+	 * Zone append operations always go at the current write pointer, but
+	 * regular write operations must already be aligned to the write pointer
+	 * when submitted.
+	 */
+	if (is_append) {
+		/*
+		 * If ordered zone append is in use, we already checked and set
+		 * the target sector in zloop_queue_rq().
+		 */
+		if (!zlo->ordered_zone_append) {
+			if (zone->cond == BLK_ZONE_COND_FULL ||
+			    zone->wp + nr_sectors > zone_end) {
+				ret = -EIO;
+				goto out_unlock;
+			}
+			cmd->sector = zone->wp;
+		}
+	} else {
+		if (cmd->sector != zone->wp) {
+			pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
+			       zone_no, cmd->sector, zone->wp);
+			ret = -EIO;
+			goto out_unlock;
+		}
+	}
+
+	/* Implicitly open the target zone. */
+	if (zone->cond == BLK_ZONE_COND_CLOSED ||
+	    zone->cond == BLK_ZONE_COND_EMPTY)
+		zone->cond = BLK_ZONE_COND_IMP_OPEN;
+
+	/*
+	 * Advance the write pointer, unless ordered zone append is in use. If
+	 * the write fails, the write pointer position will be corrected when
+	 * the next I/O starts execution.
+	 */
+	if (!is_append || !zlo->ordered_zone_append) {
+		zone->wp += nr_sectors;
+		if (zone->wp == zone_end) {
+			zone->cond = BLK_ZONE_COND_FULL;
+			zone->wp = ULLONG_MAX;
+		}
+	}
+out_unlock:
+	spin_unlock_irqrestore(&zone->wp_lock, flags);
+	return ret;
+}
+
+static void zloop_rw(struct zloop_cmd *cmd)
+{
+	struct request *rq = blk_mq_rq_from_pdu(cmd);
+	struct zloop_device *zlo = rq->q->queuedata;
+	unsigned int zone_no = rq_zone_no(rq);
+	sector_t nr_sectors = blk_rq_sectors(rq);
+	bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
+	bool is_write = req_op(rq) == REQ_OP_WRITE || is_append;
+	struct zloop_zone *zone;
+	int ret = -EIO;
+
+	atomic_set(&cmd->ref, 2);
+	cmd->sector = blk_rq_pos(rq);
+	cmd->nr_sectors = nr_sectors;
+	cmd->ret = 0;
+
+	if (WARN_ON_ONCE(is_append && !zlo->zone_append))
+		goto out;
+
+	/* We should never get an I/O beyond the device capacity. */
+	if (WARN_ON_ONCE(zone_no >= zlo->nr_zones))
+		goto out;
+
+	zone = &zlo->zones[zone_no];
+
+	/*
+	 * The block layer should never send requests that are not fully
+	 * contained within the zone.
+	 */
+	if (WARN_ON_ONCE(cmd->sector + nr_sectors >
+			 zone->start + zlo->zone_size))
+		goto out;
+
+	if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
+		mutex_lock(&zone->lock);
+		ret = zloop_update_seq_zone(zlo, zone_no);
 		mutex_unlock(&zone->lock);
+		if (ret)
+			goto out;
+	}
+
+	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
+		mutex_lock(&zone->lock);
+		ret = zloop_seq_write_prep(cmd);
+		if (!ret)
+			ret = zloop_do_rw(cmd);
+		mutex_unlock(&zone->lock);
+	} else {
+		ret = zloop_do_rw(cmd);
+	}
 out:
 	if (ret != -EIOCBQUEUED)
 		zloop_rw_complete(&cmd->iocb, ret);
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2026-03-23  7:12 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-18  5:53 add a "discard cache" debug option to zloop Christoph Hellwig
2026-03-18  5:53 ` [PATCH 1/2] zloop: refactor zloop_rw Christoph Hellwig
2026-03-18  6:58   ` Damien Le Moal
2026-03-18  5:53 ` [PATCH 2/2] zloop: forget write cache on force removal Christoph Hellwig
2026-03-18  7:03   ` Damien Le Moal
  -- strict thread matches above, loose matches on Subject: below --
2026-03-19  6:02 add a "discard cache" debug option to zloop v2 Christoph Hellwig
2026-03-19  6:02 ` [PATCH 1/2] zloop: refactor zloop_rw Christoph Hellwig
2026-03-19 14:06   ` Martin K. Petersen
2026-03-23  7:11 add a "discard cache" debug option to zloop v3 Christoph Hellwig
2026-03-23  7:11 ` [PATCH 1/2] zloop: refactor zloop_rw Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox