All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] the dm-loop target
@ 2025-03-03 10:24 Mikulas Patocka
  2025-03-03 13:59 ` Christoph Hellwig
  2025-03-03 16:55 ` Bryn M. Reeves
  0 siblings, 2 replies; 49+ messages in thread
From: Mikulas Patocka @ 2025-03-03 10:24 UTC (permalink / raw)
  To: Jooyung Han; +Cc: Alasdair Kergon, Mike Snitzer, zkabelac, dm-devel

This is the dm-loop target - a replacement for the regular loop driver 
with better performance. The dm-loop target builds a map of the file in 
the constructor and it just remaps bios according to this map.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/md/Kconfig   |    9 +
 drivers/md/Makefile  |    1 
 drivers/md/dm-loop.c |  404 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 414 insertions(+)

Index: linux-2.6/drivers/md/Kconfig
===================================================================
--- linux-2.6.orig/drivers/md/Kconfig	2025-03-02 21:09:46.000000000 +0100
+++ linux-2.6/drivers/md/Kconfig	2025-03-02 21:09:46.000000000 +0100
@@ -646,6 +646,15 @@ config DM_ZONED
 
 	  If unsure, say N.
 
+config DM_LOOP
+	tristate "Loop target"
+	depends on BLK_DEV_DM
+	help
+	  This device-mapper target allows you to treat a regular file as
+	  a block device.
+
+	  If unsure, say N.
+
 config DM_AUDIT
 	bool "DM audit events"
 	depends on BLK_DEV_DM
Index: linux-2.6/drivers/md/Makefile
===================================================================
--- linux-2.6.orig/drivers/md/Makefile	2025-03-02 21:09:46.000000000 +0100
+++ linux-2.6/drivers/md/Makefile	2025-03-02 21:09:46.000000000 +0100
@@ -79,6 +79,7 @@ obj-$(CONFIG_DM_CLONE)		+= dm-clone.o
 obj-$(CONFIG_DM_LOG_WRITES)	+= dm-log-writes.o
 obj-$(CONFIG_DM_INTEGRITY)	+= dm-integrity.o
 obj-$(CONFIG_DM_ZONED)		+= dm-zoned.o
+obj-$(CONFIG_DM_LOOP)		+= dm-loop.o
 obj-$(CONFIG_DM_WRITECACHE)	+= dm-writecache.o
 obj-$(CONFIG_SECURITY_LOADPIN_VERITY)	+= dm-verity-loadpin.o
 
Index: linux-2.6/drivers/md/dm-loop.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/drivers/md/dm-loop.c	2025-03-02 21:41:36.000000000 +0100
@@ -0,0 +1,404 @@
+#include <linux/device-mapper.h>
+
+#include <linux/module.h>
+#include <linux/pagemap.h>
+
+#define DM_MSG_PREFIX "loop"
+
+struct loop_c {
+	struct file *filp;
+	char *path;
+	loff_t offset;
+	struct block_device *bdev;
+	struct inode *inode;
+	unsigned blkbits;
+	bool read_only;
+	sector_t mapped_sectors;
+
+	sector_t nr_extents;
+	struct dm_loop_extent *map;
+};
+
+struct dm_loop_extent {
+	sector_t start; 		/* start sector in mapped device */
+	sector_t to;			/* start sector on target device */
+	sector_t len;			/* length in sectors */
+};
+
+static sector_t blk2sect(struct loop_c *lc, blkcnt_t block)
+{
+	return block << (lc->blkbits - SECTOR_SHIFT);
+}
+
+static blkcnt_t sec2blk(struct loop_c *lc, sector_t sector)
+{
+	return sector >> (lc->blkbits - SECTOR_SHIFT);
+}
+
+static blkcnt_t sec2blk_roundup(struct loop_c *lc, sector_t sector)
+{
+	return (sector + (1 << (lc->blkbits - SECTOR_SHIFT)) - 1) >> (lc->blkbits - SECTOR_SHIFT);
+}
+
+static struct dm_loop_extent *extent_binary_lookup(struct loop_c *lc, sector_t sector)
+{
+	ssize_t first = 0;
+	ssize_t last = lc->nr_extents - 1;
+
+	while (first <= last) {
+		ssize_t middle = (first + last) >> 1;
+		struct dm_loop_extent *ex = &lc->map[middle];
+		if (sector < ex->start) {
+			last = middle - 1;
+			continue;
+		}
+		if (likely(sector >= ex->start + ex->len)) {
+			first = middle + 1;
+			continue;
+		}
+		return ex;
+	}
+
+	return NULL;
+}
+
+static int loop_map(struct dm_target *ti, struct bio *bio)
+{
+	struct loop_c *lc = ti->private;
+	sector_t sector, len;
+	struct dm_loop_extent *ex;
+
+	sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
+	ex = extent_binary_lookup(lc, sector);
+	if (!ex)
+		return DM_MAPIO_KILL;
+
+	bio_set_dev(bio, lc->bdev);
+	bio->bi_iter.bi_sector = ex->to + (sector - ex->start);
+	len = ex->len - (sector - ex->start);
+	if (len < bio_sectors(bio))
+		dm_accept_partial_bio(bio, len);
+
+	if (unlikely(!ex->to)) {
+		if (unlikely(!lc->read_only))
+			return DM_MAPIO_KILL;
+		zero_fill_bio(bio);
+		bio_endio(bio);
+		return DM_MAPIO_SUBMITTED;
+	}
+
+	return DM_MAPIO_REMAPPED;
+}
+
+static void loop_status(struct dm_target *ti, status_type_t type,
+		unsigned status_flags, char *result, unsigned maxlen)
+{
+	struct loop_c *lc = ti->private;
+	size_t sz = 0;
+
+	switch (type) {
+		case STATUSTYPE_INFO:
+			result[0] = '\0';
+			break;
+		case STATUSTYPE_TABLE:
+			DMEMIT("%s %llu", lc->path, lc->offset);
+			break;
+		case STATUSTYPE_IMA:
+			DMEMIT_TARGET_NAME_VERSION(ti->type);
+			DMEMIT(",file_name=%s,offset=%llu;", lc->path, lc->offset);
+			break;
+	}
+}
+
+static int loop_iterate_devices(struct dm_target *ti,
+				iterate_devices_callout_fn fn, void *data)
+{
+	return 0;
+}
+
+static int extent_range(struct loop_c *lc,
+			sector_t logical_blk, sector_t last_blk,
+			sector_t *begin_blk, sector_t *nr_blks,
+			char **error)
+{
+	sector_t dist = 0, phys_blk, probe_blk = logical_blk;
+	int r;
+
+	/* Find beginning physical block of extent starting at logical_blk. */
+	*begin_blk = probe_blk;
+	*nr_blks = 0;
+	r = bmap(lc->inode, begin_blk);
+	if (r) {
+		*error = "bmap failed";
+		return r;
+	}
+	if (!*begin_blk) {
+		if (!lc->read_only) {
+			*error = "File is sparse";
+			return -ENXIO;
+		}
+	}
+
+	for (phys_blk = *begin_blk; phys_blk == *begin_blk + dist; dist += !!*begin_blk) {
+		cond_resched();
+
+		(*nr_blks)++;
+		if (++probe_blk > last_blk)
+			break;
+
+		phys_blk = probe_blk;
+		r = bmap(lc->inode, &phys_blk);
+		if (r) {
+			*error = "bmap failed";
+			return r;
+		}
+		if (unlikely(!phys_blk)) {
+			if (!lc->read_only) {
+				*error = "File is sparse";
+				return -ENXIO;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int loop_extents(struct loop_c *lc, sector_t *nr_extents,
+			struct dm_loop_extent *map, char **error)
+{
+	int r;
+	sector_t start = 0;
+	sector_t nr_blks, begin_blk;
+	sector_t after_last_blk = sec2blk_roundup(lc,
+			(lc->mapped_sectors + (lc->offset >> 9)));
+	sector_t logical_blk = sec2blk(lc, lc->offset >> 9);
+
+	*nr_extents = 0;
+
+	/* for each block in the mapped region */
+	while (logical_blk < after_last_blk) {
+		r = extent_range(lc, logical_blk, after_last_blk - 1,
+				 &begin_blk, &nr_blks, error);
+
+		if (unlikely(r))
+			return r;
+
+		if (map) {
+			if (*nr_extents >= lc->nr_extents) {
+				*error = "The file changed while mapping it";
+				return -EBUSY;
+			}
+			map[*nr_extents].start = start;
+			map[*nr_extents].to = blk2sect(lc, begin_blk);
+			map[*nr_extents].len = blk2sect(lc, nr_blks);
+		}
+
+		(*nr_extents)++;
+		start += blk2sect(lc, nr_blks);
+		logical_blk += nr_blks;
+	}
+
+	if (*nr_extents != lc->nr_extents) {
+		*error = "The file changed while mapping it";
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int setup_block_map(struct loop_c *lc, struct dm_target *ti)
+{
+	int r;
+	sector_t n_file_sectors, offset_sector, nr_extents_tmp;
+
+	if (!S_ISREG(lc->inode->i_mode) || !lc->inode->i_sb || !lc->inode->i_sb->s_bdev) {
+		ti->error = "The file is not a regular file";
+		return -ENXIO;
+	}
+
+	lc->bdev = lc->inode->i_sb->s_bdev;
+	lc->blkbits = lc->inode->i_blkbits;
+	n_file_sectors = i_size_read(lc->inode) >> lc->blkbits << (lc->blkbits - 9);
+
+	if (lc->offset & ((1 << lc->blkbits) - 1)) {
+		ti->error = "Unaligned offset";
+		return -EINVAL;
+	}
+	offset_sector = lc->offset >> 9;
+	if (offset_sector >= n_file_sectors) {
+		ti->error = "Offset is greater than file size";
+		return -EINVAL;
+	}
+	if (ti->len > (n_file_sectors - offset_sector)) {
+		ti->error = "Target maps area after file end";
+		return -EINVAL;
+	}
+	lc->mapped_sectors = ti->len >> (lc->blkbits - 9) << (lc->blkbits - 9);
+
+	r = loop_extents(lc, &lc->nr_extents, NULL, &ti->error);
+	if (r)
+		return r;
+
+	if (lc->nr_extents != (size_t)lc->nr_extents) {
+		ti->error = "Too many extents";
+		return -EOVERFLOW;
+	}
+
+	lc->map = kvcalloc(lc->nr_extents, sizeof(struct dm_loop_extent), GFP_KERNEL);
+	if (!lc->map) {
+		ti->error = "Failed to allocate extent map";
+		return -ENOMEM;
+	}
+
+	r = loop_extents(lc, &nr_extents_tmp, lc->map, &ti->error);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static int loop_lock_inode(struct inode *inode)
+{
+	int r;
+	inode_lock(inode);
+	if (IS_SWAPFILE(inode)) {
+		inode_unlock(inode);
+		return -EBUSY;
+	}
+	inode->i_flags |= S_SWAPFILE;
+	r = inode_drain_writes(inode);
+	if (r) {
+		inode->i_flags &= ~S_SWAPFILE;
+		inode_unlock(inode);
+		return r;
+	}
+	inode_unlock(inode);
+	return 0;
+}
+
+static void loop_unlock_inode(struct inode *inode)
+{
+	inode_lock(inode);
+	inode->i_flags &= ~S_SWAPFILE;
+	inode_unlock(inode);
+}
+
+static void loop_free(struct loop_c *lc)
+{
+	if (!lc)
+		return;
+	if (!IS_ERR_OR_NULL(lc->filp)) {
+		loop_unlock_inode(lc->inode);
+		filp_close(lc->filp, NULL);
+	}
+	kvfree(lc->map);
+	kfree(lc->path);
+	kfree(lc);
+}
+
+static int loop_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+	struct loop_c *lc = NULL;
+	int r;
+	char dummy;
+
+	if (argc != 2) {
+		r = -EINVAL;
+		ti->error = "Invalid number of arguments";
+		goto err;
+	}
+
+	lc = kzalloc(sizeof(*lc), GFP_KERNEL);
+	if (!lc) {
+		r = -ENOMEM;
+		ti->error = "Cannot allocate loop context";
+		goto err;
+	}
+	ti->private = lc;
+
+	lc->path = kstrdup(argv[0], GFP_KERNEL);
+	if (!lc->path) {
+		r = -ENOMEM;
+		ti->error = "Cannot allocate loop path";
+		goto err;
+	}
+
+	if (sscanf(argv[1], "%lld%c", &lc->offset, &dummy) != 1) {
+		r = -EINVAL;
+		ti->error = "Invalid file offset";
+		goto err;
+	}
+
+	lc->read_only = !(dm_table_get_mode(ti->table) & FMODE_WRITE);
+
+	lc->filp = filp_open(lc->path, lc->read_only ? O_RDONLY : O_RDWR, 0);
+	if (IS_ERR(lc->filp)) {
+		r = PTR_ERR(lc->filp);
+		ti->error = "Could not open backing file";
+		goto err;
+	}
+
+	lc->inode = lc->filp->f_mapping->host;
+
+	r = loop_lock_inode(lc->inode);
+	if (r) {
+		ti->error = "Could not lock inode";
+		goto err;
+	}
+
+	r = setup_block_map(lc, ti);
+	if (r) {
+		goto err;
+	}
+
+	return 0;
+
+err:
+	loop_free(lc);
+	return r;
+}
+
+static void loop_dtr(struct dm_target *ti)
+{
+	struct loop_c *lc = ti->private;
+	loop_free(lc);
+}
+
+static struct target_type loop_target = {
+	.name = "loop",
+	.version = {1, 0, 0},
+	.module = THIS_MODULE,
+	.ctr = loop_ctr,
+	.dtr = loop_dtr,
+	.map = loop_map,
+	.status = loop_status,
+	.iterate_devices = loop_iterate_devices,
+};
+
+static int __init dm_loop_init(void)
+{
+	int r;
+
+	r = dm_register_target(&loop_target);
+	if (r < 0) {
+		DMERR("register failed %d", r);
+		goto err_target;
+	}
+
+	return 0;
+
+err_target:
+	return r;
+}
+
+static void __exit dm_loop_exit(void)
+{
+	dm_unregister_target(&loop_target);
+}
+
+module_init(dm_loop_init);
+module_exit(dm_loop_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
+MODULE_DESCRIPTION("device-mapper loop target");


^ permalink raw reply	[flat|nested] 49+ messages in thread

end of thread, other threads:[~2025-03-25 12:24 UTC | newest]

Thread overview: 49+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-03 10:24 [PATCH] the dm-loop target Mikulas Patocka
2025-03-03 13:59 ` Christoph Hellwig
     [not found]   ` <CAM23VxprhJgOPfhxQf6QNWzHd6+-ZwbjSo-oMHCD2WDQiKntMg@mail.gmail.com>
2025-03-03 15:13     ` Christoph Hellwig
2025-03-03 15:22       ` Matthew Wilcox
2025-03-03 15:31         ` Christoph Hellwig
     [not found]       ` <CAM23VxprSduDDK8qvLVkUt9WWmLMPFjhqKB8X4e6gw7Wv-6R2w@mail.gmail.com>
2025-03-03 17:24         ` Christoph Hellwig
     [not found]           ` <CAM23Vxoxyrf9nwJd1Xe8uncAPiyK8yaNZNsugwX8p=qo1n6yVg@mail.gmail.com>
2025-03-04 13:52             ` Christoph Hellwig
2025-03-03 16:16   ` Mikulas Patocka
2025-03-03 17:24     ` Christoph Hellwig
2025-03-03 21:03       ` Mikulas Patocka
2025-03-04  2:13         ` Dave Chinner
2025-03-04 11:18           ` Mikulas Patocka
2025-03-04 13:50             ` Christoph Hellwig
2025-03-05  0:01             ` Dave Chinner
2025-03-07 15:21               ` Mikulas Patocka
2025-03-08  3:49                 ` Darrick J. Wong
2025-03-08 20:45                   ` Mikulas Patocka
2025-03-09  0:05                 ` Ming Lei
2025-03-10 11:18                   ` Mikulas Patocka
2025-03-11  1:27                     ` Dave Chinner
2025-03-11 10:43                       ` Ming Lei
2025-03-12  2:34                         ` Dave Chinner
2025-03-12  6:24                           ` Christoph Hellwig
2025-03-12  8:26                           ` Ming Lei
2025-03-13  1:36                             ` Ming Lei
2025-03-13 16:36                             ` Mikulas Patocka
2025-03-18  4:27                               ` Dave Chinner
2025-03-18  7:57                                 ` Christoph Hellwig
2025-03-18  9:34                                   ` Ming Lei
2025-03-20  7:08                                     ` Christoph Hellwig
2025-03-20  7:41                                       ` Ming Lei
2025-03-20 14:22                                         ` Christoph Hellwig
2025-03-20 14:36                                           ` Ming Lei
2025-03-25 10:15                                         ` Dave Chinner
2025-03-25 12:23                                           ` Ming Lei
2025-03-09  0:16                 ` Ming Lei
2025-03-10 11:20                   ` Mikulas Patocka
2025-03-04 13:49         ` Christoph Hellwig
     [not found]           ` <CAM23Vxr=fKy-0L1R5P-5h6A95acKT_d=CC1E+TAzAs8v6q9gHw@mail.gmail.com>
2025-03-04 16:04             ` Christoph Hellwig
     [not found]               ` <CAM23VxqJX46DCpCiH5qxPpDLtMVg87Ba8sx55aQ4hvt-XaHzuQ@mail.gmail.com>
2025-03-04 17:17                 ` Christoph Hellwig
2025-03-12 13:26           ` Kent Overstreet
2025-03-12 14:20             ` Christoph Hellwig
2025-03-12 16:09               ` Kent Overstreet
2025-03-13 12:44                 ` Christoph Hellwig
2025-03-13 16:21               ` Mikulas Patocka
2025-03-13 16:33                 ` Kent Overstreet
2025-03-03 16:55 ` Bryn M. Reeves
2025-03-03 17:06   ` Mikulas Patocka
2025-03-03 17:18     ` Bryn M. Reeves

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.