* [PATCH 1/6] block: cleanup blkdev_ioctl
2015-08-04 7:11 Persistent Reservation API Christoph Hellwig
@ 2015-08-04 7:11 ` Christoph Hellwig
[not found] ` <1438672271-11309-2-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-08-04 7:11 ` [PATCH 2/6] block: add a API for persistent reservations Christoph Hellwig
` (6 subsequent siblings)
7 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2015-08-04 7:11 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-scsi, linux-nvme, dm-devel, linux-api, linux-kernel
Split out helpers for all non-trivial ioctls to make this function simpler,
and also start passing around a pointer version of the argument, as that's
what most ioctl handlers actually need.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/ioctl.c | 227 ++++++++++++++++++++++++++++++++--------------------------
1 file changed, 127 insertions(+), 100 deletions(-)
diff --git a/block/ioctl.c b/block/ioctl.c
index 8061eba..df62b47 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -193,10 +193,20 @@ int blkdev_reread_part(struct block_device *bdev)
}
EXPORT_SYMBOL(blkdev_reread_part);
-static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
- uint64_t len, int secure)
+static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
+ unsigned long arg, unsigned long flags)
{
- unsigned long flags = 0;
+ uint64_t range[2];
+ uint64_t start, len;
+
+ if (!(mode & FMODE_WRITE))
+ return -EBADF;
+
+ if (copy_from_user(range, (void __user *)arg, sizeof(range)))
+ return -EFAULT;
+
+ start = range[0];
+ len = range[1];
if (start & 511)
return -EINVAL;
@@ -207,14 +217,24 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
if (start + len > (i_size_read(bdev->bd_inode) >> 9))
return -EINVAL;
- if (secure)
- flags |= BLKDEV_DISCARD_SECURE;
return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
}
-static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start,
- uint64_t len)
+static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
+ unsigned long arg)
{
+ uint64_t range[2];
+ uint64_t start, len;
+
+ if (!(mode & FMODE_WRITE))
+ return -EBADF;
+
+ if (copy_from_user(range, (void __user *)arg, sizeof(range)))
+ return -EFAULT;
+
+ start = range[0];
+ len = range[1];
+
if (start & 511)
return -EINVAL;
if (len & 511)
@@ -295,89 +315,115 @@ static inline int is_unrecognized_ioctl(int ret)
ret == -ENOIOCTLCMD;
}
-/*
- * always keep this in sync with compat_blkdev_ioctl()
- */
-int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
- unsigned long arg)
+static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
+ unsigned cmd, unsigned long arg)
{
- struct gendisk *disk = bdev->bd_disk;
- struct backing_dev_info *bdi;
- loff_t size;
- int ret, n;
- unsigned int max_sectors;
+ int ret;
- switch(cmd) {
- case BLKFLSBUF:
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
- ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
- if (!is_unrecognized_ioctl(ret))
- return ret;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
- fsync_bdev(bdev);
- invalidate_bdev(bdev);
- return 0;
+ ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+ if (!is_unrecognized_ioctl(ret))
+ return ret;
- case BLKROSET:
- ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
- if (!is_unrecognized_ioctl(ret))
- return ret;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
- if (get_user(n, (int __user *)(arg)))
- return -EFAULT;
- set_device_ro(bdev, n);
- return 0;
+ fsync_bdev(bdev);
+ invalidate_bdev(bdev);
+ return 0;
+}
- case BLKDISCARD:
- case BLKSECDISCARD: {
- uint64_t range[2];
+static int blkdev_roset(struct block_device *bdev, fmode_t mode,
+ unsigned cmd, unsigned long arg)
+{
+ int ret, n;
- if (!(mode & FMODE_WRITE))
- return -EBADF;
+ ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+ if (!is_unrecognized_ioctl(ret))
+ return ret;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (get_user(n, (int __user *)arg))
+ return -EFAULT;
+ set_device_ro(bdev, n);
+ return 0;
+}
- if (copy_from_user(range, (void __user *)arg, sizeof(range)))
- return -EFAULT;
+static int blkdev_getgeo(struct block_device *bdev,
+ struct hd_geometry __user *argp)
+{
+ struct gendisk *disk = bdev->bd_disk;
+ struct hd_geometry geo;
+ int ret;
- return blk_ioctl_discard(bdev, range[0], range[1],
- cmd == BLKSECDISCARD);
- }
- case BLKZEROOUT: {
- uint64_t range[2];
+ if (!argp)
+ return -EINVAL;
+ if (!disk->fops->getgeo)
+ return -ENOTTY;
+
+ /*
+ * We need to set the startsect first, the driver may
+ * want to override it.
+ */
+ memset(&geo, 0, sizeof(geo));
+ geo.start = get_start_sect(bdev);
+ ret = disk->fops->getgeo(bdev, &geo);
+ if (ret)
+ return ret;
+ if (copy_to_user(argp, &geo, sizeof(geo)))
+ return -EFAULT;
+ return 0;
+}
- if (!(mode & FMODE_WRITE))
- return -EBADF;
+/* set the logical block size */
+static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
+ int __user *argp)
+{
+ int ret, n;
- if (copy_from_user(range, (void __user *)arg, sizeof(range)))
- return -EFAULT;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (!argp)
+ return -EINVAL;
+ if (get_user(n, argp))
+ return -EFAULT;
- return blk_ioctl_zeroout(bdev, range[0], range[1]);
+ if (!(mode & FMODE_EXCL)) {
+ bdgrab(bdev);
+ if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
+ return -EBUSY;
}
- case HDIO_GETGEO: {
- struct hd_geometry geo;
+ ret = set_blocksize(bdev, n);
+ if (!(mode & FMODE_EXCL))
+ blkdev_put(bdev, mode | FMODE_EXCL);
+ return ret;
+}
- if (!arg)
- return -EINVAL;
- if (!disk->fops->getgeo)
- return -ENOTTY;
-
- /*
- * We need to set the startsect first, the driver may
- * want to override it.
- */
- memset(&geo, 0, sizeof(geo));
- geo.start = get_start_sect(bdev);
- ret = disk->fops->getgeo(bdev, &geo);
- if (ret)
- return ret;
- if (copy_to_user((struct hd_geometry __user *)arg, &geo,
- sizeof(geo)))
- return -EFAULT;
- return 0;
- }
+/*
+ * always keep this in sync with compat_blkdev_ioctl()
+ */
+int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
+ unsigned long arg)
+{
+ struct backing_dev_info *bdi;
+ void __user *argp = (void __user *)arg;
+ loff_t size;
+ unsigned int max_sectors;
+
+ switch (cmd) {
+ case BLKFLSBUF:
+ return blkdev_flushbuf(bdev, mode, cmd, arg);
+ case BLKROSET:
+ return blkdev_roset(bdev, mode, cmd, arg);
+ case BLKDISCARD:
+ return blk_ioctl_discard(bdev, mode, arg, 0);
+ case BLKSECDISCARD:
+ return blk_ioctl_discard(bdev, mode, arg,
+ BLKDEV_DISCARD_SECURE);
+ case BLKZEROOUT:
+ return blk_ioctl_zeroout(bdev, mode, arg);
+ case HDIO_GETGEO:
+ return blkdev_getgeo(bdev, argp);
case BLKRAGET:
case BLKFRAGET:
if (!arg)
@@ -414,28 +460,11 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
return 0;
case BLKBSZSET:
- /* set the logical block size */
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
- if (!arg)
- return -EINVAL;
- if (get_user(n, (int __user *) arg))
- return -EFAULT;
- if (!(mode & FMODE_EXCL)) {
- bdgrab(bdev);
- if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
- return -EBUSY;
- }
- ret = set_blocksize(bdev, n);
- if (!(mode & FMODE_EXCL))
- blkdev_put(bdev, mode | FMODE_EXCL);
- return ret;
+ return blkdev_bszset(bdev, mode, argp);
case BLKPG:
- ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
- break;
+ return blkpg_ioctl(bdev, argp);
case BLKRRPART:
- ret = blkdev_reread_part(bdev);
- break;
+ return blkdev_reread_part(bdev);
case BLKGETSIZE:
size = i_size_read(bdev->bd_inode);
if ((size >> 9) > ~0UL)
@@ -447,11 +476,9 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKTRACESTOP:
case BLKTRACESETUP:
case BLKTRACETEARDOWN:
- ret = blk_trace_ioctl(bdev, cmd, (char __user *) arg);
- break;
+ return blk_trace_ioctl(bdev, cmd, argp);
default:
- ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+ return __blkdev_driver_ioctl(bdev, mode, cmd, arg);
}
- return ret;
}
EXPORT_SYMBOL_GPL(blkdev_ioctl);
--
1.9.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 2/6] block: add a API for persistent reservations
2015-08-04 7:11 Persistent Reservation API Christoph Hellwig
2015-08-04 7:11 ` [PATCH 1/6] block: cleanup blkdev_ioctl Christoph Hellwig
@ 2015-08-04 7:11 ` Christoph Hellwig
[not found] ` <1438672271-11309-3-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-08-04 7:11 ` [PATCH 3/6] sd: implement the persisten reservation API Christoph Hellwig
` (5 subsequent siblings)
7 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2015-08-04 7:11 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-scsi, linux-nvme, dm-devel, linux-api, linux-kernel
This commits adds a driver API and ioctls for controling persistent
reservations genericly at the block layer. Persistent reservations
are supported by SCSI and NVMe and allow controlling who gets access
to a device in a shared storage setup.
Note that we add a pr_ops structure to struct block_device_operation
instead of adding the members directly to avoid bloating all instances
of devices that will never support persistent reservations.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
Documentation/block/pr.txt | 107 +++++++++++++++++++++++++++++++++++++++++++++
block/ioctl.c | 85 +++++++++++++++++++++++++++++++++++
include/linux/blkdev.h | 2 +
include/linux/pr.h | 18 ++++++++
include/uapi/linux/pr.h | 45 +++++++++++++++++++
5 files changed, 257 insertions(+)
create mode 100644 Documentation/block/pr.txt
create mode 100644 include/linux/pr.h
create mode 100644 include/uapi/linux/pr.h
diff --git a/Documentation/block/pr.txt b/Documentation/block/pr.txt
new file mode 100644
index 0000000..1ced450
--- /dev/null
+++ b/Documentation/block/pr.txt
@@ -0,0 +1,107 @@
+
+Block layer support for Persistent reservations
+===============================================
+
+The Linux kernel supports a user space interface for simplified
+Persistent Reservations which map to block devices that support
+these (like SCSI). Persistent Reservations allow restricting
+access to block devices to specific initiators in a shared storage
+setup.
+
+This document gives a general overview of the support ioctl commands,
+but for a more detailed reference please refer the the SCSI Primary
+Command standard, specifically the section on Reservations and the
+"PERSISTENT RESERVE IN" and "PERSISTENT RESERVE OUT" commands.
+
+All implementations are expected to ensure the reservations survive
+a power loss and cover all connections in a multi path environment.
+These behavior are optional in SPC but will be automatically applied
+by Linux.
+
+The following types of reservations are supported:
+
+ - PR_WRITE_EXCLUSIVE
+
+ Only the initiator that owns the reservation can write to the
+ device. Any initiator can read from the device.
+
+ - PR_EXCLUSIVE_ACCESS
+
+ Only the initiator that owns the reservation can access the
+ device.
+
+ - PR_WRITE_EXCLUSIVE_REG_ONLY
+
+ Only initiators with a registered key can write to the device,
+ Any initiator can read from the device.
+
+ - PR_EXCLUSIVE_ACCESS_REG_ONLY
+
+ Only initiators with a registered key can access the device.
+
+ - PR_WRITE_EXCLUSIVE_ALL_REGS
+
+ Only initiators with a registered key can write to the device,
+ Any initiator can read from the device.
+ All initiators with a registered key are considered reservation
+ holders.
+ Please reference the SPC spec on the meaning of a reservation
+ holder if you want to use this type.
+
+ - PR_EXCLUSIVE_ACCESS_ALL_REGS
+
+ Only initiators with a registered key can access the device.
+ All initiators with a registered key are considered reservation
+ holders.
+ Please reference the SPC spec on the meaning of a reservation
+ holder if you want to use this type.
+
+
+1. IOC_PR_REGISTER
+
+This ioctl command registers a new reservation if the new_key argument
+is non-null. If no existing reservation exists old_key must be zero,
+if an existing reservation should be replaced old_key must contain
+the old reservation key.
+
+If the new_key argument is 0 it unregisters the existing reservation passed
+in old_key.
+
+
+2. IOC_PR_REGISTER_IGNORE
+
+This ioctl command registers a new reservations with the key passed in
+new_key, ignoring and replacing any existing previous reservation. The
+old_key argument is ignored.
+
+
+3. IOC_PR_RESERVE
+
+This ioctl command reserves the device and thus restricts access for other
+devices based on the type argument. The key argument must be the existing
+reservation key for the device as acquired by the IOC_PR_REGISTER,
+IOC_PR_REGISTER_IGNORE, IOC_PR_PREEMPT or IOC_PR_PREEMPT_ABORT commands.
+
+
+4. IOC_PR_RELEASE
+
+This ioctl command releases the reservation specified by key and flags
+and thus removes any access restriction implied by it.
+
+
+5. IOC_PR_PREEMPT
+
+This ioctl command releases the existing reservation referred to by
+old_key and replaces it with a a new reservation of type type for the
+reservation key new_key.
+
+
+6. IOC_PR_PREEMPT_ABORT
+
+This ioctl command work like IOC_PR_PREEMPT except that it also aborts
+any outstanding command sent over a connection identified by old_key.
+
+7. IOC_PR_CLEAR
+
+This ioctl command unregisters both key and any other reservation key
+registered with the device and drops any existing reservation.
diff --git a/block/ioctl.c b/block/ioctl.c
index df62b47..34d8c77 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -7,6 +7,7 @@
#include <linux/backing-dev.h>
#include <linux/fs.h>
#include <linux/blktrace_api.h>
+#include <linux/pr.h>
#include <asm/uaccess.h>
static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
@@ -295,6 +296,76 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
*/
EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
+static int blkdev_pr_register(struct block_device *bdev,
+ struct pr_registration __user *arg, bool ignore)
+{
+ const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+ struct pr_registration reg;
+
+ if (!ops || !ops->pr_register)
+ return -EOPNOTSUPP;
+ if (copy_from_user(®, arg, sizeof(reg)))
+ return -EFAULT;
+
+ return ops->pr_register(bdev, reg.old_key, reg.new_key, ignore);
+}
+
+static int blkdev_pr_reserve(struct block_device *bdev,
+ struct pr_reservation __user *arg)
+{
+ const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+ struct pr_reservation rsv;
+
+ if (!ops || !ops->pr_reserve)
+ return -EOPNOTSUPP;
+ if (copy_from_user(&rsv, arg, sizeof(rsv)))
+ return -EFAULT;
+
+ return ops->pr_reserve(bdev, rsv.key, rsv.type);
+}
+
+static int blkdev_pr_release(struct block_device *bdev,
+ struct pr_reservation __user *arg)
+{
+ const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+ struct pr_reservation rsv;
+
+ if (!ops || !ops->pr_release)
+ return -EOPNOTSUPP;
+ if (copy_from_user(&rsv, arg, sizeof(rsv)))
+ return -EFAULT;
+
+ return ops->pr_release(bdev, rsv.key, rsv.type);
+}
+
+static int blkdev_pr_preempt(struct block_device *bdev,
+ struct pr_preempt __user *arg, bool abort)
+{
+ const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+ struct pr_preempt p;
+
+ if (!ops || !ops->pr_preempt)
+ return -EOPNOTSUPP;
+ if (copy_from_user(&p, arg, sizeof(p)))
+ return -EFAULT;
+
+ return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort);
+}
+
+static int blkdev_pr_clear(struct block_device *bdev,
+ struct pr_clear __user *arg)
+{
+ const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+ struct pr_clear c;
+
+ if (!ops || !ops->pr_clear)
+ return -EOPNOTSUPP;
+ if (copy_from_user(&c, arg, sizeof(c)))
+ return -EFAULT;
+
+ return ops->pr_clear(bdev, c.key);
+}
+
/*
* Is it an unrecognized ioctl? The correct returns are either
* ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a
@@ -477,6 +548,20 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKTRACESETUP:
case BLKTRACETEARDOWN:
return blk_trace_ioctl(bdev, cmd, argp);
+ case IOC_PR_REGISTER:
+ return blkdev_pr_register(bdev, argp, false);
+ case IOC_PR_REGISTER_IGNORE:
+ return blkdev_pr_register(bdev, argp, true);
+ case IOC_PR_RESERVE:
+ return blkdev_pr_reserve(bdev, argp);
+ case IOC_PR_RELEASE:
+ return blkdev_pr_release(bdev, argp);
+ case IOC_PR_PREEMPT:
+ return blkdev_pr_preempt(bdev, argp, false);
+ case IOC_PR_PREEMPT_ABORT:
+ return blkdev_pr_preempt(bdev, argp, true);
+ case IOC_PR_CLEAR:
+ return blkdev_pr_clear(bdev, argp);
default:
return __blkdev_driver_ioctl(bdev, mode, cmd, arg);
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 243f29e..a07eec93 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -35,6 +35,7 @@ struct sg_io_hdr;
struct bsg_job;
struct blkcg_gq;
struct blk_flush_queue;
+struct pr_ops;
#define BLKDEV_MIN_RQ 4
#define BLKDEV_MAX_RQ 128 /* Default maximum */
@@ -1568,6 +1569,7 @@ struct block_device_operations {
/* this callback is with swap_lock and sometimes page table lock held */
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
struct module *owner;
+ const struct pr_ops *pr_ops;
};
extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
diff --git a/include/linux/pr.h b/include/linux/pr.h
new file mode 100644
index 0000000..a6f0496
--- /dev/null
+++ b/include/linux/pr.h
@@ -0,0 +1,18 @@
+#ifndef LINUX_PR_H
+#define LINUX_PR_H
+
+#include <uapi/linux/pr.h>
+
+struct pr_ops {
+ int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key,
+ bool ignore);
+ int (*pr_reserve)(struct block_device *bdev, u64 key,
+ enum pr_type type);
+ int (*pr_release)(struct block_device *bdev, u64 key,
+ enum pr_type type);
+ int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key,
+ enum pr_type type, bool abort);
+ int (*pr_clear)(struct block_device *bdev, u64 key);
+};
+
+#endif /* LINUX_PR_H */
diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h
new file mode 100644
index 0000000..6014d14
--- /dev/null
+++ b/include/uapi/linux/pr.h
@@ -0,0 +1,45 @@
+#ifndef _UAPI_PR_H
+#define _UAPI_PR_H
+
+enum pr_type {
+ PR_WRITE_EXCLUSIVE = 1,
+ PR_EXCLUSIVE_ACCESS = 2,
+ PR_WRITE_EXCLUSIVE_REG_ONLY = 3,
+ PR_EXCLUSIVE_ACCESS_REG_ONLY = 4,
+ PR_WRITE_EXCLUSIVE_ALL_REGS = 5,
+ PR_EXCLUSIVE_ACCESS_ALL_REGS = 6,
+};
+
+struct pr_reservation {
+ __u64 key;
+ __u32 type;
+ __u32 __pad;
+};
+
+struct pr_registration {
+ __u64 old_key;
+ __u64 new_key;
+};
+
+struct pr_preempt {
+ __u64 old_key;
+ __u64 new_key;
+ __u32 type;
+ __u32 __pad;
+};
+
+struct pr_clear {
+ __u64 key;
+};
+
+#define IOC_PR_REGISTER _IOW('p', 200, struct pr_registration)
+#define IOC_PR_REGISTER_IGNORE _IOW('p', 201, struct pr_registration)
+
+#define IOC_PR_RESERVE _IOW('p', 202, struct pr_reservation)
+#define IOC_PR_RELEASE _IOW('p', 203, struct pr_reservation)
+
+#define IOC_PR_PREEMPT _IOW('p', 204, struct pr_preempt)
+#define IOC_PR_PREEMPT_ABORT _IOW('p', 205, struct pr_preempt)
+#define IOC_PR_CLEAR _IOW('p', 206, struct pr_clear)
+
+#endif /* _UAPI_PR_H */
--
1.9.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 3/6] sd: implement the persisten reservation API
2015-08-04 7:11 Persistent Reservation API Christoph Hellwig
2015-08-04 7:11 ` [PATCH 1/6] block: cleanup blkdev_ioctl Christoph Hellwig
2015-08-04 7:11 ` [PATCH 2/6] block: add a API for persistent reservations Christoph Hellwig
@ 2015-08-04 7:11 ` Christoph Hellwig
[not found] ` <1438672271-11309-4-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-08-04 7:11 ` [PATCH 4/6] dm: refactor ioctl handling Christoph Hellwig
` (4 subsequent siblings)
7 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2015-08-04 7:11 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-scsi, linux-nvme, dm-devel, linux-api, linux-kernel
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/scsi/sd.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 90 insertions(+)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 160e44e..e9bc210 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -51,6 +51,7 @@
#include <linux/async.h>
#include <linux/slab.h>
#include <linux/pm_runtime.h>
+#include <linux/pr.h>
#include <asm/uaccess.h>
#include <asm/unaligned.h>
@@ -1535,6 +1536,94 @@ static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode,
}
#endif
+static char sd_pr_type(enum pr_type type)
+{
+ switch (type) {
+ case PR_WRITE_EXCLUSIVE:
+ return 0x01;
+ case PR_EXCLUSIVE_ACCESS:
+ return 0x03;
+ case PR_WRITE_EXCLUSIVE_REG_ONLY:
+ return 0x05;
+ case PR_EXCLUSIVE_ACCESS_REG_ONLY:
+ return 0x06;
+ case PR_WRITE_EXCLUSIVE_ALL_REGS:
+ return 0x07;
+ case PR_EXCLUSIVE_ACCESS_ALL_REGS:
+ return 0x08;
+ default:
+ return 0;
+ }
+};
+
+static int sd_pr_command(struct block_device *bdev, u8 sa,
+ u64 key, u64 sa_key, u8 type, u8 flags)
+{
+ struct scsi_device *sdev = scsi_disk(bdev->bd_disk)->device;
+ struct scsi_sense_hdr sshdr;
+ int result;
+ u8 cmd[16] = { 0, };
+ u8 data[24] = { 0, };
+
+ cmd[0] = PERSISTENT_RESERVE_OUT;
+ cmd[1] = sa;
+ cmd[2] = type;
+ put_unaligned_be32(sizeof(data), &cmd[5]);
+
+ put_unaligned_be64(key, &data[0]);
+ put_unaligned_be64(sa_key, &data[8]);
+ data[20] = flags;
+
+ result = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, &data, sizeof(data),
+ &sshdr, SD_TIMEOUT, SD_MAX_RETRIES, NULL);
+
+ if ((driver_byte(result) & DRIVER_SENSE) &&
+ (scsi_sense_valid(&sshdr))) {
+ sdev_printk(KERN_INFO, sdev, "PR commad failed: %d\n", result);
+ scsi_print_sense_hdr(sdev, NULL, &sshdr);
+ }
+
+ return result;
+}
+
+static int sd_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
+ bool ignore)
+{
+ return sd_pr_command(bdev, ignore ? 0x06 : 0x00, old_key, new_key, 0,
+ (1 << 0) /* APTPL */ |
+ (1 << 2) /* ALL_TG_PT */);
+}
+
+static int sd_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type)
+{
+ return sd_pr_command(bdev, 0x01, key, 0, sd_pr_type(type), 0);
+}
+
+static int sd_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
+{
+ return sd_pr_command(bdev, 0x02, key, 0, sd_pr_type(type), 0);
+}
+
+static int sd_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
+ enum pr_type type, bool abort)
+{
+ return sd_pr_command(bdev, abort ? 0x05 : 0x04, old_key, new_key,
+ sd_pr_type(type), 0);
+}
+
+static int sd_pr_clear(struct block_device *bdev, u64 key)
+{
+ return sd_pr_command(bdev, 0x03, key, 0, 0, 0);
+}
+
+static const struct pr_ops sd_pr_ops = {
+ .pr_register = sd_pr_register,
+ .pr_reserve = sd_pr_reserve,
+ .pr_release = sd_pr_release,
+ .pr_preempt = sd_pr_preempt,
+ .pr_clear = sd_pr_clear,
+};
+
static const struct block_device_operations sd_fops = {
.owner = THIS_MODULE,
.open = sd_open,
@@ -1547,6 +1636,7 @@ static const struct block_device_operations sd_fops = {
.check_events = sd_check_events,
.revalidate_disk = sd_revalidate_disk,
.unlock_native_capacity = sd_unlock_native_capacity,
+ .pr_ops = &sd_pr_ops,
};
/**
--
1.9.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 4/6] dm: refactor ioctl handling
2015-08-04 7:11 Persistent Reservation API Christoph Hellwig
` (2 preceding siblings ...)
2015-08-04 7:11 ` [PATCH 3/6] sd: implement the persisten reservation API Christoph Hellwig
@ 2015-08-04 7:11 ` Christoph Hellwig
[not found] ` <1438672271-11309-5-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-08-04 7:11 ` [PATCH 5/6] dm: split out a helper to find the ioctl target Christoph Hellwig
` (3 subsequent siblings)
7 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2015-08-04 7:11 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-scsi, linux-nvme, dm-devel, linux-api, linux-kernel
This moves the call to blkdev_ioctl and the argument checking to core code,
and only leaves a callout to find the block device to operate on it the
targets. This will simplifies the code and will allow us to pass through
ioctl-like command using other methods in the next patch.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/dm-flakey.c | 14 +++++++-------
drivers/md/dm-linear.c | 12 ++++++------
drivers/md/dm-log-writes.c | 11 +++++------
drivers/md/dm-mpath.c | 30 +++++++++++-------------------
drivers/md/dm-switch.c | 19 ++++++++-----------
drivers/md/dm-verity.c | 13 ++++++-------
drivers/md/dm.c | 12 +++++++++++-
include/linux/device-mapper.h | 4 ++--
8 files changed, 56 insertions(+), 59 deletions(-)
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index b257e46..ec4ebc8 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -371,20 +371,20 @@ static void flakey_status(struct dm_target *ti, status_type_t type,
}
}
-static int flakey_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg)
+static int flakey_ioctl(struct dm_target *ti, struct block_device **bdev,
+ fmode_t *mode)
{
struct flakey_c *fc = ti->private;
- struct dm_dev *dev = fc->dev;
- int r = 0;
+
+ *bdev = fc->dev->bdev;
/*
* Only pass ioctls through if the device sizes match exactly.
*/
if (fc->start ||
- ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
- r = scsi_verify_blk_ioctl(NULL, cmd);
-
- return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg);
+ ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
+ return 1;
+ return 0;
}
static int flakey_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 53e848c..d897707 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -113,21 +113,21 @@ static void linear_status(struct dm_target *ti, status_type_t type,
}
}
-static int linear_ioctl(struct dm_target *ti, unsigned int cmd,
- unsigned long arg)
+static int linear_ioctl(struct dm_target *ti, struct block_device **bdev,
+ fmode_t *mode)
{
struct linear_c *lc = (struct linear_c *) ti->private;
struct dm_dev *dev = lc->dev;
- int r = 0;
+
+ *bdev = dev->bdev;
/*
* Only pass ioctls through if the device sizes match exactly.
*/
if (lc->start ||
ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
- r = scsi_verify_blk_ioctl(NULL, cmd);
-
- return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg);
+ return 1;
+ return 0;
}
static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index ad1b049..811fc42 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -712,20 +712,19 @@ static void log_writes_status(struct dm_target *ti, status_type_t type,
}
}
-static int log_writes_ioctl(struct dm_target *ti, unsigned int cmd,
- unsigned long arg)
+static int log_writes_ioctl(struct dm_target *ti, struct block_device **bdev,
+ fmode_t *mode)
{
struct log_writes_c *lc = ti->private;
struct dm_dev *dev = lc->dev;
- int r = 0;
+ *bdev = dev->bdev;
/*
* Only pass ioctls through if the device sizes match exactly.
*/
if (ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
- r = scsi_verify_blk_ioctl(NULL, cmd);
-
- return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg);
+ return 1;
+ return 0;
}
static int log_writes_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index eff7bdd..8670597 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1548,18 +1548,14 @@ out:
return r;
}
-static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
- unsigned long arg)
+static int multipath_ioctl(struct dm_target *ti,
+ struct block_device **bdev, fmode_t *mode)
{
struct multipath *m = ti->private;
struct pgpath *pgpath;
- struct block_device *bdev;
- fmode_t mode;
unsigned long flags;
int r;
- bdev = NULL;
- mode = 0;
r = 0;
spin_lock_irqsave(&m->lock, flags);
@@ -1570,26 +1566,17 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
pgpath = m->current_pgpath;
if (pgpath) {
- bdev = pgpath->path.dev->bdev;
- mode = pgpath->path.dev->mode;
+ *bdev = pgpath->path.dev->bdev;
+ *mode = pgpath->path.dev->mode;
}
if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path))
r = -ENOTCONN;
- else if (!bdev)
+ else if (!*bdev)
r = -EIO;
spin_unlock_irqrestore(&m->lock, flags);
- /*
- * Only pass ioctls through if the device sizes match exactly.
- */
- if (!bdev || ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) {
- int err = scsi_verify_blk_ioctl(NULL, cmd);
- if (err)
- r = err;
- }
-
if (r == -ENOTCONN && !fatal_signal_pending(current)) {
spin_lock_irqsave(&m->lock, flags);
if (!m->current_pg) {
@@ -1602,7 +1589,12 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
dm_table_run_md_queue_async(m->ti->table);
}
- return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+ /*
+ * Only pass ioctls through if the device sizes match exactly.
+ */
+ if (!r && ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
+ return 1;
+ return r;
}
static int multipath_iterate_devices(struct dm_target *ti,
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index 50fca46..37d27e9 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -511,27 +511,24 @@ static void switch_status(struct dm_target *ti, status_type_t type,
*
* Passthrough all ioctls to the path for sector 0
*/
-static int switch_ioctl(struct dm_target *ti, unsigned cmd,
- unsigned long arg)
+static int switch_ioctl(struct dm_target *ti,
+ struct block_device **bdev, fmode_t *mode)
{
struct switch_ctx *sctx = ti->private;
- struct block_device *bdev;
- fmode_t mode;
unsigned path_nr;
- int r = 0;
path_nr = switch_get_path_nr(sctx, 0);
- bdev = sctx->path_list[path_nr].dmdev->bdev;
- mode = sctx->path_list[path_nr].dmdev->mode;
+ *bdev = sctx->path_list[path_nr].dmdev->bdev;
+ *mode = sctx->path_list[path_nr].dmdev->mode;
/*
* Only pass ioctls through if the device sizes match exactly.
*/
- if (ti->len + sctx->path_list[path_nr].start != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT)
- r = scsi_verify_blk_ioctl(NULL, cmd);
-
- return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+ if (ti->len + sctx->path_list[path_nr].start !=
+ i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
+ return 1;
+ return 0;
}
static int switch_iterate_devices(struct dm_target *ti,
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index bb9c6a0..bd20c41 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -634,18 +634,17 @@ static void verity_status(struct dm_target *ti, status_type_t type,
}
}
-static int verity_ioctl(struct dm_target *ti, unsigned cmd,
- unsigned long arg)
+static int verity_ioctl(struct dm_target *ti, struct block_device **bdev,
+ fmode_t *mode)
{
struct dm_verity *v = ti->private;
- int r = 0;
+
+ *bdev = v->data_dev->bdev;
if (v->data_start ||
ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT)
- r = scsi_verify_blk_ioctl(NULL, cmd);
-
- return r ? : __blkdev_driver_ioctl(v->data_dev->bdev, v->data_dev->mode,
- cmd, arg);
+ return 1;
+ return 0;
}
static int verity_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f331d88..c68eb91 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -584,7 +584,17 @@ retry:
goto out;
}
- r = tgt->type->ioctl(tgt, cmd, arg);
+ r = tgt->type->ioctl(tgt, &bdev, &mode);
+ if (r < 0)
+ goto out;
+
+ if (r > 0) {
+ r = scsi_verify_blk_ioctl(NULL, cmd);
+ if (r)
+ goto out;
+ }
+
+ r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
out:
dm_put_live_table(md, srcu_idx);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 51cc1de..9b73138 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -79,8 +79,8 @@ typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type,
typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv);
-typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd,
- unsigned long arg);
+typedef int (*dm_ioctl_fn) (struct dm_target *ti,
+ struct block_device **bdev, fmode_t *mode);
typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm,
struct bio_vec *biovec, int max_size);
--
1.9.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 5/6] dm: split out a helper to find the ioctl target
2015-08-04 7:11 Persistent Reservation API Christoph Hellwig
` (3 preceding siblings ...)
2015-08-04 7:11 ` [PATCH 4/6] dm: refactor ioctl handling Christoph Hellwig
@ 2015-08-04 7:11 ` Christoph Hellwig
[not found] ` <1438672271-11309-6-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-08-04 7:11 ` [PATCH 6/6] dm: add support for passing through persistent reservations Christoph Hellwig
` (2 subsequent siblings)
7 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2015-08-04 7:11 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-scsi, linux-nvme, dm-devel, linux-api, linux-kernel
We want to reuse this code for the persistent reservation handling,
so move it into a helper.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/dm.c | 50 ++++++++++++++++++++++++++++++++------------------
1 file changed, 32 insertions(+), 18 deletions(-)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c68eb91..8dfc760 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -556,18 +556,16 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return dm_get_geometry(md, geo);
}
-static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
+static int dm_get_ioctl_table(struct mapped_device *md,
+ struct dm_target **tgt, struct block_device **bdev,
+ fmode_t *mode, int *srcu_idx)
{
- struct mapped_device *md = bdev->bd_disk->private_data;
- int srcu_idx;
struct dm_table *map;
- struct dm_target *tgt;
- int r = -ENOTTY;
+ int r;
retry:
- map = dm_get_live_table(md, &srcu_idx);
-
+ r = -ENOTTY;
+ map = dm_get_live_table(md, srcu_idx);
if (!map || !dm_table_get_size(map))
goto out;
@@ -575,8 +573,9 @@ retry:
if (dm_table_get_num_targets(map) != 1)
goto out;
- tgt = dm_table_get_target(map, 0);
- if (!tgt->type->ioctl)
+ *tgt = dm_table_get_target(map, 0);
+
+ if (!(*tgt)->type->ioctl)
goto out;
if (dm_suspended_md(md)) {
@@ -584,10 +583,32 @@ retry:
goto out;
}
- r = tgt->type->ioctl(tgt, &bdev, &mode);
+ r = (*tgt)->type->ioctl(*tgt, bdev, mode);
if (r < 0)
goto out;
+ return r;
+
+out:
+ dm_put_live_table(md, *srcu_idx);
+ if (r == -ENOTCONN) {
+ msleep(10);
+ goto retry;
+ }
+ return r;
+}
+
+static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ struct mapped_device *md = bdev->bd_disk->private_data;
+ struct dm_target *tgt;
+ int srcu_idx, r;
+
+ r = dm_get_ioctl_table(md, &tgt, &bdev, &mode, &srcu_idx);
+ if (r < 0)
+ return r;
+
if (r > 0) {
r = scsi_verify_blk_ioctl(NULL, cmd);
if (r)
@@ -595,15 +616,8 @@ retry:
}
r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
-
out:
dm_put_live_table(md, srcu_idx);
-
- if (r == -ENOTCONN) {
- msleep(10);
- goto retry;
- }
-
return r;
}
--
1.9.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 6/6] dm: add support for passing through persistent reservations
2015-08-04 7:11 Persistent Reservation API Christoph Hellwig
` (4 preceding siblings ...)
2015-08-04 7:11 ` [PATCH 5/6] dm: split out a helper to find the ioctl target Christoph Hellwig
@ 2015-08-04 7:11 ` Christoph Hellwig
2015-08-04 17:53 ` Persistent Reservation API Mike Snitzer
2015-08-04 18:04 ` Keith Busch
7 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2015-08-04 7:11 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-scsi, linux-nvme, dm-devel, linux-api, linux-kernel
This adds support to pass through persistent reservation requests
similar to the existing ioctl handling, and with the same limitations,
e.g. devices may only have a single target attached.
This is mostly intended for multipathing.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/dm.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 122 insertions(+)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8dfc760..3388fa6 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -24,6 +24,7 @@
#include <linux/ktime.h>
#include <linux/elevator.h> /* for rq_end_sector() */
#include <linux/blk-mq.h>
+#include <linux/pr.h>
#include <trace/events/block.h>
@@ -3670,11 +3671,132 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
kfree(pools);
}
+static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
+ bool ignore)
+{
+ struct mapped_device *md = bdev->bd_disk->private_data;
+ const struct pr_ops *ops;
+ struct dm_target *tgt;
+ fmode_t mode;
+ int srcu_idx, r;
+
+ r = dm_get_ioctl_table(md, &tgt, &bdev, &mode, &srcu_idx);
+ if (r < 0)
+ return r;
+
+ ops = bdev->bd_disk->fops->pr_ops;
+ if (ops && ops->pr_register)
+ r = ops->pr_register(bdev, old_key, new_key, ignore);
+ else
+ r = -EOPNOTSUPP;
+
+ dm_put_live_table(md, srcu_idx);
+ return r;
+}
+
+static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type)
+{
+ struct mapped_device *md = bdev->bd_disk->private_data;
+ const struct pr_ops *ops;
+ struct dm_target *tgt;
+ fmode_t mode;
+ int srcu_idx, r;
+
+ r = dm_get_ioctl_table(md, &tgt, &bdev, &mode, &srcu_idx);
+ if (r < 0)
+ return r;
+
+ ops = bdev->bd_disk->fops->pr_ops;
+ if (ops && ops->pr_reserve)
+ r = ops->pr_reserve(bdev, key, type);
+ else
+ r = -EOPNOTSUPP;
+
+ dm_put_live_table(md, srcu_idx);
+ return r;
+}
+
+static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
+{
+ struct mapped_device *md = bdev->bd_disk->private_data;
+ const struct pr_ops *ops;
+ struct dm_target *tgt;
+ fmode_t mode;
+ int srcu_idx, r;
+
+ r = dm_get_ioctl_table(md, &tgt, &bdev, &mode, &srcu_idx);
+ if (r < 0)
+ return r;
+
+ ops = bdev->bd_disk->fops->pr_ops;
+ if (ops && ops->pr_release)
+ r = ops->pr_release(bdev, key, type);
+ else
+ r = -EOPNOTSUPP;
+
+ dm_put_live_table(md, srcu_idx);
+ return r;
+}
+
+static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
+ enum pr_type type, bool abort)
+{
+ struct mapped_device *md = bdev->bd_disk->private_data;
+ const struct pr_ops *ops;
+ struct dm_target *tgt;
+ fmode_t mode;
+ int srcu_idx, r;
+
+ r = dm_get_ioctl_table(md, &tgt, &bdev, &mode, &srcu_idx);
+ if (r < 0)
+ return r;
+
+ ops = bdev->bd_disk->fops->pr_ops;
+ if (ops && ops->pr_preempt)
+ r = ops->pr_preempt(bdev, old_key, new_key, type, abort);
+ else
+ r = -EOPNOTSUPP;
+
+ dm_put_live_table(md, srcu_idx);
+ return r;
+}
+
+static int dm_pr_clear(struct block_device *bdev, u64 key)
+{
+ struct mapped_device *md = bdev->bd_disk->private_data;
+ const struct pr_ops *ops;
+ struct dm_target *tgt;
+ fmode_t mode;
+ int srcu_idx, r;
+
+ r = dm_get_ioctl_table(md, &tgt, &bdev, &mode, &srcu_idx);
+ if (r < 0)
+ return r;
+
+ ops = bdev->bd_disk->fops->pr_ops;
+ if (ops && ops->pr_clear)
+ r = ops->pr_clear(bdev, key);
+ else
+ r = -EOPNOTSUPP;
+
+ dm_put_live_table(md, srcu_idx);
+ return r;
+}
+
+static const struct pr_ops dm_pr_ops = {
+ .pr_register = dm_pr_register,
+ .pr_reserve = dm_pr_reserve,
+ .pr_release = dm_pr_release,
+ .pr_preempt = dm_pr_preempt,
+ .pr_clear = dm_pr_clear,
+};
+
static const struct block_device_operations dm_blk_dops = {
.open = dm_blk_open,
.release = dm_blk_close,
.ioctl = dm_blk_ioctl,
.getgeo = dm_blk_getgeo,
+ .pr_ops = &dm_pr_ops,
.owner = THIS_MODULE
};
--
1.9.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: Persistent Reservation API
2015-08-04 7:11 Persistent Reservation API Christoph Hellwig
` (5 preceding siblings ...)
2015-08-04 7:11 ` [PATCH 6/6] dm: add support for passing through persistent reservations Christoph Hellwig
@ 2015-08-04 17:53 ` Mike Snitzer
2015-08-06 13:44 ` Christoph Hellwig
2015-08-04 18:04 ` Keith Busch
7 siblings, 1 reply; 19+ messages in thread
From: Mike Snitzer @ 2015-08-04 17:53 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jens Axboe, linux-api, dm-devel, linux-nvme, linux-scsi,
linux-kernel
On Tue, Aug 04 2015 at 3:11am -0400,
Christoph Hellwig <hch@lst.de> wrote:
> This series adds support for a simplified persistent reservation API
> to the block layer. The intent is that both in-kernel and userspace
> consumers can use the API instead of having to hand craft SCSI or NVMe
> command through the various pass through interfaces. It also adds
> DM support as getting reservations through dm-multipath is a major
> pain with the current scheme.
The DM changes need to go through linux-dm.git. Once the block and SCSI
bits land I'll rebase accordingly. That cross-maintainer logisitics
aside, I'll reply with feedback on the DM patches.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: Persistent Reservation API
2015-08-04 17:53 ` Persistent Reservation API Mike Snitzer
@ 2015-08-06 13:44 ` Christoph Hellwig
0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2015-08-06 13:44 UTC (permalink / raw)
To: Mike Snitzer
Cc: Christoph Hellwig, Jens Axboe, linux-api, dm-devel, linux-nvme,
linux-scsi, linux-kernel
On Tue, Aug 04, 2015 at 01:53:49PM -0400, Mike Snitzer wrote:
> The DM changes need to go through linux-dm.git. Once the block and SCSI
> bits land I'll rebase accordingly. That cross-maintainer logisitics
> aside, I'll reply with feedback on the DM patches.
That sounds fine to me, thanks!
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: Persistent Reservation API
2015-08-04 7:11 Persistent Reservation API Christoph Hellwig
` (6 preceding siblings ...)
2015-08-04 17:53 ` Persistent Reservation API Mike Snitzer
@ 2015-08-04 18:04 ` Keith Busch
2015-08-06 13:45 ` Christoph Hellwig
7 siblings, 1 reply; 19+ messages in thread
From: Keith Busch @ 2015-08-04 18:04 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jens Axboe, linux-api, dm-devel, linux-nvme, linux-scsi,
linux-kernel
On Tue, 4 Aug 2015, Christoph Hellwig wrote:
> NVMe support currently isn't included as I don't have a multihost
> NVMe setup to test on, but if I can find a volunteer to test it I'm
> happy to write the code for it.
Looks pretty good so far. I'd be happy to give try it out with NVMe
subsystems.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: Persistent Reservation API
2015-08-04 18:04 ` Keith Busch
@ 2015-08-06 13:45 ` Christoph Hellwig
0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2015-08-06 13:45 UTC (permalink / raw)
To: Keith Busch
Cc: Christoph Hellwig, Jens Axboe, linux-api, dm-devel, linux-nvme,
linux-scsi, linux-kernel
On Tue, Aug 04, 2015 at 06:04:04PM +0000, Keith Busch wrote:
> On Tue, 4 Aug 2015, Christoph Hellwig wrote:
>> NVMe support currently isn't included as I don't have a multihost
>> NVMe setup to test on, but if I can find a volunteer to test it I'm
>> happy to write the code for it.
>
> Looks pretty good so far. I'd be happy to give try it out with NVMe
> subsystems.
Thanks, I'll prepare a version once I get a few free minutes.
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 4/6] dm: refactor ioctl handling
2015-10-15 12:10 Persistent Reservation API V4 Christoph Hellwig
@ 2015-10-15 12:10 ` Christoph Hellwig
0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2015-10-15 12:10 UTC (permalink / raw)
To: axboe
Cc: keith.busch, linux-scsi, linux-nvme, dm-devel, linux-api,
linux-kernel
This moves the call to blkdev_ioctl and the argument checking to core code,
and only leaves a callout to find the block device to operate on it the
targets. This will simplifies the code and will allow us to pass through
ioctl-like command using other methods in the next patch.
Also split out a helper around calling the prepare_ioctl method that will
be reused for persistent reservation handling.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/md/dm-flakey.c | 16 +++++++-------
drivers/md/dm-linear.c | 14 ++++++------
drivers/md/dm-log-writes.c | 13 ++++++-----
drivers/md/dm-mpath.c | 32 +++++++++++----------------
drivers/md/dm-switch.c | 21 ++++++++----------
drivers/md/dm-verity.c | 15 ++++++-------
drivers/md/dm.c | 50 ++++++++++++++++++++++++++++++++-----------
include/linux/device-mapper.h | 6 +++---
8 files changed, 89 insertions(+), 78 deletions(-)
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 645e8b4..09e2afc 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -373,20 +373,20 @@ static void flakey_status(struct dm_target *ti, status_type_t type,
}
}
-static int flakey_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg)
+static int flakey_prepare_ioctl(struct dm_target *ti,
+ struct block_device **bdev, fmode_t *mode)
{
struct flakey_c *fc = ti->private;
- struct dm_dev *dev = fc->dev;
- int r = 0;
+
+ *bdev = fc->dev->bdev;
/*
* Only pass ioctls through if the device sizes match exactly.
*/
if (fc->start ||
- ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
- r = scsi_verify_blk_ioctl(NULL, cmd);
-
- return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg);
+ ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
+ return 1;
+ return 0;
}
static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
@@ -405,7 +405,7 @@ static struct target_type flakey_target = {
.map = flakey_map,
.end_io = flakey_end_io,
.status = flakey_status,
- .ioctl = flakey_ioctl,
+ .prepare_ioctl = flakey_prepare_ioctl,
.iterate_devices = flakey_iterate_devices,
};
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 436f5c9..de52864 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -116,21 +116,21 @@ static void linear_status(struct dm_target *ti, status_type_t type,
}
}
-static int linear_ioctl(struct dm_target *ti, unsigned int cmd,
- unsigned long arg)
+static int linear_prepare_ioctl(struct dm_target *ti,
+ struct block_device **bdev, fmode_t *mode)
{
struct linear_c *lc = (struct linear_c *) ti->private;
struct dm_dev *dev = lc->dev;
- int r = 0;
+
+ *bdev = dev->bdev;
/*
* Only pass ioctls through if the device sizes match exactly.
*/
if (lc->start ||
ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
- r = scsi_verify_blk_ioctl(NULL, cmd);
-
- return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg);
+ return 1;
+ return 0;
}
static int linear_iterate_devices(struct dm_target *ti,
@@ -149,7 +149,7 @@ static struct target_type linear_target = {
.dtr = linear_dtr,
.map = linear_map,
.status = linear_status,
- .ioctl = linear_ioctl,
+ .prepare_ioctl = linear_prepare_ioctl,
.iterate_devices = linear_iterate_devices,
};
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index b2912db..624589d 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -714,20 +714,19 @@ static void log_writes_status(struct dm_target *ti, status_type_t type,
}
}
-static int log_writes_ioctl(struct dm_target *ti, unsigned int cmd,
- unsigned long arg)
+static int log_writes_prepare_ioctl(struct dm_target *ti,
+ struct block_device **bdev, fmode_t *mode)
{
struct log_writes_c *lc = ti->private;
struct dm_dev *dev = lc->dev;
- int r = 0;
+ *bdev = dev->bdev;
/*
* Only pass ioctls through if the device sizes match exactly.
*/
if (ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
- r = scsi_verify_blk_ioctl(NULL, cmd);
-
- return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg);
+ return 1;
+ return 0;
}
static int log_writes_iterate_devices(struct dm_target *ti,
@@ -782,7 +781,7 @@ static struct target_type log_writes_target = {
.map = log_writes_map,
.end_io = normal_end_io,
.status = log_writes_status,
- .ioctl = log_writes_ioctl,
+ .prepare_ioctl = log_writes_prepare_ioctl,
.message = log_writes_message,
.iterate_devices = log_writes_iterate_devices,
.io_hints = log_writes_io_hints,
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 5a67671..77066a1 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1533,18 +1533,14 @@ out:
return r;
}
-static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
- unsigned long arg)
+static int multipath_prepare_ioctl(struct dm_target *ti,
+ struct block_device **bdev, fmode_t *mode)
{
struct multipath *m = ti->private;
struct pgpath *pgpath;
- struct block_device *bdev;
- fmode_t mode;
unsigned long flags;
int r;
- bdev = NULL;
- mode = 0;
r = 0;
spin_lock_irqsave(&m->lock, flags);
@@ -1555,26 +1551,17 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
pgpath = m->current_pgpath;
if (pgpath) {
- bdev = pgpath->path.dev->bdev;
- mode = pgpath->path.dev->mode;
+ *bdev = pgpath->path.dev->bdev;
+ *mode = pgpath->path.dev->mode;
}
if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path))
r = -ENOTCONN;
- else if (!bdev)
+ else if (!*bdev)
r = -EIO;
spin_unlock_irqrestore(&m->lock, flags);
- /*
- * Only pass ioctls through if the device sizes match exactly.
- */
- if (!bdev || ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) {
- int err = scsi_verify_blk_ioctl(NULL, cmd);
- if (err)
- r = err;
- }
-
if (r == -ENOTCONN && !fatal_signal_pending(current)) {
spin_lock_irqsave(&m->lock, flags);
if (!m->current_pg) {
@@ -1587,7 +1574,12 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
dm_table_run_md_queue_async(m->ti->table);
}
- return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+ /*
+ * Only pass ioctls through if the device sizes match exactly.
+ */
+ if (!r && ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
+ return 1;
+ return r;
}
static int multipath_iterate_devices(struct dm_target *ti,
@@ -1703,7 +1695,7 @@ static struct target_type multipath_target = {
.resume = multipath_resume,
.status = multipath_status,
.message = multipath_message,
- .ioctl = multipath_ioctl,
+ .prepare_ioctl = multipath_prepare_ioctl,
.iterate_devices = multipath_iterate_devices,
.busy = multipath_busy,
};
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index 50fca46..b128575 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -511,27 +511,24 @@ static void switch_status(struct dm_target *ti, status_type_t type,
*
* Passthrough all ioctls to the path for sector 0
*/
-static int switch_ioctl(struct dm_target *ti, unsigned cmd,
- unsigned long arg)
+static int switch_prepare_ioctl(struct dm_target *ti,
+ struct block_device **bdev, fmode_t *mode)
{
struct switch_ctx *sctx = ti->private;
- struct block_device *bdev;
- fmode_t mode;
unsigned path_nr;
- int r = 0;
path_nr = switch_get_path_nr(sctx, 0);
- bdev = sctx->path_list[path_nr].dmdev->bdev;
- mode = sctx->path_list[path_nr].dmdev->mode;
+ *bdev = sctx->path_list[path_nr].dmdev->bdev;
+ *mode = sctx->path_list[path_nr].dmdev->mode;
/*
* Only pass ioctls through if the device sizes match exactly.
*/
- if (ti->len + sctx->path_list[path_nr].start != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT)
- r = scsi_verify_blk_ioctl(NULL, cmd);
-
- return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+ if (ti->len + sctx->path_list[path_nr].start !=
+ i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
+ return 1;
+ return 0;
}
static int switch_iterate_devices(struct dm_target *ti,
@@ -560,7 +557,7 @@ static struct target_type switch_target = {
.map = switch_map,
.message = switch_message,
.status = switch_status,
- .ioctl = switch_ioctl,
+ .prepare_ioctl = switch_prepare_ioctl,
.iterate_devices = switch_iterate_devices,
};
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index edc624b..ccf4188 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -631,18 +631,17 @@ static void verity_status(struct dm_target *ti, status_type_t type,
}
}
-static int verity_ioctl(struct dm_target *ti, unsigned cmd,
- unsigned long arg)
+static int verity_prepare_ioctl(struct dm_target *ti,
+ struct block_device **bdev, fmode_t *mode)
{
struct dm_verity *v = ti->private;
- int r = 0;
+
+ *bdev = v->data_dev->bdev;
if (v->data_start ||
ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT)
- r = scsi_verify_blk_ioctl(NULL, cmd);
-
- return r ? : __blkdev_driver_ioctl(v->data_dev->bdev, v->data_dev->mode,
- cmd, arg);
+ return 1;
+ return 0;
}
static int verity_iterate_devices(struct dm_target *ti,
@@ -965,7 +964,7 @@ static struct target_type verity_target = {
.dtr = verity_dtr,
.map = verity_map,
.status = verity_status,
- .ioctl = verity_ioctl,
+ .prepare_ioctl = verity_prepare_ioctl,
.iterate_devices = verity_iterate_devices,
.io_hints = verity_io_hints,
};
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6264781..017c712 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -555,18 +555,16 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return dm_get_geometry(md, geo);
}
-static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
+static int dm_get_live_table_for_ioctl(struct mapped_device *md,
+ struct dm_target **tgt, struct block_device **bdev,
+ fmode_t *mode, int *srcu_idx)
{
- struct mapped_device *md = bdev->bd_disk->private_data;
- int srcu_idx;
struct dm_table *map;
- struct dm_target *tgt;
- int r = -ENOTTY;
+ int r;
retry:
- map = dm_get_live_table(md, &srcu_idx);
-
+ r = -ENOTTY;
+ map = dm_get_live_table(md, srcu_idx);
if (!map || !dm_table_get_size(map))
goto out;
@@ -574,8 +572,9 @@ retry:
if (dm_table_get_num_targets(map) != 1)
goto out;
- tgt = dm_table_get_target(map, 0);
- if (!tgt->type->ioctl)
+ *tgt = dm_table_get_target(map, 0);
+
+ if (!(*tgt)->type->prepare_ioctl)
goto out;
if (dm_suspended_md(md)) {
@@ -583,16 +582,41 @@ retry:
goto out;
}
- r = tgt->type->ioctl(tgt, cmd, arg);
+ r = (*tgt)->type->prepare_ioctl(*tgt, bdev, mode);
+ if (r < 0)
+ goto out;
-out:
- dm_put_live_table(md, srcu_idx);
+ return r;
+out:
+ dm_put_live_table(md, *srcu_idx);
if (r == -ENOTCONN) {
msleep(10);
goto retry;
}
+ return r;
+}
+
+static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ struct mapped_device *md = bdev->bd_disk->private_data;
+ struct dm_target *tgt;
+ int srcu_idx, r;
+
+ r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
+ if (r < 0)
+ return r;
+
+ if (r > 0) {
+ r = scsi_verify_blk_ioctl(NULL, cmd);
+ if (r)
+ goto out;
+ }
+ r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+out:
+ dm_put_live_table(md, srcu_idx);
return r;
}
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 76d23fa..ec1c61c 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -79,8 +79,8 @@ typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type,
typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv);
-typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd,
- unsigned long arg);
+typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti,
+ struct block_device **bdev, fmode_t *mode);
/*
* These iteration functions are typically used to check (and combine)
@@ -156,7 +156,7 @@ struct target_type {
dm_resume_fn resume;
dm_status_fn status;
dm_message_fn message;
- dm_ioctl_fn ioctl;
+ dm_prepare_ioctl_fn prepare_ioctl;
dm_busy_fn busy;
dm_iterate_devices_fn iterate_devices;
dm_io_hints_fn io_hints;
--
1.9.1
^ permalink raw reply related [flat|nested] 19+ messages in thread