* [PATCH V2] nvme: enable char device per namespace
@ 2020-12-08 13:29 javier
2020-12-08 14:21 ` Christoph Hellwig
0 siblings, 1 reply; 4+ messages in thread
From: javier @ 2020-12-08 13:29 UTC (permalink / raw)
To: linux-nvme
Cc: linux-block, hch, kbusch, sagi, minwoo.im.dev,
Javier González
From: Javier González <javier.gonz@samsung.com>
Create a char device per NVMe namespace. This char device is always
initialized, independently of whether thedeatures implemented by the
device are supported by the kernel. User-space can therefore always
issue IOCTLs to the NVMe driver using this char device.
The char device is presented as /dev/nvmeXnYc. This naming aligns with
nvme-cli filters, so the char device should be usable without tool
changes. It also follows a new name conventio to avoid confusion with
the existing hidden device (i.e., nvmeXcYnZ).
Changes since V1:
- Remove patches 1-3 which are already picked up by Christoph
- Change the char device and sysfs entries to nvmeXnYc / c signals
char device
- Address Minwoo's comments on inline functions and style
Signed-off-by: Javier González <javier.gonz@samsung.com>
---
drivers/nvme/host/core.c | 141 +++++++++++++++++++++++++++++++++++----
drivers/nvme/host/nvme.h | 8 +++
2 files changed, 135 insertions(+), 14 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 99f91efe3824..e446aaba5ccd 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -86,7 +86,9 @@ static DEFINE_MUTEX(nvme_subsystems_lock);
static DEFINE_IDA(nvme_instance_ida);
static dev_t nvme_ctrl_base_chr_devt;
+static dev_t nvme_ns_base_chr_devt;
static struct class *nvme_class;
+static struct class *nvme_ns_class;
static struct class *nvme_subsys_class;
static void nvme_put_subsystem(struct nvme_subsystem *subsys);
@@ -538,6 +540,7 @@ static void nvme_free_ns(struct kref *kref)
if (ns->ndev)
nvme_nvm_unregister(ns);
+ cdev_device_del(&ns->cdev, &ns->cdev_device);
put_disk(ns->disk);
nvme_put_ns_head(ns->head);
nvme_put_ctrl(ns->ctrl);
@@ -1738,15 +1741,15 @@ static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
return ret;
}
-static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
+static int __nvme_ns_ioctl(struct gendisk *disk, unsigned int cmd,
+ unsigned long arg)
{
struct nvme_ns_head *head = NULL;
void __user *argp = (void __user *)arg;
struct nvme_ns *ns;
int srcu_idx, ret;
- ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
+ ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
if (unlikely(!ns))
return -EWOULDBLOCK;
@@ -1783,6 +1786,18 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
return ret;
}
+static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ return __nvme_ns_ioctl(bdev->bd_disk, cmd, arg);
+}
+
+static long nvme_cdev_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return __nvme_ns_ioctl((struct gendisk *)file->private_data, cmd, arg);
+}
+
#ifdef CONFIG_COMPAT
struct nvme_user_io32 {
__u8 opcode;
@@ -1824,10 +1839,8 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
#define nvme_compat_ioctl NULL
#endif /* CONFIG_COMPAT */
-static int nvme_open(struct block_device *bdev, fmode_t mode)
+static int __nvme_open(struct nvme_ns *ns)
{
- struct nvme_ns *ns = bdev->bd_disk->private_data;
-
#ifdef CONFIG_NVME_MULTIPATH
/* should never be called due to GENHD_FL_HIDDEN */
if (WARN_ON_ONCE(ns->head->disk))
@@ -1846,12 +1859,24 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
return -ENXIO;
}
+static void __nvme_release(struct nvme_ns *ns)
+{
+ module_put(ns->ctrl->ops->module);
+ nvme_put_ns(ns);
+}
+
+static int nvme_open(struct block_device *bdev, fmode_t mode)
+{
+ struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+ return __nvme_open(ns);
+}
+
static void nvme_release(struct gendisk *disk, fmode_t mode)
{
struct nvme_ns *ns = disk->private_data;
- module_put(ns->ctrl->ops->module);
- nvme_put_ns(ns);
+ __nvme_release(ns);
}
static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -1863,6 +1888,26 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0;
}
+static int nvme_cdev_open(struct inode *inode, struct file *file)
+{
+ struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
+ int ret;
+
+ ret = __nvme_open(ns);
+ if (!ret)
+ file->private_data = ns->disk;
+
+ return ret;
+}
+
+static int nvme_cdev_release(struct inode *inode, struct file *file)
+{
+ struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
+
+ __nvme_release(ns);
+ return 0;
+}
+
#ifdef CONFIG_BLK_DEV_INTEGRITY
static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
u32 max_integrity_segments)
@@ -2346,6 +2391,14 @@ static const struct block_device_operations nvme_bdev_ops = {
.pr_ops = &nvme_pr_ops,
};
+static const struct file_operations nvme_cdev_fops = {
+ .owner = THIS_MODULE,
+ .open = nvme_cdev_open,
+ .release = nvme_cdev_release,
+ .unlocked_ioctl = nvme_cdev_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+};
+
#ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode)
{
@@ -3343,6 +3396,9 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
{
struct gendisk *disk = dev_to_disk(dev);
+ if (dev->class == nvme_ns_class)
+ return nvme_get_ns_from_cdev(dev)->head;
+
if (disk->fops == &nvme_bdev_ops)
return nvme_get_ns_from_dev(dev)->head;
else
@@ -3474,6 +3530,11 @@ const struct attribute_group *nvme_ns_id_attr_groups[] = {
NULL,
};
+const struct attribute_group *nvme_ns_char_id_attr_groups[] = {
+ &nvme_ns_id_attr_group,
+ NULL,
+};
+
#define nvme_show_str_function(field) \
static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
@@ -3866,6 +3927,35 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
}
EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
+static int nvme_alloc_chardev_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
+{
+ char cdisk_name[DISK_NAME_LEN];
+ int ret;
+
+ device_initialize(&ns->cdev_device);
+ ns->cdev_device.devt = MKDEV(MAJOR(nvme_ns_base_chr_devt),
+ ns->head->instance);
+ ns->cdev_device.class = nvme_ns_class;
+ ns->cdev_device.parent = ctrl->device;
+ ns->cdev_device.groups = nvme_ns_char_id_attr_groups;
+ dev_set_drvdata(&ns->cdev_device, ns);
+
+ sprintf(cdisk_name, "nvme%dn%dc", ctrl->instance, ns->head->instance);
+
+ ret = dev_set_name(&ns->cdev_device, "%s", cdisk_name);
+ if (ret)
+ return ret;
+
+ cdev_init(&ns->cdev, &nvme_cdev_fops);
+ ns->cdev.owner = ctrl->ops->module;
+
+ ret = cdev_device_add(&ns->cdev, &ns->cdev_device);
+ if (ret)
+ kfree_const(ns->cdev_device.kobj.name);
+
+ return ret;
+}
+
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
struct nvme_ns_ids *ids)
{
@@ -3912,8 +4002,12 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
ns->disk = disk;
+ /* When the device does not support any of the features required by the
+ * kernel (or viceversa), hide the block device. We can still rely on
+ * the namespace char device for submitting IOCTLs
+ */
if (nvme_update_ns_info(ns, id))
- goto out_put_disk;
+ disk->flags |= GENHD_FL_HIDDEN;
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
if (nvme_nvm_register(ns, disk_name, node)) {
@@ -3929,9 +4023,12 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
nvme_get_ctrl(ctrl);
device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
-
nvme_mpath_add_disk(ns, id);
nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
+
+ if (nvme_alloc_chardev_ns(ctrl, ns))
+ goto out_put_disk;
+
kfree(id);
return;
@@ -4733,23 +4830,38 @@ static int __init nvme_core_init(void)
if (result < 0)
goto destroy_delete_wq;
+ result = alloc_chrdev_region(&nvme_ns_base_chr_devt, 0,
+ NVME_MINORS, "nvmec");
+ if (result < 0)
+ goto unregister_dev_chrdev;
+
nvme_class = class_create(THIS_MODULE, "nvme");
if (IS_ERR(nvme_class)) {
result = PTR_ERR(nvme_class);
- goto unregister_chrdev;
+ goto unregister_ns_chrdev;
}
nvme_class->dev_uevent = nvme_class_uevent;
+ nvme_ns_class = class_create(THIS_MODULE, "nvme-ns");
+ if (IS_ERR(nvme_ns_class)) {
+ result = PTR_ERR(nvme_ns_class);
+ goto destroy_dev_class;
+ }
+
nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem");
if (IS_ERR(nvme_subsys_class)) {
result = PTR_ERR(nvme_subsys_class);
- goto destroy_class;
+ goto destroy_ns_class;
}
return 0;
-destroy_class:
+destroy_ns_class:
+ class_destroy(nvme_ns_class);
+destroy_dev_class:
class_destroy(nvme_class);
-unregister_chrdev:
+unregister_ns_chrdev:
+ unregister_chrdev_region(nvme_ns_base_chr_devt, NVME_MINORS);
+unregister_dev_chrdev:
unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
destroy_delete_wq:
destroy_workqueue(nvme_delete_wq);
@@ -4765,6 +4877,7 @@ static void __exit nvme_core_exit(void)
{
class_destroy(nvme_subsys_class);
class_destroy(nvme_class);
+ unregister_chrdev_region(nvme_ns_base_chr_devt, NVME_MINORS);
unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
destroy_workqueue(nvme_delete_wq);
destroy_workqueue(nvme_reset_wq);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index bfcedfa4b057..1dd99f207aee 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -439,6 +439,9 @@ struct nvme_ns {
struct kref kref;
struct nvme_ns_head *head;
+ struct device cdev_device; /* char device */
+ struct cdev cdev;
+
int lba_shift;
u16 ms;
u16 sgs;
@@ -818,6 +821,11 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
return dev_to_disk(dev)->private_data;
}
+static inline struct nvme_ns *nvme_get_ns_from_cdev(struct device *dev)
+{
+ return dev_get_drvdata(dev);
+}
+
#ifdef CONFIG_NVME_HWMON
int nvme_hwmon_init(struct nvme_ctrl *ctrl);
#else
--
2.17.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [PATCH V2] nvme: enable char device per namespace
2020-12-08 13:29 [PATCH V2] nvme: enable char device per namespace javier
@ 2020-12-08 14:21 ` Christoph Hellwig
2020-12-08 19:03 ` Keith Busch
2020-12-09 9:16 ` Javier González
0 siblings, 2 replies; 4+ messages in thread
From: Christoph Hellwig @ 2020-12-08 14:21 UTC (permalink / raw)
To: javier
Cc: linux-nvme, linux-block, hch, kbusch, sagi, minwoo.im.dev,
Javier González
A bunch of nitpicks (mostly naming as usual, sorry..):
> +static int __nvme_ns_ioctl(struct gendisk *disk, unsigned int cmd,
> + unsigned long arg)
> {
What about nvme_disk_ioctl instead as that is what it operates on?
> +static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
> + unsigned int cmd, unsigned long arg)
> +{
> + return __nvme_ns_ioctl(bdev->bd_disk, cmd, arg);
> +}
> +
> +static long nvme_cdev_ioctl(struct file *file, unsigned int cmd,
> + unsigned long arg)
> +{
> + return __nvme_ns_ioctl((struct gendisk *)file->private_data, cmd, arg);
> +}
No need for the cast.
Also can we keep all the char device methods together close to the
struct file_operations declaration? I just prefer to keep the code
a little grouped.
> -static int nvme_open(struct block_device *bdev, fmode_t mode)
> +static int __nvme_open(struct nvme_ns *ns)
> {
> - struct nvme_ns *ns = bdev->bd_disk->private_data;
> -
> #ifdef CONFIG_NVME_MULTIPATH
> /* should never be called due to GENHD_FL_HIDDEN */
> if (WARN_ON_ONCE(ns->head->disk))
> @@ -1846,12 +1859,24 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
> return -ENXIO;
> }
>
> +static void __nvme_release(struct nvme_ns *ns)
> +{
> + module_put(ns->ctrl->ops->module);
> + nvme_put_ns(ns);
> +}
nvme_ns_open and nvme_ns_release?
> +
> +static int nvme_open(struct block_device *bdev, fmode_t mode)
> +{
> + struct nvme_ns *ns = bdev->bd_disk->private_data;
> +
> + return __nvme_open(ns);
> +}
> +
> static void nvme_release(struct gendisk *disk, fmode_t mode)
> {
> struct nvme_ns *ns = disk->private_data;
>
> - module_put(ns->ctrl->ops->module);
> - nvme_put_ns(ns);
> + __nvme_release(ns);
No need for the local ns variable in both cases.
> +static int nvme_cdev_open(struct inode *inode, struct file *file)
> +{
> + struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
> + int ret;
> +
> + ret = __nvme_open(ns);
> + if (!ret)
> + file->private_data = ns->disk;
> +
> + return ret;
Do we need the ->private_data assignment at all? I think the ioctl
handler could just grab it directly from i_cdev.
> + sprintf(cdisk_name, "nvme%dn%dc", ctrl->instance, ns->head->instance);
And the most important naming decision is this. I have two issues with
naming still:
- we aready use the c for controller in the hidden disk naming. Although
that is in a different position, but I think this not super intuitive.
- this is missing multipath support entirely, so once we want to add
multipath support we'll run into issues. So maybe use something
based off the hidden node naming? E.g.:
sprintf(disk_name, "nvme-generic-%dc%dn%d", ctrl->subsys->instance,
ctrl->instance, ns->head->instance);
> + /* When the device does not support any of the features required by the
> + * kernel (or viceversa), hide the block device. We can still rely on
> + * the namespace char device for submitting IOCTLs
> + */
Normal kernel comment style is the opening
/*
on its own line.
> if (nvme_update_ns_info(ns, id))
> - goto out_put_disk;
> + disk->flags |= GENHD_FL_HIDDEN;
I don't think we can do this based on all the error returns. I think
we'll have to move the flags manipulation into nvme_update_ns_info to
also cover the revalidate case.
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH V2] nvme: enable char device per namespace
2020-12-08 14:21 ` Christoph Hellwig
@ 2020-12-08 19:03 ` Keith Busch
2020-12-09 9:16 ` Javier González
1 sibling, 0 replies; 4+ messages in thread
From: Keith Busch @ 2020-12-08 19:03 UTC (permalink / raw)
To: Christoph Hellwig
Cc: javier, linux-nvme, linux-block, sagi, minwoo.im.dev,
Javier González
On Tue, Dec 08, 2020 at 03:21:51PM +0100, Christoph Hellwig wrote:
> > + sprintf(cdisk_name, "nvme%dn%dc", ctrl->instance, ns->head->instance);
>
> And the most important naming decision is this. I have two issues with
> naming still:
>
> - we aready use the c for controller in the hidden disk naming. Although
> that is in a different position, but I think this not super intuitive.
> - this is missing multipath support entirely, so once we want to add
> multipath support we'll run into issues. So maybe use something
> based off the hidden node naming? E.g.:
>
> sprintf(disk_name, "nvme-generic-%dc%dn%d", ctrl->subsys->instance,
> ctrl->instance, ns->head->instance);
+1 for this naming suggestion.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: nvme: enable char device per namespace
2020-12-08 14:21 ` Christoph Hellwig
2020-12-08 19:03 ` Keith Busch
@ 2020-12-09 9:16 ` Javier González
1 sibling, 0 replies; 4+ messages in thread
From: Javier González @ 2020-12-09 9:16 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-nvme, linux-block, kbusch, sagi, minwoo.im.dev
On 08.12.2020 15:21, Christoph Hellwig wrote:
>A bunch of nitpicks (mostly naming as usual, sorry..):
No worries. Thanks for taking the time.
>
>> +static int __nvme_ns_ioctl(struct gendisk *disk, unsigned int cmd,
>> + unsigned long arg)
>> {
>
>What about nvme_disk_ioctl instead as that is what it operates on?
Sure.
>
>> +static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
>> + unsigned int cmd, unsigned long arg)
>> +{
>> + return __nvme_ns_ioctl(bdev->bd_disk, cmd, arg);
>> +}
>> +
>> +static long nvme_cdev_ioctl(struct file *file, unsigned int cmd,
>> + unsigned long arg)
>> +{
>> + return __nvme_ns_ioctl((struct gendisk *)file->private_data, cmd, arg);
>> +}
>
>No need for the cast.
>
>Also can we keep all the char device methods together close to the
>struct file_operations declaration? I just prefer to keep the code
>a little grouped.
Perfect.
>
>> -static int nvme_open(struct block_device *bdev, fmode_t mode)
>> +static int __nvme_open(struct nvme_ns *ns)
>> {
>> - struct nvme_ns *ns = bdev->bd_disk->private_data;
>> -
>> #ifdef CONFIG_NVME_MULTIPATH
>> /* should never be called due to GENHD_FL_HIDDEN */
>> if (WARN_ON_ONCE(ns->head->disk))
>> @@ -1846,12 +1859,24 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
>> return -ENXIO;
>> }
>>
>> +static void __nvme_release(struct nvme_ns *ns)
>> +{
>> + module_put(ns->ctrl->ops->module);
>> + nvme_put_ns(ns);
>> +}
>
>nvme_ns_open and nvme_ns_release?
ok.
>
>> +
>> +static int nvme_open(struct block_device *bdev, fmode_t mode)
>> +{
>> + struct nvme_ns *ns = bdev->bd_disk->private_data;
>> +
>> + return __nvme_open(ns);
>> +}
>> +
>> static void nvme_release(struct gendisk *disk, fmode_t mode)
>> {
>> struct nvme_ns *ns = disk->private_data;
>>
>> - module_put(ns->ctrl->ops->module);
>> - nvme_put_ns(ns);
>> + __nvme_release(ns);
>
>No need for the local ns variable in both cases.
ok.
>
>> +static int nvme_cdev_open(struct inode *inode, struct file *file)
>> +{
>> + struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
>> + int ret;
>> +
>> + ret = __nvme_open(ns);
>> + if (!ret)
>> + file->private_data = ns->disk;
>> +
>> + return ret;
>
>Do we need the ->private_data assignment at all? I think the ioctl
>handler could just grab it directly from i_cdev.
Mmmm. Good point. I'll try that.
>
>> + sprintf(cdisk_name, "nvme%dn%dc", ctrl->instance, ns->head->instance);
>
>And the most important naming decision is this. I have two issues with
>naming still:
>
> - we aready use the c for controller in the hidden disk naming. Although
> that is in a different position, but I think this not super intuitive.
> - this is missing multipath support entirely, so once we want to add
> multipath support we'll run into issues. So maybe use something
> based off the hidden node naming? E.g.:
>
> sprintf(disk_name, "nvme-generic-%dc%dn%d", ctrl->subsys->instance,
> ctrl->instance, ns->head->instance);
Perfect. Sounds like a good compromise to still keep the original hidden
disk. Keith is happy too, so we have a plan.
>> + /* When the device does not support any of the features required by the
>> + * kernel (or viceversa), hide the block device. We can still rely on
>> + * the namespace char device for submitting IOCTLs
>> + */
>
>Normal kernel comment style is the opening
>
> /*
>
>on its own line.
OK.
>
>> if (nvme_update_ns_info(ns, id))
>> - goto out_put_disk;
>> + disk->flags |= GENHD_FL_HIDDEN;
>
>I don't think we can do this based on all the error returns. I think
>we'll have to move the flags manipulation into nvme_update_ns_info to
>also cover the revalidate case.
Ok.
I am working on the multipath part. I'll send a V3 with all these
comments and then a follow-up patch with multipath.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-12-09 9:17 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-12-08 13:29 [PATCH V2] nvme: enable char device per namespace javier
2020-12-08 14:21 ` Christoph Hellwig
2020-12-08 19:03 ` Keith Busch
2020-12-09 9:16 ` Javier González
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox