linux-doc.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [Patch v4 2/3] Drivers: hv: add Azure Blob driver
       [not found] <1626751866-15765-1-git-send-email-longli@linuxonhyperv.com>
@ 2021-07-20  3:31 ` longli
  2021-07-20  5:26   ` Greg Kroah-Hartman
                     ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: longli @ 2021-07-20  3:31 UTC (permalink / raw)
  To: linux-fs, linux-block, linux-kernel, linux-hyperv
  Cc: Long Li, Jonathan Corbet, K. Y. Srinivasan, Haiyang Zhang,
	Stephen Hemminger, Wei Liu, Dexuan Cui, Greg Kroah-Hartman,
	Bjorn Andersson, Hans de Goede, Dan Williams, Maximilian Luz,
	Mike Rapoport, Ben Widawsky, Jiri Slaby, Andra Paraschiv,
	Siddharth Gupta, Hannes Reinecke, linux-doc

From: Long Li <longli@microsoft.com>

Azure Blob provides scalable, secure and shared storage services for Azure.

This driver adds support for accelerated access to Azure Blob storage. As an
alternative to REST APIs, it provides a fast data path that uses host native
network stack and direct data link for storage server access.

Cc: Jonathan Corbet <corbet@lwn.net>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Maximilian Luz <luzmaximilian@gmail.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Ben Widawsky <ben.widawsky@intel.com>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Andra Paraschiv <andraprs@amazon.com>
Cc: Siddharth Gupta <sidgup@codeaurora.org>
Cc: Hannes Reinecke <hare@suse.de>
Cc: linux-doc@vger.kernel.org
Signed-off-by: Long Li <longli@microsoft.com>
---
 .../userspace-api/ioctl/ioctl-number.rst      |   2 +
 drivers/hv/Kconfig                            |  11 +
 drivers/hv/Makefile                           |   1 +
 drivers/hv/channel_mgmt.c                     |   7 +
 drivers/hv/hv_azure_blob.c                    | 628 ++++++++++++++++++
 include/linux/hyperv.h                        |   9 +
 include/uapi/misc/hv_azure_blob.h             |  34 +
 7 files changed, 692 insertions(+)
 create mode 100644 drivers/hv/hv_azure_blob.c
 create mode 100644 include/uapi/misc/hv_azure_blob.h

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 9bfc2b510c64..1ee8c0c7bd2e 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -180,6 +180,8 @@ Code  Seq#    Include File                                           Comments
 'R'   01     linux/rfkill.h                                          conflict!
 'R'   C0-DF  net/bluetooth/rfcomm.h
 'R'   E0     uapi/linux/fsl_mc.h
+'R'   F0-FF  uapi/misc/hv_azure_blob.h                               Microsoft Azure Blob driver
+                                                                     <mailto:longli@microsoft.com>
 'S'   all    linux/cdrom.h                                           conflict!
 'S'   80-81  scsi/scsi_ioctl.h                                       conflict!
 'S'   82-FF  scsi/scsi.h                                             conflict!
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 66c794d92391..53bebd0ad812 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -27,4 +27,15 @@ config HYPERV_BALLOON
 	help
 	  Select this option to enable Hyper-V Balloon driver.
 
+config HYPERV_AZURE_BLOB
+	tristate "Microsoft Azure Blob driver"
+	depends on HYPERV && X86_64
+	help
+	  Select this option to enable Microsoft Azure Blob driver.
+
+	  This driver implements a fast datapath over Hyper-V to support
+	  accelerated access to Microsoft Azure Blob services.
+	  To compile this driver as a module, choose M here. The module will be
+	  called azure_blob.
+
 endmenu
diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index 94daf8240c95..272644532245 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -2,6 +2,7 @@
 obj-$(CONFIG_HYPERV)		+= hv_vmbus.o
 obj-$(CONFIG_HYPERV_UTILS)	+= hv_utils.o
 obj-$(CONFIG_HYPERV_BALLOON)	+= hv_balloon.o
+obj-$(CONFIG_HYPERV_AZURE_BLOB)	+= hv_azure_blob.o
 
 CFLAGS_hv_trace.o = -I$(src)
 CFLAGS_hv_balloon.o = -I$(src)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 705e95d7a040..3095611045b5 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -154,6 +154,13 @@ const struct vmbus_device vmbus_devs[] = {
 	  .allowed_in_isolated = false,
 	},
 
+	/* Azure Blob */
+	{ .dev_type = HV_AZURE_BLOB,
+	  HV_AZURE_BLOB_GUID,
+	  .perf_device = false,
+	  .allowed_in_isolated = false,
+	},
+
 	/* Unknown GUID */
 	{ .dev_type = HV_UNKNOWN,
 	  .perf_device = false,
diff --git a/drivers/hv/hv_azure_blob.c b/drivers/hv/hv_azure_blob.c
new file mode 100644
index 000000000000..04bec92aa058
--- /dev/null
+++ b/drivers/hv/hv_azure_blob.c
@@ -0,0 +1,628 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021 Microsoft Corporation. */
+
+#include <uapi/misc/hv_azure_blob.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include <linux/debugfs.h>
+#include <linux/pagemap.h>
+#include <linux/hyperv.h>
+#include <linux/miscdevice.h>
+#include <linux/uio.h>
+
+struct az_blob_device {
+	struct hv_device *device;
+
+	/* Opened files maintained by this device */
+	struct list_head file_list;
+	/* Lock for protecting file_list */
+	spinlock_t file_lock;
+
+	/* The refcount for this device */
+	refcount_t count;
+
+	/* Pending requests to VSP */
+	atomic_t pending;
+	wait_queue_head_t waiting_to_drain;
+
+	bool removing;
+};
+
+/* VSP messages */
+enum az_blob_vsp_request_type {
+	AZ_BLOB_DRIVER_REQUEST_FIRST     = 0x100,
+	AZ_BLOB_DRIVER_USER_REQUEST      = 0x100,
+	AZ_BLOB_DRIVER_REGISTER_BUFFER   = 0x101,
+	AZ_BLOB_DRIVER_DEREGISTER_BUFFER = 0x102,
+};
+
+/* VSC->VSP request */
+struct az_blob_vsp_request {
+	u32 version;
+	u32 timeout_ms;
+	u32 data_buffer_offset;
+	u32 data_buffer_length;
+	u32 data_buffer_valid;
+	u32 operation_type;
+	u32 request_buffer_offset;
+	u32 request_buffer_length;
+	u32 response_buffer_offset;
+	u32 response_buffer_length;
+	guid_t transaction_id;
+} __packed;
+
+/* VSP->VSC response */
+struct az_blob_vsp_response {
+	u32 length;
+	u32 error;
+	u32 response_len;
+} __packed;
+
+struct az_blob_vsp_request_ctx {
+	struct list_head list;
+	struct completion wait_vsp;
+	struct az_blob_request_sync *request;
+};
+
+struct az_blob_file_ctx {
+	struct list_head list;
+
+	/* List of pending requests to VSP */
+	struct list_head vsp_pending_requests;
+	/* Lock for protecting vsp_pending_requests */
+	spinlock_t vsp_pending_lock;
+	wait_queue_head_t wait_vsp_pending;
+
+	pid_t pid;
+
+	struct az_blob_device *dev;
+};
+
+/* The maximum number of pages we can pass to VSP in a single packet */
+#define AZ_BLOB_MAX_PAGES 8192
+
+static struct az_blob_device *az_blob_dev;
+#define AZ_DEV (&az_blob_dev->device->device)
+
+/* Ring buffer size in bytes */
+#define AZ_BLOB_RING_SIZE (128 * 1024)
+
+/* System wide device queue depth */
+#define AZ_BLOB_QUEUE_DEPTH 1024
+
+/* The VSP protocol version this driver understands */
+#define VSP_PROTOCOL_VERSION_V1 0
+
+static const struct hv_vmbus_device_id id_table[] = {
+	{ HV_AZURE_BLOB_GUID,
+	  .driver_data = 0
+	},
+	{ },
+};
+
+static void az_blob_on_channel_callback(void *context)
+{
+	struct vmbus_channel *channel = (struct vmbus_channel *)context;
+	const struct vmpacket_descriptor *desc;
+
+	foreach_vmbus_pkt(desc, channel) {
+		struct az_blob_vsp_request_ctx *request_ctx;
+		struct az_blob_vsp_response *response;
+		u64 cmd_rqst = vmbus_request_addr(&channel->requestor,
+						  desc->trans_id);
+		if (cmd_rqst == VMBUS_RQST_ERROR) {
+			dev_err(AZ_DEV, "incorrect transaction id %llu\n",
+				desc->trans_id);
+			continue;
+		}
+		request_ctx = (struct az_blob_vsp_request_ctx *)cmd_rqst;
+		response = hv_pkt_data(desc);
+
+		dev_dbg(AZ_DEV, "got response for request %pUb status %u "
+			    "response_len %u\n",
+			    &request_ctx->request->guid, response->error,
+			    response->response_len);
+		request_ctx->request->response.status = response->error;
+		request_ctx->request->response.response_len =
+			response->response_len;
+		complete(&request_ctx->wait_vsp);
+	}
+}
+
+static int az_blob_fop_open(struct inode *inode, struct file *file)
+{
+	struct az_blob_device *dev = az_blob_dev;
+	struct az_blob_file_ctx *file_ctx;
+	unsigned long flags;
+
+	file_ctx = kzalloc(sizeof(*file_ctx), GFP_KERNEL);
+	if (!file_ctx)
+		return -ENOMEM;
+
+	file_ctx->pid = task_tgid_vnr(current);
+	INIT_LIST_HEAD(&file_ctx->vsp_pending_requests);
+	init_waitqueue_head(&file_ctx->wait_vsp_pending);
+	spin_lock_init(&file_ctx->vsp_pending_lock);
+	file_ctx->dev = dev;
+	refcount_inc(&dev->count);
+
+	spin_lock_irqsave(&dev->file_lock, flags);
+	list_add_tail(&file_ctx->list, &dev->file_list);
+	spin_unlock_irqrestore(&dev->file_lock, flags);
+
+	file->private_data = file_ctx;
+	return 0;
+}
+
+static int az_blob_fop_release(struct inode *inode, struct file *file)
+{
+	struct az_blob_file_ctx *file_ctx = file->private_data;
+	struct az_blob_device *dev = file_ctx->dev;
+	unsigned long flags;
+
+	wait_event(file_ctx->wait_vsp_pending,
+		   list_empty(&file_ctx->vsp_pending_requests));
+
+	spin_lock_irqsave(&dev->file_lock, flags);
+	list_del(&file_ctx->list);
+	spin_unlock_irqrestore(&dev->file_lock, flags);
+
+	kfree(file_ctx);
+	if (refcount_dec_and_test(&dev->count))
+		kfree(dev);
+
+	return 0;
+}
+
+static inline bool az_blob_safe_file_access(struct file *file)
+{
+	return file->f_cred == current_cred() && !uaccess_kernel();
+}
+
+/* Pin the user buffer pages into memory for passing to VSP */
+static int get_buffer_pages(int rw, void __user *buffer, u32 buffer_len,
+			    struct page ***ppages, size_t *start,
+			    size_t *num_pages)
+{
+	struct iovec iov;
+	struct iov_iter iter;
+	int ret;
+	ssize_t result;
+	struct page **pages;
+
+	ret = import_single_range(rw, buffer, buffer_len, &iov, &iter);
+	if (ret) {
+		dev_dbg(AZ_DEV, "request buffer access error %d\n", ret);
+		return ret;
+	}
+
+	result = iov_iter_get_pages_alloc(&iter, &pages, buffer_len, start);
+	if (result < 0) {
+		dev_dbg(AZ_DEV, "failed to pin user pages result=%ld\n", result);
+		return result;
+	}
+
+	*num_pages = (result + *start + PAGE_SIZE - 1) / PAGE_SIZE;
+	if (result != buffer_len) {
+		dev_dbg(AZ_DEV, "can't pin user pages requested %d got %ld\n",
+			buffer_len, result);
+		for (i = 0; i < *num_pages; i++)
+			put_page(pages[i]);
+		kvfree(pages);
+		return -EFAULT;
+	}
+
+	*ppages = pages;
+	return 0;
+}
+
+static void fill_in_page_buffer(u64 *pfn_array, int *index,
+				struct page **pages, unsigned long num_pages)
+{
+	int i, page_idx = *index;
+
+	for (i = 0; i < num_pages; i++)
+		pfn_array[page_idx++] = page_to_pfn(pages[i]);
+	*index = page_idx;
+}
+
+static void free_buffer_pages(size_t num_pages, struct page **pages)
+{
+	unsigned long i;
+
+	for (i = 0; i < num_pages; i++)
+		if (pages && pages[i])
+			put_page(pages[i]);
+	kvfree(pages);
+}
+
+static long az_blob_ioctl_user_request(struct file *filp, unsigned long arg)
+{
+	struct az_blob_file_ctx *file_ctx = filp->private_data;
+	struct az_blob_device *dev = file_ctx->dev;
+	struct az_blob_request_sync __user *request_user =
+		(struct az_blob_request_sync __user *)arg;
+	struct az_blob_request_sync request;
+	struct az_blob_vsp_request_ctx request_ctx;
+	unsigned long flags;
+	int ret;
+	size_t request_start, request_num_pages = 0;
+	size_t response_start, response_num_pages = 0;
+	size_t data_start, data_num_pages = 0, total_num_pages;
+	struct page **request_pages = NULL, **response_pages = NULL;
+	struct page **data_pages = NULL;
+	struct vmbus_packet_mpb_array *desc;
+	u64 *pfn_array;
+	int desc_size;
+	int page_idx;
+	struct az_blob_vsp_request *vsp_request;
+
+	/* Fast fail if device is being removed */
+	if (dev->removing)
+		return -ENODEV;
+
+	if (!az_blob_safe_file_access(filp)) {
+		dev_dbg(AZ_DEV, "process %d(%s) changed security contexts after"
+			    " opening file descriptor\n",
+			    task_tgid_vnr(current), current->comm);
+		return -EACCES;
+	}
+
+	if (copy_from_user(&request, request_user, sizeof(request))) {
+		dev_dbg(AZ_DEV, "don't have permission to user provided buffer\n");
+		return -EFAULT;
+	}
+
+	dev_dbg(AZ_DEV, "az_blob ioctl request guid %pUb timeout %u request_len %u"
+		    " response_len %u data_len %u request_buffer %llx "
+		    "response_buffer %llx data_buffer %llx\n",
+		    &request.guid, request.timeout, request.request_len,
+		    request.response_len, request.data_len, request.request_buffer,
+		    request.response_buffer, request.data_buffer);
+
+	if (!request.request_len || !request.response_len)
+		return -EINVAL;
+
+	if (request.data_len && request.data_len < request.data_valid)
+		return -EINVAL;
+
+	if (request.data_len > PAGE_SIZE * AZ_BLOB_MAX_PAGES ||
+	    request.request_len > PAGE_SIZE * AZ_BLOB_MAX_PAGES ||
+	    request.response_len > PAGE_SIZE * AZ_BLOB_MAX_PAGES)
+		return -EINVAL;
+
+	init_completion(&request_ctx.wait_vsp);
+	request_ctx.request = &request;
+
+	ret = get_buffer_pages(READ, (void __user *)request.request_buffer,
+			       request.request_len, &request_pages,
+			       &request_start, &request_num_pages);
+	if (ret)
+		goto get_user_page_failed;
+
+	ret = get_buffer_pages(READ | WRITE,
+			       (void __user *)request.response_buffer,
+			       request.response_len, &response_pages,
+			       &response_start, &response_num_pages);
+	if (ret)
+		goto get_user_page_failed;
+
+	if (request.data_len) {
+		ret = get_buffer_pages(READ | WRITE,
+				       (void __user *)request.data_buffer,
+				       request.data_len, &data_pages,
+				       &data_start, &data_num_pages);
+		if (ret)
+			goto get_user_page_failed;
+	}
+
+	total_num_pages = request_num_pages + response_num_pages +
+				data_num_pages;
+	if (total_num_pages > AZ_BLOB_MAX_PAGES) {
+		dev_dbg(AZ_DEV, "number of DMA pages %lu buffer exceeding %u\n",
+			total_num_pages, AZ_BLOB_MAX_PAGES);
+		ret = -EINVAL;
+		goto get_user_page_failed;
+	}
+
+	/* Construct a VMBUS packet and send it over to VSP */
+	desc_size = struct_size(desc, range.pfn_array, total_num_pages);
+	desc = kzalloc(desc_size, GFP_KERNEL);
+	vsp_request = kzalloc(sizeof(*vsp_request), GFP_KERNEL);
+	if (!desc || !vsp_request) {
+		kfree(desc);
+		kfree(vsp_request);
+		ret = -ENOMEM;
+		goto get_user_page_failed;
+	}
+
+	desc->range.offset = 0;
+	desc->range.len = total_num_pages * PAGE_SIZE;
+	pfn_array = desc->range.pfn_array;
+	page_idx = 0;
+
+	if (request.data_len) {
+		fill_in_page_buffer(pfn_array, &page_idx, data_pages,
+				    data_num_pages);
+		vsp_request->data_buffer_offset = data_start;
+		vsp_request->data_buffer_length = request.data_len;
+		vsp_request->data_buffer_valid = request.data_valid;
+	}
+
+	fill_in_page_buffer(pfn_array, &page_idx, request_pages,
+			    request_num_pages);
+	vsp_request->request_buffer_offset = request_start +
+						data_num_pages * PAGE_SIZE;
+	vsp_request->request_buffer_length = request.request_len;
+
+	fill_in_page_buffer(pfn_array, &page_idx, response_pages,
+			    response_num_pages);
+	vsp_request->response_buffer_offset = response_start +
+		(data_num_pages + request_num_pages) * PAGE_SIZE;
+	vsp_request->response_buffer_length = request.response_len;
+
+	vsp_request->version = VSP_PROTOCOL_VERSION_V1;
+	vsp_request->timeout_ms = request.timeout;
+	vsp_request->operation_type = AZ_BLOB_DRIVER_USER_REQUEST;
+	guid_copy(&vsp_request->transaction_id, &request.guid);
+
+	spin_lock_irqsave(&file_ctx->vsp_pending_lock, flags);
+	list_add_tail(&request_ctx.list, &file_ctx->vsp_pending_requests);
+	spin_unlock_irqrestore(&file_ctx->vsp_pending_lock, flags);
+
+	atomic_inc(&dev->pending);
+
+	/* Check if device is being removed */
+	if (dev->removing) {
+		ret = -ENODEV;
+		goto vmbus_send_failed;
+	}
+
+	ret = vmbus_sendpacket_mpb_desc(dev->device->channel, desc, desc_size,
+					vsp_request, sizeof(*vsp_request),
+					(u64)&request_ctx);
+
+	kfree(desc);
+	kfree(vsp_request);
+	if (ret)
+		goto vmbus_send_failed;
+
+	wait_for_completion(&request_ctx.wait_vsp);
+
+	/*
+	 * At this point, the response is already written to request
+	 * by VMBUS completion handler, copy them to user-mode buffers
+	 * and return to user-mode
+	 */
+	if (copy_to_user(&request_user->response, &request.response,
+			 sizeof(request.response)))
+		ret = -EFAULT;
+
+vmbus_send_failed:
+	if (atomic_dec_and_test(&dev->pending) && dev->removing)
+		wake_up(&dev->waiting_to_drain);
+
+	spin_lock_irqsave(&file_ctx->vsp_pending_lock, flags);
+	list_del(&request_ctx.list);
+	if (list_empty(&file_ctx->vsp_pending_requests))
+		wake_up(&file_ctx->wait_vsp_pending);
+	spin_unlock_irqrestore(&file_ctx->vsp_pending_lock, flags);
+
+get_user_page_failed:
+	free_buffer_pages(request_num_pages, request_pages);
+	free_buffer_pages(response_num_pages, response_pages);
+	free_buffer_pages(data_num_pages, data_pages);
+
+	return ret;
+}
+
+static long az_blob_fop_ioctl(struct file *filp, unsigned int cmd,
+			      unsigned long arg)
+{
+	switch (cmd) {
+	case IOCTL_AZ_BLOB_DRIVER_USER_REQUEST:
+		return az_blob_ioctl_user_request(filp, arg);
+
+	default:
+		dev_dbg(AZ_DEV, "unrecognized IOCTL code %u\n", cmd);
+	}
+
+	return -EINVAL;
+}
+
+static const struct file_operations az_blob_client_fops = {
+	.owner		= THIS_MODULE,
+	.open		= az_blob_fop_open,
+	.unlocked_ioctl = az_blob_fop_ioctl,
+	.release	= az_blob_fop_release,
+};
+
+static struct miscdevice az_blob_misc_device = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "azure_blob",
+	.fops	= &az_blob_client_fops,
+};
+
+static int az_blob_show_pending_requests(struct seq_file *m, void *v)
+{
+	unsigned long flags, flags2;
+	struct az_blob_vsp_request_ctx *request_ctx;
+	struct az_blob_file_ctx *file_ctx;
+	struct az_blob_device *dev = az_blob_dev;
+
+	seq_puts(m, "List of pending requests\n");
+	seq_puts(m, "UUID request_len response_len data_len "
+		"request_buffer response_buffer data_buffer\n");
+	spin_lock_irqsave(&dev->file_lock, flags);
+	list_for_each_entry(file_ctx, &dev->file_list, list) {
+		spin_lock_irqsave(&file_ctx->vsp_pending_lock, flags2);
+		seq_printf(m, "file context for pid %u\n", file_ctx->pid);
+		list_for_each_entry(request_ctx,
+				    &file_ctx->vsp_pending_requests, list) {
+			seq_printf(m, "%pUb ", &request_ctx->request->guid);
+			seq_printf(m, "%u ", request_ctx->request->request_len);
+			seq_printf(m, "%u ",
+				   request_ctx->request->response_len);
+			seq_printf(m, "%u ", request_ctx->request->data_len);
+			seq_printf(m, "%llx ",
+				   request_ctx->request->request_buffer);
+			seq_printf(m, "%llx ",
+				   request_ctx->request->response_buffer);
+			seq_printf(m, "%llx\n",
+				   request_ctx->request->data_buffer);
+		}
+		spin_unlock_irqrestore(&file_ctx->vsp_pending_lock, flags2);
+	}
+	spin_unlock_irqrestore(&dev->file_lock, flags);
+
+	return 0;
+}
+
+static int az_blob_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, az_blob_show_pending_requests, NULL);
+}
+
+static const struct file_operations az_blob_debugfs_fops = {
+	.open		= az_blob_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release
+};
+
+static void az_blob_remove_device(void)
+{
+	struct dentry *debugfs_root = debugfs_lookup("az_blob", NULL);
+
+	debugfs_remove_recursive(debugfs_root);
+	misc_deregister(&az_blob_misc_device);
+}
+
+static int az_blob_create_device(struct az_blob_device *dev)
+{
+	int ret;
+	struct dentry *debugfs_root;
+
+	ret = misc_register(&az_blob_misc_device);
+	if (ret)
+		return ret;
+
+	debugfs_root = debugfs_create_dir("az_blob", NULL);
+	debugfs_create_file("pending_requests", 0400, debugfs_root, NULL,
+			    &az_blob_debugfs_fops);
+
+	return 0;
+}
+
+static int az_blob_connect_to_vsp(struct hv_device *device,
+				  struct az_blob_device *dev, u32 ring_size)
+{
+	int ret;
+
+	dev->device = device;
+	device->channel->rqstor_size = AZ_BLOB_QUEUE_DEPTH;
+
+	ret = vmbus_open(device->channel, ring_size, ring_size, NULL, 0,
+			 az_blob_on_channel_callback, device->channel);
+
+	if (ret)
+		return ret;
+
+	hv_set_drvdata(device, dev);
+
+	return ret;
+}
+
+static void az_blob_remove_vmbus(struct hv_device *device)
+{
+	hv_set_drvdata(device, NULL);
+	vmbus_close(device->channel);
+}
+
+static int az_blob_probe(struct hv_device *device,
+			 const struct hv_vmbus_device_id *dev_id)
+{
+	int ret;
+	struct az_blob_device *dev;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	spin_lock_init(&dev->file_lock);
+	INIT_LIST_HEAD(&dev->file_list);
+	atomic_set(&dev->pending, 0);
+	init_waitqueue_head(&dev->waiting_to_drain);
+
+	ret = az_blob_connect_to_vsp(device, dev, AZ_BLOB_RING_SIZE);
+	if (ret)
+		goto fail;
+
+	refcount_set(&dev->count, 1);
+	az_blob_dev = dev;
+
+	// create user-mode client library facing device
+	ret = az_blob_create_device(dev);
+	if (ret) {
+		dev_err(AZ_DEV, "failed to create device ret=%d\n", ret);
+		az_blob_remove_vmbus(device);
+		goto fail;
+	}
+
+	dev_info(AZ_DEV, "successfully probed device\n");
+	return 0;
+
+fail:
+	kfree(dev);
+	return ret;
+}
+
+static int az_blob_remove(struct hv_device *device)
+{
+	struct az_blob_device *dev = hv_get_drvdata(device);
+
+	dev->removing = true;
+	az_blob_remove_device();
+
+	/*
+	 * We won't get any new requests from user-mode. There may be
+	 * pending requests already sent over VMBUS and we may get more
+	 * responses from VSP. Wait until no VSC/VSP traffic is possible.
+	 */
+	wait_event(dev->waiting_to_drain,
+		   atomic_read(&dev->pending) == 0);
+
+	az_blob_remove_vmbus(device);
+
+	if (refcount_dec_and_test(&dev->count))
+		kfree(dev);
+
+	return 0;
+}
+
+static struct hv_driver az_blob_drv = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= id_table,
+	.probe		= az_blob_probe,
+	.remove		= az_blob_remove,
+	.driver		= {
+		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
+	},
+};
+
+static int __init az_blob_drv_init(void)
+{
+	return vmbus_driver_register(&az_blob_drv);
+}
+
+static void __exit az_blob_drv_exit(void)
+{
+	vmbus_driver_unregister(&az_blob_drv);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Microsoft Azure Blob driver");
+module_init(az_blob_drv_init);
+module_exit(az_blob_drv_exit);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index d1e59dbef1dd..ac3136284ace 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -772,6 +772,7 @@ enum vmbus_device_type {
 	HV_FCOPY,
 	HV_BACKUP,
 	HV_DM,
+	HV_AZURE_BLOB,
 	HV_UNKNOWN,
 };
 
@@ -1349,6 +1350,14 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size);
 	.guid = GUID_INIT(0xba6163d9, 0x04a1, 0x4d29, 0xb6, 0x05, \
 			  0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f)
 
+/*
+ * Azure Blob GUID
+ * {0590b792-db64-45cc-81db-b8d70c577c9e}
+ */
+#define HV_AZURE_BLOB_GUID \
+	.guid = GUID_INIT(0x0590b792, 0xdb64, 0x45cc, 0x81, 0xdb, \
+			  0xb8, 0xd7, 0x0c, 0x57, 0x7c, 0x9e)
+
 /*
  * Shutdown GUID
  * {0e0b6031-5213-4934-818b-38d90ced39db}
diff --git a/include/uapi/misc/hv_azure_blob.h b/include/uapi/misc/hv_azure_blob.h
new file mode 100644
index 000000000000..ebb141b2762a
--- /dev/null
+++ b/include/uapi/misc/hv_azure_blob.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/* Copyright (c) 2021 Microsoft Corporation. */
+
+#ifndef _AZ_BLOB_H
+#define _AZ_BLOB_H
+
+#include <linux/kernel.h>
+#include <linux/uuid.h>
+
+/* user-mode sync request sent through ioctl */
+struct az_blob_request_sync_response {
+	__u32 status;
+	__u32 response_len;
+};
+
+struct az_blob_request_sync {
+	guid_t guid;
+	__u32 timeout;
+	__u32 request_len;
+	__u32 response_len;
+	__u32 data_len;
+	__u32 data_valid;
+	__aligned_u64 request_buffer;
+	__aligned_u64 response_buffer;
+	__aligned_u64 data_buffer;
+	struct az_blob_request_sync_response response;
+};
+
+#define AZ_BLOB_MAGIC_NUMBER	'R'
+#define IOCTL_AZ_BLOB_DRIVER_USER_REQUEST \
+		_IOWR(AZ_BLOB_MAGIC_NUMBER, 0xf0, \
+			struct az_blob_request_sync)
+
+#endif /* define _AZ_BLOB_H */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
  2021-07-20  3:31 ` [Patch v4 2/3] Drivers: hv: add Azure Blob driver longli
@ 2021-07-20  5:26   ` Greg Kroah-Hartman
  2021-07-20  5:30   ` Greg Kroah-Hartman
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Greg Kroah-Hartman @ 2021-07-20  5:26 UTC (permalink / raw)
  To: longli
  Cc: linux-fs, linux-block, linux-kernel, linux-hyperv, Long Li,
	Jonathan Corbet, K. Y. Srinivasan, Haiyang Zhang,
	Stephen Hemminger, Wei Liu, Dexuan Cui, Bjorn Andersson,
	Hans de Goede, Dan Williams, Maximilian Luz, Mike Rapoport,
	Ben Widawsky, Jiri Slaby, Andra Paraschiv, Siddharth Gupta,
	Hannes Reinecke, linux-doc

On Mon, Jul 19, 2021 at 08:31:05PM -0700, longli@linuxonhyperv.com wrote:
> From: Long Li <longli@microsoft.com>
> 
> Azure Blob provides scalable, secure and shared storage services for Azure.
> 
> This driver adds support for accelerated access to Azure Blob storage. As an
> alternative to REST APIs, it provides a fast data path that uses host native
> network stack and direct data link for storage server access.

Thank you for fixing up the license info, but you did not include any
information here about how to use this, where the userspace tools are,
nor why you are going around the existing kernel block layer and proof
that going around the block layer is a good idea.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
  2021-07-20  3:31 ` [Patch v4 2/3] Drivers: hv: add Azure Blob driver longli
  2021-07-20  5:26   ` Greg Kroah-Hartman
@ 2021-07-20  5:30   ` Greg Kroah-Hartman
  2021-07-20  7:34   ` Greg Kroah-Hartman
  2021-07-20 11:10   ` Jiri Slaby
  3 siblings, 0 replies; 10+ messages in thread
From: Greg Kroah-Hartman @ 2021-07-20  5:30 UTC (permalink / raw)
  To: longli
  Cc: linux-fs, linux-block, linux-kernel, linux-hyperv, Long Li,
	Jonathan Corbet, K. Y. Srinivasan, Haiyang Zhang,
	Stephen Hemminger, Wei Liu, Dexuan Cui, Bjorn Andersson,
	Hans de Goede, Dan Williams, Maximilian Luz, Mike Rapoport,
	Ben Widawsky, Jiri Slaby, Andra Paraschiv, Siddharth Gupta,
	Hannes Reinecke, linux-doc

On Mon, Jul 19, 2021 at 08:31:05PM -0700, longli@linuxonhyperv.com wrote:
> From: Long Li <longli@microsoft.com>
> 
> Azure Blob provides scalable, secure and shared storage services for Azure.
> 
> This driver adds support for accelerated access to Azure Blob storage. As an
> alternative to REST APIs, it provides a fast data path that uses host native
> network stack and direct data link for storage server access.
> 
> Cc: Jonathan Corbet <corbet@lwn.net>
> Cc: "K. Y. Srinivasan" <kys@microsoft.com>
> Cc: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: Wei Liu <wei.liu@kernel.org>
> Cc: Dexuan Cui <decui@microsoft.com>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
> Cc: Hans de Goede <hdegoede@redhat.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Maximilian Luz <luzmaximilian@gmail.com>
> Cc: Mike Rapoport <rppt@kernel.org>
> Cc: Ben Widawsky <ben.widawsky@intel.com>
> Cc: Jiri Slaby <jirislaby@kernel.org>
> Cc: Andra Paraschiv <andraprs@amazon.com>
> Cc: Siddharth Gupta <sidgup@codeaurora.org>
> Cc: Hannes Reinecke <hare@suse.de>
> Cc: linux-doc@vger.kernel.org
> Signed-off-by: Long Li <longli@microsoft.com>
> ---
>  .../userspace-api/ioctl/ioctl-number.rst      |   2 +
>  drivers/hv/Kconfig                            |  11 +
>  drivers/hv/Makefile                           |   1 +
>  drivers/hv/channel_mgmt.c                     |   7 +
>  drivers/hv/hv_azure_blob.c                    | 628 ++++++++++++++++++
>  include/linux/hyperv.h                        |   9 +
>  include/uapi/misc/hv_azure_blob.h             |  34 +
>  7 files changed, 692 insertions(+)
>  create mode 100644 drivers/hv/hv_azure_blob.c
>  create mode 100644 include/uapi/misc/hv_azure_blob.h
> 

Hi,

This is the friendly patch-bot of Greg Kroah-Hartman.  You have sent him
a patch that has triggered this response.  He used to manually respond
to these common problems, but in order to save his sanity (he kept
writing the same thing over and over, yet to different people), I was
created.  Hopefully you will not take offence and will fix the problem
in your patch and resubmit it so that it can be accepted into the Linux
kernel tree.

You are receiving this message because of the following common error(s)
as indicated below:

- This looks like a new version of a previously submitted patch, but you
  did not list below the --- line any changes from the previous version.
  Please read the section entitled "The canonical patch format" in the
  kernel file, Documentation/SubmittingPatches for what needs to be done
  here to properly describe this.

If you wish to discuss this problem further, or you have questions about
how to resolve this issue, please feel free to respond to this email and
Greg will reply once he has dug out from the pending patches received
from other developers.

thanks,

greg k-h's patch email bot

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
  2021-07-20  3:31 ` [Patch v4 2/3] Drivers: hv: add Azure Blob driver longli
  2021-07-20  5:26   ` Greg Kroah-Hartman
  2021-07-20  5:30   ` Greg Kroah-Hartman
@ 2021-07-20  7:34   ` Greg Kroah-Hartman
  2021-07-20 19:57     ` Long Li
  2021-07-20 11:10   ` Jiri Slaby
  3 siblings, 1 reply; 10+ messages in thread
From: Greg Kroah-Hartman @ 2021-07-20  7:34 UTC (permalink / raw)
  To: longli
  Cc: linux-fs, linux-block, linux-kernel, linux-hyperv, Long Li,
	Jonathan Corbet, K. Y. Srinivasan, Haiyang Zhang,
	Stephen Hemminger, Wei Liu, Dexuan Cui, Bjorn Andersson,
	Hans de Goede, Dan Williams, Maximilian Luz, Mike Rapoport,
	Ben Widawsky, Jiri Slaby, Andra Paraschiv, Siddharth Gupta,
	Hannes Reinecke, linux-doc

On Mon, Jul 19, 2021 at 08:31:05PM -0700, longli@linuxonhyperv.com wrote:
> +struct az_blob_device {
> +	struct hv_device *device;
> +
> +	/* Opened files maintained by this device */
> +	struct list_head file_list;
> +	/* Lock for protecting file_list */
> +	spinlock_t file_lock;
> +
> +	/* The refcount for this device */
> +	refcount_t count;

Just use a kref please if you really need this.  Are you sure you do?
You already have 2 other reference counted objects being used here, why
make it 3?

> +	/* Pending requests to VSP */
> +	atomic_t pending;

Why does this need to be atomic?


> +	wait_queue_head_t waiting_to_drain;
> +
> +	bool removing;

Are you sure this actually works properly?  Why is it needed vs. any
other misc device?


> +/* VSC->VSP request */
> +struct az_blob_vsp_request {
> +	u32 version;
> +	u32 timeout_ms;
> +	u32 data_buffer_offset;
> +	u32 data_buffer_length;
> +	u32 data_buffer_valid;
> +	u32 operation_type;
> +	u32 request_buffer_offset;
> +	u32 request_buffer_length;
> +	u32 response_buffer_offset;
> +	u32 response_buffer_length;
> +	guid_t transaction_id;
> +} __packed;

Why packed?  If this is going across the wire somewhere, you need to
specify the endian-ness of these values, right?  If this is not going
across the wire, no need for it to be packed.

> +
> +/* VSP->VSC response */
> +struct az_blob_vsp_response {
> +	u32 length;
> +	u32 error;
> +	u32 response_len;
> +} __packed;

Same here.

> +
> +struct az_blob_vsp_request_ctx {
> +	struct list_head list;
> +	struct completion wait_vsp;
> +	struct az_blob_request_sync *request;
> +};
> +
> +struct az_blob_file_ctx {
> +	struct list_head list;
> +
> +	/* List of pending requests to VSP */
> +	struct list_head vsp_pending_requests;
> +	/* Lock for protecting vsp_pending_requests */
> +	spinlock_t vsp_pending_lock;
> +	wait_queue_head_t wait_vsp_pending;
> +
> +	pid_t pid;

Why do you need a pid?  What namespace is this pid in?

> +static int az_blob_probe(struct hv_device *device,
> +			 const struct hv_vmbus_device_id *dev_id)
> +{
> +	int ret;
> +	struct az_blob_device *dev;
> +
> +	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
> +	if (!dev)
> +		return -ENOMEM;
> +
> +	spin_lock_init(&dev->file_lock);
> +	INIT_LIST_HEAD(&dev->file_list);
> +	atomic_set(&dev->pending, 0);
> +	init_waitqueue_head(&dev->waiting_to_drain);
> +
> +	ret = az_blob_connect_to_vsp(device, dev, AZ_BLOB_RING_SIZE);
> +	if (ret)
> +		goto fail;
> +
> +	refcount_set(&dev->count, 1);
> +	az_blob_dev = dev;
> +
> +	// create user-mode client library facing device
> +	ret = az_blob_create_device(dev);
> +	if (ret) {
> +		dev_err(AZ_DEV, "failed to create device ret=%d\n", ret);
> +		az_blob_remove_vmbus(device);
> +		goto fail;
> +	}
> +
> +	dev_info(AZ_DEV, "successfully probed device\n");

When drivers are working properly, they should be quiet.

And what is with the AZ_DEV macro mess?

And can you handle more than one device in the system at one time?  I
think your debugfs logic will get really confused.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
  2021-07-20  3:31 ` [Patch v4 2/3] Drivers: hv: add Azure Blob driver longli
                     ` (2 preceding siblings ...)
  2021-07-20  7:34   ` Greg Kroah-Hartman
@ 2021-07-20 11:10   ` Jiri Slaby
  2021-07-20 22:12     ` Long Li
  3 siblings, 1 reply; 10+ messages in thread
From: Jiri Slaby @ 2021-07-20 11:10 UTC (permalink / raw)
  To: longli, linux-fs, linux-block, linux-kernel, linux-hyperv
  Cc: Long Li, Jonathan Corbet, K. Y. Srinivasan, Haiyang Zhang,
	Stephen Hemminger, Wei Liu, Dexuan Cui, Greg Kroah-Hartman,
	Bjorn Andersson, Hans de Goede, Dan Williams, Maximilian Luz,
	Mike Rapoport, Ben Widawsky, Andra Paraschiv, Siddharth Gupta,
	Hannes Reinecke, linux-doc

On 20. 07. 21, 5:31, longli@linuxonhyperv.com wrote:
> --- /dev/null
> +++ b/include/uapi/misc/hv_azure_blob.h
> @@ -0,0 +1,34 @@
> +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
> +/* Copyright (c) 2021 Microsoft Corporation. */
> +
> +#ifndef _AZ_BLOB_H
> +#define _AZ_BLOB_H
> +
> +#include <linux/kernel.h>
> +#include <linux/uuid.h>

Quoting from 
https://lore.kernel.org/linux-doc/MWHPR21MB159375586D810EC5DCB66AF0D7039@MWHPR21MB1593.namprd21.prod.outlook.com/:
=====
Seems like a #include of asm/ioctl.h (or something similar)
is needed so that _IOWR is defined.  Also, a #include
is needed for __u32, __aligned_u64, guid_t, etc.
=====

Why was no include added?

> +
> +/* user-mode sync request sent through ioctl */
> +struct az_blob_request_sync_response {
> +	__u32 status;
> +	__u32 response_len;
> +};
> +
> +struct az_blob_request_sync {
> +	guid_t guid;
> +	__u32 timeout;
> +	__u32 request_len;
> +	__u32 response_len;
> +	__u32 data_len;
> +	__u32 data_valid;
> +	__aligned_u64 request_buffer;
> +	__aligned_u64 response_buffer;
> +	__aligned_u64 data_buffer;
> +	struct az_blob_request_sync_response response;
> +};
> +
> +#define AZ_BLOB_MAGIC_NUMBER	'R'
> +#define IOCTL_AZ_BLOB_DRIVER_USER_REQUEST \
> +		_IOWR(AZ_BLOB_MAGIC_NUMBER, 0xf0, \
> +			struct az_blob_request_sync)
> +
> +#endif /* define _AZ_BLOB_H */
> 

thanks,
-- 
js
suse labs

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
  2021-07-20  7:34   ` Greg Kroah-Hartman
@ 2021-07-20 19:57     ` Long Li
  2021-07-21  5:08       ` Greg Kroah-Hartman
  0 siblings, 1 reply; 10+ messages in thread
From: Long Li @ 2021-07-20 19:57 UTC (permalink / raw)
  To: Greg Kroah-Hartman, longli@linuxonhyperv.com
  Cc: linux-fs@vger.kernel.org, linux-block@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-hyperv@vger.kernel.org,
	Jonathan Corbet, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	Wei Liu, Dexuan Cui, Bjorn Andersson, Hans de Goede,
	Williams, Dan J, Maximilian Luz, Mike Rapoport, Ben Widawsky,
	Jiri Slaby, Andra Paraschiv, Siddharth Gupta, Hannes Reinecke,
	linux-doc@vger.kernel.org

> Subject: Re: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
> 
> On Mon, Jul 19, 2021 at 08:31:05PM -0700, longli@linuxonhyperv.com wrote:
> > +struct az_blob_device {
> > +	struct hv_device *device;
> > +
> > +	/* Opened files maintained by this device */
> > +	struct list_head file_list;
> > +	/* Lock for protecting file_list */
> > +	spinlock_t file_lock;
> > +
> > +	/* The refcount for this device */
> > +	refcount_t count;
> 
> Just use a kref please if you really need this.  Are you sure you do?
> You already have 2 other reference counted objects being used here, why make
> it 3?

The "count" is to keep track how many user-mode instances and vmbus instance
are opened on this device. Being a VMBUS device, this device can be removed 
at any time (host servicing etc). We must remove the device when this happens
even if the device is still opened by some user-mode program. The "count" will
guarantee the lifecycle of the device object after all user-mode has released the device.

I looked at using "file_list" (it's used for tracking opened files by user-mode) for this purpose, 
but I found out I still need to manage the device count at the vmbus side. 

> 
> > +	/* Pending requests to VSP */
> > +	atomic_t pending;
> 
> Why does this need to be atomic?

"pending' is per-device maintained that could change when multiple-user access
the device at the same time.

> 
> 
> > +	wait_queue_head_t waiting_to_drain;
> > +
> > +	bool removing;
> 
> Are you sure this actually works properly?  Why is it needed vs. any other misc
> device?

When removing this device from vmbus, we need to guarantee there is no possible packets to
vmbus. This is a requirement before calling vmbus_close(). Other drivers of vmbus follows
the same procedure.

The reason why this driver needs this is that the device removal can happen in the middle of
az_blob_ioctl_user_request(), which can send packet over vmbus.

> 
> 
> > +/* VSC->VSP request */
> > +struct az_blob_vsp_request {
> > +	u32 version;
> > +	u32 timeout_ms;
> > +	u32 data_buffer_offset;
> > +	u32 data_buffer_length;
> > +	u32 data_buffer_valid;
> > +	u32 operation_type;
> > +	u32 request_buffer_offset;
> > +	u32 request_buffer_length;
> > +	u32 response_buffer_offset;
> > +	u32 response_buffer_length;
> > +	guid_t transaction_id;
> > +} __packed;
> 
> Why packed?  If this is going across the wire somewhere, you need to specify
> the endian-ness of these values, right?  If this is not going across the wire, no
> need for it to be packed.

Those data go through the wire.

All data structures specified in the Hyper-V and guest VM use Little Endian byte
ordering.  All HV core drivers have a dependence on X86, that guarantees this
ordering.

> 
> > +
> > +/* VSP->VSC response */
> > +struct az_blob_vsp_response {
> > +	u32 length;
> > +	u32 error;
> > +	u32 response_len;
> > +} __packed;
> 
> Same here.
> 
> > +
> > +struct az_blob_vsp_request_ctx {
> > +	struct list_head list;
> > +	struct completion wait_vsp;
> > +	struct az_blob_request_sync *request; };
> > +
> > +struct az_blob_file_ctx {
> > +	struct list_head list;
> > +
> > +	/* List of pending requests to VSP */
> > +	struct list_head vsp_pending_requests;
> > +	/* Lock for protecting vsp_pending_requests */
> > +	spinlock_t vsp_pending_lock;
> > +	wait_queue_head_t wait_vsp_pending;
> > +
> > +	pid_t pid;
> 
> Why do you need a pid?  What namespace is this pid in?

It's a request from user library team for production troubleshooting
purposes. It's exposed as informal in debugfs.

> 
> > +static int az_blob_probe(struct hv_device *device,
> > +			 const struct hv_vmbus_device_id *dev_id) {
> > +	int ret;
> > +	struct az_blob_device *dev;
> > +
> > +	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
> > +	if (!dev)
> > +		return -ENOMEM;
> > +
> > +	spin_lock_init(&dev->file_lock);
> > +	INIT_LIST_HEAD(&dev->file_list);
> > +	atomic_set(&dev->pending, 0);
> > +	init_waitqueue_head(&dev->waiting_to_drain);
> > +
> > +	ret = az_blob_connect_to_vsp(device, dev, AZ_BLOB_RING_SIZE);
> > +	if (ret)
> > +		goto fail;
> > +
> > +	refcount_set(&dev->count, 1);
> > +	az_blob_dev = dev;
> > +
> > +	// create user-mode client library facing device
> > +	ret = az_blob_create_device(dev);
> > +	if (ret) {
> > +		dev_err(AZ_DEV, "failed to create device ret=%d\n", ret);
> > +		az_blob_remove_vmbus(device);
> > +		goto fail;
> > +	}
> > +
> > +	dev_info(AZ_DEV, "successfully probed device\n");
> 
> When drivers are working properly, they should be quiet.

The reason is that in production environment when dealing with custom support
cases, there is no good way to check if the channel is opened on the device. Having
this message will greatly clear confusions on possible mis-configurations.

> 
> And what is with the AZ_DEV macro mess?

It's not required, it's just for saving code length. I can put "&az_blob_dev->device->device"
in every dev_err(), but it makes the code look a lot longer.

> 
> And can you handle more than one device in the system at one time?  I think
> your debugfs logic will get really confused.

There can be one device object active in the system at any given time. The debugfs grabs
the current active device object. If the device is being removed, removed or added, 
the current active device object is updated accordingly.

> 
> thanks,
> 
> greg k-h

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
  2021-07-20 11:10   ` Jiri Slaby
@ 2021-07-20 22:12     ` Long Li
  2021-07-21  4:57       ` Jiri Slaby
  0 siblings, 1 reply; 10+ messages in thread
From: Long Li @ 2021-07-20 22:12 UTC (permalink / raw)
  To: Jiri Slaby, longli@linuxonhyperv.com, linux-fs@vger.kernel.org,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-hyperv@vger.kernel.org
  Cc: Jonathan Corbet, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	Wei Liu, Dexuan Cui, Greg Kroah-Hartman, Bjorn Andersson,
	Hans de Goede, Williams, Dan J, Maximilian Luz, Mike Rapoport,
	Ben Widawsky, Andra Paraschiv, Siddharth Gupta, Hannes Reinecke,
	linux-doc@vger.kernel.org

> Subject: Re: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
> 
> On 20. 07. 21, 5:31, longli@linuxonhyperv.com wrote:
> > --- /dev/null
> > +++ b/include/uapi/misc/hv_azure_blob.h
> > @@ -0,0 +1,34 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
> > +/* Copyright (c) 2021 Microsoft Corporation. */
> > +
> > +#ifndef _AZ_BLOB_H
> > +#define _AZ_BLOB_H
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/uuid.h>
> 
> Quoting from
> https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.
> kernel.org%2Flinux-
> doc%2FMWHPR21MB159375586D810EC5DCB66AF0D7039%40MWHPR21MB1
> 593.namprd21.prod.outlook.com%2F&amp;data=04%7C01%7Clongli%40micr
> osoft.com%7C7fdf2d6ed15d4d4122a308d94b6eeed0%7C72f988bf86f141af91
> ab2d7cd011db47%7C1%7C0%7C637623762292949381%7CUnknown%7CTWFp
> bGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVC
> I6Mn0%3D%7C3000&amp;sdata=kv0ZkU1QL6TxlJJZEQEsT7aqLFL9lmP2SStz8k
> U5sIs%3D&amp;reserved=0:
> =====
> Seems like a #include of asm/ioctl.h (or something similar) is needed so that
> _IOWR is defined.  Also, a #include is needed for __u32, __aligned_u64,
> guid_t, etc.
> =====

The user-space code includes "sys/ioctl.h" for calling into ioctl(). "sys/ioctl.h"
includes <linux/ioctl.h>, so it has no problem finding _IOWR.

guid_t is defined in <uapi/linux/uuid.h>, included from <linux/uuid.h> (in this file)
__u32 and __aligned_u64 are defined in <uapi/linux/types.>, which is included from <linux/kernel.h> (in this file)




> 
> Why was no include added?
> 
> > +
> > +/* user-mode sync request sent through ioctl */ struct
> > +az_blob_request_sync_response {
> > +	__u32 status;
> > +	__u32 response_len;
> > +};
> > +
> > +struct az_blob_request_sync {
> > +	guid_t guid;
> > +	__u32 timeout;
> > +	__u32 request_len;
> > +	__u32 response_len;
> > +	__u32 data_len;
> > +	__u32 data_valid;
> > +	__aligned_u64 request_buffer;
> > +	__aligned_u64 response_buffer;
> > +	__aligned_u64 data_buffer;
> > +	struct az_blob_request_sync_response response; };
> > +
> > +#define AZ_BLOB_MAGIC_NUMBER	'R'
> > +#define IOCTL_AZ_BLOB_DRIVER_USER_REQUEST \
> > +		_IOWR(AZ_BLOB_MAGIC_NUMBER, 0xf0, \
> > +			struct az_blob_request_sync)
> > +
> > +#endif /* define _AZ_BLOB_H */
> >
> 
> thanks,
> --
> js
> suse labs

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
  2021-07-20 22:12     ` Long Li
@ 2021-07-21  4:57       ` Jiri Slaby
  2021-07-21 16:07         ` Long Li
  0 siblings, 1 reply; 10+ messages in thread
From: Jiri Slaby @ 2021-07-21  4:57 UTC (permalink / raw)
  To: Long Li, longli@linuxonhyperv.com, linux-fs@vger.kernel.org,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-hyperv@vger.kernel.org
  Cc: Jonathan Corbet, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	Wei Liu, Dexuan Cui, Greg Kroah-Hartman, Bjorn Andersson,
	Hans de Goede, Williams, Dan J, Maximilian Luz, Mike Rapoport,
	Ben Widawsky, Andra Paraschiv, Siddharth Gupta, Hannes Reinecke,
	linux-doc@vger.kernel.org

On 21. 07. 21, 0:12, Long Li wrote:
>> Subject: Re: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
>>
>> On 20. 07. 21, 5:31, longli@linuxonhyperv.com wrote:
>>> --- /dev/null
>>> +++ b/include/uapi/misc/hv_azure_blob.h
>>> @@ -0,0 +1,34 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
>>> +/* Copyright (c) 2021 Microsoft Corporation. */
>>> +
>>> +#ifndef _AZ_BLOB_H
>>> +#define _AZ_BLOB_H
>>> +
>>> +#include <linux/kernel.h>
>>> +#include <linux/uuid.h>
>>
>> Quoting from
>> https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.
>> kernel.org%2Flinux-
>> doc%2FMWHPR21MB159375586D810EC5DCB66AF0D7039%40MWHPR21MB1
>> 593.namprd21.prod.outlook.com%2F&amp;data=04%7C01%7Clongli%40micr
>> osoft.com%7C7fdf2d6ed15d4d4122a308d94b6eeed0%7C72f988bf86f141af91
>> ab2d7cd011db47%7C1%7C0%7C637623762292949381%7CUnknown%7CTWFp
>> bGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVC
>> I6Mn0%3D%7C3000&amp;sdata=kv0ZkU1QL6TxlJJZEQEsT7aqLFL9lmP2SStz8k
>> U5sIs%3D&amp;reserved=0:
>> =====
>> Seems like a #include of asm/ioctl.h (or something similar) is needed so that
>> _IOWR is defined.  Also, a #include is needed for __u32, __aligned_u64,
>> guid_t, etc.
>> =====
> 
> The user-space code includes "sys/ioctl.h" for calling into ioctl(). "sys/ioctl.h"
> includes <linux/ioctl.h>, so it has no problem finding _IOWR.
> 
> guid_t is defined in <uapi/linux/uuid.h>, included from <linux/uuid.h> (in this file)
> __u32 and __aligned_u64 are defined in <uapi/linux/types.>, which is included from <linux/kernel.h> (in this file)

No, please don't rely on implicit include chains. Nor that userspace 
solves the includes for you.

thanks,
-- 
js
suse labs

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
  2021-07-20 19:57     ` Long Li
@ 2021-07-21  5:08       ` Greg Kroah-Hartman
  0 siblings, 0 replies; 10+ messages in thread
From: Greg Kroah-Hartman @ 2021-07-21  5:08 UTC (permalink / raw)
  To: Long Li
  Cc: longli@linuxonhyperv.com, linux-fs@vger.kernel.org,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-hyperv@vger.kernel.org, Jonathan Corbet, KY Srinivasan,
	Haiyang Zhang, Stephen Hemminger, Wei Liu, Dexuan Cui,
	Bjorn Andersson, Hans de Goede, Williams, Dan J, Maximilian Luz,
	Mike Rapoport, Ben Widawsky, Jiri Slaby, Andra Paraschiv,
	Siddharth Gupta, Hannes Reinecke, linux-doc@vger.kernel.org

On Tue, Jul 20, 2021 at 07:57:56PM +0000, Long Li wrote:
> > Subject: Re: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
> > 
> > On Mon, Jul 19, 2021 at 08:31:05PM -0700, longli@linuxonhyperv.com wrote:
> > > +struct az_blob_device {
> > > +	struct hv_device *device;
> > > +
> > > +	/* Opened files maintained by this device */
> > > +	struct list_head file_list;
> > > +	/* Lock for protecting file_list */
> > > +	spinlock_t file_lock;
> > > +
> > > +	/* The refcount for this device */
> > > +	refcount_t count;
> > 
> > Just use a kref please if you really need this.  Are you sure you do?
> > You already have 2 other reference counted objects being used here, why make
> > it 3?
> 
> The "count" is to keep track how many user-mode instances and vmbus instance
> are opened on this device.

That will not work at all, sorry.  Please NEVER try to count "open"
calls to a driver, as the driver will never be told how many userspace
programs are accessing it at any time, nor should it even ever care.

Use the existing apis, there is no need to attempt to count this as you
will always get it wrong (hint, what happens when processes share file
descriptors...)

> Being a VMBUS device, this device can be removed 
> at any time (host servicing etc). We must remove the device when this happens
> even if the device is still opened by some user-mode program. The "count" will
> guarantee the lifecycle of the device object after all user-mode has released the device.

No it will not, just use the existing apis and all will be fine.

> I looked at using "file_list" (it's used for tracking opened files by user-mode) for this purpose, 
> but I found out I still need to manage the device count at the vmbus side. 

Again, you should not need this.  As proof, no other driver needs
this...

> > > +	/* Pending requests to VSP */
> > > +	atomic_t pending;
> > 
> > Why does this need to be atomic?
> 
> "pending' is per-device maintained that could change when multiple-user access
> the device at the same time.

How do you know this, and why does it matter?

> > > +	wait_queue_head_t waiting_to_drain;
> > > +
> > > +	bool removing;
> > 
> > Are you sure this actually works properly?  Why is it needed vs. any other misc
> > device?
> 
> When removing this device from vmbus, we need to guarantee there is no possible packets to
> vmbus.

Why?  Shouldn't the vmbus api handle this for you automatically?  Why is
this driver unique here?

> This is a requirement before calling vmbus_close(). Other drivers of vmbus follows
> the same procedure.

Ah.  Why not fix this in the vmbus core?  That sounds like extra logic
that everyone would have to duplicate for no good reason.

> The reason why this driver needs this is that the device removal can happen in the middle of
> az_blob_ioctl_user_request(), which can send packet over vmbus.

Sure, but the bus should handle that for you.

> > > +/* VSC->VSP request */
> > > +struct az_blob_vsp_request {
> > > +	u32 version;
> > > +	u32 timeout_ms;
> > > +	u32 data_buffer_offset;
> > > +	u32 data_buffer_length;
> > > +	u32 data_buffer_valid;
> > > +	u32 operation_type;
> > > +	u32 request_buffer_offset;
> > > +	u32 request_buffer_length;
> > > +	u32 response_buffer_offset;
> > > +	u32 response_buffer_length;
> > > +	guid_t transaction_id;
> > > +} __packed;
> > 
> > Why packed?  If this is going across the wire somewhere, you need to specify
> > the endian-ness of these values, right?  If this is not going across the wire, no
> > need for it to be packed.
> 
> Those data go through the wire.
> 
> All data structures specified in the Hyper-V and guest VM use Little Endian byte
> ordering.  All HV core drivers have a dependence on X86, that guarantees this
> ordering.

Then specify little endian please.

> > > +	struct completion wait_vsp;
> > > +	struct az_blob_request_sync *request; };
> > > +
> > > +struct az_blob_file_ctx {
> > > +	struct list_head list;
> > > +
> > > +	/* List of pending requests to VSP */
> > > +	struct list_head vsp_pending_requests;
> > > +	/* Lock for protecting vsp_pending_requests */
> > > +	spinlock_t vsp_pending_lock;
> > > +	wait_queue_head_t wait_vsp_pending;
> > > +
> > > +	pid_t pid;
> > 
> > Why do you need a pid?  What namespace is this pid in?
> 
> It's a request from user library team for production troubleshooting
> purposes. It's exposed as informal in debugfs.

Then it will be wrong.  Put all of your "debugging" stuff into one place
so it is obvious what it is for, and so that people can ignore it.

> > > +static int az_blob_probe(struct hv_device *device,
> > > +			 const struct hv_vmbus_device_id *dev_id) {
> > > +	int ret;
> > > +	struct az_blob_device *dev;
> > > +
> > > +	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
> > > +	if (!dev)
> > > +		return -ENOMEM;
> > > +
> > > +	spin_lock_init(&dev->file_lock);
> > > +	INIT_LIST_HEAD(&dev->file_list);
> > > +	atomic_set(&dev->pending, 0);
> > > +	init_waitqueue_head(&dev->waiting_to_drain);
> > > +
> > > +	ret = az_blob_connect_to_vsp(device, dev, AZ_BLOB_RING_SIZE);
> > > +	if (ret)
> > > +		goto fail;
> > > +
> > > +	refcount_set(&dev->count, 1);
> > > +	az_blob_dev = dev;
> > > +
> > > +	// create user-mode client library facing device
> > > +	ret = az_blob_create_device(dev);
> > > +	if (ret) {
> > > +		dev_err(AZ_DEV, "failed to create device ret=%d\n", ret);
> > > +		az_blob_remove_vmbus(device);
> > > +		goto fail;
> > > +	}
> > > +
> > > +	dev_info(AZ_DEV, "successfully probed device\n");
> > 
> > When drivers are working properly, they should be quiet.
> 
> The reason is that in production environment when dealing with custom support
> cases, there is no good way to check if the channel is opened on the device. Having
> this message will greatly clear confusions on possible mis-configurations.

Again, no, the driver should be quiet.  If you REALLY need this type of
thing, the bus should be the one doing that type of notification for all
devices on the bus.  Do not make this a per-driver choice.

> > And what is with the AZ_DEV macro mess?
> 
> It's not required, it's just for saving code length. I can put "&az_blob_dev->device->device"
> in every dev_err(), but it makes the code look a lot longer.

Then perhaps your structures are not correct?  Please spell it out so
that we can see that your implementation needs fixing.

> > And can you handle more than one device in the system at one time?  I think
> > your debugfs logic will get really confused.
> 
> There can be one device object active in the system at any given time. The debugfs grabs
> the current active device object. If the device is being removed, removed or added, 
> the current active device object is updated accordingly.

If I remember, your debugfs initialization seems to not use the device
name, but rather a "driver" name, which implies that this is not going
to work well with multiple devices.  But I could be wrong.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
  2021-07-21  4:57       ` Jiri Slaby
@ 2021-07-21 16:07         ` Long Li
  0 siblings, 0 replies; 10+ messages in thread
From: Long Li @ 2021-07-21 16:07 UTC (permalink / raw)
  To: Jiri Slaby, longli@linuxonhyperv.com, linux-fs@vger.kernel.org,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-hyperv@vger.kernel.org
  Cc: Jonathan Corbet, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	Wei Liu, Dexuan Cui, Greg Kroah-Hartman, Bjorn Andersson,
	Hans de Goede, Williams, Dan J, Maximilian Luz, Mike Rapoport,
	Ben Widawsky, Andra Paraschiv, Siddharth Gupta, Hannes Reinecke,
	linux-doc@vger.kernel.org

> Subject: Re: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
> 
> On 21. 07. 21, 0:12, Long Li wrote:
> >> Subject: Re: [Patch v4 2/3] Drivers: hv: add Azure Blob driver
> >>
> >> On 20. 07. 21, 5:31, longli@linuxonhyperv.com wrote:
> >>> --- /dev/null
> >>> +++ b/include/uapi/misc/hv_azure_blob.h
> >>> @@ -0,0 +1,34 @@
> >>> +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
> >>> +/* Copyright (c) 2021 Microsoft Corporation. */
> >>> +
> >>> +#ifndef _AZ_BLOB_H
> >>> +#define _AZ_BLOB_H
> >>> +
> >>> +#include <linux/kernel.h>
> >>> +#include <linux/uuid.h>
> >>
> >> Quoting from
> >>
> https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.
> >> kernel.org%2Flinux-
> >>
> doc%2FMWHPR21MB159375586D810EC5DCB66AF0D7039%40MWHPR21M
> B1
> >>
> 593.namprd21.prod.outlook.com%2F&amp;data=04%7C01%7Clongli%40micr
> >>
> osoft.com%7C7fdf2d6ed15d4d4122a308d94b6eeed0%7C72f988bf86f141af
> 91
> >>
> ab2d7cd011db47%7C1%7C0%7C637623762292949381%7CUnknown%7CTW
> Fp
> >>
> bGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVC
> >>
> I6Mn0%3D%7C3000&amp;sdata=kv0ZkU1QL6TxlJJZEQEsT7aqLFL9lmP2SStz8k
> >> U5sIs%3D&amp;reserved=0:
> >> =====
> >> Seems like a #include of asm/ioctl.h (or something similar) is needed
> >> so that _IOWR is defined.  Also, a #include is needed for __u32,
> >> __aligned_u64, guid_t, etc.
> >> =====
> >
> > The user-space code includes "sys/ioctl.h" for calling into ioctl(). "sys/ioctl.h"
> > includes <linux/ioctl.h>, so it has no problem finding _IOWR.
> >
> > guid_t is defined in <uapi/linux/uuid.h>, included from <linux/uuid.h>
> > (in this file)
> > __u32 and __aligned_u64 are defined in <uapi/linux/types.>, which is
> > included from <linux/kernel.h> (in this file)
> 
> No, please don't rely on implicit include chains. Nor that userspace solves the
> includes for you.

Will fix this.

> 
> thanks,
> --
> js
> suse labs

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-07-21 16:07 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1626751866-15765-1-git-send-email-longli@linuxonhyperv.com>
2021-07-20  3:31 ` [Patch v4 2/3] Drivers: hv: add Azure Blob driver longli
2021-07-20  5:26   ` Greg Kroah-Hartman
2021-07-20  5:30   ` Greg Kroah-Hartman
2021-07-20  7:34   ` Greg Kroah-Hartman
2021-07-20 19:57     ` Long Li
2021-07-21  5:08       ` Greg Kroah-Hartman
2021-07-20 11:10   ` Jiri Slaby
2021-07-20 22:12     ` Long Li
2021-07-21  4:57       ` Jiri Slaby
2021-07-21 16:07         ` Long Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).