Linux-HyperV List
 help / color / mirror / Atom feed
* [Patch v2 3/5] RDMA/mana_ib : Add error eq and notification from SoC
From: sharmaajay @ 2023-07-26  3:56 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Dexuan Cui, Wei Liu,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: linux-rdma, linux-hyperv, netdev, linux-kernel, Ajay Sharma
In-Reply-To: <1690343820-20188-1-git-send-email-sharmaajay@linuxonhyperv.com>

From: Ajay Sharma <sharmaajay@microsoft.com>

Add error eq needed for adapter creation
and later used for notification from
Management SW.

Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
---
 drivers/infiniband/hw/mana/device.c           |  13 +-
 drivers/infiniband/hw/mana/main.c             |  44 ++++++
 drivers/infiniband/hw/mana/mana_ib.h          |   3 +
 .../net/ethernet/microsoft/mana/gdma_main.c   | 146 ++++++++++--------
 drivers/net/ethernet/microsoft/mana/mana_en.c |   3 +
 include/net/mana/gdma.h                       |  13 +-
 6 files changed, 152 insertions(+), 70 deletions(-)

diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index ea4c8c8fc10d..3ab4e69705df 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -68,7 +68,7 @@ static int mana_ib_probe(struct auxiliary_device *adev,
 	ibdev_dbg(&mib_dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
 		  mdev->dev_id.as_uint32, mib_dev->ib_dev.phys_port_cnt);
 
-	mib_dev->gdma_dev = mdev;
+	mib_dev->gc = mdev->gdma_context;
 	mib_dev->ib_dev.node_type = RDMA_NODE_IB_CA;
 
 	/*
@@ -85,15 +85,23 @@ static int mana_ib_probe(struct auxiliary_device *adev,
 		goto free_ib_device;
 	}
 
+	ret = mana_ib_create_error_eq(mib_dev);
+	if (ret) {
+		ibdev_err(&mib_dev->ib_dev, "Failed to allocate err eq");
+		goto deregister_device;
+	}
+
 	ret = ib_register_device(&mib_dev->ib_dev, "mana_%d",
 				 mdev->gdma_context->dev);
 	if (ret)
-		goto deregister_device;
+		goto free_error_eq;
 
 	dev_set_drvdata(&adev->dev, mib_dev);
 
 	return 0;
 
+free_error_eq:
+	mana_gd_destroy_queue(mib_dev->gc, mib_dev->fatal_err_eq);
 deregister_device:
 	mana_gd_deregister_device(&mib_dev->gc->mana_ib);
 free_ib_device:
@@ -105,6 +113,7 @@ static void mana_ib_remove(struct auxiliary_device *adev)
 {
 	struct mana_ib_dev *mib_dev = dev_get_drvdata(&adev->dev);
 
+	mana_gd_destroy_queue(mib_dev->gc, mib_dev->fatal_err_eq);
 	mana_gd_deregister_device(&mib_dev->gc->mana_ib);
 	ib_unregister_device(&mib_dev->ib_dev);
 	ib_dealloc_device(&mib_dev->ib_dev);
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 2c4e3c496644..2ea24ba3065f 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -504,3 +504,47 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
 void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
 {
 }
+
+void mana_ib_soc_event_handler(void *ctx, struct gdma_queue *queue,
+				struct gdma_event *event)
+{
+	struct mana_ib_dev *mib_dev = (struct mana_ib_dev *)ctx;
+
+	switch (event->type) {
+	case GDMA_EQE_SOC_EVENT_NOTIFICATION:
+		ibdev_info(&mib_dev->ib_dev, "Received SOC Notification");
+		break;
+	case GDMA_EQE_SOC_EVENT_TEST:
+		ibdev_info(&mib_dev->ib_dev, "Received SoC Test");
+		break;
+	default:
+		ibdev_err(&mib_dev->ib_dev, "Received unsolicited evt %d",
+			event->type);
+	}
+}
+
+int mana_ib_create_error_eq(struct mana_ib_dev *mib_dev)
+{
+	struct gdma_queue_spec spec = {};
+	int err;
+
+	spec.type = GDMA_EQ;
+	spec.monitor_avl_buf = false;
+	spec.queue_size = EQ_SIZE;
+	spec.eq.callback = mana_ib_soc_event_handler;
+	spec.eq.context = mib_dev;
+	spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
+	spec.eq.msix_allocated = true;
+	spec.eq.msix_index = 0;
+	spec.doorbell = mib_dev->gc->mana_ib.doorbell;
+	spec.pdid = mib_dev->gc->mana_ib.pdid;
+
+	err = mana_gd_create_mana_eq(&mib_dev->gc->mana_ib, &spec,
+				&mib_dev->fatal_err_eq);
+	if (err)
+		return err;
+
+	mib_dev->fatal_err_eq->eq.disable_needed = true;
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 3a2ba6b96f15..4383777354d3 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -31,6 +31,7 @@ struct mana_ib_dev {
 	struct ib_device ib_dev;
 	struct gdma_dev *gdma_dev;
 	struct gdma_context *gc;
+	struct gdma_queue *fatal_err_eq;
 };
 
 struct mana_ib_wq {
@@ -161,4 +162,6 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
 
 void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
 
+int mana_ib_create_error_eq(struct mana_ib_dev *mib_dev);
+
 #endif
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 9fa7a2d6c2b2..84faf4efcb75 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -185,7 +185,8 @@ void mana_gd_free_memory(struct gdma_mem_info *gmi)
 }
 
 static int mana_gd_create_hw_eq(struct gdma_context *gc,
-				struct gdma_queue *queue)
+				struct gdma_queue *queue,
+				u32 doorbell, u32 pdid)
 {
 	struct gdma_create_queue_resp resp = {};
 	struct gdma_create_queue_req req = {};
@@ -199,8 +200,8 @@ static int mana_gd_create_hw_eq(struct gdma_context *gc,
 
 	req.hdr.dev_id = queue->gdma_dev->dev_id;
 	req.type = queue->type;
-	req.pdid = queue->gdma_dev->pdid;
-	req.doolbell_id = queue->gdma_dev->doorbell;
+	req.pdid = pdid;
+	req.doolbell_id = doorbell;
 	req.gdma_region = queue->mem_info.dma_region_handle;
 	req.queue_size = queue->queue_size;
 	req.log2_throttle_limit = queue->eq.log2_throttle_limit;
@@ -371,53 +372,51 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
 	}
 }
 
-static void mana_gd_process_eq_events(void *arg)
+static void mana_gd_process_eq_events(struct list_head *eq_list)
 {
 	u32 owner_bits, new_bits, old_bits;
 	union gdma_eqe_info eqe_info;
 	struct gdma_eqe *eq_eqe_ptr;
-	struct gdma_queue *eq = arg;
+	struct gdma_queue *eq;
 	struct gdma_context *gc;
 	struct gdma_eqe *eqe;
 	u32 head, num_eqe;
 	int i;
 
-	gc = eq->gdma_dev->gdma_context;
-
-	num_eqe = eq->queue_size / GDMA_EQE_SIZE;
-	eq_eqe_ptr = eq->queue_mem_ptr;
-
-	/* Process up to 5 EQEs at a time, and update the HW head. */
-	for (i = 0; i < 5; i++) {
-		eqe = &eq_eqe_ptr[eq->head % num_eqe];
-		eqe_info.as_uint32 = eqe->eqe_info;
-		owner_bits = eqe_info.owner_bits;
-
-		old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK;
-		/* No more entries */
-		if (owner_bits == old_bits)
-			break;
-
-		new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK;
-		if (owner_bits != new_bits) {
-			dev_err(gc->dev, "EQ %d: overflow detected\n", eq->id);
-			break;
+	list_for_each_entry_rcu(eq, eq_list, entry) {
+		gc = eq->gdma_dev->gdma_context;
+
+		num_eqe = eq->queue_size / GDMA_EQE_SIZE;
+		eq_eqe_ptr = eq->queue_mem_ptr;
+		/* Process up to 5 EQEs at a time, and update the HW head. */
+		for (i = 0; i < 5; i++) {
+			eqe = &eq_eqe_ptr[eq->head % num_eqe];
+			eqe_info.as_uint32 = eqe->eqe_info;
+			owner_bits = eqe_info.owner_bits;
+
+			old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK;
+			/* No more entries */
+			if (owner_bits == old_bits)
+				break;
+
+			new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK;
+			if (owner_bits != new_bits) {
+				dev_err(gc->dev, "EQ %d: overflow detected\n",
+					eq->id);
+				break;
+			}
+			/* Per GDMA spec, rmb is necessary after checking owner_bits, before
+			 * reading eqe.
+			 */
+			rmb();
+			mana_gd_process_eqe(eq);
+			eq->head++;
 		}
 
-		/* Per GDMA spec, rmb is necessary after checking owner_bits, before
-		 * reading eqe.
-		 */
-		rmb();
-
-		mana_gd_process_eqe(eq);
-
-		eq->head++;
+		head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS);
+		mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type,
+				      eq->id, head, SET_ARM_BIT);
 	}
-
-	head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS);
-
-	mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id,
-			      head, SET_ARM_BIT);
 }
 
 static int mana_gd_register_irq(struct gdma_queue *queue,
@@ -435,44 +434,47 @@ static int mana_gd_register_irq(struct gdma_queue *queue,
 	gc = gd->gdma_context;
 	r = &gc->msix_resource;
 	dev = gc->dev;
+	msi_index = spec->eq.msix_index;
 
 	spin_lock_irqsave(&r->lock, flags);
 
-	msi_index = find_first_zero_bit(r->map, r->size);
-	if (msi_index >= r->size || msi_index >= gc->num_msix_usable) {
-		err = -ENOSPC;
-	} else {
-		bitmap_set(r->map, msi_index, 1);
-		queue->eq.msix_index = msi_index;
-	}
-
-	spin_unlock_irqrestore(&r->lock, flags);
+	if (!spec->eq.msix_allocated) {
+		msi_index = find_first_zero_bit(r->map, r->size);
 
-	if (err) {
-		dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u",
-			err, msi_index, r->size, gc->num_msix_usable);
+			if (msi_index >= r->size ||
+			    msi_index >= gc->num_msix_usable)
+				err = -ENOSPC;
+			else
+				bitmap_set(r->map, msi_index, 1);
 
-		return err;
+		if (err) {
+			dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u",
+				err, msi_index, r->size, gc->num_msix_usable);
+				goto out;
+		}
 	}
 
+	queue->eq.msix_index = msi_index;
 	gic = &gc->irq_contexts[msi_index];
 
-	WARN_ON(gic->handler || gic->arg);
-
-	gic->arg = queue;
+	list_add_rcu(&queue->entry, &gic->eq_list);
 
 	gic->handler = mana_gd_process_eq_events;
 
-	return 0;
+out:
+	spin_unlock_irqrestore(&r->lock, flags);
+	return err;
 }
 
-static void mana_gd_deregiser_irq(struct gdma_queue *queue)
+static void mana_gd_deregister_irq(struct gdma_queue *queue)
 {
 	struct gdma_dev *gd = queue->gdma_dev;
 	struct gdma_irq_context *gic;
 	struct gdma_context *gc;
 	struct gdma_resource *r;
 	unsigned int msix_index;
+	struct list_head *p, *n;
+	struct gdma_queue *eq;
 	unsigned long flags;
 
 	gc = gd->gdma_context;
@@ -483,14 +485,23 @@ static void mana_gd_deregiser_irq(struct gdma_queue *queue)
 	if (WARN_ON(msix_index >= gc->num_msix_usable))
 		return;
 
+	spin_lock_irqsave(&r->lock, flags);
+
 	gic = &gc->irq_contexts[msix_index];
-	gic->handler = NULL;
-	gic->arg = NULL;
+	list_for_each_safe(p, n, &gic->eq_list) {
+		eq = list_entry(p, struct gdma_queue, entry);
+		if (queue == eq) {
+			list_del(&eq->entry);
+			break;
+		}
+	}
 
-	spin_lock_irqsave(&r->lock, flags);
-	bitmap_clear(r->map, msix_index, 1);
-	spin_unlock_irqrestore(&r->lock, flags);
+	if (list_empty(&gic->eq_list)) {
+		gic->handler = NULL;
+		bitmap_clear(r->map, msix_index, 1);
+	}
 
+	spin_unlock_irqrestore(&r->lock, flags);
 	queue->eq.msix_index = INVALID_PCI_MSIX_INDEX;
 }
 
@@ -553,7 +564,7 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets,
 			dev_warn(gc->dev, "Failed to flush EQ: %d\n", err);
 	}
 
-	mana_gd_deregiser_irq(queue);
+	mana_gd_deregister_irq(queue);
 
 	if (queue->eq.disable_needed)
 		mana_gd_disable_queue(queue);
@@ -568,7 +579,7 @@ static int mana_gd_create_eq(struct gdma_dev *gd,
 	u32 log2_num_entries;
 	int err;
 
-	queue->eq.msix_index = INVALID_PCI_MSIX_INDEX;
+	queue->eq.msix_index = spec->eq.msix_index;
 
 	log2_num_entries = ilog2(queue->queue_size / GDMA_EQE_SIZE);
 
@@ -590,7 +601,8 @@ static int mana_gd_create_eq(struct gdma_dev *gd,
 	queue->eq.log2_throttle_limit = spec->eq.log2_throttle_limit ?: 1;
 
 	if (create_hwq) {
-		err = mana_gd_create_hw_eq(gc, queue);
+		err = mana_gd_create_hw_eq(gc, queue,
+					   spec->doorbell, spec->pdid);
 		if (err)
 			goto out;
 
@@ -800,6 +812,7 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd,
 	kfree(queue);
 	return err;
 }
+EXPORT_SYMBOL(mana_gd_create_mana_eq);
 
 int mana_gd_create_mana_wq_cq(struct gdma_dev *gd,
 			      const struct gdma_queue_spec *spec,
@@ -876,6 +889,7 @@ void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue)
 	mana_gd_free_memory(gmi);
 	kfree(queue);
 }
+EXPORT_SYMBOL(mana_gd_destroy_queue);
 
 int mana_gd_verify_vf_version(struct pci_dev *pdev)
 {
@@ -1193,7 +1207,7 @@ static irqreturn_t mana_gd_intr(int irq, void *arg)
 	struct gdma_irq_context *gic = arg;
 
 	if (gic->handler)
-		gic->handler(gic->arg);
+		gic->handler(&gic->eq_list);
 
 	return IRQ_HANDLED;
 }
@@ -1246,7 +1260,7 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev)
 	for (i = 0; i < nvec; i++) {
 		gic = &gc->irq_contexts[i];
 		gic->handler = NULL;
-		gic->arg = NULL;
+		INIT_LIST_HEAD(&gic->eq_list);
 
 		if (!i)
 			snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_hwc@pci:%s",
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index a499e460594b..d2ba7de8b512 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1167,6 +1167,9 @@ static int mana_create_eq(struct mana_context *ac)
 	spec.eq.callback = NULL;
 	spec.eq.context = ac->eqs;
 	spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
+	spec.eq.msix_allocated = false;
+	spec.doorbell = gd->doorbell;
+	spec.pdid = gd->pdid;
 
 	for (i = 0; i < gc->max_num_queues; i++) {
 		err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq);
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index e2b212dd722b..aee8e8fa1ea6 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -57,6 +57,10 @@ enum gdma_eqe_type {
 	GDMA_EQE_HWC_INIT_EQ_ID_DB	= 129,
 	GDMA_EQE_HWC_INIT_DATA		= 130,
 	GDMA_EQE_HWC_INIT_DONE		= 131,
+
+	/* IB NiC  Events start at 176*/
+	GDMA_EQE_SOC_EVENT_NOTIFICATION = 176,
+	GDMA_EQE_SOC_EVENT_TEST,
 };
 
 enum {
@@ -291,6 +295,7 @@ struct gdma_queue {
 
 	u32 head;
 	u32 tail;
+	struct list_head entry;
 
 	/* Extra fields specific to EQ/CQ. */
 	union {
@@ -318,6 +323,8 @@ struct gdma_queue_spec {
 	enum gdma_queue_type type;
 	bool monitor_avl_buf;
 	unsigned int queue_size;
+	u32 doorbell;
+	u32 pdid;
 
 	/* Extra fields specific to EQ/CQ. */
 	union {
@@ -326,6 +333,8 @@ struct gdma_queue_spec {
 			void *context;
 
 			unsigned long log2_throttle_limit;
+			bool msix_allocated;
+			unsigned int msix_index;
 		} eq;
 
 		struct {
@@ -341,8 +350,8 @@ struct gdma_queue_spec {
 #define MANA_IRQ_NAME_SZ 32
 
 struct gdma_irq_context {
-	void (*handler)(void *arg);
-	void *arg;
+	void (*handler)(struct list_head *arg);
+	struct list_head eq_list;
 	char name[MANA_IRQ_NAME_SZ];
 };
 
-- 
2.25.1


^ permalink raw reply related

* [Patch v2 4/5] RDMA/mana_ib : Create Adapter - each vf bound to adapter object
From: sharmaajay @ 2023-07-26  3:56 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Dexuan Cui, Wei Liu,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: linux-rdma, linux-hyperv, netdev, linux-kernel, Ajay Sharma
In-Reply-To: <1690343820-20188-1-git-send-email-sharmaajay@linuxonhyperv.com>

From: Ajay Sharma <sharmaajay@microsoft.com>

Create adapte object to have nice container
for VF resources.

Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
---
 drivers/infiniband/hw/mana/device.c  | 11 +++++-
 drivers/infiniband/hw/mana/main.c    | 50 ++++++++++++++++++++++++++++
 drivers/infiniband/hw/mana/mana_ib.h | 30 +++++++++++++++++
 3 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index 3ab4e69705df..4077e440657a 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -91,15 +91,23 @@ static int mana_ib_probe(struct auxiliary_device *adev,
 		goto deregister_device;
 	}
 
+	ret = mana_ib_create_adapter(mib_dev);
+	if (ret) {
+		ibdev_err(&mib_dev->ib_dev, "Failed to create adapter");
+		goto free_error_eq;
+	}
+
 	ret = ib_register_device(&mib_dev->ib_dev, "mana_%d",
 				 mdev->gdma_context->dev);
 	if (ret)
-		goto free_error_eq;
+		goto destroy_adapter;
 
 	dev_set_drvdata(&adev->dev, mib_dev);
 
 	return 0;
 
+destroy_adapter:
+	mana_ib_destroy_adapter(mib_dev);
 free_error_eq:
 	mana_gd_destroy_queue(mib_dev->gc, mib_dev->fatal_err_eq);
 deregister_device:
@@ -114,6 +122,7 @@ static void mana_ib_remove(struct auxiliary_device *adev)
 	struct mana_ib_dev *mib_dev = dev_get_drvdata(&adev->dev);
 
 	mana_gd_destroy_queue(mib_dev->gc, mib_dev->fatal_err_eq);
+	mana_ib_destroy_adapter(mib_dev);
 	mana_gd_deregister_device(&mib_dev->gc->mana_ib);
 	ib_unregister_device(&mib_dev->ib_dev);
 	ib_dealloc_device(&mib_dev->ib_dev);
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 2ea24ba3065f..aab1cc096824 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -505,6 +505,56 @@ void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
 {
 }
 
+int mana_ib_destroy_adapter(struct mana_ib_dev *mib_dev)
+{
+	struct mana_ib_destroy_adapter_resp resp = {};
+	struct mana_ib_destroy_adapter_req req = {};
+	struct gdma_context *gc;
+	int err;
+
+	gc = mib_dev->gc;
+
+	mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_ADAPTER, sizeof(req),
+			     sizeof(resp));
+	req.adapter = mib_dev->adapter_handle;
+	req.hdr.dev_id = gc->mana_ib.dev_id;
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+	if (err) {
+		ibdev_err(&mib_dev->ib_dev, "Failed to destroy adapter err %d", err);
+		return err;
+	}
+
+	return 0;
+}
+
+int mana_ib_create_adapter(struct mana_ib_dev *mib_dev)
+{
+	struct mana_ib_create_adapter_resp resp = {};
+	struct mana_ib_create_adapter_req req = {};
+	struct gdma_context *gc;
+	int err;
+
+	gc = mib_dev->gc;
+
+	mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_ADAPTER, sizeof(req),
+			     sizeof(resp));
+	req.notify_eq_id = mib_dev->fatal_err_eq->id;
+	req.hdr.dev_id = gc->mana_ib.dev_id;
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+	if (err) {
+		ibdev_err(&mib_dev->ib_dev, "Failed to create adapter err %d", err);
+		return err;
+	}
+
+	mib_dev->adapter_handle = resp.adapter;
+
+	return 0;
+}
+
 void mana_ib_soc_event_handler(void *ctx, struct gdma_queue *queue,
 				struct gdma_event *event)
 {
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 4383777354d3..8a652bccd978 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -32,6 +32,7 @@ struct mana_ib_dev {
 	struct gdma_dev *gdma_dev;
 	struct gdma_context *gc;
 	struct gdma_queue *fatal_err_eq;
+	mana_handle_t adapter_handle;
 };
 
 struct mana_ib_wq {
@@ -94,6 +95,31 @@ struct mana_ib_rwq_ind_table {
 	struct ib_rwq_ind_table ib_ind_table;
 };
 
+enum mana_ib_command_code {
+	MANA_IB_CREATE_ADAPTER  = 0x30002,
+	MANA_IB_DESTROY_ADAPTER = 0x30003,
+};
+
+struct mana_ib_create_adapter_req {
+	struct gdma_req_hdr hdr;
+	u32 notify_eq_id;
+	u32 reserved;
+}; /*HW Data */
+
+struct mana_ib_create_adapter_resp {
+	struct gdma_resp_hdr hdr;
+	mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_ib_destroy_adapter_req {
+	struct gdma_req_hdr hdr;
+	mana_handle_t adapter;
+}; /*HW Data */
+
+struct mana_ib_destroy_adapter_resp {
+	struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
 int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
 				 struct ib_umem *umem,
 				 mana_handle_t *gdma_region);
@@ -164,4 +190,8 @@ void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
 
 int mana_ib_create_error_eq(struct mana_ib_dev *mib_dev);
 
+int mana_ib_create_adapter(struct mana_ib_dev *mib_dev);
+
+int mana_ib_destroy_adapter(struct mana_ib_dev *mib_dev);
+
 #endif
-- 
2.25.1


^ permalink raw reply related

* [Patch v2 5/5] RDMA/mana_ib : Query adapter capabilities
From: sharmaajay @ 2023-07-26  3:57 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Dexuan Cui, Wei Liu,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: linux-rdma, linux-hyperv, netdev, linux-kernel, Ajay Sharma
In-Reply-To: <1690343820-20188-1-git-send-email-sharmaajay@linuxonhyperv.com>

From: Ajay Sharma <sharmaajay@microsoft.com>

Query the adapter capabilities to expose to
other clients and VF.

Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
---
 drivers/infiniband/hw/mana/device.c  |  4 +++
 drivers/infiniband/hw/mana/main.c    | 43 ++++++++++++++++++----
 drivers/infiniband/hw/mana/mana_ib.h | 53 +++++++++++++++++++++++++++-
 3 files changed, 92 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index 4077e440657a..e15da43c73a0 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -97,6 +97,10 @@ static int mana_ib_probe(struct auxiliary_device *adev,
 		goto free_error_eq;
 	}
 
+	ret = mana_ib_query_adapter_caps(mib_dev);
+	if (ret)
+		ibdev_dbg(&mib_dev->ib_dev, "Failed to get caps, use defaults");
+
 	ret = ib_register_device(&mib_dev->ib_dev, "mana_%d",
 				 mdev->gdma_context->dev);
 	if (ret)
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index aab1cc096824..d6cfda1d079f 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -469,21 +469,26 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
 int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
 			 struct ib_udata *uhw)
 {
+	struct mana_ib_dev *mib_dev = container_of(ibdev,
+			struct mana_ib_dev, ib_dev);
+
 	props->max_qp = MANA_MAX_NUM_QUEUES;
 	props->max_qp_wr = MAX_SEND_BUFFERS_PER_QUEUE;
-
-	/*
-	 * max_cqe could be potentially much bigger.
-	 * As this version of driver only support RAW QP, set it to the same
-	 * value as max_qp_wr
-	 */
 	props->max_cqe = MAX_SEND_BUFFERS_PER_QUEUE;
-
 	props->max_mr_size = MANA_IB_MAX_MR_SIZE;
 	props->max_mr = MANA_IB_MAX_MR;
 	props->max_send_sge = MAX_TX_WQE_SGL_ENTRIES;
 	props->max_recv_sge = MAX_RX_WQE_SGL_ENTRIES;
 
+	if (mib_dev->adapter_handle) {
+		props->max_qp = mib_dev->adapter_caps.max_qp_count;
+		props->max_qp_wr = mib_dev->adapter_caps.max_requester_sq_size;
+		props->max_cqe = mib_dev->adapter_caps.max_requester_sq_size;
+		props->max_mr = mib_dev->adapter_caps.max_mr_count;
+		props->max_send_sge = mib_dev->adapter_caps.max_send_wqe_size;
+		props->max_recv_sge = mib_dev->adapter_caps.max_recv_wqe_size;
+	}
+
 	return 0;
 }
 
@@ -598,3 +603,27 @@ int mana_ib_create_error_eq(struct mana_ib_dev *mib_dev)
 
 	return 0;
 }
+
+int mana_ib_query_adapter_caps(struct mana_ib_dev *mib_dev)
+{
+	struct mana_ib_query_adapter_caps_resp resp = {};
+	struct mana_ib_query_adapter_caps_req req = {};
+	int err;
+
+	mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req),
+			     sizeof(resp));
+	req.hdr.resp.msg_version = MANA_IB__GET_ADAPTER_CAP_RESPONSE_V3;
+	req.hdr.dev_id = mib_dev->gc->mana_ib.dev_id;
+
+	err = mana_gd_send_request(mib_dev->gc, sizeof(req), &req,
+				   sizeof(resp), &resp);
+
+	if (err) {
+		ibdev_err(&mib_dev->ib_dev, "Failed to query adapter caps err %d", err);
+		return err;
+	}
+
+	memcpy(&mib_dev->adapter_caps, &resp.max_sq_id,
+			sizeof(mib_dev->adapter_caps));
+	return 0;
+}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 8a652bccd978..1044358230d3 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -20,19 +20,41 @@
 
 /* MANA doesn't have any limit for MR size */
 #define MANA_IB_MAX_MR_SIZE	U64_MAX
-
+#define MANA_IB__GET_ADAPTER_CAP_RESPONSE_V3 3
 /*
  * The hardware limit of number of MRs is greater than maximum number of MRs
  * that can possibly represent in 24 bits
  */
 #define MANA_IB_MAX_MR		0xFFFFFFu
 
+struct mana_ib_adapter_caps {
+	u32 max_sq_id;
+	u32 max_rq_id;
+	u32 max_cq_id;
+	u32 max_qp_count;
+	u32 max_cq_count;
+	u32 max_mr_count;
+	u32 max_pd_count;
+	u32 max_inbound_read_limit;
+	u32 max_outbound_read_limit;
+	u32 mw_count;
+	u32 max_srq_count;
+	u32 max_requester_sq_size;
+	u32 max_responder_sq_size;
+	u32 max_requester_rq_size;
+	u32 max_responder_rq_size;
+	u32 max_send_wqe_size;
+	u32 max_recv_wqe_size;
+	u32 max_inline_data_size;
+};
+
 struct mana_ib_dev {
 	struct ib_device ib_dev;
 	struct gdma_dev *gdma_dev;
 	struct gdma_context *gc;
 	struct gdma_queue *fatal_err_eq;
 	mana_handle_t adapter_handle;
+	struct mana_ib_adapter_caps adapter_caps;
 };
 
 struct mana_ib_wq {
@@ -96,6 +118,7 @@ struct mana_ib_rwq_ind_table {
 };
 
 enum mana_ib_command_code {
+	MANA_IB_GET_ADAPTER_CAP = 0x30001,
 	MANA_IB_CREATE_ADAPTER  = 0x30002,
 	MANA_IB_DESTROY_ADAPTER = 0x30003,
 };
@@ -120,6 +143,32 @@ struct mana_ib_destroy_adapter_resp {
 	struct gdma_resp_hdr hdr;
 }; /* HW Data */
 
+struct mana_ib_query_adapter_caps_req {
+	struct gdma_req_hdr hdr;
+}; /*HW Data */
+
+struct mana_ib_query_adapter_caps_resp {
+	struct gdma_resp_hdr hdr;
+	u32 max_sq_id;
+	u32 max_rq_id;
+	u32 max_cq_id;
+	u32 max_qp_count;
+	u32 max_cq_count;
+	u32 max_mr_count;
+	u32 max_pd_count;
+	u32 max_inbound_read_limit;
+	u32 max_outbound_read_limit;
+	u32 mw_count;
+	u32 max_srq_count;
+	u32 max_requester_sq_size;
+	u32 max_responder_sq_size;
+	u32 max_requester_rq_size;
+	u32 max_responder_rq_size;
+	u32 max_send_wqe_size;
+	u32 max_recv_wqe_size;
+	u32 max_inline_data_size;
+}; /* HW Data */
+
 int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
 				 struct ib_umem *umem,
 				 mana_handle_t *gdma_region);
@@ -194,4 +243,6 @@ int mana_ib_create_adapter(struct mana_ib_dev *mib_dev);
 
 int mana_ib_destroy_adapter(struct mana_ib_dev *mib_dev);
 
+int mana_ib_query_adapter_caps(struct mana_ib_dev *mib_dev);
+
 #endif
-- 
2.25.1


^ permalink raw reply related

* Re: [PATCH V3 7/9] x86/hyperv: Initialize cpu and memory for SEV-SNP enlightened guest
From: Jinank Jain @ 2023-07-26  4:26 UTC (permalink / raw)
  To: Tianyu Lan, kys, haiyangz, wei.liu, decui, tglx, mingo, bp,
	dave.hansen, x86, hpa, daniel.lezcano, arnd, michael.h.kelley
  Cc: Tianyu Lan, linux-arch, linux-hyperv, linux-kernel, vkuznets,
	Michael Kelley
In-Reply-To: <20230718032304.136888-8-ltykernel@gmail.com>

Hi Tianyu,

On 7/18/2023 8:53 AM, Tianyu Lan wrote:
> From: Tianyu Lan <tiala@microsoft.com>
>
> Hyper-V enlightened guest doesn't have boot loader support.
> Boot Linux kernel directly from hypervisor with data (kernel
> image, initrd and parameter page) and memory for boot up that
> is initialized via AMD SEV PSP protocol (Please reference
> Section 4.5 Launching a Guest of [1]).
>
> Kernel needs to read processor and memory info from EN_SEV_
> SNP_PROCESSOR/MEM_INFO_ADDR address which are populated by
> Hyper-V. The data is prepared by hypervisor via SNP_
> LAUNCH_UPDATE with page type SNP_PAGE_TYPE_UNMEASURED and
> Initialize smp cpu related ops, validate system memory and
> add them into e820 table.
>
> [1]: https://www.amd.com/system/files/TechDocs/56860.pdf
> Reviewed-by: Michael Kelley <mikelley@microsoft.com>
> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
> ---
> Change since v2:
> 	* Update change log.
> ---
>   arch/x86/hyperv/ivm.c           | 93 +++++++++++++++++++++++++++++++++
>   arch/x86/include/asm/mshyperv.h | 17 ++++++
>   arch/x86/kernel/cpu/mshyperv.c  |  3 ++
>   3 files changed, 113 insertions(+)
>
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index b2b5cb19fac9..ede47c8264e0 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -18,6 +18,11 @@
>   #include <asm/mshyperv.h>
>   #include <asm/hypervisor.h>
>   #include <asm/mtrr.h>
> +#include <asm/coco.h>
> +#include <asm/io_apic.h>
> +#include <asm/sev.h>
> +#include <asm/realmode.h>
> +#include <asm/e820/api.h>
>   
>   #ifdef CONFIG_AMD_MEM_ENCRYPT
>   
> @@ -58,6 +63,8 @@ union hv_ghcb {
>   
>   static u16 hv_ghcb_version __ro_after_init;
>   
> +static u32 processor_count;
> +
>   u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
>   {
>   	union hv_ghcb *hv_ghcb;
> @@ -357,6 +364,92 @@ static bool hv_is_private_mmio(u64 addr)
>   	return false;
>   }
>   
> +static __init void hv_snp_get_smp_config(unsigned int early)
> +{
> +	/*
> +	 * The "early" parameter can be true only if old-style AMD
> +	 * Opteron NUMA detection is enabled, which should never be
> +	 * the case for an SEV-SNP guest.  See CONFIG_AMD_NUMA.
> +	 * For safety, just do nothing if "early" is true.
> +	 */
> +	if (early)
> +		return;
> +
> +	/*
> +	 * There is no firmware and ACPI MADT table support in
> +	 * in the Hyper-V SEV-SNP enlightened guest. Set smp
> +	 * related config variable here.
> +	 */
> +	while (num_processors < processor_count) {
> +		early_per_cpu(x86_cpu_to_apicid, num_processors) = num_processors;
> +		early_per_cpu(x86_bios_cpu_apicid, num_processors) = num_processors;
> +		physid_set(num_processors, phys_cpu_present_map);
> +		set_cpu_possible(num_processors, true);
> +		set_cpu_present(num_processors, true);
> +		num_processors++;
> +	}
> +}
> +
> +__init void hv_sev_init_mem_and_cpu(void)
> +{
> +	struct memory_map_entry *entry;
> +	struct e820_entry *e820_entry;
> +	u64 e820_end;
> +	u64 ram_end;
> +	u64 page;
> +
> +	/*
> +	 * Hyper-V enlightened snp guest boots kernel
> +	 * directly without bootloader. So roms, bios
> +	 * regions and reserve resources are not available.
> +	 * Set these callback to NULL.
> +	 */
> +	x86_platform.legacy.rtc			= 0;
> +	x86_platform.legacy.reserve_bios_regions = 0;
> +	x86_platform.set_wallclock		= set_rtc_noop;
> +	x86_platform.get_wallclock		= get_rtc_noop;
> +	x86_init.resources.probe_roms		= x86_init_noop;
> +	x86_init.resources.reserve_resources	= x86_init_noop;
> +	x86_init.mpparse.find_smp_config	= x86_init_noop;
> +	x86_init.mpparse.get_smp_config		= hv_snp_get_smp_config;
> +
> +	/*
> +	 * Hyper-V SEV-SNP enlightened guest doesn't support ioapic
> +	 * and legacy APIC page read/write. Switch to hv apic here.
> +	 */
> +	disable_ioapic_support();

Where are we switching hv_apic? May I am missing something here?

Also in my experiments I have seen that if we don't enable I/O Apic 
legacy serial console does not seem to work for SEV-SNP guests.

> +
> +	/* Get processor and mem info. */
> +	processor_count = *(u32 *)__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR);
> +	entry = (struct memory_map_entry *)__va(EN_SEV_SNP_MEM_INFO_ADDR);
> +
> +	/*
> +	 * There is no bootloader/EFI firmware in the SEV SNP guest.
> +	 * E820 table in the memory just describes memory for kernel,
> +	 * ACPI table, cmdline, boot params and ramdisk. The dynamic
> +	 * data(e.g, vcpu number and the rest memory layout) needs to
> +	 * be read from EN_SEV_SNP_PROCESSOR_INFO_ADDR.
> +	 */
> +	for (; entry->numpages != 0; entry++) {
> +		e820_entry = &e820_table->entries[
> +				e820_table->nr_entries - 1];
> +		e820_end = e820_entry->addr + e820_entry->size;
> +		ram_end = (entry->starting_gpn +
> +			   entry->numpages) * PAGE_SIZE;
> +
> +		if (e820_end < entry->starting_gpn * PAGE_SIZE)
> +			e820_end = entry->starting_gpn * PAGE_SIZE;
> +
> +		if (e820_end < ram_end) {
> +			pr_info("Hyper-V: add e820 entry [mem %#018Lx-%#018Lx]\n", e820_end, ram_end - 1);
> +			e820__range_add(e820_end, ram_end - e820_end,
> +					E820_TYPE_RAM);
> +			for (page = e820_end; page < ram_end; page += PAGE_SIZE)
> +				pvalidate((unsigned long)__va(page), RMP_PG_SIZE_4K, true);
> +		}
> +	}
> +}
> +
>   void __init hv_vtom_init(void)
>   {
>   	/*
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 025eda129d99..e57df590846a 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -50,6 +50,21 @@ extern bool hv_isolation_type_en_snp(void);
>   
>   extern union hv_ghcb * __percpu *hv_ghcb_pg;
>   
> +/*
> + * Hyper-V puts processor and memory layout info
> + * to this address in SEV-SNP enlightened guest.
> + */
> +#define EN_SEV_SNP_PROCESSOR_INFO_ADDR  0x802000
> +#define EN_SEV_SNP_MEM_INFO_ADDR	0x802018
> +
> +struct memory_map_entry {
> +	u64 starting_gpn;
> +	u64 numpages;
> +	u16 type;
> +	u16 flags;
> +	u32 reserved;
> +};
> +
>   int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
>   int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
>   int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
> @@ -234,12 +249,14 @@ void hv_ghcb_msr_read(u64 msr, u64 *value);
>   bool hv_ghcb_negotiate_protocol(void);
>   void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason);
>   void hv_vtom_init(void);
> +void hv_sev_init_mem_and_cpu(void);
>   #else
>   static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
>   static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
>   static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
>   static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
>   static inline void hv_vtom_init(void) {}
> +static inline void hv_sev_init_mem_and_cpu(void) {}
>   #endif
>   
>   extern bool hv_isolation_type_snp(void);
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 5398fb2f4d39..d3bb921ee7fe 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -529,6 +529,9 @@ static void __init ms_hyperv_init_platform(void)
>   	if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
>   		mark_tsc_unstable("running on Hyper-V");
>   
> +	if (hv_isolation_type_en_snp())
> +		hv_sev_init_mem_and_cpu();
> +
>   	hardlockup_detector_disable();
>   }
>   

Regards,

Jinank


^ permalink raw reply

* Re: [PATCH V3,net-next] net: mana: Add page pool for RX buffers
From: Jesper Dangaard Brouer @ 2023-07-26  9:22 UTC (permalink / raw)
  To: Haiyang Zhang, Jesper Dangaard Brouer,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: brouer, Dexuan Cui, KY Srinivasan, Paul Rosswurm, olaf@aepfle.de,
	vkuznets@redhat.com, davem@davemloft.net, wei.liu@kernel.org,
	edumazet@google.com, kuba@kernel.org, pabeni@redhat.com,
	leon@kernel.org, Long Li, ssengar@linux.microsoft.com,
	linux-rdma@vger.kernel.org, daniel@iogearbox.net,
	john.fastabend@gmail.com, bpf@vger.kernel.org, ast@kernel.org,
	Ajay Sharma, hawk@kernel.org, tglx@linutronix.de,
	shradhagupta@linux.microsoft.com, linux-kernel@vger.kernel.org,
	Ilias Apalodimas
In-Reply-To: <PH7PR21MB3116F5612AA8303512EEBA4CCA03A@PH7PR21MB3116.namprd21.prod.outlook.com>



On 25/07/2023 21.02, Haiyang Zhang wrote:
> 
>> -----Original Message-----
>> From: Jesper Dangaard Brouer <jbrouer@redhat.com>
>> Sent: Tuesday, July 25, 2023 2:01 PM
>>>>
>>>> Our driver is using NUMA 0 by default, so I implicitly assign NUMA node id
>>>> to zero during pool init.
>>>>
>>>> And, if the IRQ/CPU affinity is changed, the page_pool_nid_changed()
>>>> will update the nid for the pool. Does this sound good?
>>>>
>>>
>>> Also, since our driver is getting the default node from here:
>>> 	gc->numa_node = dev_to_node(&pdev->dev);
>>> I will update this patch to set the default node as above, instead of implicitly
>>> assigning it to 0.
>>>
>>
>> In that case, I agree that it make sense to use dev_to_node(&pdev->dev),
>> like:
>> 	pprm.nid = dev_to_node(&pdev->dev);
>>
>> Driver must have a reason for assigning gc->numa_node for this hardware,
>> which is okay. That is why page_pool API allows driver to control this.
>>
>> But then I don't think you should call page_pool_nid_changed() like
>>
>> 	page_pool_nid_changed(rxq->page_pool, numa_mem_id());
>>
>> Because then you will (at first packet processing event) revert the
>> dev_to_node() setting to use numa_mem_id() of processing/running CPU.
>> (In effect this will be the same as setting NUMA_NO_NODE).
>>
>> I know, mlx5 do call page_pool_nid_changed(), but they showed benchmark
>> numbers that this was preferred action, even-when sysadm had
>> "misconfigured" the default smp_affinity RX-processing to happen on a
>> remote NUMA node.  AFAIK mlx5 keeps the descriptor rings on the
>> originally configured NUMA node that corresponds to the NIC PCIe slot.
> 
> In mana_gd_setup_irqs(), we set the default IRQ/CPU affinity to gc->numa_node
> too, so it won't revert the nid initial setting.
> 
> Currently, the Azure hypervisor always indicates numa 0 as default. (In
> the future, it will start to provide the accurate default dev node.) When a
> user manually changes the IRQ/CPU affinity for perf tuning, we want to
> allow page_pool_nid_changed() to update the pool. Is this OK?
> 

If I were you, I would wait with the page_pool_nid_changed()
"optimization" and do a benchmark mark to see if this actually have a
benefit.  (You can do this in another patch).  (In a Azure hypervisor
environment is might not be the right choice).

This reminds me, do you have any benchmark data on the improvement this
patch (using page_pool) gave?

--Jesper


^ permalink raw reply

* Re: [Patch v2 3/5] RDMA/mana_ib : Add error eq and notification from SoC
From: Simon Horman @ 2023-07-26 12:25 UTC (permalink / raw)
  To: sharmaajay
  Cc: Jason Gunthorpe, Leon Romanovsky, Dexuan Cui, Wei Liu,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	linux-rdma, linux-hyperv, netdev, linux-kernel, Ajay Sharma
In-Reply-To: <1690343820-20188-4-git-send-email-sharmaajay@linuxonhyperv.com>

On Tue, Jul 25, 2023 at 08:56:58PM -0700, sharmaajay@linuxonhyperv.com wrote:

...

> diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
> index 2c4e3c496644..2ea24ba3065f 100644
> --- a/drivers/infiniband/hw/mana/main.c
> +++ b/drivers/infiniband/hw/mana/main.c
> @@ -504,3 +504,47 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
>  void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
>  {
>  }
> +
> +void mana_ib_soc_event_handler(void *ctx, struct gdma_queue *queue,
> +				struct gdma_event *event)

Hi Ajay,

I wonder if this function should be static.
It seems to only be used in this file.

> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c

...

> @@ -435,44 +434,47 @@ static int mana_gd_register_irq(struct gdma_queue *queue,
>  	gc = gd->gdma_context;
>  	r = &gc->msix_resource;
>  	dev = gc->dev;
> +	msi_index = spec->eq.msix_index;
>  
>  	spin_lock_irqsave(&r->lock, flags);
>  
> -	msi_index = find_first_zero_bit(r->map, r->size);
> -	if (msi_index >= r->size || msi_index >= gc->num_msix_usable) {
> -		err = -ENOSPC;
> -	} else {
> -		bitmap_set(r->map, msi_index, 1);
> -		queue->eq.msix_index = msi_index;
> -	}
> -
> -	spin_unlock_irqrestore(&r->lock, flags);
> +	if (!spec->eq.msix_allocated) {
> +		msi_index = find_first_zero_bit(r->map, r->size);
>  
> -	if (err) {
> -		dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u",
> -			err, msi_index, r->size, gc->num_msix_usable);
> +			if (msi_index >= r->size ||
> +			    msi_index >= gc->num_msix_usable)
> +				err = -ENOSPC;
> +			else
> +				bitmap_set(r->map, msi_index, 1);

It looks like the indention of the lines above is off.
There seems to be one tab too many.

>  
> -		return err;
> +		if (err) {
> +			dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u",
> +				err, msi_index, r->size, gc->num_msix_usable);
> +				goto out;
> +		}
>  	}

...

^ permalink raw reply

* Re: [Patch v2 4/5] RDMA/mana_ib : Create Adapter - each vf bound to adapter object
From: Simon Horman @ 2023-07-26 12:26 UTC (permalink / raw)
  To: sharmaajay
  Cc: Jason Gunthorpe, Leon Romanovsky, Dexuan Cui, Wei Liu,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	linux-rdma, linux-hyperv, netdev, linux-kernel, Ajay Sharma
In-Reply-To: <1690343820-20188-5-git-send-email-sharmaajay@linuxonhyperv.com>

On Tue, Jul 25, 2023 at 08:56:59PM -0700, sharmaajay@linuxonhyperv.com wrote:
> From: Ajay Sharma <sharmaajay@microsoft.com>
> 
> Create adapte object to have nice container

nit: adapte -> adapter

...

^ permalink raw reply

* Re: [Patch v2 5/5] RDMA/mana_ib : Query adapter capabilities
From: Simon Horman @ 2023-07-26 12:31 UTC (permalink / raw)
  To: sharmaajay
  Cc: Jason Gunthorpe, Leon Romanovsky, Dexuan Cui, Wei Liu,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	linux-rdma, linux-hyperv, netdev, linux-kernel, Ajay Sharma
In-Reply-To: <1690343820-20188-6-git-send-email-sharmaajay@linuxonhyperv.com>

On Tue, Jul 25, 2023 at 08:57:00PM -0700, sharmaajay@linuxonhyperv.com wrote:

...

> diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c

...

> +int mana_ib_query_adapter_caps(struct mana_ib_dev *mib_dev)
> +{
> +	struct mana_ib_query_adapter_caps_resp resp = {};
> +	struct mana_ib_query_adapter_caps_req req = {};
> +	int err;
> +
> +	mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req),
> +			     sizeof(resp));
> +	req.hdr.resp.msg_version = MANA_IB__GET_ADAPTER_CAP_RESPONSE_V3;
> +	req.hdr.dev_id = mib_dev->gc->mana_ib.dev_id;
> +
> +	err = mana_gd_send_request(mib_dev->gc, sizeof(req), &req,
> +				   sizeof(resp), &resp);
> +
> +	if (err) {
> +		ibdev_err(&mib_dev->ib_dev, "Failed to query adapter caps err %d", err);
> +		return err;
> +	}
> +
> +	memcpy(&mib_dev->adapter_caps, &resp.max_sq_id,
> +			sizeof(mib_dev->adapter_caps));

Hi Ajay,

The indentation of the line above is off.

	memcpy(&mib_dev->adapter_caps, &resp.max_sq_id,
	       sizeof(mib_dev->adapter_caps));

But, perhaps more importantly, an x86_64 allmodconfig W=1 build with gcc-12
yields:

 In file included from ./include/linux/string.h:254,
                  from ./include/linux/bitmap.h:11,
                  from ./include/linux/ethtool.h:16,
                  from ./include/rdma/ib_verbs.h:15,
                  from drivers/infiniband/hw/mana/mana_ib.h:9,
                  from drivers/infiniband/hw/mana/main.c:6:
 In function 'fortify_memcpy_chk',
     inlined from 'mana_ib_query_adapter_caps' at drivers/infiniband/hw/mana/main.c:626:2:
 ./include/linux/fortify-string.h:592:25: warning: call to '__read_overflow2_field' declared with attribute warning: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning]
   592 |                         __read_overflow2_field(q_size_field, size);
       |

> +	return 0;
> +}

...

^ permalink raw reply

* [PATCH V2] x86/hyperv: Rename hv_isolation_type_snp/en_snp() to isol_type_snp_paravisor/enlightened()
From: Tianyu Lan @ 2023-07-26 12:49 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, tglx, mingo, bp, dave.hansen, x86,
	hpa, arnd, michael.h.kelley
  Cc: Tianyu Lan, linux-arch, linux-hyperv, linux-kernel, vkuznets

From: Tianyu Lan <tiala@microsoft.com>

Rename hv_isolation_type_snp and hv_isolation_type_en_snp()
to make them much intuitiver.

Suggested-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Tianyu Lan <tiala@microsoft.com>
---
This patch is based on the patchset "x86/hyperv: Add AMD sev-snp
enlightened guest support on hyperv" https://lore.kernel.org/lkml/
20230718032304.136888-3-ltykernel@gmail.com/T/.

Change since v1:
       Add "hv_" prefix to isol_type_snp_paravisor/enlightened()
---
 arch/x86/hyperv/hv_init.c       |  6 +++---
 arch/x86/hyperv/ivm.c           | 17 +++++++++--------
 arch/x86/include/asm/mshyperv.h |  8 ++++----
 arch/x86/kernel/cpu/mshyperv.c  | 12 ++++++------
 drivers/hv/connection.c         |  2 +-
 drivers/hv/hv.c                 | 16 ++++++++--------
 drivers/hv/hv_common.c          | 10 +++++-----
 include/asm-generic/mshyperv.h  |  4 ++--
 8 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index b004370d3b01..3df948c69cff 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -52,7 +52,7 @@ static int hyperv_init_ghcb(void)
 	void *ghcb_va;
 	void **ghcb_base;
 
-	if (!hv_isolation_type_snp())
+	if (!hv_isol_type_snp_paravisor())
 		return 0;
 
 	if (!hv_ghcb_pg)
@@ -116,7 +116,7 @@ static int hv_cpu_init(unsigned int cpu)
 			 * is blocked to run in Confidential VM. So only decrypt assist
 			 * page in non-root partition here.
 			 */
-			if (*hvp && hv_isolation_type_en_snp()) {
+			if (*hvp && hv_isol_type_snp_enlightened()) {
 				WARN_ON_ONCE(set_memory_decrypted((unsigned long)(*hvp), 1));
 				memset(*hvp, 0, PAGE_SIZE);
 			}
@@ -453,7 +453,7 @@ void __init hyperv_init(void)
 		goto common_free;
 	}
 
-	if (hv_isolation_type_snp()) {
+	if (hv_isol_type_snp_paravisor()) {
 		/* Negotiate GHCB Version. */
 		if (!hv_ghcb_negotiate_protocol())
 			hv_ghcb_terminate(SEV_TERM_SET_GEN,
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 2eda4e69849d..2548d904e45a 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -591,24 +591,25 @@ bool hv_is_isolation_supported(void)
 	return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
 }
 
-DEFINE_STATIC_KEY_FALSE(isolation_type_snp);
+DEFINE_STATIC_KEY_FALSE(isol_type_snp_paravisor);
 
 /*
- * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based
+ * hv_isol_type_snp_paravisor - Check system runs in the AMD SEV-SNP based
  * isolation VM.
  */
-bool hv_isolation_type_snp(void)
+bool hv_isol_type_snp_paravisor(void)
 {
-	return static_branch_unlikely(&isolation_type_snp);
+	return static_branch_unlikely(&isol_type_snp_paravisor);
 }
 
-DEFINE_STATIC_KEY_FALSE(isolation_type_en_snp);
+DEFINE_STATIC_KEY_FALSE(isol_type_snp_enlightened);
+
 /*
- * hv_isolation_type_en_snp - Check system runs in the AMD SEV-SNP based
+ * hv_isol_type_snp_enlightened - Check system runs in the AMD SEV-SNP based
  * isolation enlightened VM.
  */
-bool hv_isolation_type_en_snp(void)
+bool hv_isol_type_snp_enlightened(void)
 {
-	return static_branch_unlikely(&isolation_type_en_snp);
+	return static_branch_unlikely(&isol_type_snp_enlightened);
 }
 
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index c5a3c29fad01..e543a5a1b007 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -25,8 +25,8 @@
 
 union hv_ghcb;
 
-DECLARE_STATIC_KEY_FALSE(isolation_type_snp);
-DECLARE_STATIC_KEY_FALSE(isolation_type_en_snp);
+DECLARE_STATIC_KEY_FALSE(isol_type_snp_paravisor);
+DECLARE_STATIC_KEY_FALSE(isol_type_snp_enlightened);
 
 typedef int (*hyperv_fill_flush_list_func)(
 		struct hv_guest_mapping_flush_list *flush,
@@ -46,7 +46,7 @@ extern void *hv_hypercall_pg;
 
 extern u64 hv_current_partition_id;
 
-extern bool hv_isolation_type_en_snp(void);
+extern bool hv_isol_type_snp_enlightened(void);
 
 extern union hv_ghcb * __percpu *hv_ghcb_pg;
 
@@ -268,7 +268,7 @@ static inline void hv_sev_init_mem_and_cpu(void) {}
 static int hv_snp_boot_ap(int cpu, unsigned long start_ip) {}
 #endif
 
-extern bool hv_isolation_type_snp(void);
+extern bool hv_isol_type_snp_paravisor(void);
 
 static inline bool hv_is_synic_reg(unsigned int reg)
 {
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 6ff0b60d30f9..3c61b4b6a5e3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -66,7 +66,7 @@ u64 hv_get_non_nested_register(unsigned int reg)
 {
 	u64 value;
 
-	if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
+	if (hv_is_synic_reg(reg) && hv_isol_type_snp_paravisor())
 		hv_ghcb_msr_read(reg, &value);
 	else
 		rdmsrl(reg, value);
@@ -76,7 +76,7 @@ EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
 
 void hv_set_non_nested_register(unsigned int reg, u64 value)
 {
-	if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
+	if (hv_is_synic_reg(reg) && hv_isol_type_snp_paravisor()) {
 		hv_ghcb_msr_write(reg, value);
 
 		/* Write proxy bit via wrmsl instruction */
@@ -300,7 +300,7 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
 	 *  Override wakeup_secondary_cpu_64 callback for SEV-SNP
 	 *  enlightened guest.
 	 */
-	if (hv_isolation_type_en_snp())
+	if (hv_isol_type_snp_enlightened())
 		apic->wakeup_secondary_cpu_64 = hv_snp_boot_ap;
 
 	if (!hv_root_partition)
@@ -421,9 +421,9 @@ static void __init ms_hyperv_init_platform(void)
 
 
 		if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
-			static_branch_enable(&isolation_type_en_snp);
+			static_branch_enable(&isol_type_snp_enlightened);
 		} else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
-			static_branch_enable(&isolation_type_snp);
+			static_branch_enable(&isol_type_snp_paravisor);
 		}
 	}
 
@@ -545,7 +545,7 @@ static void __init ms_hyperv_init_platform(void)
 	if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
 		mark_tsc_unstable("running on Hyper-V");
 
-	if (hv_isolation_type_en_snp())
+	if (hv_isol_type_snp_enlightened())
 		hv_sev_init_mem_and_cpu();
 
 	hardlockup_detector_disable();
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 02b54f85dc60..f86570f3bc1e 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -484,7 +484,7 @@ void vmbus_set_event(struct vmbus_channel *channel)
 
 	++channel->sig_events;
 
-	if (hv_isolation_type_snp())
+	if (hv_isol_type_snp_paravisor())
 		hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
 				NULL, sizeof(channel->sig_event));
 	else
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index ec6e35a0d9bf..3a6e5ecd03d8 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -64,7 +64,7 @@ int hv_post_message(union hv_connection_id connection_id,
 	aligned_msg->payload_size = payload_size;
 	memcpy((void *)aligned_msg->payload, payload, payload_size);
 
-	if (hv_isolation_type_snp())
+	if (hv_isol_type_snp_paravisor())
 		status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
 				(void *)aligned_msg, NULL,
 				sizeof(*aligned_msg));
@@ -109,7 +109,7 @@ int hv_synic_alloc(void)
 		 * Synic message and event pages are allocated by paravisor.
 		 * Skip these pages allocation here.
 		 */
-		if (!hv_isolation_type_snp() && !hv_root_partition) {
+		if (!hv_isol_type_snp_paravisor() && !hv_root_partition) {
 			hv_cpu->synic_message_page =
 				(void *)get_zeroed_page(GFP_ATOMIC);
 			if (hv_cpu->synic_message_page == NULL) {
@@ -125,7 +125,7 @@ int hv_synic_alloc(void)
 			}
 		}
 
-		if (hv_isolation_type_en_snp()) {
+		if (hv_isol_type_snp_enlightened()) {
 			ret = set_memory_decrypted((unsigned long)
 				hv_cpu->synic_message_page, 1);
 			if (ret) {
@@ -174,7 +174,7 @@ void hv_synic_free(void)
 			= per_cpu_ptr(hv_context.cpu_context, cpu);
 
 		/* It's better to leak the page if the encryption fails. */
-		if (hv_isolation_type_en_snp()) {
+		if (hv_isol_type_snp_enlightened()) {
 			if (hv_cpu->synic_message_page) {
 				ret = set_memory_encrypted((unsigned long)
 					hv_cpu->synic_message_page, 1);
@@ -221,7 +221,7 @@ void hv_synic_enable_regs(unsigned int cpu)
 	simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
 	simp.simp_enabled = 1;
 
-	if (hv_isolation_type_snp() || hv_root_partition) {
+	if (hv_isol_type_snp_paravisor() || hv_root_partition) {
 		/* Mask out vTOM bit. ioremap_cache() maps decrypted */
 		u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
 				~ms_hyperv.shared_gpa_boundary;
@@ -240,7 +240,7 @@ void hv_synic_enable_regs(unsigned int cpu)
 	siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
 	siefp.siefp_enabled = 1;
 
-	if (hv_isolation_type_snp() || hv_root_partition) {
+	if (hv_isol_type_snp_paravisor() || hv_root_partition) {
 		/* Mask out vTOM bit. ioremap_cache() maps decrypted */
 		u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
 				~ms_hyperv.shared_gpa_boundary;
@@ -323,7 +323,7 @@ void hv_synic_disable_regs(unsigned int cpu)
 	 * addresses.
 	 */
 	simp.simp_enabled = 0;
-	if (hv_isolation_type_snp() || hv_root_partition) {
+	if (hv_isol_type_snp_paravisor() || hv_root_partition) {
 		iounmap(hv_cpu->synic_message_page);
 		hv_cpu->synic_message_page = NULL;
 	} else {
@@ -335,7 +335,7 @@ void hv_synic_disable_regs(unsigned int cpu)
 	siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
 	siefp.siefp_enabled = 0;
 
-	if (hv_isolation_type_snp() || hv_root_partition) {
+	if (hv_isol_type_snp_paravisor() || hv_root_partition) {
 		iounmap(hv_cpu->synic_event_page);
 		hv_cpu->synic_event_page = NULL;
 	} else {
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 2d43ba2bc925..e205f85709ad 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -381,7 +381,7 @@ int hv_common_cpu_init(unsigned int cpu)
 			*outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
 		}
 
-		if (hv_isolation_type_en_snp()) {
+		if (hv_isol_type_snp_enlightened()) {
 			ret = set_memory_decrypted((unsigned long)*inputarg, pgcount);
 			if (ret) {
 				kfree(*inputarg);
@@ -509,17 +509,17 @@ bool __weak hv_is_isolation_supported(void)
 }
 EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
 
-bool __weak hv_isolation_type_snp(void)
+bool __weak hv_isol_type_snp_paravisor(void)
 {
 	return false;
 }
-EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
+EXPORT_SYMBOL_GPL(hv_isol_type_snp_paravisor);
 
-bool __weak hv_isolation_type_en_snp(void)
+bool __weak hv_isol_type_snp_enlightened(void)
 {
 	return false;
 }
-EXPORT_SYMBOL_GPL(hv_isolation_type_en_snp);
+EXPORT_SYMBOL_GPL(hv_isol_type_snp_enlightened);
 
 void __weak hv_setup_vmbus_handler(void (*handler)(void))
 {
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index f73a044ecaa7..b8f2b48b640f 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -64,7 +64,7 @@ extern void * __percpu *hyperv_pcpu_output_arg;
 
 extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
 extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
-extern bool hv_isolation_type_snp(void);
+extern bool hv_isol_type_snp_paravisor(void);
 
 /* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */
 static inline int hv_result(u64 status)
@@ -279,7 +279,7 @@ bool hv_is_hyperv_initialized(void);
 bool hv_is_hibernation_supported(void);
 enum hv_isolation_type hv_get_isolation_type(void);
 bool hv_is_isolation_supported(void);
-bool hv_isolation_type_snp(void);
+bool hv_isol_type_snp_paravisor(void);
 u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
 void hyperv_cleanup(void);
 bool hv_query_ext_cap(u64 cap_query);
-- 
2.25.1


^ permalink raw reply related

* [PATCH V6 net] net: mana: Fix MANA VF unload when hardware is
From: Souradeep Chakrabarti @ 2023-07-26 13:15 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, davem, edumazet, kuba, pabeni,
	longli, sharmaajay, leon, cai.huoqing, ssengar, vkuznets, tglx,
	linux-hyperv, netdev, linux-kernel, linux-rdma
  Cc: schakrabarti, Souradeep Chakrabarti, stable

When unloading the MANA driver, mana_dealloc_queues() waits for the MANA
hardware to complete any inflight packets and set the pending send count
to zero. But if the hardware has failed, mana_dealloc_queues()
could wait forever.

Fix this by adding a timeout to the wait. Set the timeout to 120 seconds,
which is a somewhat arbitrary value that is more than long enough for
functional hardware to complete any sends.

Cc: stable@vger.kernel.org
Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)")

Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
---
V5 -> V6:
* Added pcie_flr to reset the pci after timeout.
* Fixed the position of changelog.
* Removed unused variable like cq.

V4 -> V5:
* Added fixes tag
* Changed the usleep_range from static to incremental value.
* Initialized timeout in the begining.

V3 -> V4:
* Removed the unnecessary braces from mana_dealloc_queues().

V2 -> V3:
* Removed the unnecessary braces from mana_dealloc_queues().

V1 -> V2:
* Added net branch
* Removed the typecasting to (struct mana_context*) of void pointer
* Repositioned timeout variable in mana_dealloc_queues()
* Repositioned vf_unload_timeout in mana_context struct, to utilise the
 6 bytes hole
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 38 +++++++++++++++++--
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index a499e460594b..ea039e2d4c4b 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -8,6 +8,7 @@
 #include <linux/ethtool.h>
 #include <linux/filter.h>
 #include <linux/mm.h>
+#include <linux/pci.h>
 
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
@@ -2345,9 +2346,12 @@ int mana_attach(struct net_device *ndev)
 static int mana_dealloc_queues(struct net_device *ndev)
 {
 	struct mana_port_context *apc = netdev_priv(ndev);
+	unsigned long timeout = jiffies + 120 * HZ;
 	struct gdma_dev *gd = apc->ac->gdma_dev;
 	struct mana_txq *txq;
+	struct sk_buff *skb;
 	int i, err;
+	u32 tsleep;
 
 	if (apc->port_is_up)
 		return -EINVAL;
@@ -2363,15 +2367,41 @@ static int mana_dealloc_queues(struct net_device *ndev)
 	 * to false, but it doesn't matter since mana_start_xmit() drops any
 	 * new packets due to apc->port_is_up being false.
 	 *
-	 * Drain all the in-flight TX packets
+	 * Drain all the in-flight TX packets.
+	 * A timeout of 120 seconds for all the queues is used.
+	 * This will break the while loop when h/w is not responding.
+	 * This value of 120 has been decided here considering max
+	 * number of queues.
 	 */
+
 	for (i = 0; i < apc->num_queues; i++) {
 		txq = &apc->tx_qp[i].txq;
-
-		while (atomic_read(&txq->pending_sends) > 0)
-			usleep_range(1000, 2000);
+		tsleep = 1000;
+		while (atomic_read(&txq->pending_sends) > 0 &&
+		       time_before(jiffies, timeout)) {
+			usleep_range(tsleep, tsleep + 1000);
+			tsleep <<= 1;
+		}
+		if (atomic_read(&txq->pending_sends)) {
+			err  = pcie_flr(to_pci_dev(gd->gdma_context->dev));
+			if (err) {
+				netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n",
+					   err, atomic_read(&txq->pending_sends),
+					   txq->gdma_txq_id);
+			}
+			break;
+		}
 	}
 
+	for (i = 0; i < apc->num_queues; i++) {
+		txq = &apc->tx_qp[i].txq;
+		while (atomic_read(&txq->pending_sends)) {
+			skb = skb_dequeue(&txq->pending_skbs);
+			mana_unmap_skb(skb, apc);
+			dev_consume_skb_any(skb);
+			atomic_sub(1, &txq->pending_sends);
+		}
+	}
 	/* We're 100% sure the queues can no longer be woken up, because
 	 * we're sure now mana_poll_tx_cq() can't be running.
 	 */
-- 
2.34.1


^ permalink raw reply related

* Re: [PATCH V3 5/9] x86/hyperv: Use vmmcall to implement Hyper-V hypercall in sev-snp enlightened guest
From: Tianyu Lan @ 2023-07-26 13:47 UTC (permalink / raw)
  To: Michael Kelley (LINUX), KY Srinivasan, Haiyang Zhang,
	wei.liu@kernel.org, Dexuan Cui, tglx@linutronix.de,
	mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
	x86@kernel.org, hpa@zytor.com, daniel.lezcano@linaro.org,
	arnd@arndb.de
  Cc: Tianyu Lan, linux-arch@vger.kernel.org,
	linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
	vkuznets@redhat.com
In-Reply-To: <BYAPR21MB16882FAEDEFAED59208ED9E0D700A@BYAPR21MB1688.namprd21.prod.outlook.com>

On 7/26/2023 11:44 AM, Michael Kelley (LINUX) wrote:
>> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
>> index 2fa38e9f6207..025eda129d99 100644
>> --- a/arch/x86/include/asm/mshyperv.h
>> +++ b/arch/x86/include/asm/mshyperv.h
>> @@ -64,12 +64,12 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
>>   	if (!hv_hypercall_pg)
>>   		return U64_MAX;
>>
>> -	__asm__ __volatile__("mov %4, %%r8\n"
>> -			     CALL_NOSPEC
>> +	__asm__ __volatile__("mov %[output], %%r8\n"
>> +			     ALTERNATIVE("vmmcall", CALL_NOSPEC, X86_FEATURE_SEV_ES)
> Since this code is for SEV-SNP, what's the thinking behind using
> X86_FEATURE_SEV_ES in the ALTERNATIVE statements?   Don't you need
> to use X86_FEATURE_SEV_SNP (which is being added in another patch set that
> Boris Petkov pointed out).

Hi Michael:
	Thanks for your review. The patch mentioned by Boris has not been 
merged and so still use X86_FEATURE_SEV_ES here. We may replace the 
feature flag with X86_FEATURE_SEV_SNP after it's upstreamed.

> 
> Also, does this patch depend on Peter Zijlstra's patch to support nested
> ALTERNATIVE statements?  If so, that needs to be called out, probably in
> the cover letter.  Peter's patch doesn't yet appear in linux-next.
> 

It may work without Peterz's patch. Please see 
https://lkml.org/lkml/2023/6/27/520.
Peterz's patch optimizes ALTERNATIVE_n implementation with nested 
expression.

^ permalink raw reply

* Re: [PATCH V3 7/9] x86/hyperv: Initialize cpu and memory for SEV-SNP enlightened guest
From: Tianyu Lan @ 2023-07-26 14:15 UTC (permalink / raw)
  To: Jinank Jain, kys, haiyangz, wei.liu, decui, tglx, mingo, bp,
	dave.hansen, x86, hpa, daniel.lezcano, arnd, michael.h.kelley
  Cc: Tianyu Lan, linux-arch, linux-hyperv, linux-kernel, vkuznets,
	Michael Kelley
In-Reply-To: <4d0715a5-70a8-9667-ccf0-de9bc933bb04@linux.microsoft.com>

On 7/26/2023 12:26 PM, Jinank Jain wrote:
>> +    /*
>> +     * Hyper-V SEV-SNP enlightened guest doesn't support ioapic
>> +     * and legacy APIC page read/write. Switch to hv apic here.
>> +     */
>> +    disable_ioapic_support();
> 
> Where are we switching hv_apic? May I am missing something here?
>

Nice catch! It's fossil comment when there is no x2apic support.
Will fix it in the next version.


^ permalink raw reply

* RE: [PATCH V3 5/9] x86/hyperv: Use vmmcall to implement Hyper-V hypercall in sev-snp enlightened guest
From: Michael Kelley (LINUX) @ 2023-07-26 14:29 UTC (permalink / raw)
  To: Tianyu Lan, KY Srinivasan, Haiyang Zhang, wei.liu@kernel.org,
	Dexuan Cui, tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com,
	daniel.lezcano@linaro.org, arnd@arndb.de
  Cc: Tianyu Lan, linux-arch@vger.kernel.org,
	linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
	vkuznets@redhat.com
In-Reply-To: <89c9f27c-f539-ef75-dc67-bdb0a8480c4b@gmail.com>

From: Tianyu Lan <ltykernel@gmail.com> Sent: Wednesday, July 26, 2023 6:47 AM
> 
> On 7/26/2023 11:44 AM, Michael Kelley (LINUX) wrote:
> >> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> >> index 2fa38e9f6207..025eda129d99 100644
> >> --- a/arch/x86/include/asm/mshyperv.h
> >> +++ b/arch/x86/include/asm/mshyperv.h
> >> @@ -64,12 +64,12 @@ static inline u64 hv_do_hypercall(u64 control, void *input,
> void *output)
> >>   	if (!hv_hypercall_pg)
> >>   		return U64_MAX;
> >>
> >> -	__asm__ __volatile__("mov %4, %%r8\n"
> >> -			     CALL_NOSPEC
> >> +	__asm__ __volatile__("mov %[output], %%r8\n"
> >> +			     ALTERNATIVE("vmmcall", CALL_NOSPEC, X86_FEATURE_SEV_ES)
> > Since this code is for SEV-SNP, what's the thinking behind using
> > X86_FEATURE_SEV_ES in the ALTERNATIVE statements?   Don't you need
> > to use X86_FEATURE_SEV_SNP (which is being added in another patch set that
> > Boris Petkov pointed out).
> 
> Hi Michael:
> 	Thanks for your review. The patch mentioned by Boris has not been
> merged and so still use X86_FEATURE_SEV_ES here. We may replace the
> feature flag with X86_FEATURE_SEV_SNP after it's upstreamed.
> 

Just so I'm clear, is it true that in an SEV-SNP VM, the CPUID flags for
SEV-ES *and* SEV-SNP are set?  That would seem to be necessary for
your approach to work.

I wonder if it would be better to take the patch from Brijesh Singh
that adds X86_FEATURE_SEV_SNP and add it to your patch set (with
Brijesh's agreement, of course).  That patch is small and straightforward.

> >
> > Also, does this patch depend on Peter Zijlstra's patch to support nested
> > ALTERNATIVE statements?  If so, that needs to be called out, probably in
> > the cover letter.  Peter's patch doesn't yet appear in linux-next.
> >
> 
> It may work without Peterz's patch. Please see
> https://lkml.org/lkml/2023/6/27/520
> Peterz's patch optimizes ALTERNATIVE_n implementation with nested
> expression.

OK, good.

Michael

^ permalink raw reply

* Re: [Patch v2 3/5] RDMA/mana_ib : Add error eq and notification from SoC
From: kernel test robot @ 2023-07-26 15:11 UTC (permalink / raw)
  To: sharmaajay, Jason Gunthorpe, Leon Romanovsky, Dexuan Cui, Wei Liu,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: oe-kbuild-all, netdev, linux-rdma, linux-hyperv, linux-kernel,
	Ajay Sharma
In-Reply-To: <1690343820-20188-4-git-send-email-sharmaajay@linuxonhyperv.com>

Hi,

kernel test robot noticed the following build warnings:

[auto build test WARNING on rdma/for-next]
[also build test WARNING on linus/master v6.5-rc3 next-20230726]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/sharmaajay-linuxonhyperv-com/RDMA-mana-ib-Rename-all-mana_ib_dev-type-variables-to-mib_dev/20230726-115925
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
patch link:    https://lore.kernel.org/r/1690343820-20188-4-git-send-email-sharmaajay%40linuxonhyperv.com
patch subject: [Patch v2 3/5] RDMA/mana_ib : Add error eq and notification from SoC
config: x86_64-allyesconfig (https://download.01.org/0day-ci/archive/20230726/202307262214.QoyNnN8T-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce: (https://download.01.org/0day-ci/archive/20230726/202307262214.QoyNnN8T-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202307262214.QoyNnN8T-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/infiniband/hw/mana/main.c:508:6: warning: no previous prototype for 'mana_ib_soc_event_handler' [-Wmissing-prototypes]
     508 | void mana_ib_soc_event_handler(void *ctx, struct gdma_queue *queue,
         |      ^~~~~~~~~~~~~~~~~~~~~~~~~


vim +/mana_ib_soc_event_handler +508 drivers/infiniband/hw/mana/main.c

   507	
 > 508	void mana_ib_soc_event_handler(void *ctx, struct gdma_queue *queue,
   509					struct gdma_event *event)
   510	{
   511		struct mana_ib_dev *mib_dev = (struct mana_ib_dev *)ctx;
   512	
   513		switch (event->type) {
   514		case GDMA_EQE_SOC_EVENT_NOTIFICATION:
   515			ibdev_info(&mib_dev->ib_dev, "Received SOC Notification");
   516			break;
   517		case GDMA_EQE_SOC_EVENT_TEST:
   518			ibdev_info(&mib_dev->ib_dev, "Received SoC Test");
   519			break;
   520		default:
   521			ibdev_err(&mib_dev->ib_dev, "Received unsolicited evt %d",
   522				event->type);
   523		}
   524	}
   525	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply

* RE: [PATCH V3,net-next] net: mana: Add page pool for RX buffers
From: Haiyang Zhang @ 2023-07-26 15:51 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org
  Cc: brouer@redhat.com, Dexuan Cui, KY Srinivasan, Paul Rosswurm,
	olaf@aepfle.de, vkuznets@redhat.com, davem@davemloft.net,
	wei.liu@kernel.org, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, leon@kernel.org, Long Li,
	ssengar@linux.microsoft.com, linux-rdma@vger.kernel.org,
	daniel@iogearbox.net, john.fastabend@gmail.com,
	bpf@vger.kernel.org, ast@kernel.org, Ajay Sharma, hawk@kernel.org,
	tglx@linutronix.de, shradhagupta@linux.microsoft.com,
	linux-kernel@vger.kernel.org, Ilias Apalodimas
In-Reply-To: <6396223c-6008-0e1b-e6ed-79c04c87a5e0@redhat.com>



> -----Original Message-----
> From: Jesper Dangaard Brouer <jbrouer@redhat.com>
> Sent: Wednesday, July 26, 2023 5:23 AM
> >
> > In mana_gd_setup_irqs(), we set the default IRQ/CPU affinity to gc-
> >numa_node
> > too, so it won't revert the nid initial setting.
> >
> > Currently, the Azure hypervisor always indicates numa 0 as default. (In
> > the future, it will start to provide the accurate default dev node.) When a
> > user manually changes the IRQ/CPU affinity for perf tuning, we want to
> > allow page_pool_nid_changed() to update the pool. Is this OK?
> >
> 
> If I were you, I would wait with the page_pool_nid_changed()
> "optimization" and do a benchmark mark to see if this actually have a
> benefit.  (You can do this in another patch).  (In a Azure hypervisor
> environment is might not be the right choice).
Ok, I will submit a patch without the page_pool_nid_changed() optimization 
for now, and will do more testing on this.

> This reminds me, do you have any benchmark data on the improvement this
> patch (using page_pool) gave?
With iperf and 128 threads test, this patch improved the throughput by 12-15%, 
and decreased the IRQ associated CPU's usage from 99-100% to 10-50%.

Thanks,
- Haiyang


^ permalink raw reply

* Re: [PATCH RFC net-next v5 07/14] virtio/vsock: add common datagram send path
From: Bobby Eshleman @ 2023-07-26 17:08 UTC (permalink / raw)
  To: Arseniy Krasnov
  Cc: Bobby Eshleman, Stefan Hajnoczi, Stefano Garzarella,
	Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, K. Y. Srinivasan,
	Haiyang Zhang, Wei Liu, Dexuan Cui, Bryan Tan, Vishnu Dasa,
	VMware PV-Drivers Reviewers, Dan Carpenter, Simon Horman, kvm,
	virtualization, netdev, linux-kernel, linux-hyperv, bpf
In-Reply-To: <051e4091-556c-4592-4a72-4dacf0015da8@gmail.com>

On Sat, Jul 22, 2023 at 11:16:05AM +0300, Arseniy Krasnov wrote:
> 
> 
> On 19.07.2023 03:50, Bobby Eshleman wrote:
> > This commit implements the common function
> > virtio_transport_dgram_enqueue for enqueueing datagrams. It does not add
> > usage in either vhost or virtio yet.
> > 
> > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> > ---
> >  net/vmw_vsock/virtio_transport_common.c | 76 ++++++++++++++++++++++++++++++++-
> >  1 file changed, 75 insertions(+), 1 deletion(-)
> > 
> > diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> > index ffcbdd77feaa..3bfaff758433 100644
> > --- a/net/vmw_vsock/virtio_transport_common.c
> > +++ b/net/vmw_vsock/virtio_transport_common.c
> > @@ -819,7 +819,81 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
> >  			       struct msghdr *msg,
> >  			       size_t dgram_len)
> >  {
> > -	return -EOPNOTSUPP;
> > +	/* Here we are only using the info struct to retain style uniformity
> > +	 * and to ease future refactoring and merging.
> > +	 */
> > +	struct virtio_vsock_pkt_info info_stack = {
> > +		.op = VIRTIO_VSOCK_OP_RW,
> > +		.msg = msg,
> > +		.vsk = vsk,
> > +		.type = VIRTIO_VSOCK_TYPE_DGRAM,
> > +	};
> > +	const struct virtio_transport *t_ops;
> > +	struct virtio_vsock_pkt_info *info;
> > +	struct sock *sk = sk_vsock(vsk);
> > +	struct virtio_vsock_hdr *hdr;
> > +	u32 src_cid, src_port;
> > +	struct sk_buff *skb;
> > +	void *payload;
> > +	int noblock;
> > +	int err;
> > +
> > +	info = &info_stack;
> 
> I think 'info' assignment could be moved below, to the place where it is used
> first time.
> 
> > +
> > +	if (dgram_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> > +		return -EMSGSIZE;
> > +
> > +	t_ops = virtio_transport_get_ops(vsk);
> > +	if (unlikely(!t_ops))
> > +		return -EFAULT;
> > +
> > +	/* Unlike some of our other sending functions, this function is not
> > +	 * intended for use without a msghdr.
> > +	 */
> > +	if (WARN_ONCE(!msg, "vsock dgram bug: no msghdr found for dgram enqueue\n"))
> > +		return -EFAULT;
> 
> Sorry, but is that possible? I thought 'msg' is always provided by general socket layer (e.g. before
> af_vsock.c code) and can't be NULL for DGRAM. Please correct me if i'm wrong.
> 
> Also I see, that in af_vsock.c , 'vsock_dgram_sendmsg()' dereferences 'msg' for checking MSG_OOB without any
> checks (before calling transport callback - this function in case of virtio). So I think if we want to keep
> this type of check - such check must be placed in af_vsock.c or somewhere before first dereference of this pointer.
> 

There is some talk about dgram sockets adding additional messages types
in the future that help with congestion control. Those messages won't
come from the socket layer, so msghdr will be null. Since there is no
other function for sending datagrams, it seemed likely that this
function would be reworked for that purpose. I felt that adding this
check was a direct way to make it explicit that this function is
currently designed only for the socket-layer caller.

Perhaps a comment would suffice?

> > +
> > +	noblock = msg->msg_flags & MSG_DONTWAIT;
> > +
> > +	/* Use sock_alloc_send_skb to throttle by sk_sndbuf. This helps avoid
> > +	 * triggering the OOM.
> > +	 */
> > +	skb = sock_alloc_send_skb(sk, dgram_len + VIRTIO_VSOCK_SKB_HEADROOM,
> > +				  noblock, &err);
> > +	if (!skb)
> > +		return err;
> > +
> > +	skb_reserve(skb, VIRTIO_VSOCK_SKB_HEADROOM);
> > +
> > +	src_cid = t_ops->transport.get_local_cid();
> > +	src_port = vsk->local_addr.svm_port;
> > +
> > +	hdr = virtio_vsock_hdr(skb);
> > +	hdr->type	= cpu_to_le16(info->type);
> > +	hdr->op		= cpu_to_le16(info->op);
> > +	hdr->src_cid	= cpu_to_le64(src_cid);
> > +	hdr->dst_cid	= cpu_to_le64(remote_addr->svm_cid);
> > +	hdr->src_port	= cpu_to_le32(src_port);
> > +	hdr->dst_port	= cpu_to_le32(remote_addr->svm_port);
> > +	hdr->flags	= cpu_to_le32(info->flags);
> > +	hdr->len	= cpu_to_le32(dgram_len);
> > +
> > +	skb_set_owner_w(skb, sk);
> > +
> > +	payload = skb_put(skb, dgram_len);
> > +	err = memcpy_from_msg(payload, msg, dgram_len);
> > +	if (err)
> > +		return err;
> 
> Do we need free allocated skb here ?
> 

Yep, thanks.

> > +
> > +	trace_virtio_transport_alloc_pkt(src_cid, src_port,
> > +					 remote_addr->svm_cid,
> > +					 remote_addr->svm_port,
> > +					 dgram_len,
> > +					 info->type,
> > +					 info->op,
> > +					 0);
> > +
> > +	return t_ops->send_pkt(skb);
> >  }
> >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> >  
> > 
> 
> Thanks, Arseniy

Thanks for the review!

Best,
Bobby

^ permalink raw reply

* Re: [PATCH RFC net-next v5 11/14] vhost/vsock: implement datagram support
From: Bobby Eshleman @ 2023-07-26 17:55 UTC (permalink / raw)
  To: Arseniy Krasnov
  Cc: Bobby Eshleman, Stefan Hajnoczi, Stefano Garzarella,
	Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, K. Y. Srinivasan,
	Haiyang Zhang, Wei Liu, Dexuan Cui, Bryan Tan, Vishnu Dasa,
	VMware PV-Drivers Reviewers, Dan Carpenter, Simon Horman, kvm,
	virtualization, netdev, linux-kernel, linux-hyperv, bpf
In-Reply-To: <b15d237e-31b5-40ae-83fc-e71649febd2b@gmail.com>

On Sat, Jul 22, 2023 at 11:42:38AM +0300, Arseniy Krasnov wrote:
> 
> 
> On 19.07.2023 03:50, Bobby Eshleman wrote:
> > This commit implements datagram support for vhost/vsock by teaching
> > vhost to use the common virtio transport datagram functions.
> > 
> > If the virtio RX buffer is too small, then the transmission is
> > abandoned, the packet dropped, and EHOSTUNREACH is added to the socket's
> > error queue.
> > 
> > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> > ---
> >  drivers/vhost/vsock.c    | 62 +++++++++++++++++++++++++++++++++++++++++++++---
> >  net/vmw_vsock/af_vsock.c |  5 +++-
> >  2 files changed, 63 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> > index d5d6a3c3f273..da14260c6654 100644
> > --- a/drivers/vhost/vsock.c
> > +++ b/drivers/vhost/vsock.c
> > @@ -8,6 +8,7 @@
> >   */
> >  #include <linux/miscdevice.h>
> >  #include <linux/atomic.h>
> > +#include <linux/errqueue.h>
> >  #include <linux/module.h>
> >  #include <linux/mutex.h>
> >  #include <linux/vmalloc.h>
> > @@ -32,7 +33,8 @@
> >  enum {
> >  	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
> >  			       (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
> > -			       (1ULL << VIRTIO_VSOCK_F_SEQPACKET)
> > +			       (1ULL << VIRTIO_VSOCK_F_SEQPACKET) |
> > +			       (1ULL << VIRTIO_VSOCK_F_DGRAM)
> >  };
> >  
> >  enum {
> > @@ -56,6 +58,7 @@ struct vhost_vsock {
> >  	atomic_t queued_replies;
> >  
> >  	u32 guest_cid;
> > +	bool dgram_allow;
> >  	bool seqpacket_allow;
> >  };
> >  
> > @@ -86,6 +89,32 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> >  	return NULL;
> >  }
> >  
> > +/* Claims ownership of the skb, do not free the skb after calling! */
> > +static void
> > +vhost_transport_error(struct sk_buff *skb, int err)
> > +{
> > +	struct sock_exterr_skb *serr;
> > +	struct sock *sk = skb->sk;
> > +	struct sk_buff *clone;
> > +
> > +	serr = SKB_EXT_ERR(skb);
> > +	memset(serr, 0, sizeof(*serr));
> > +	serr->ee.ee_errno = err;
> > +	serr->ee.ee_origin = SO_EE_ORIGIN_NONE;
> > +
> > +	clone = skb_clone(skb, GFP_KERNEL);
> 
> May for skb which is error carrier we can use 'sock_omalloc()', not 'skb_clone()' ? TCP uses skb
> allocated by this function as carriers of error structure. I guess 'skb_clone()' also clones data of origin,
> but i think that there is no need in data as we insert it to error queue of the socket.
> 
> What do You think?

IIUC skb_clone() is often used in this scenario so that the user can
retrieve the error-causing packet from the error queue.  Is there some
reason we shouldn't do this?

I'm seeing that the serr bits need to occur on the clone here, not the
original. I didn't realize the SKB_EXT_ERR() is a skb->cb cast. I'm not
actually sure how this passes the test case since ->cb isn't cloned.

> 
> > +	if (!clone)
> > +		return;
> 
> What will happen here 'if (!clone)' ? skb will leak as it was removed from queue?
> 

Ah yes, true.

> > +
> > +	if (sock_queue_err_skb(sk, clone))
> > +		kfree_skb(clone);
> > +
> > +	sk->sk_err = err;
> > +	sk_error_report(sk);
> > +
> > +	kfree_skb(skb);
> > +}
> > +
> >  static void
> >  vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
> >  			    struct vhost_virtqueue *vq)
> > @@ -160,9 +189,15 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
> >  		hdr = virtio_vsock_hdr(skb);
> >  
> >  		/* If the packet is greater than the space available in the
> > -		 * buffer, we split it using multiple buffers.
> > +		 * buffer, we split it using multiple buffers for connectible
> > +		 * sockets and drop the packet for datagram sockets.
> >  		 */
> >  		if (payload_len > iov_len - sizeof(*hdr)) {
> > +			if (le16_to_cpu(hdr->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
> > +				vhost_transport_error(skb, EHOSTUNREACH);
> > +				continue;
> > +			}
> > +
> >  			payload_len = iov_len - sizeof(*hdr);
> >  
> >  			/* As we are copying pieces of large packet's buffer to
> > @@ -394,6 +429,7 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
> >  	return val < vq->num;
> >  }
> >  
> > +static bool vhost_transport_dgram_allow(u32 cid, u32 port);
> >  static bool vhost_transport_seqpacket_allow(u32 remote_cid);
> >  
> >  static struct virtio_transport vhost_transport = {
> > @@ -410,7 +446,8 @@ static struct virtio_transport vhost_transport = {
> >  		.cancel_pkt               = vhost_transport_cancel_pkt,
> >  
> >  		.dgram_enqueue            = virtio_transport_dgram_enqueue,
> > -		.dgram_allow              = virtio_transport_dgram_allow,
> > +		.dgram_allow              = vhost_transport_dgram_allow,
> > +		.dgram_addr_init          = virtio_transport_dgram_addr_init,
> >  
> >  		.stream_enqueue           = virtio_transport_stream_enqueue,
> >  		.stream_dequeue           = virtio_transport_stream_dequeue,
> > @@ -443,6 +480,22 @@ static struct virtio_transport vhost_transport = {
> >  	.send_pkt = vhost_transport_send_pkt,
> >  };
> >  
> > +static bool vhost_transport_dgram_allow(u32 cid, u32 port)
> > +{
> > +	struct vhost_vsock *vsock;
> > +	bool dgram_allow = false;
> > +
> > +	rcu_read_lock();
> > +	vsock = vhost_vsock_get(cid);
> > +
> > +	if (vsock)
> > +		dgram_allow = vsock->dgram_allow;
> > +
> > +	rcu_read_unlock();
> > +
> > +	return dgram_allow;
> > +}
> > +
> >  static bool vhost_transport_seqpacket_allow(u32 remote_cid)
> >  {
> >  	struct vhost_vsock *vsock;
> > @@ -799,6 +852,9 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
> >  	if (features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET))
> >  		vsock->seqpacket_allow = true;
> >  
> > +	if (features & (1ULL << VIRTIO_VSOCK_F_DGRAM))
> > +		vsock->dgram_allow = true;
> > +
> >  	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
> >  		vq = &vsock->vqs[i];
> >  		mutex_lock(&vq->mutex);
> > diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> > index e73f3b2c52f1..449ed63ac2b0 100644
> > --- a/net/vmw_vsock/af_vsock.c
> > +++ b/net/vmw_vsock/af_vsock.c
> > @@ -1427,9 +1427,12 @@ int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
> >  		return prot->recvmsg(sk, msg, len, flags, NULL);
> >  #endif
> >  
> > -	if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
> > +	if (unlikely(flags & MSG_OOB))
> >  		return -EOPNOTSUPP;
> >  
> > +	if (unlikely(flags & MSG_ERRQUEUE))
> > +		return sock_recv_errqueue(sk, msg, len, SOL_VSOCK, 0);
> > +
> 
> Sorry, but I get build error here, because SOL_VSOCK in undefined. I think it should be added to
> include/linux/socket.h and to uapi files also for future use in userspace.
> 

Strange, I built each patch individually without issue. My base is
netdev/main with your SOL_VSOCK patch applied. I will look today and see
if I'm missing something.

> Also Stefano Garzarella <sgarzare@redhat.com> suggested to add define something like VSOCK_RECVERR,
> in the same way as IP_RECVERR, and use it as last parameter of 'sock_recv_errqueue()'.
> 

Got it, thanks.

> >  	transport = vsk->transport;
> >  
> >  	/* Retrieve the head sk_buff from the socket's receive queue. */
> > 
> 
> Thanks, Arseniy

Thanks,
Bobby

^ permalink raw reply

* Re: [PATCH RFC net-next v5 13/14] virtio/vsock: implement datagram support
From: Bobby Eshleman @ 2023-07-26 17:58 UTC (permalink / raw)
  To: Arseniy Krasnov
  Cc: Bobby Eshleman, Stefan Hajnoczi, Stefano Garzarella,
	Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, K. Y. Srinivasan,
	Haiyang Zhang, Wei Liu, Dexuan Cui, Bryan Tan, Vishnu Dasa,
	VMware PV-Drivers Reviewers, Dan Carpenter, Simon Horman, kvm,
	virtualization, netdev, linux-kernel, linux-hyperv, bpf
In-Reply-To: <adeed3a8-68fe-bdb7-e4a1-48044dbe5436@gmail.com>

On Sat, Jul 22, 2023 at 11:45:29AM +0300, Arseniy Krasnov wrote:
> 
> 
> On 19.07.2023 03:50, Bobby Eshleman wrote:
> > This commit implements datagram support for virtio/vsock by teaching
> > virtio to use the general virtio transport ->dgram_addr_init() function
> > and implementation a new version of ->dgram_allow().
> > 
> > Additionally, it drops virtio_transport_dgram_allow() as an exported
> > symbol because it is no longer used in other transports.
> > 
> > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> > ---
> >  include/linux/virtio_vsock.h            |  1 -
> >  net/vmw_vsock/virtio_transport.c        | 24 +++++++++++++++++++++++-
> >  net/vmw_vsock/virtio_transport_common.c |  6 ------
> >  3 files changed, 23 insertions(+), 8 deletions(-)
> > 
> > diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> > index b3856b8a42b3..d0a4f08b12c1 100644
> > --- a/include/linux/virtio_vsock.h
> > +++ b/include/linux/virtio_vsock.h
> > @@ -211,7 +211,6 @@ void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val);
> >  u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk);
> >  bool virtio_transport_stream_is_active(struct vsock_sock *vsk);
> >  bool virtio_transport_stream_allow(u32 cid, u32 port);
> > -bool virtio_transport_dgram_allow(u32 cid, u32 port);
> >  void virtio_transport_dgram_addr_init(struct sk_buff *skb,
> >  				      struct sockaddr_vm *addr);
> >  
> > diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> > index ac2126c7dac5..713718861bd4 100644
> > --- a/net/vmw_vsock/virtio_transport.c
> > +++ b/net/vmw_vsock/virtio_transport.c
> > @@ -63,6 +63,7 @@ struct virtio_vsock {
> >  
> >  	u32 guest_cid;
> >  	bool seqpacket_allow;
> > +	bool dgram_allow;
> >  };
> >  
> >  static u32 virtio_transport_get_local_cid(void)
> > @@ -413,6 +414,7 @@ static void virtio_vsock_rx_done(struct virtqueue *vq)
> >  	queue_work(virtio_vsock_workqueue, &vsock->rx_work);
> >  }
> >  
> > +static bool virtio_transport_dgram_allow(u32 cid, u32 port);
> 
> May be add body here? Without prototyping? Same for loopback and vhost.
> 

Sounds okay with me, but this seems to go against the pattern
established by seqpacket. Any reason why?

> >  static bool virtio_transport_seqpacket_allow(u32 remote_cid);
> >  
> >  static struct virtio_transport virtio_transport = {
> > @@ -430,6 +432,7 @@ static struct virtio_transport virtio_transport = {
> >  
> >  		.dgram_enqueue            = virtio_transport_dgram_enqueue,
> >  		.dgram_allow              = virtio_transport_dgram_allow,
> > +		.dgram_addr_init          = virtio_transport_dgram_addr_init,
> >  
> >  		.stream_dequeue           = virtio_transport_stream_dequeue,
> >  		.stream_enqueue           = virtio_transport_stream_enqueue,
> > @@ -462,6 +465,21 @@ static struct virtio_transport virtio_transport = {
> >  	.send_pkt = virtio_transport_send_pkt,
> >  };
> >  
> > +static bool virtio_transport_dgram_allow(u32 cid, u32 port)
> > +{
> > +	struct virtio_vsock *vsock;
> > +	bool dgram_allow;
> > +
> > +	dgram_allow = false;
> > +	rcu_read_lock();
> > +	vsock = rcu_dereference(the_virtio_vsock);
> > +	if (vsock)
> > +		dgram_allow = vsock->dgram_allow;
> > +	rcu_read_unlock();
> > +
> > +	return dgram_allow;
> > +}
> > +
> >  static bool virtio_transport_seqpacket_allow(u32 remote_cid)
> >  {
> >  	struct virtio_vsock *vsock;
> > @@ -655,6 +673,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
> >  	if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
> >  		vsock->seqpacket_allow = true;
> >  
> > +	if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_DGRAM))
> > +		vsock->dgram_allow = true;
> > +
> >  	vdev->priv = vsock;
> >  
> >  	ret = virtio_vsock_vqs_init(vsock);
> > @@ -747,7 +768,8 @@ static struct virtio_device_id id_table[] = {
> >  };
> >  
> >  static unsigned int features[] = {
> > -	VIRTIO_VSOCK_F_SEQPACKET
> > +	VIRTIO_VSOCK_F_SEQPACKET,
> > +	VIRTIO_VSOCK_F_DGRAM
> >  };
> >  
> >  static struct virtio_driver virtio_vsock_driver = {
> > diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> > index 96118e258097..77898f5325cd 100644
> > --- a/net/vmw_vsock/virtio_transport_common.c
> > +++ b/net/vmw_vsock/virtio_transport_common.c
> > @@ -783,12 +783,6 @@ bool virtio_transport_stream_allow(u32 cid, u32 port)
> >  }
> >  EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> >  
> > -bool virtio_transport_dgram_allow(u32 cid, u32 port)
> > -{
> > -	return false;
> > -}
> > -EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> > -
> >  int virtio_transport_connect(struct vsock_sock *vsk)
> >  {
> >  	struct virtio_vsock_pkt_info info = {
> > 
> 
> Thanks, Arseniy

Thanks,
Bobby

^ permalink raw reply

* Re: [PATCH RFC net-next v5 00/14] virtio/vsock: support datagrams
From: Bobby Eshleman @ 2023-07-26 18:02 UTC (permalink / raw)
  To: Bobby Eshleman
  Cc: Stefan Hajnoczi, Stefano Garzarella, Michael S. Tsirkin,
	Jason Wang, Xuan Zhuo, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, K. Y. Srinivasan, Haiyang Zhang,
	Wei Liu, Dexuan Cui, Bryan Tan, Vishnu Dasa,
	VMware PV-Drivers Reviewers, Dan Carpenter, Simon Horman,
	Krasnov Arseniy, kvm, virtualization, netdev, linux-kernel,
	linux-hyperv, bpf, Jiang Wang
In-Reply-To: <20230413-b4-vsock-dgram-v5-0-581bd37fdb26@bytedance.com>

On Wed, Jul 19, 2023 at 12:50:04AM +0000, Bobby Eshleman wrote:
> Hey all!
> 
> This series introduces support for datagrams to virtio/vsock.
> 
> It is a spin-off (and smaller version) of this series from the summer:
>   https://lore.kernel.org/all/cover.1660362668.git.bobby.eshleman@bytedance.com/
> 
> Please note that this is an RFC and should not be merged until
> associated changes are made to the virtio specification, which will
> follow after discussion from this series.
> 
> Another aside, the v4 of the series has only been mildly tested with a
> run of tools/testing/vsock/vsock_test. Some code likely needs cleaning
> up, but I'm hoping to get some of the design choices agreed upon before
> spending too much time making it pretty.

Stale from v4 cover, sorry.

> 
> This series first supports datagrams in a basic form for virtio, and
> then optimizes the sendpath for all datagram transports.
> 
> The result is a very fast datagram communication protocol that
> outperforms even UDP on multi-queue virtio-net w/ vhost on a variety
> of multi-threaded workload samples.
> 
> For those that are curious, some summary data comparing UDP and VSOCK
> DGRAM (N=5):
> 
> 	vCPUS: 16
> 	virtio-net queues: 16
> 	payload size: 4KB
> 	Setup: bare metal + vm (non-nested)
> 
> 	UDP: 287.59 MB/s
> 	VSOCK DGRAM: 509.2 MB/s

Also stale. After dropping the lockless sendpath patch and deferring it
to later, this data does not apply to the series anymore.

> 
> Some notes about the implementation...
> 
> This datagram implementation forces datagrams to self-throttle according
> to the threshold set by sk_sndbuf. It behaves similar to the credits
> used by streams in its effect on throughput and memory consumption, but
> it is not influenced by the receiving socket as credits are.
> 
> The device drops packets silently.
> 
> As discussed previously, this series introduces datagrams and defers
> fairness to future work. See discussion in v2 for more context around
> datagrams, fairness, and this implementation.
> 
> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> ---
> Changes in v5:
> - teach vhost to drop dgram when a datagram exceeds the receive buffer
>   - now uses MSG_ERRQUEUE and depends on Arseniy's zerocopy patch:
> 	"vsock: read from socket's error queue"
> - replace multiple ->dgram_* callbacks with single ->dgram_addr_init()
>   callback
> - refactor virtio dgram skb allocator to reduce conflicts w/ zerocopy series
> - add _fallback/_FALLBACK suffix to dgram transport variables/macros
> - add WARN_ONCE() for table_size / VSOCK_HASH issue
> - add static to vsock_find_bound_socket_common
> - dedupe code in vsock_dgram_sendmsg() using module_got var
> - drop concurrent sendmsg() for dgram and defer to future series
> - Add more tests
>   - test EHOSTUNREACH in errqueue
>   - test stream + dgram address collision
> - improve clarity of dgram msg bounds test code
> - Link to v4: https://lore.kernel.org/r/20230413-b4-vsock-dgram-v4-0-0cebbb2ae899@bytedance.com
> 
> Changes in v4:
> - style changes
>   - vsock: use sk_vsock(vsk) in vsock_dgram_recvmsg instead of
>     &sk->vsk
>   - vsock: fix xmas tree declaration
>   - vsock: fix spacing issues
>   - virtio/vsock: virtio_transport_recv_dgram returns void because err
>     unused
> - sparse analysis warnings/errors
>   - virtio/vsock: fix unitialized skerr on destroy
>   - virtio/vsock: fix uninitialized err var on goto out
>   - vsock: fix declarations that need static
>   - vsock: fix __rcu annotation order
> - bugs
>   - vsock: fix null ptr in remote_info code
>   - vsock/dgram: make transport_dgram a fallback instead of first
>     priority
>   - vsock: remove redundant rcu read lock acquire in getname()
> - tests
>   - add more tests (message bounds and more)
>   - add vsock_dgram_bind() helper
>   - add vsock_dgram_connect() helper
> 
> Changes in v3:
> - Support multi-transport dgram, changing logic in connect/bind
>   to support VMCI case
> - Support per-pkt transport lookup for sendto() case
> - Fix dgram_allow() implementation
> - Fix dgram feature bit number (now it is 3)
> - Fix binding so dgram and connectible (cid,port) spaces are
>   non-overlapping
> - RCU protect transport ptr so connect() calls never leave
>   a lockless read of the transport and remote_addr are always
>   in sync
> - Link to v2: https://lore.kernel.org/r/20230413-b4-vsock-dgram-v2-0-079cc7cee62e@bytedance.com
> 
> ---
> Bobby Eshleman (13):
>       af_vsock: generalize vsock_dgram_recvmsg() to all transports
>       af_vsock: refactor transport lookup code
>       af_vsock: support multi-transport datagrams
>       af_vsock: generalize bind table functions
>       af_vsock: use a separate dgram bind table
>       virtio/vsock: add VIRTIO_VSOCK_TYPE_DGRAM
>       virtio/vsock: add common datagram send path
>       af_vsock: add vsock_find_bound_dgram_socket()
>       virtio/vsock: add common datagram recv path
>       virtio/vsock: add VIRTIO_VSOCK_F_DGRAM feature bit
>       vhost/vsock: implement datagram support
>       vsock/loopback: implement datagram support
>       virtio/vsock: implement datagram support
> 
> Jiang Wang (1):
>       test/vsock: add vsock dgram tests
> 
>  drivers/vhost/vsock.c                   |  64 ++-
>  include/linux/virtio_vsock.h            |  10 +-
>  include/net/af_vsock.h                  |  14 +-
>  include/uapi/linux/virtio_vsock.h       |   2 +
>  net/vmw_vsock/af_vsock.c                | 281 ++++++++++---
>  net/vmw_vsock/hyperv_transport.c        |  13 -
>  net/vmw_vsock/virtio_transport.c        |  26 +-
>  net/vmw_vsock/virtio_transport_common.c | 190 +++++++--
>  net/vmw_vsock/vmci_transport.c          |  60 +--
>  net/vmw_vsock/vsock_loopback.c          |  10 +-
>  tools/testing/vsock/util.c              | 141 ++++++-
>  tools/testing/vsock/util.h              |   6 +
>  tools/testing/vsock/vsock_test.c        | 680 ++++++++++++++++++++++++++++++++
>  13 files changed, 1320 insertions(+), 177 deletions(-)
> ---
> base-commit: 37cadc266ebdc7e3531111c2b3304fa01b2131e8
> change-id: 20230413-b4-vsock-dgram-3b6eba6a64e5
> 
> Best regards,
> -- 
> Bobby Eshleman <bobby.eshleman@bytedance.com>
> 

^ permalink raw reply

* Re: [PATCH RFC net-next v5 01/14] af_vsock: generalize vsock_dgram_recvmsg() to all transports
From: Bobby Eshleman @ 2023-07-26 18:21 UTC (permalink / raw)
  To: Arseniy Krasnov
  Cc: Bobby Eshleman, Stefan Hajnoczi, Stefano Garzarella,
	Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, K. Y. Srinivasan,
	Haiyang Zhang, Wei Liu, Dexuan Cui, Bryan Tan, Vishnu Dasa,
	VMware PV-Drivers Reviewers, Dan Carpenter, Simon Horman, kvm,
	virtualization, netdev, linux-kernel, linux-hyperv, bpf
In-Reply-To: <27a430f8-18e9-7cc2-c773-dde8ae824bfc@gmail.com>

On Mon, Jul 24, 2023 at 09:11:44PM +0300, Arseniy Krasnov wrote:
> 
> 
> On 19.07.2023 03:50, Bobby Eshleman wrote:
> > This commit drops the transport->dgram_dequeue callback and makes
> > vsock_dgram_recvmsg() generic to all transports.
> > 
> > To make this possible, two transport-level changes are introduced:
> > - implementation of the ->dgram_addr_init() callback to initialize
> >   the sockaddr_vm structure with data from incoming socket buffers.
> > - transport implementations set the skb->data pointer to the beginning
> >   of the payload prior to adding the skb to the socket's receive queue.
> >   That is, they must use skb_pull() before enqueuing. This is an
> >   agreement between the transport and the socket layer that skb->data
> >   always points to the beginning of the payload (and not, for example,
> >   the packet header).
> > 
> > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> > ---
> >  drivers/vhost/vsock.c                   |  1 -
> >  include/linux/virtio_vsock.h            |  5 ---
> >  include/net/af_vsock.h                  |  3 +-
> >  net/vmw_vsock/af_vsock.c                | 40 ++++++++++++++++++++++-
> >  net/vmw_vsock/hyperv_transport.c        |  7 ----
> >  net/vmw_vsock/virtio_transport.c        |  1 -
> >  net/vmw_vsock/virtio_transport_common.c |  9 -----
> >  net/vmw_vsock/vmci_transport.c          | 58 ++++++---------------------------
> >  net/vmw_vsock/vsock_loopback.c          |  1 -
> >  9 files changed, 50 insertions(+), 75 deletions(-)
> > 
> > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> > index 6578db78f0ae..ae8891598a48 100644
> > --- a/drivers/vhost/vsock.c
> > +++ b/drivers/vhost/vsock.c
> > @@ -410,7 +410,6 @@ static struct virtio_transport vhost_transport = {
> >  		.cancel_pkt               = vhost_transport_cancel_pkt,
> >  
> >  		.dgram_enqueue            = virtio_transport_dgram_enqueue,
> > -		.dgram_dequeue            = virtio_transport_dgram_dequeue,
> >  		.dgram_bind               = virtio_transport_dgram_bind,
> >  		.dgram_allow              = virtio_transport_dgram_allow,
> >  
> > diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> > index c58453699ee9..18cbe8d37fca 100644
> > --- a/include/linux/virtio_vsock.h
> > +++ b/include/linux/virtio_vsock.h
> > @@ -167,11 +167,6 @@ virtio_transport_stream_dequeue(struct vsock_sock *vsk,
> >  				size_t len,
> >  				int type);
> >  int
> > -virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
> > -			       struct msghdr *msg,
> > -			       size_t len, int flags);
> > -
> > -int
> >  virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> >  				   struct msghdr *msg,
> >  				   size_t len);
> > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> > index 0e7504a42925..305d57502e89 100644
> > --- a/include/net/af_vsock.h
> > +++ b/include/net/af_vsock.h
> > @@ -120,11 +120,10 @@ struct vsock_transport {
> >  
> >  	/* DGRAM. */
> >  	int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *);
> > -	int (*dgram_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
> > -			     size_t len, int flags);
> >  	int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *,
> >  			     struct msghdr *, size_t len);
> >  	bool (*dgram_allow)(u32 cid, u32 port);
> > +	void (*dgram_addr_init)(struct sk_buff *skb, struct sockaddr_vm *addr);
> >  
> >  	/* STREAM. */
> >  	/* TODO: stream_bind() */
> > diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> > index deb72a8c44a7..ad71e084bf2f 100644
> > --- a/net/vmw_vsock/af_vsock.c
> > +++ b/net/vmw_vsock/af_vsock.c
> > @@ -1272,11 +1272,15 @@ static int vsock_dgram_connect(struct socket *sock,
> >  int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
> >  			size_t len, int flags)
> >  {
> > +	const struct vsock_transport *transport;
> >  #ifdef CONFIG_BPF_SYSCALL
> >  	const struct proto *prot;
> >  #endif
> >  	struct vsock_sock *vsk;
> > +	struct sk_buff *skb;
> > +	size_t payload_len;
> >  	struct sock *sk;
> > +	int err;
> >  
> >  	sk = sock->sk;
> >  	vsk = vsock_sk(sk);
> > @@ -1287,7 +1291,41 @@ int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
> >  		return prot->recvmsg(sk, msg, len, flags, NULL);
> >  #endif
> >  
> > -	return vsk->transport->dgram_dequeue(vsk, msg, len, flags);
> > +	if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
> > +		return -EOPNOTSUPP;
> > +
> > +	transport = vsk->transport;
> > +
> > +	/* Retrieve the head sk_buff from the socket's receive queue. */
> > +	err = 0;
> > +	skb = skb_recv_datagram(sk_vsock(vsk), flags, &err);
> > +	if (!skb)
> > +		return err;
> > +
> > +	payload_len = skb->len;
> > +
> > +	if (payload_len > len) {
> > +		payload_len = len;
> > +		msg->msg_flags |= MSG_TRUNC;
> > +	}
> > +
> > +	/* Place the datagram payload in the user's iovec. */
> > +	err = skb_copy_datagram_msg(skb, 0, msg, payload_len);
> > +	if (err)
> > +		goto out;
> > +
> > +	if (msg->msg_name) {
> > +		/* Provide the address of the sender. */
> > +		DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
> > +
> > +		transport->dgram_addr_init(skb, vm_addr);
> 
> Do we need check that dgram_addr_init != NULL? because I see that not all transports have this
> callback set in this patch
> 

How about adding the check somewhere outside of the hotpath, such as
when the transport is assigned?

> > +		msg->msg_namelen = sizeof(*vm_addr);
> > +	}
> > +	err = payload_len;
> > +
> > +out:
> > +	skb_free_datagram(&vsk->sk, skb);
> > +	return err;
> >  }
> >  EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg);
> >  
> > diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
> > index 7cb1a9d2cdb4..7f1ea434656d 100644
> > --- a/net/vmw_vsock/hyperv_transport.c
> > +++ b/net/vmw_vsock/hyperv_transport.c
> > @@ -556,12 +556,6 @@ static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr)
> >  	return -EOPNOTSUPP;
> >  }
> >  
> > -static int hvs_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
> > -			     size_t len, int flags)
> > -{
> > -	return -EOPNOTSUPP;
> > -}
> > -
> >  static int hvs_dgram_enqueue(struct vsock_sock *vsk,
> >  			     struct sockaddr_vm *remote, struct msghdr *msg,
> >  			     size_t dgram_len)
> > @@ -833,7 +827,6 @@ static struct vsock_transport hvs_transport = {
> >  	.shutdown                 = hvs_shutdown,
> >  
> >  	.dgram_bind               = hvs_dgram_bind,
> > -	.dgram_dequeue            = hvs_dgram_dequeue,
> >  	.dgram_enqueue            = hvs_dgram_enqueue,
> >  	.dgram_allow              = hvs_dgram_allow,
> >  
> > diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> > index e95df847176b..66edffdbf303 100644
> > --- a/net/vmw_vsock/virtio_transport.c
> > +++ b/net/vmw_vsock/virtio_transport.c
> > @@ -429,7 +429,6 @@ static struct virtio_transport virtio_transport = {
> >  		.cancel_pkt               = virtio_transport_cancel_pkt,
> >  
> >  		.dgram_bind               = virtio_transport_dgram_bind,
> > -		.dgram_dequeue            = virtio_transport_dgram_dequeue,
> >  		.dgram_enqueue            = virtio_transport_dgram_enqueue,
> >  		.dgram_allow              = virtio_transport_dgram_allow,
> >  
> > diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> > index b769fc258931..01ea1402ad40 100644
> > --- a/net/vmw_vsock/virtio_transport_common.c
> > +++ b/net/vmw_vsock/virtio_transport_common.c
> > @@ -583,15 +583,6 @@ virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> >  }
> >  EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue);
> >  
> > -int
> > -virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
> > -			       struct msghdr *msg,
> > -			       size_t len, int flags)
> > -{
> > -	return -EOPNOTSUPP;
> > -}
> > -EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> > -
> >  s64 virtio_transport_stream_has_data(struct vsock_sock *vsk)
> >  {
> >  	struct virtio_vsock_sock *vvs = vsk->trans;
> > diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
> > index b370070194fa..0bbbdb222245 100644
> > --- a/net/vmw_vsock/vmci_transport.c
> > +++ b/net/vmw_vsock/vmci_transport.c
> > @@ -641,6 +641,7 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
> >  	sock_hold(sk);
> >  	skb_put(skb, size);
> >  	memcpy(skb->data, dg, size);
> > +	skb_pull(skb, VMCI_DG_HEADERSIZE);
> >  	sk_receive_skb(sk, skb, 0);
> >  
> >  	return VMCI_SUCCESS;
> > @@ -1731,57 +1732,18 @@ static int vmci_transport_dgram_enqueue(
> >  	return err - sizeof(*dg);
> >  }
> >  
> > -static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk,
> > -					struct msghdr *msg, size_t len,
> > -					int flags)
> > +static void vmci_transport_dgram_addr_init(struct sk_buff *skb,
> > +					   struct sockaddr_vm *addr)
> >  {
> > -	int err;
> >  	struct vmci_datagram *dg;
> > -	size_t payload_len;
> > -	struct sk_buff *skb;
> > -
> > -	if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
> > -		return -EOPNOTSUPP;
> > -
> > -	/* Retrieve the head sk_buff from the socket's receive queue. */
> > -	err = 0;
> > -	skb = skb_recv_datagram(&vsk->sk, flags, &err);
> > -	if (!skb)
> > -		return err;
> > -
> > -	dg = (struct vmci_datagram *)skb->data;
> > -	if (!dg)
> > -		/* err is 0, meaning we read zero bytes. */
> > -		goto out;
> > -
> > -	payload_len = dg->payload_size;
> > -	/* Ensure the sk_buff matches the payload size claimed in the packet. */
> > -	if (payload_len != skb->len - sizeof(*dg)) {
> > -		err = -EINVAL;
> > -		goto out;
> > -	}
> > -
> > -	if (payload_len > len) {
> > -		payload_len = len;
> > -		msg->msg_flags |= MSG_TRUNC;
> > -	}
> > +	unsigned int cid, port;
> >  
> > -	/* Place the datagram payload in the user's iovec. */
> > -	err = skb_copy_datagram_msg(skb, sizeof(*dg), msg, payload_len);
> > -	if (err)
> > -		goto out;
> > -
> > -	if (msg->msg_name) {
> > -		/* Provide the address of the sender. */
> > -		DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
> > -		vsock_addr_init(vm_addr, dg->src.context, dg->src.resource);
> > -		msg->msg_namelen = sizeof(*vm_addr);
> > -	}
> > -	err = payload_len;
> > +	WARN_ONCE(skb->head == skb->data, "vmci vsock bug: bad dgram skb");
> >  
> > -out:
> > -	skb_free_datagram(&vsk->sk, skb);
> > -	return err;
> > +	dg = (struct vmci_datagram *)skb->head;
> > +	cid = dg->src.context;
> > +	port = dg->src.resource;
> > +	vsock_addr_init(addr, cid, port);
> 
> I think we
> 
> 1) can short this to:
> 
> vsock_addr_init(addr, dg->src.context, dg->src.resource);
> 
> 2) w/o previous point, cid and port better be u32, as VMCI structure has u32 fields 'context' and
>    'resource' and 'vsock_addr_init()' also has u32 type for both arguments.
> 
> Thanks, Arseniy

Sounds good, thanks.

> 
> >  }
> >  
> >  static bool vmci_transport_dgram_allow(u32 cid, u32 port)
> > @@ -2040,9 +2002,9 @@ static struct vsock_transport vmci_transport = {
> >  	.release = vmci_transport_release,
> >  	.connect = vmci_transport_connect,
> >  	.dgram_bind = vmci_transport_dgram_bind,
> > -	.dgram_dequeue = vmci_transport_dgram_dequeue,
> >  	.dgram_enqueue = vmci_transport_dgram_enqueue,
> >  	.dgram_allow = vmci_transport_dgram_allow,
> > +	.dgram_addr_init = vmci_transport_dgram_addr_init,
> >  	.stream_dequeue = vmci_transport_stream_dequeue,
> >  	.stream_enqueue = vmci_transport_stream_enqueue,
> >  	.stream_has_data = vmci_transport_stream_has_data,
> > diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
> > index 5c6360df1f31..2a59dd177c74 100644
> > --- a/net/vmw_vsock/vsock_loopback.c
> > +++ b/net/vmw_vsock/vsock_loopback.c
> > @@ -62,7 +62,6 @@ static struct virtio_transport loopback_transport = {
> >  		.cancel_pkt               = vsock_loopback_cancel_pkt,
> >  
> >  		.dgram_bind               = virtio_transport_dgram_bind,
> > -		.dgram_dequeue            = virtio_transport_dgram_dequeue,
> >  		.dgram_enqueue            = virtio_transport_dgram_enqueue,
> >  		.dgram_allow              = virtio_transport_dgram_allow,
> >  
> > 

Thanks,
Bobby

^ permalink raw reply

* Re: [PATCH RFC net-next v5 10/14] virtio/vsock: add VIRTIO_VSOCK_F_DGRAM feature bit
From: Michael S. Tsirkin @ 2023-07-26 18:38 UTC (permalink / raw)
  To: Bobby Eshleman
  Cc: Stefan Hajnoczi, Stefano Garzarella, Jason Wang, Xuan Zhuo,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	K. Y. Srinivasan, Haiyang Zhang, Wei Liu, Dexuan Cui, Bryan Tan,
	Vishnu Dasa, VMware PV-Drivers Reviewers, Dan Carpenter,
	Simon Horman, Krasnov Arseniy, kvm, virtualization, netdev,
	linux-kernel, linux-hyperv, bpf, Jiang Wang
In-Reply-To: <20230413-b4-vsock-dgram-v5-10-581bd37fdb26@bytedance.com>

On Wed, Jul 19, 2023 at 12:50:14AM +0000, Bobby Eshleman wrote:
> This commit adds a feature bit for virtio vsock to support datagrams.
> 
> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> ---
>  include/uapi/linux/virtio_vsock.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> index 331be28b1d30..27b4b2b8bf13 100644
> --- a/include/uapi/linux/virtio_vsock.h
> +++ b/include/uapi/linux/virtio_vsock.h
> @@ -40,6 +40,7 @@
>  
>  /* The feature bitmap for virtio vsock */
>  #define VIRTIO_VSOCK_F_SEQPACKET	1	/* SOCK_SEQPACKET supported */
> +#define VIRTIO_VSOCK_F_DGRAM		3	/* SOCK_DGRAM supported */
>  
>  struct virtio_vsock_config {
>  	__le64 guest_cid;

pls do not add interface without first getting it accepted in the
virtio spec.

> -- 
> 2.30.2


^ permalink raw reply

* Re: [PATCH RFC net-next v5 11/14] vhost/vsock: implement datagram support
From: Michael S. Tsirkin @ 2023-07-26 18:40 UTC (permalink / raw)
  To: Bobby Eshleman
  Cc: Stefan Hajnoczi, Stefano Garzarella, Jason Wang, Xuan Zhuo,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	K. Y. Srinivasan, Haiyang Zhang, Wei Liu, Dexuan Cui, Bryan Tan,
	Vishnu Dasa, VMware PV-Drivers Reviewers, Dan Carpenter,
	Simon Horman, Krasnov Arseniy, kvm, virtualization, netdev,
	linux-kernel, linux-hyperv, bpf
In-Reply-To: <20230413-b4-vsock-dgram-v5-11-581bd37fdb26@bytedance.com>

On Wed, Jul 19, 2023 at 12:50:15AM +0000, Bobby Eshleman wrote:
> This commit implements datagram support for vhost/vsock by teaching
> vhost to use the common virtio transport datagram functions.
> 
> If the virtio RX buffer is too small, then the transmission is
> abandoned, the packet dropped, and EHOSTUNREACH is added to the socket's
> error queue.
> 
> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>

EHOSTUNREACH?


> ---
>  drivers/vhost/vsock.c    | 62 +++++++++++++++++++++++++++++++++++++++++++++---
>  net/vmw_vsock/af_vsock.c |  5 +++-
>  2 files changed, 63 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index d5d6a3c3f273..da14260c6654 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -8,6 +8,7 @@
>   */
>  #include <linux/miscdevice.h>
>  #include <linux/atomic.h>
> +#include <linux/errqueue.h>
>  #include <linux/module.h>
>  #include <linux/mutex.h>
>  #include <linux/vmalloc.h>
> @@ -32,7 +33,8 @@
>  enum {
>  	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
>  			       (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
> -			       (1ULL << VIRTIO_VSOCK_F_SEQPACKET)
> +			       (1ULL << VIRTIO_VSOCK_F_SEQPACKET) |
> +			       (1ULL << VIRTIO_VSOCK_F_DGRAM)
>  };
>  
>  enum {
> @@ -56,6 +58,7 @@ struct vhost_vsock {
>  	atomic_t queued_replies;
>  
>  	u32 guest_cid;
> +	bool dgram_allow;
>  	bool seqpacket_allow;
>  };
>  
> @@ -86,6 +89,32 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>  	return NULL;
>  }
>  
> +/* Claims ownership of the skb, do not free the skb after calling! */
> +static void
> +vhost_transport_error(struct sk_buff *skb, int err)
> +{
> +	struct sock_exterr_skb *serr;
> +	struct sock *sk = skb->sk;
> +	struct sk_buff *clone;
> +
> +	serr = SKB_EXT_ERR(skb);
> +	memset(serr, 0, sizeof(*serr));
> +	serr->ee.ee_errno = err;
> +	serr->ee.ee_origin = SO_EE_ORIGIN_NONE;
> +
> +	clone = skb_clone(skb, GFP_KERNEL);
> +	if (!clone)
> +		return;
> +
> +	if (sock_queue_err_skb(sk, clone))
> +		kfree_skb(clone);
> +
> +	sk->sk_err = err;
> +	sk_error_report(sk);
> +
> +	kfree_skb(skb);
> +}
> +
>  static void
>  vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>  			    struct vhost_virtqueue *vq)
> @@ -160,9 +189,15 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>  		hdr = virtio_vsock_hdr(skb);
>  
>  		/* If the packet is greater than the space available in the
> -		 * buffer, we split it using multiple buffers.
> +		 * buffer, we split it using multiple buffers for connectible
> +		 * sockets and drop the packet for datagram sockets.
>  		 */

won't this break things like recently proposed zerocopy?
I think splitup has to be supported for all types.


>  		if (payload_len > iov_len - sizeof(*hdr)) {
> +			if (le16_to_cpu(hdr->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
> +				vhost_transport_error(skb, EHOSTUNREACH);
> +				continue;
> +			}
> +
>  			payload_len = iov_len - sizeof(*hdr);
>  
>  			/* As we are copying pieces of large packet's buffer to
> @@ -394,6 +429,7 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
>  	return val < vq->num;
>  }
>  
> +static bool vhost_transport_dgram_allow(u32 cid, u32 port);
>  static bool vhost_transport_seqpacket_allow(u32 remote_cid);
>  
>  static struct virtio_transport vhost_transport = {
> @@ -410,7 +446,8 @@ static struct virtio_transport vhost_transport = {
>  		.cancel_pkt               = vhost_transport_cancel_pkt,
>  
>  		.dgram_enqueue            = virtio_transport_dgram_enqueue,
> -		.dgram_allow              = virtio_transport_dgram_allow,
> +		.dgram_allow              = vhost_transport_dgram_allow,
> +		.dgram_addr_init          = virtio_transport_dgram_addr_init,
>  
>  		.stream_enqueue           = virtio_transport_stream_enqueue,
>  		.stream_dequeue           = virtio_transport_stream_dequeue,
> @@ -443,6 +480,22 @@ static struct virtio_transport vhost_transport = {
>  	.send_pkt = vhost_transport_send_pkt,
>  };
>  
> +static bool vhost_transport_dgram_allow(u32 cid, u32 port)
> +{
> +	struct vhost_vsock *vsock;
> +	bool dgram_allow = false;
> +
> +	rcu_read_lock();
> +	vsock = vhost_vsock_get(cid);
> +
> +	if (vsock)
> +		dgram_allow = vsock->dgram_allow;
> +
> +	rcu_read_unlock();
> +
> +	return dgram_allow;
> +}
> +
>  static bool vhost_transport_seqpacket_allow(u32 remote_cid)
>  {
>  	struct vhost_vsock *vsock;
> @@ -799,6 +852,9 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
>  	if (features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET))
>  		vsock->seqpacket_allow = true;
>  
> +	if (features & (1ULL << VIRTIO_VSOCK_F_DGRAM))
> +		vsock->dgram_allow = true;
> +
>  	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
>  		vq = &vsock->vqs[i];
>  		mutex_lock(&vq->mutex);
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> index e73f3b2c52f1..449ed63ac2b0 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -1427,9 +1427,12 @@ int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
>  		return prot->recvmsg(sk, msg, len, flags, NULL);
>  #endif
>  
> -	if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
> +	if (unlikely(flags & MSG_OOB))
>  		return -EOPNOTSUPP;
>  
> +	if (unlikely(flags & MSG_ERRQUEUE))
> +		return sock_recv_errqueue(sk, msg, len, SOL_VSOCK, 0);
> +
>  	transport = vsk->transport;
>  
>  	/* Retrieve the head sk_buff from the socket's receive queue. */
> 
> -- 
> 2.30.2


^ permalink raw reply

* [Patch v3 0/4] RDMA/mana_ib Read Capabilities
From: sharmaajay @ 2023-07-26 20:08 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Dexuan Cui, Wei Liu,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: linux-rdma, linux-hyperv, netdev, linux-kernel, Ajay Sharma

From: Ajay Sharma <sharmaajay@microsoft.com>

This patch series introduces some cleanup changes and
resource control changes. The mana and mana_ib devices
are used at common places so a consistent naming is
introduced. Adapter object container to have a common
point of object release for resources and query the
management software to prevent resource overflow.
It also introduces async channel for management to
notify the clients in case of errors/info.

Ajay Sharma (4):
  RDMA/mana_ib : Rename all mana_ib_dev type variables to mib_dev
  RDMA/mana_ib : Register Mana IB  device with Management SW
  RDMA/mana_ib : Create adapter and Add error eq
  RDMA/mana_ib : Query adapter capabilities

 drivers/infiniband/hw/mana/cq.c               |  12 +-
 drivers/infiniband/hw/mana/device.c           |  72 +++--
 drivers/infiniband/hw/mana/main.c             | 282 +++++++++++++-----
 drivers/infiniband/hw/mana/mana_ib.h          |  96 +++++-
 drivers/infiniband/hw/mana/mr.c               |  42 ++-
 drivers/infiniband/hw/mana/qp.c               |  82 ++---
 drivers/infiniband/hw/mana/wq.c               |  21 +-
 .../net/ethernet/microsoft/mana/gdma_main.c   | 151 ++++++----
 drivers/net/ethernet/microsoft/mana/mana_en.c |   3 +
 include/net/mana/gdma.h                       |  16 +-
 10 files changed, 529 insertions(+), 248 deletions(-)

-- 
2.25.1


^ permalink raw reply

* [Patch v3 2/4] RDMA/mana_ib : Register Mana IB  device with Management SW
From: sharmaajay @ 2023-07-26 20:08 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Dexuan Cui, Wei Liu,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: linux-rdma, linux-hyperv, netdev, linux-kernel, Ajay Sharma
In-Reply-To: <1690402104-29518-1-git-send-email-sharmaajay@linuxonhyperv.com>

From: Ajay Sharma <sharmaajay@microsoft.com>

Each of the MANA infiniband devices must be registered
with the management software to request services/resources.
Register the Mana IB device with Management
which would later help get an adapter handle.

Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
---
 drivers/infiniband/hw/mana/device.c           | 20 +++++--
 drivers/infiniband/hw/mana/main.c             | 58 ++++++-------------
 drivers/infiniband/hw/mana/mana_ib.h          |  1 +
 drivers/infiniband/hw/mana/mr.c               | 17 ++----
 drivers/infiniband/hw/mana/qp.c               | 10 ++--
 .../net/ethernet/microsoft/mana/gdma_main.c   |  5 ++
 include/net/mana/gdma.h                       |  3 +
 7 files changed, 55 insertions(+), 59 deletions(-)

diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index 083f27246ba8..ea4c8c8fc10d 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -78,22 +78,34 @@ static int mana_ib_probe(struct auxiliary_device *adev,
 	mib_dev->ib_dev.num_comp_vectors = 1;
 	mib_dev->ib_dev.dev.parent = mdev->gdma_context->dev;
 
-	ret = ib_register_device(&mib_dev->ib_dev, "mana_%d",
-				 mdev->gdma_context->dev);
+	ret = mana_gd_register_device(&mib_dev->gc->mana_ib);
 	if (ret) {
-		ib_dealloc_device(&mib_dev->ib_dev);
-		return ret;
+		ibdev_err(&mib_dev->ib_dev, "Failed to register device, ret %d",
+			  ret);
+		goto free_ib_device;
 	}
 
+	ret = ib_register_device(&mib_dev->ib_dev, "mana_%d",
+				 mdev->gdma_context->dev);
+	if (ret)
+		goto deregister_device;
+
 	dev_set_drvdata(&adev->dev, mib_dev);
 
 	return 0;
+
+deregister_device:
+	mana_gd_deregister_device(&mib_dev->gc->mana_ib);
+free_ib_device:
+	ib_dealloc_device(&mib_dev->ib_dev);
+	return ret;
 }
 
 static void mana_ib_remove(struct auxiliary_device *adev)
 {
 	struct mana_ib_dev *mib_dev = dev_get_drvdata(&adev->dev);
 
+	mana_gd_deregister_device(&mib_dev->gc->mana_ib);
 	ib_unregister_device(&mib_dev->ib_dev);
 	ib_dealloc_device(&mib_dev->ib_dev);
 }
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 189e774cdab6..2c4e3c496644 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -8,7 +8,7 @@
 void mana_ib_uncfg_vport(struct mana_ib_dev *mib_dev, struct mana_ib_pd *pd,
 			 u32 port)
 {
-	struct gdma_dev *gd = mib_dev->gdma_dev;
+	struct gdma_dev *gd = &mib_dev->gc->mana;
 	struct mana_port_context *mpc;
 	struct net_device *ndev;
 	struct mana_context *mc;
@@ -32,7 +32,7 @@ int mana_ib_cfg_vport(struct mana_ib_dev *mib_dev, u32 port,
 		      struct mana_ib_pd *pd,
 		      u32 doorbell_id)
 {
-	struct gdma_dev *mdev = mib_dev->gdma_dev;
+	struct gdma_dev *mdev = &mib_dev->gc->mana;
 	struct mana_port_context *mpc;
 	struct mana_context *mc;
 	struct net_device *ndev;
@@ -81,17 +81,16 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	struct gdma_create_pd_req req = {};
 	enum gdma_pd_flags flags = 0;
 	struct mana_ib_dev *mib_dev;
-	struct gdma_dev *mdev;
+
 	int err;
 
 	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
-	mdev = mib_dev->gdma_dev;
 
 	mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
 			     sizeof(resp));
 
 	req.flags = flags;
-	err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
+	err = mana_gd_send_request(mib_dev->gc, sizeof(req), &req,
 				   sizeof(resp), &resp);
 
 	if (err || resp.hdr.status) {
@@ -121,17 +120,15 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	struct gdma_destory_pd_resp resp = {};
 	struct gdma_destroy_pd_req req = {};
 	struct mana_ib_dev *mib_dev;
-	struct gdma_dev *mdev;
 	int err;
 
 	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
-	mdev = mib_dev->gdma_dev;
 
 	mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req),
 			     sizeof(resp));
 
 	req.pd_handle = pd->pd_handle;
-	err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
+	err = mana_gd_send_request(mib_dev->gc, sizeof(req), &req,
 				   sizeof(resp), &resp);
 
 	if (err || resp.hdr.status) {
@@ -207,17 +204,13 @@ int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
 		container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
 	struct ib_device *ibdev = ibcontext->device;
 	struct mana_ib_dev *mib_dev;
-	struct gdma_context *gc;
-	struct gdma_dev *dev;
 	int doorbell_page;
 	int ret;
 
 	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
-	dev = mib_dev->gdma_dev;
-	gc = dev->gdma_context;
 
 	/* Allocate a doorbell page index */
-	ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page);
+	ret = mana_gd_allocate_doorbell_page(mib_dev->gc, &doorbell_page);
 	if (ret) {
 		ibdev_dbg(ibdev, "Failed to allocate doorbell page %d\n", ret);
 		return ret;
@@ -236,20 +229,17 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 		container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
 	struct ib_device *ibdev = ibcontext->device;
 	struct mana_ib_dev *mib_dev;
-	struct gdma_context *gc;
 	int ret;
 
 	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
-	gc = mib_dev->gdma_dev->gdma_context;
 
-	ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
+	ret = mana_gd_destroy_doorbell_page(mib_dev->gc, mana_ucontext->doorbell);
 	if (ret)
 		ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
 }
 
 static int
 mana_ib_gd_first_dma_region(struct mana_ib_dev *mib_dev,
-			    struct gdma_context *gc,
 			    struct gdma_create_dma_region_req *create_req,
 			    size_t num_pages, mana_handle_t *gdma_region,
 			    u32 expected_status)
@@ -262,7 +252,7 @@ mana_ib_gd_first_dma_region(struct mana_ib_dev *mib_dev,
 		struct_size(create_req, page_addr_list, num_pages);
 	create_req->page_addr_list_len = num_pages;
 
-	err = mana_gd_send_request(gc, create_req_msg_size, create_req,
+	err = mana_gd_send_request(mib_dev->gc, create_req_msg_size, create_req,
 				   sizeof(create_resp), &create_resp);
 	if (err || create_resp.hdr.status != expected_status) {
 		ibdev_dbg(&mib_dev->ib_dev,
@@ -282,7 +272,7 @@ mana_ib_gd_first_dma_region(struct mana_ib_dev *mib_dev,
 }
 
 static int
-mana_ib_gd_add_dma_region(struct mana_ib_dev *mib_dev, struct gdma_context *gc,
+mana_ib_gd_add_dma_region(struct mana_ib_dev *mib_dev,
 			  struct gdma_dma_region_add_pages_req *add_req,
 			  unsigned int num_pages, u32 expected_status)
 {
@@ -295,7 +285,7 @@ mana_ib_gd_add_dma_region(struct mana_ib_dev *mib_dev, struct gdma_context *gc,
 			     add_req_msg_size, sizeof(add_resp));
 	add_req->page_addr_list_len = num_pages;
 
-	err = mana_gd_send_request(gc, add_req_msg_size, add_req,
+	err = mana_gd_send_request(mib_dev->gc, add_req_msg_size, add_req,
 				   sizeof(add_resp), &add_resp);
 	if (err || add_resp.hdr.status != expected_status) {
 		ibdev_dbg(&mib_dev->ib_dev,
@@ -323,18 +313,14 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
 	struct ib_block_iter biter;
 	size_t max_pgs_add_cmd = 0;
 	size_t max_pgs_create_cmd;
-	struct gdma_context *gc;
 	size_t num_pages_total;
-	struct gdma_dev *mdev;
 	unsigned long page_sz;
 	unsigned int tail = 0;
 	u64 *page_addr_list;
 	void *request_buf;
 	int err;
 
-	mdev = mib_dev->gdma_dev;
-	gc = mdev->gdma_context;
-	hwc = gc->hwc.driver_data;
+	hwc = mib_dev->gc->hwc.driver_data;
 
 	/* Hardware requires dma region to align to chosen page size */
 	page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0);
@@ -388,7 +374,7 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
 
 		if (!num_pages_processed) {
 			/* First create message */
-			err = mana_ib_gd_first_dma_region(mib_dev, gc, create_req,
+			err = mana_ib_gd_first_dma_region(mib_dev, create_req,
 							  tail, gdma_region,
 							  expected_status);
 			if (err)
@@ -403,7 +389,7 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
 			page_addr_list = add_req->page_addr_list;
 		} else {
 			/* Subsequent create messages */
-			err = mana_ib_gd_add_dma_region(mib_dev, gc, add_req, tail,
+			err = mana_ib_gd_add_dma_region(mib_dev, add_req, tail,
 							expected_status);
 			if (err)
 				break;
@@ -429,13 +415,9 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
 
 int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *mib_dev, u64 gdma_region)
 {
-	struct gdma_dev *mdev = mib_dev->gdma_dev;
-	struct gdma_context *gc;
-
-	gc = mdev->gdma_context;
 	ibdev_dbg(&mib_dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region);
 
-	return mana_gd_destroy_dma_region(gc, gdma_region);
+	return mana_gd_destroy_dma_region(mib_dev->gc, gdma_region);
 }
 
 int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -444,13 +426,11 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
 		container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
 	struct ib_device *ibdev = ibcontext->device;
 	struct mana_ib_dev *mib_dev;
-	struct gdma_context *gc;
 	phys_addr_t pfn;
 	pgprot_t prot;
 	int ret;
 
 	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
-	gc = mib_dev->gdma_dev->gdma_context;
 
 	if (vma->vm_pgoff != 0) {
 		ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma->vm_pgoff);
@@ -458,18 +438,18 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
 	}
 
 	/* Map to the page indexed by ucontext->doorbell */
-	pfn = (gc->phys_db_page_base +
-	       gc->db_page_size * mana_ucontext->doorbell) >>
+	pfn = (mib_dev->gc->phys_db_page_base +
+	       mib_dev->gc->db_page_size * mana_ucontext->doorbell) >>
 	      PAGE_SHIFT;
 	prot = pgprot_writecombine(vma->vm_page_prot);
 
-	ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot,
-				NULL);
+	ret = rdma_user_mmap_io(ibcontext, vma, pfn, mib_dev->gc->db_page_size,
+				prot, NULL);
 	if (ret)
 		ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret);
 	else
 		ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n",
-			  pfn, gc->db_page_size, ret);
+			  pfn, mib_dev->gc->db_page_size, ret);
 
 	return ret;
 }
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index ee4efd0af278..3a2ba6b96f15 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -30,6 +30,7 @@
 struct mana_ib_dev {
 	struct ib_device ib_dev;
 	struct gdma_dev *gdma_dev;
+	struct gdma_context *gc;
 };
 
 struct mana_ib_wq {
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
index f6a53906204d..3106d1bce837 100644
--- a/drivers/infiniband/hw/mana/mr.c
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -29,13 +29,10 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *mib_dev,
 				struct mana_ib_mr *mr,
 				struct gdma_create_mr_params *mr_params)
 {
-	struct gdma_dev *mdev = mib_dev->gdma_dev;
 	struct gdma_create_mr_response resp = {};
 	struct gdma_create_mr_request req = {};
-	struct gdma_context *gc;
 	int err;
 
-	gc = mdev->gdma_context;
 
 	mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req),
 			     sizeof(resp));
@@ -56,7 +53,8 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *mib_dev,
 		return -EINVAL;
 	}
 
-	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	err = mana_gd_send_request(mib_dev->gc, sizeof(req), &req,
+				   sizeof(resp), &resp);
 
 	if (err || resp.hdr.status) {
 		ibdev_dbg(&mib_dev->ib_dev, "Failed to create mr %d, %u", err,
@@ -77,22 +75,19 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *mib_dev,
 static int mana_ib_gd_destroy_mr(struct mana_ib_dev *mib_dev, u64 mr_handle)
 {
 	struct gdma_destroy_mr_response resp = {};
-	struct gdma_dev *mdev = mib_dev->gdma_dev;
 	struct gdma_destroy_mr_request req = {};
-	struct gdma_context *gc;
 	int err;
 
-	gc = mdev->gdma_context;
-
 	mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req),
 			     sizeof(resp));
 
 	req.mr_handle = mr_handle;
 
-	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	err = mana_gd_send_request(mib_dev->gc, sizeof(req), &req,
+				   sizeof(resp), &resp);
 	if (err || resp.hdr.status) {
-		dev_err(gc->dev, "Failed to destroy MR: %d, 0x%x\n", err,
-			resp.hdr.status);
+		dev_err(mib_dev->gc->dev, "Failed to destroy MR: %d, 0x%x\n",
+			err, resp.hdr.status);
 		if (!err)
 			err = -EPROTO;
 		return err;
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 2e3a57123ed7..874cfd794825 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -21,7 +21,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *mib_dev,
 	u32 req_buf_size;
 	int i, err;
 
-	mdev = mib_dev->gdma_dev;
+	mdev = &mib_dev->gc->mana;
 	gc = mdev->gdma_context;
 
 	req_buf_size =
@@ -102,7 +102,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 	struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
 	struct mana_ib_create_qp_rss_resp resp = {};
 	struct mana_ib_create_qp_rss ucmd = {};
-	struct gdma_dev *gd = mib_dev->gdma_dev;
+	struct gdma_dev *gd = &mib_dev->gc->mana;
 	mana_handle_t *mana_ind_table;
 	struct mana_port_context *mpc;
 	struct mana_context *mc;
@@ -267,7 +267,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 		rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
 					  ibucontext);
 	struct mana_ib_create_qp_resp resp = {};
-	struct gdma_dev *gd = mib_dev->gdma_dev;
+	struct gdma_dev *gd = &mib_dev->gc->mana;
 	struct mana_ib_create_qp ucmd = {};
 	struct mana_obj_spec wq_spec = {};
 	struct mana_obj_spec cq_spec = {};
@@ -437,7 +437,7 @@ static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
 {
 	struct mana_ib_dev *mib_dev =
 		container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
-	struct gdma_dev *gd = mib_dev->gdma_dev;
+	struct gdma_dev *gd = &mib_dev->gc->mana;
 	struct mana_port_context *mpc;
 	struct mana_context *mc;
 	struct net_device *ndev;
@@ -464,7 +464,7 @@ static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata)
 {
 	struct mana_ib_dev *mib_dev =
 		container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
-	struct gdma_dev *gd = mib_dev->gdma_dev;
+	struct gdma_dev *gd = &mib_dev->gc->mana;
 	struct ib_pd *ibpd = qp->ibqp.pd;
 	struct mana_port_context *mpc;
 	struct mana_context *mc;
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 8f3f78b68592..9fa7a2d6c2b2 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -139,6 +139,9 @@ static int mana_gd_detect_devices(struct pci_dev *pdev)
 		if (dev_type == GDMA_DEVICE_MANA) {
 			gc->mana.gdma_context = gc;
 			gc->mana.dev_id = dev;
+		} else if (dev_type == GDMA_DEVICE_MANA_IB) {
+			gc->mana_ib.dev_id = dev;
+			gc->mana_ib.gdma_context = gc;
 		}
 	}
 
@@ -940,6 +943,7 @@ int mana_gd_register_device(struct gdma_dev *gd)
 
 	return 0;
 }
+EXPORT_SYMBOL(mana_gd_register_device);
 
 int mana_gd_deregister_device(struct gdma_dev *gd)
 {
@@ -970,6 +974,7 @@ int mana_gd_deregister_device(struct gdma_dev *gd)
 
 	return err;
 }
+EXPORT_SYMBOL(mana_gd_deregister_device);
 
 u32 mana_gd_wq_avail_space(struct gdma_queue *wq)
 {
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 96c120160f15..e2b212dd722b 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -63,6 +63,7 @@ enum {
 	GDMA_DEVICE_NONE	= 0,
 	GDMA_DEVICE_HWC		= 1,
 	GDMA_DEVICE_MANA	= 2,
+	GDMA_DEVICE_MANA_IB	= 3,
 };
 
 struct gdma_resource {
@@ -384,6 +385,8 @@ struct gdma_context {
 
 	/* Azure network adapter */
 	struct gdma_dev		mana;
+	/* rdma device */
+	struct gdma_dev		mana_ib;
 };
 
 #define MAX_NUM_GDMA_DEVICES	4
-- 
2.25.1


^ permalink raw reply related

* [Patch v3 1/4] RDMA/mana_ib : Rename all mana_ib_dev type variables to mib_dev
From: sharmaajay @ 2023-07-26 20:08 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky, Dexuan Cui, Wei Liu,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: linux-rdma, linux-hyperv, netdev, linux-kernel, Ajay Sharma
In-Reply-To: <1690402104-29518-1-git-send-email-sharmaajay@linuxonhyperv.com>

From: Ajay Sharma <sharmaajay@microsoft.com>

This patch does not introduce any functional changes. It
creates naming convention to distinguish especially when
used in the same function.Renaming all mana_ib_dev type
variables to mib_dev to have clean separation between
eth dev and ibdev variables.

Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
---
 drivers/infiniband/hw/mana/cq.c      | 12 ++--
 drivers/infiniband/hw/mana/device.c  | 34 +++++------
 drivers/infiniband/hw/mana/main.c    | 87 ++++++++++++++--------------
 drivers/infiniband/hw/mana/mana_ib.h |  9 +--
 drivers/infiniband/hw/mana/mr.c      | 29 +++++-----
 drivers/infiniband/hw/mana/qp.c      | 82 +++++++++++++-------------
 drivers/infiniband/hw/mana/wq.c      | 21 +++----
 7 files changed, 140 insertions(+), 134 deletions(-)

diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index d141cab8a1e6..1aed4e6360ba 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -11,10 +11,10 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
 	struct ib_device *ibdev = ibcq->device;
 	struct mana_ib_create_cq ucmd = {};
-	struct mana_ib_dev *mdev;
+	struct mana_ib_dev *mib_dev;
 	int err;
 
-	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
 
 	if (udata->inlen < sizeof(ucmd))
 		return -EINVAL;
@@ -41,7 +41,7 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		return err;
 	}
 
-	err = mana_ib_gd_create_dma_region(mdev, cq->umem, &cq->gdma_region);
+	err = mana_ib_gd_create_dma_region(mib_dev, cq->umem, &cq->gdma_region);
 	if (err) {
 		ibdev_dbg(ibdev,
 			  "Failed to create dma region for create cq, %d\n",
@@ -68,11 +68,11 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 {
 	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
 	struct ib_device *ibdev = ibcq->device;
-	struct mana_ib_dev *mdev;
+	struct mana_ib_dev *mib_dev;
 
-	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
 
-	mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
+	mana_ib_gd_destroy_dma_region(mib_dev, cq->gdma_region);
 	ib_umem_release(cq->umem);
 
 	return 0;
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index d4541b8707e4..083f27246ba8 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -51,51 +51,51 @@ static int mana_ib_probe(struct auxiliary_device *adev,
 {
 	struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
 	struct gdma_dev *mdev = madev->mdev;
+	struct mana_ib_dev *mib_dev;
 	struct mana_context *mc;
-	struct mana_ib_dev *dev;
 	int ret;
 
 	mc = mdev->driver_data;
 
-	dev = ib_alloc_device(mana_ib_dev, ib_dev);
-	if (!dev)
+	mib_dev = ib_alloc_device(mana_ib_dev, ib_dev);
+	if (!mib_dev)
 		return -ENOMEM;
 
-	ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
+	ib_set_device_ops(&mib_dev->ib_dev, &mana_ib_dev_ops);
 
-	dev->ib_dev.phys_port_cnt = mc->num_ports;
+	mib_dev->ib_dev.phys_port_cnt = mc->num_ports;
 
-	ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
-		  mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
+	ibdev_dbg(&mib_dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
+		  mdev->dev_id.as_uint32, mib_dev->ib_dev.phys_port_cnt);
 
-	dev->gdma_dev = mdev;
-	dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+	mib_dev->gdma_dev = mdev;
+	mib_dev->ib_dev.node_type = RDMA_NODE_IB_CA;
 
 	/*
 	 * num_comp_vectors needs to set to the max MSIX index
 	 * when interrupts and event queues are implemented
 	 */
-	dev->ib_dev.num_comp_vectors = 1;
-	dev->ib_dev.dev.parent = mdev->gdma_context->dev;
+	mib_dev->ib_dev.num_comp_vectors = 1;
+	mib_dev->ib_dev.dev.parent = mdev->gdma_context->dev;
 
-	ret = ib_register_device(&dev->ib_dev, "mana_%d",
+	ret = ib_register_device(&mib_dev->ib_dev, "mana_%d",
 				 mdev->gdma_context->dev);
 	if (ret) {
-		ib_dealloc_device(&dev->ib_dev);
+		ib_dealloc_device(&mib_dev->ib_dev);
 		return ret;
 	}
 
-	dev_set_drvdata(&adev->dev, dev);
+	dev_set_drvdata(&adev->dev, mib_dev);
 
 	return 0;
 }
 
 static void mana_ib_remove(struct auxiliary_device *adev)
 {
-	struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
+	struct mana_ib_dev *mib_dev = dev_get_drvdata(&adev->dev);
 
-	ib_unregister_device(&dev->ib_dev);
-	ib_dealloc_device(&dev->ib_dev);
+	ib_unregister_device(&mib_dev->ib_dev);
+	ib_dealloc_device(&mib_dev->ib_dev);
 }
 
 static const struct auxiliary_device_id mana_id_table[] = {
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 7be4c3adb4e2..189e774cdab6 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -5,10 +5,10 @@
 
 #include "mana_ib.h"
 
-void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
+void mana_ib_uncfg_vport(struct mana_ib_dev *mib_dev, struct mana_ib_pd *pd,
 			 u32 port)
 {
-	struct gdma_dev *gd = dev->gdma_dev;
+	struct gdma_dev *gd = mib_dev->gdma_dev;
 	struct mana_port_context *mpc;
 	struct net_device *ndev;
 	struct mana_context *mc;
@@ -28,10 +28,11 @@ void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
 	mutex_unlock(&pd->vport_mutex);
 }
 
-int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd,
+int mana_ib_cfg_vport(struct mana_ib_dev *mib_dev, u32 port,
+		      struct mana_ib_pd *pd,
 		      u32 doorbell_id)
 {
-	struct gdma_dev *mdev = dev->gdma_dev;
+	struct gdma_dev *mdev = mib_dev->gdma_dev;
 	struct mana_port_context *mpc;
 	struct mana_context *mc;
 	struct net_device *ndev;
@@ -45,7 +46,7 @@ int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd,
 
 	pd->vport_use_count++;
 	if (pd->vport_use_count > 1) {
-		ibdev_dbg(&dev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Skip as this PD is already configured vport\n");
 		mutex_unlock(&pd->vport_mutex);
 		return 0;
@@ -56,7 +57,8 @@ int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd,
 		pd->vport_use_count--;
 		mutex_unlock(&pd->vport_mutex);
 
-		ibdev_dbg(&dev->ib_dev, "Failed to configure vPort %d\n", err);
+		ibdev_dbg(&mib_dev->ib_dev, "Failed to configure vPort %d\n",
+			  err);
 		return err;
 	}
 
@@ -65,7 +67,7 @@ int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd,
 	pd->tx_shortform_allowed = mpc->tx_shortform_allowed;
 	pd->tx_vp_offset = mpc->tx_vp_offset;
 
-	ibdev_dbg(&dev->ib_dev, "vport handle %llx pdid %x doorbell_id %x\n",
+	ibdev_dbg(&mib_dev->ib_dev, "vport handle %llx pdid %x doorbell_id %x\n",
 		  mpc->port_handle, pd->pdn, doorbell_id);
 
 	return 0;
@@ -78,12 +80,12 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	struct gdma_create_pd_resp resp = {};
 	struct gdma_create_pd_req req = {};
 	enum gdma_pd_flags flags = 0;
-	struct mana_ib_dev *dev;
+	struct mana_ib_dev *mib_dev;
 	struct gdma_dev *mdev;
 	int err;
 
-	dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
-	mdev = dev->gdma_dev;
+	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	mdev = mib_dev->gdma_dev;
 
 	mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
 			     sizeof(resp));
@@ -93,7 +95,7 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 				   sizeof(resp), &resp);
 
 	if (err || resp.hdr.status) {
-		ibdev_dbg(&dev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed to get pd_id err %d status %u\n", err,
 			  resp.hdr.status);
 		if (!err)
@@ -104,7 +106,7 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 
 	pd->pd_handle = resp.pd_handle;
 	pd->pdn = resp.pd_id;
-	ibdev_dbg(&dev->ib_dev, "pd_handle 0x%llx pd_id %d\n",
+	ibdev_dbg(&mib_dev->ib_dev, "pd_handle 0x%llx pd_id %d\n",
 		  pd->pd_handle, pd->pdn);
 
 	mutex_init(&pd->vport_mutex);
@@ -118,12 +120,12 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	struct ib_device *ibdev = ibpd->device;
 	struct gdma_destory_pd_resp resp = {};
 	struct gdma_destroy_pd_req req = {};
-	struct mana_ib_dev *dev;
+	struct mana_ib_dev *mib_dev;
 	struct gdma_dev *mdev;
 	int err;
 
-	dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
-	mdev = dev->gdma_dev;
+	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	mdev = mib_dev->gdma_dev;
 
 	mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req),
 			     sizeof(resp));
@@ -133,7 +135,7 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 				   sizeof(resp), &resp);
 
 	if (err || resp.hdr.status) {
-		ibdev_dbg(&dev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed to destroy pd_handle 0x%llx err %d status %u",
 			  pd->pd_handle, err, resp.hdr.status);
 		if (!err)
@@ -204,14 +206,14 @@ int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
 	struct mana_ib_ucontext *ucontext =
 		container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
 	struct ib_device *ibdev = ibcontext->device;
-	struct mana_ib_dev *mdev;
+	struct mana_ib_dev *mib_dev;
 	struct gdma_context *gc;
 	struct gdma_dev *dev;
 	int doorbell_page;
 	int ret;
 
-	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
-	dev = mdev->gdma_dev;
+	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	dev = mib_dev->gdma_dev;
 	gc = dev->gdma_context;
 
 	/* Allocate a doorbell page index */
@@ -233,12 +235,12 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	struct mana_ib_ucontext *mana_ucontext =
 		container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
 	struct ib_device *ibdev = ibcontext->device;
-	struct mana_ib_dev *mdev;
+	struct mana_ib_dev *mib_dev;
 	struct gdma_context *gc;
 	int ret;
 
-	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
-	gc = mdev->gdma_dev->gdma_context;
+	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	gc = mib_dev->gdma_dev->gdma_context;
 
 	ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
 	if (ret)
@@ -246,7 +248,7 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 }
 
 static int
-mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
+mana_ib_gd_first_dma_region(struct mana_ib_dev *mib_dev,
 			    struct gdma_context *gc,
 			    struct gdma_create_dma_region_req *create_req,
 			    size_t num_pages, mana_handle_t *gdma_region,
@@ -263,7 +265,7 @@ mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
 	err = mana_gd_send_request(gc, create_req_msg_size, create_req,
 				   sizeof(create_resp), &create_resp);
 	if (err || create_resp.hdr.status != expected_status) {
-		ibdev_dbg(&dev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed to create DMA region: %d, 0x%x\n",
 			  err, create_resp.hdr.status);
 		if (!err)
@@ -273,14 +275,14 @@ mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
 	}
 
 	*gdma_region = create_resp.dma_region_handle;
-	ibdev_dbg(&dev->ib_dev, "Created DMA region handle 0x%llx\n",
+	ibdev_dbg(&mib_dev->ib_dev, "Created DMA region handle 0x%llx\n",
 		  *gdma_region);
 
 	return 0;
 }
 
 static int
-mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc,
+mana_ib_gd_add_dma_region(struct mana_ib_dev *mib_dev, struct gdma_context *gc,
 			  struct gdma_dma_region_add_pages_req *add_req,
 			  unsigned int num_pages, u32 expected_status)
 {
@@ -296,7 +298,7 @@ mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc,
 	err = mana_gd_send_request(gc, add_req_msg_size, add_req,
 				   sizeof(add_resp), &add_resp);
 	if (err || add_resp.hdr.status != expected_status) {
-		ibdev_dbg(&dev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed to create DMA region: %d, 0x%x\n",
 			  err, add_resp.hdr.status);
 
@@ -309,7 +311,8 @@ mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc,
 	return 0;
 }
 
-int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
+				 struct ib_umem *umem,
 				 mana_handle_t *gdma_region)
 {
 	struct gdma_dma_region_add_pages_req *add_req = NULL;
@@ -329,14 +332,14 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
 	void *request_buf;
 	int err;
 
-	mdev = dev->gdma_dev;
+	mdev = mib_dev->gdma_dev;
 	gc = mdev->gdma_context;
 	hwc = gc->hwc.driver_data;
 
 	/* Hardware requires dma region to align to chosen page size */
 	page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0);
 	if (!page_sz) {
-		ibdev_dbg(&dev->ib_dev, "failed to find page size.\n");
+		ibdev_dbg(&mib_dev->ib_dev, "failed to find page size.\n");
 		return -ENOMEM;
 	}
 	num_pages_total = ib_umem_num_dma_blocks(umem, page_sz);
@@ -362,13 +365,13 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
 	create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT;
 	create_req->page_count = num_pages_total;
 
-	ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n",
+	ibdev_dbg(&mib_dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n",
 		  umem->length, num_pages_total);
 
-	ibdev_dbg(&dev->ib_dev, "page_sz %lu offset_in_page %u\n",
+	ibdev_dbg(&mib_dev->ib_dev, "page_sz %lu offset_in_page %u\n",
 		  page_sz, create_req->offset_in_page);
 
-	ibdev_dbg(&dev->ib_dev, "num_pages_to_handle %lu, gdma_page_type %u",
+	ibdev_dbg(&mib_dev->ib_dev, "num_pages_to_handle %lu, gdma_page_type %u",
 		  num_pages_to_handle, create_req->gdma_page_type);
 
 	page_addr_list = create_req->page_addr_list;
@@ -385,7 +388,7 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
 
 		if (!num_pages_processed) {
 			/* First create message */
-			err = mana_ib_gd_first_dma_region(dev, gc, create_req,
+			err = mana_ib_gd_first_dma_region(mib_dev, gc, create_req,
 							  tail, gdma_region,
 							  expected_status);
 			if (err)
@@ -400,7 +403,7 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
 			page_addr_list = add_req->page_addr_list;
 		} else {
 			/* Subsequent create messages */
-			err = mana_ib_gd_add_dma_region(dev, gc, add_req, tail,
+			err = mana_ib_gd_add_dma_region(mib_dev, gc, add_req, tail,
 							expected_status);
 			if (err)
 				break;
@@ -417,20 +420,20 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
 	}
 
 	if (err)
-		mana_ib_gd_destroy_dma_region(dev, *gdma_region);
+		mana_ib_gd_destroy_dma_region(mib_dev, *gdma_region);
 
 out:
 	kfree(request_buf);
 	return err;
 }
 
-int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64 gdma_region)
+int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *mib_dev, u64 gdma_region)
 {
-	struct gdma_dev *mdev = dev->gdma_dev;
+	struct gdma_dev *mdev = mib_dev->gdma_dev;
 	struct gdma_context *gc;
 
 	gc = mdev->gdma_context;
-	ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region);
+	ibdev_dbg(&mib_dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region);
 
 	return mana_gd_destroy_dma_region(gc, gdma_region);
 }
@@ -440,14 +443,14 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
 	struct mana_ib_ucontext *mana_ucontext =
 		container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
 	struct ib_device *ibdev = ibcontext->device;
-	struct mana_ib_dev *mdev;
+	struct mana_ib_dev *mib_dev;
 	struct gdma_context *gc;
 	phys_addr_t pfn;
 	pgprot_t prot;
 	int ret;
 
-	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
-	gc = mdev->gdma_dev->gdma_context;
+	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	gc = mib_dev->gdma_dev->gdma_context;
 
 	if (vma->vm_pgoff != 0) {
 		ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma->vm_pgoff);
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 502cc8672eef..ee4efd0af278 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -92,10 +92,11 @@ struct mana_ib_rwq_ind_table {
 	struct ib_rwq_ind_table ib_ind_table;
 };
 
-int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
+				 struct ib_umem *umem,
 				 mana_handle_t *gdma_region);
 
-int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
+int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *mib_dev,
 				  mana_handle_t gdma_region);
 
 struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
@@ -129,9 +130,9 @@ int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
 int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
 
-int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port_id,
+int mana_ib_cfg_vport(struct mana_ib_dev *mib_dev, u32 port_id,
 		      struct mana_ib_pd *pd, u32 doorbell_id);
-void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
+void mana_ib_uncfg_vport(struct mana_ib_dev *mib_dev, struct mana_ib_pd *pd,
 			 u32 port);
 
 int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
index 351207c60eb6..f6a53906204d 100644
--- a/drivers/infiniband/hw/mana/mr.c
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -25,12 +25,13 @@ mana_ib_verbs_to_gdma_access_flags(int access_flags)
 	return flags;
 }
 
-static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
+static int mana_ib_gd_create_mr(struct mana_ib_dev *mib_dev,
+				struct mana_ib_mr *mr,
 				struct gdma_create_mr_params *mr_params)
 {
+	struct gdma_dev *mdev = mib_dev->gdma_dev;
 	struct gdma_create_mr_response resp = {};
 	struct gdma_create_mr_request req = {};
-	struct gdma_dev *mdev = dev->gdma_dev;
 	struct gdma_context *gc;
 	int err;
 
@@ -49,7 +50,7 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
 		break;
 
 	default:
-		ibdev_dbg(&dev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "invalid param (GDMA_MR_TYPE) passed, type %d\n",
 			  req.mr_type);
 		return -EINVAL;
@@ -58,7 +59,7 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
 	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
 
 	if (err || resp.hdr.status) {
-		ibdev_dbg(&dev->ib_dev, "Failed to create mr %d, %u", err,
+		ibdev_dbg(&mib_dev->ib_dev, "Failed to create mr %d, %u", err,
 			  resp.hdr.status);
 		if (!err)
 			err = -EPROTO;
@@ -73,11 +74,11 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
 	return 0;
 }
 
-static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle)
+static int mana_ib_gd_destroy_mr(struct mana_ib_dev *mib_dev, u64 mr_handle)
 {
 	struct gdma_destroy_mr_response resp = {};
+	struct gdma_dev *mdev = mib_dev->gdma_dev;
 	struct gdma_destroy_mr_request req = {};
-	struct gdma_dev *mdev = dev->gdma_dev;
 	struct gdma_context *gc;
 	int err;
 
@@ -107,12 +108,12 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
 	struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
 	struct gdma_create_mr_params mr_params = {};
 	struct ib_device *ibdev = ibpd->device;
-	struct mana_ib_dev *dev;
+	struct mana_ib_dev *mib_dev;
 	struct mana_ib_mr *mr;
 	u64 dma_region_handle;
 	int err;
 
-	dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
 
 	ibdev_dbg(ibdev,
 		  "start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x",
@@ -133,7 +134,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
 		goto err_free;
 	}
 
-	err = mana_ib_gd_create_dma_region(dev, mr->umem, &dma_region_handle);
+	err = mana_ib_gd_create_dma_region(mib_dev, mr->umem, &dma_region_handle);
 	if (err) {
 		ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
 			  err);
@@ -151,7 +152,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
 	mr_params.gva.access_flags =
 		mana_ib_verbs_to_gdma_access_flags(access_flags);
 
-	err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+	err = mana_ib_gd_create_mr(mib_dev, mr, &mr_params);
 	if (err)
 		goto err_dma_region;
 
@@ -164,7 +165,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
 	return &mr->ibmr;
 
 err_dma_region:
-	mana_gd_destroy_dma_region(dev->gdma_dev->gdma_context,
+	mana_gd_destroy_dma_region(mib_dev->gdma_dev->gdma_context,
 				   dma_region_handle);
 
 err_umem:
@@ -179,12 +180,12 @@ int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 {
 	struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr);
 	struct ib_device *ibdev = ibmr->device;
-	struct mana_ib_dev *dev;
+	struct mana_ib_dev *mib_dev;
 	int err;
 
-	dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
 
-	err = mana_ib_gd_destroy_mr(dev, mr->mr_handle);
+	err = mana_ib_gd_destroy_mr(mib_dev, mr->mr_handle);
 	if (err)
 		return err;
 
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 4b3b5b274e84..2e3a57123ed7 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -5,7 +5,7 @@
 
 #include "mana_ib.h"
 
-static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
+static int mana_ib_cfg_vport_steering(struct mana_ib_dev *mib_dev,
 				      struct net_device *ndev,
 				      mana_handle_t default_rxobj,
 				      mana_handle_t ind_table[],
@@ -21,7 +21,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
 	u32 req_buf_size;
 	int i, err;
 
-	mdev = dev->gdma_dev;
+	mdev = mib_dev->gdma_dev;
 	gc = mdev->gdma_context;
 
 	req_buf_size =
@@ -55,10 +55,10 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
 	 * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb
 	 * ind_table to MANA_INDIRECT_TABLE_SIZE if required
 	 */
-	ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
+	ibdev_dbg(&mib_dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
 	for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
 		req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
-		ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
+		ibdev_dbg(&mib_dev->ib_dev, "index %u handle 0x%llx\n", i,
 			  req_indir_tab[i]);
 	}
 
@@ -68,7 +68,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
 	else
 		netdev_rss_key_fill(req->hashkey, MANA_HASH_KEY_SIZE);
 
-	ibdev_dbg(&dev->ib_dev, "vport handle %llu default_rxobj 0x%llx\n",
+	ibdev_dbg(&mib_dev->ib_dev, "vport handle %llu default_rxobj 0x%llx\n",
 		  req->vport, default_rxobj);
 
 	err = mana_gd_send_request(gc, req_buf_size, req, sizeof(resp), &resp);
@@ -97,12 +97,12 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 				 struct ib_udata *udata)
 {
 	struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
-	struct mana_ib_dev *mdev =
+	struct mana_ib_dev *mib_dev =
 		container_of(pd->device, struct mana_ib_dev, ib_dev);
 	struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
 	struct mana_ib_create_qp_rss_resp resp = {};
 	struct mana_ib_create_qp_rss ucmd = {};
-	struct gdma_dev *gd = mdev->gdma_dev;
+	struct gdma_dev *gd = mib_dev->gdma_dev;
 	mana_handle_t *mana_ind_table;
 	struct mana_port_context *mpc;
 	struct mana_context *mc;
@@ -123,21 +123,21 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 
 	ret = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
 	if (ret) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed copy from udata for create rss-qp, err %d\n",
 			  ret);
 		return ret;
 	}
 
 	if (attr->cap.max_recv_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Requested max_recv_wr %d exceeding limit\n",
 			  attr->cap.max_recv_wr);
 		return -EINVAL;
 	}
 
 	if (attr->cap.max_recv_sge > MAX_RX_WQE_SGL_ENTRIES) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Requested max_recv_sge %d exceeding limit\n",
 			  attr->cap.max_recv_sge);
 		return -EINVAL;
@@ -145,14 +145,14 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 
 	ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
 	if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Indirect table size %d exceeding limit\n",
 			  ind_tbl_size);
 		return -EINVAL;
 	}
 
 	if (ucmd.rx_hash_function != MANA_IB_RX_HASH_FUNC_TOEPLITZ) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "RX Hash function is not supported, %d\n",
 			  ucmd.rx_hash_function);
 		return -EINVAL;
@@ -161,14 +161,14 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 	/* IB ports start with 1, MANA start with 0 */
 	port = ucmd.port;
 	if (port < 1 || port > mc->num_ports) {
-		ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
+		ibdev_dbg(&mib_dev->ib_dev, "Invalid port %u in creating qp\n",
 			  port);
 		return -EINVAL;
 	}
 	ndev = mc->ports[port - 1];
 	mpc = netdev_priv(ndev);
 
-	ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n",
+	ibdev_dbg(&mib_dev->ib_dev, "rx_hash_function %d port %d\n",
 		  ucmd.rx_hash_function, port);
 
 	mana_ind_table = kcalloc(ind_tbl_size, sizeof(mana_handle_t),
@@ -210,7 +210,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 		wq->id = wq_spec.queue_index;
 		cq->id = cq_spec.queue_index;
 
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
 			  ret, wq->rx_object, wq->id, cq->id);
 
@@ -221,7 +221,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 	}
 	resp.num_entries = i;
 
-	ret = mana_ib_cfg_vport_steering(mdev, ndev, wq->rx_object,
+	ret = mana_ib_cfg_vport_steering(mib_dev, ndev, wq->rx_object,
 					 mana_ind_table,
 					 ind_tbl->log_ind_tbl_size,
 					 ucmd.rx_hash_key_len,
@@ -231,7 +231,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 
 	ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
 	if (ret) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed to copy to udata create rss-qp, %d\n",
 			  ret);
 		goto fail;
@@ -259,7 +259,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 {
 	struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
 	struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
-	struct mana_ib_dev *mdev =
+	struct mana_ib_dev *mib_dev =
 		container_of(ibpd->device, struct mana_ib_dev, ib_dev);
 	struct mana_ib_cq *send_cq =
 		container_of(attr->send_cq, struct mana_ib_cq, ibcq);
@@ -267,7 +267,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 		rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
 					  ibucontext);
 	struct mana_ib_create_qp_resp resp = {};
-	struct gdma_dev *gd = mdev->gdma_dev;
+	struct gdma_dev *gd = mib_dev->gdma_dev;
 	struct mana_ib_create_qp ucmd = {};
 	struct mana_obj_spec wq_spec = {};
 	struct mana_obj_spec cq_spec = {};
@@ -285,7 +285,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 
 	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
 	if (err) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed to copy from udata create qp-raw, %d\n", err);
 		return err;
 	}
@@ -296,14 +296,14 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 		return -EINVAL;
 
 	if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Requested max_send_wr %d exceeding limit\n",
 			  attr->cap.max_send_wr);
 		return -EINVAL;
 	}
 
 	if (attr->cap.max_send_sge > MAX_TX_WQE_SGL_ENTRIES) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Requested max_send_sge %d exceeding limit\n",
 			  attr->cap.max_send_sge);
 		return -EINVAL;
@@ -311,38 +311,38 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 
 	ndev = mc->ports[port - 1];
 	mpc = netdev_priv(ndev);
-	ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev, mpc);
+	ibdev_dbg(&mib_dev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev, mpc);
 
-	err = mana_ib_cfg_vport(mdev, port - 1, pd, mana_ucontext->doorbell);
+	err = mana_ib_cfg_vport(mib_dev, port - 1, pd, mana_ucontext->doorbell);
 	if (err)
 		return -ENODEV;
 
 	qp->port = port;
 
-	ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
+	ibdev_dbg(&mib_dev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
 		  ucmd.sq_buf_addr, ucmd.port);
 
 	umem = ib_umem_get(ibpd->device, ucmd.sq_buf_addr, ucmd.sq_buf_size,
 			   IB_ACCESS_LOCAL_WRITE);
 	if (IS_ERR(umem)) {
 		err = PTR_ERR(umem);
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed to get umem for create qp-raw, err %d\n",
 			  err);
 		goto err_free_vport;
 	}
 	qp->sq_umem = umem;
 
-	err = mana_ib_gd_create_dma_region(mdev, qp->sq_umem,
+	err = mana_ib_gd_create_dma_region(mib_dev, qp->sq_umem,
 					   &qp->sq_gdma_region);
 	if (err) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed to create dma region for create qp-raw, %d\n",
 			  err);
 		goto err_release_umem;
 	}
 
-	ibdev_dbg(&mdev->ib_dev,
+	ibdev_dbg(&mib_dev->ib_dev,
 		  "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
 		  err, qp->sq_gdma_region);
 
@@ -358,7 +358,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 	err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec,
 				 &cq_spec, &qp->tx_object);
 	if (err) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed to create wq for create raw-qp, err %d\n",
 			  err);
 		goto err_destroy_dma_region;
@@ -371,7 +371,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 	qp->sq_id = wq_spec.queue_index;
 	send_cq->id = cq_spec.queue_index;
 
-	ibdev_dbg(&mdev->ib_dev,
+	ibdev_dbg(&mib_dev->ib_dev,
 		  "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err,
 		  qp->tx_object, qp->sq_id, send_cq->id);
 
@@ -381,7 +381,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 
 	err = ib_copy_to_udata(udata, &resp, sizeof(resp));
 	if (err) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed copy udata for create qp-raw, %d\n",
 			  err);
 		goto err_destroy_wq_obj;
@@ -393,13 +393,13 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 	mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
 
 err_destroy_dma_region:
-	mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
+	mana_ib_gd_destroy_dma_region(mib_dev, qp->sq_gdma_region);
 
 err_release_umem:
 	ib_umem_release(umem);
 
 err_free_vport:
-	mana_ib_uncfg_vport(mdev, pd, port - 1);
+	mana_ib_uncfg_vport(mib_dev, pd, port - 1);
 
 	return err;
 }
@@ -435,9 +435,9 @@ static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
 				  struct ib_rwq_ind_table *ind_tbl,
 				  struct ib_udata *udata)
 {
-	struct mana_ib_dev *mdev =
+	struct mana_ib_dev *mib_dev =
 		container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
-	struct gdma_dev *gd = mdev->gdma_dev;
+	struct gdma_dev *gd = mib_dev->gdma_dev;
 	struct mana_port_context *mpc;
 	struct mana_context *mc;
 	struct net_device *ndev;
@@ -452,7 +452,7 @@ static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
 	for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
 		ibwq = ind_tbl->ind_tbl[i];
 		wq = container_of(ibwq, struct mana_ib_wq, ibwq);
-		ibdev_dbg(&mdev->ib_dev, "destroying wq->rx_object %llu\n",
+		ibdev_dbg(&mib_dev->ib_dev, "destroying wq->rx_object %llu\n",
 			  wq->rx_object);
 		mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
 	}
@@ -462,9 +462,9 @@ static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
 
 static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata)
 {
-	struct mana_ib_dev *mdev =
+	struct mana_ib_dev *mib_dev =
 		container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
-	struct gdma_dev *gd = mdev->gdma_dev;
+	struct gdma_dev *gd = mib_dev->gdma_dev;
 	struct ib_pd *ibpd = qp->ibqp.pd;
 	struct mana_port_context *mpc;
 	struct mana_context *mc;
@@ -479,11 +479,11 @@ static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata)
 	mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
 
 	if (qp->sq_umem) {
-		mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
+		mana_ib_gd_destroy_dma_region(mib_dev, qp->sq_gdma_region);
 		ib_umem_release(qp->sq_umem);
 	}
 
-	mana_ib_uncfg_vport(mdev, pd, qp->port - 1);
+	mana_ib_uncfg_vport(mib_dev, pd, qp->port - 1);
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c
index 372d361510e0..56bc2b8b6690 100644
--- a/drivers/infiniband/hw/mana/wq.c
+++ b/drivers/infiniband/hw/mana/wq.c
@@ -9,7 +9,7 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
 				struct ib_wq_init_attr *init_attr,
 				struct ib_udata *udata)
 {
-	struct mana_ib_dev *mdev =
+	struct mana_ib_dev *mib_dev =
 		container_of(pd->device, struct mana_ib_dev, ib_dev);
 	struct mana_ib_create_wq ucmd = {};
 	struct mana_ib_wq *wq;
@@ -21,7 +21,7 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
 
 	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
 	if (err) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed to copy from udata for create wq, %d\n", err);
 		return ERR_PTR(err);
 	}
@@ -30,13 +30,14 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
 	if (!wq)
 		return ERR_PTR(-ENOMEM);
 
-	ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr);
+	ibdev_dbg(&mib_dev->ib_dev, "ucmd wq_buf_addr 0x%llx\n",
+		  ucmd.wq_buf_addr);
 
 	umem = ib_umem_get(pd->device, ucmd.wq_buf_addr, ucmd.wq_buf_size,
 			   IB_ACCESS_LOCAL_WRITE);
 	if (IS_ERR(umem)) {
 		err = PTR_ERR(umem);
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed to get umem for create wq, err %d\n", err);
 		goto err_free_wq;
 	}
@@ -46,15 +47,15 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
 	wq->wq_buf_size = ucmd.wq_buf_size;
 	wq->rx_object = INVALID_MANA_HANDLE;
 
-	err = mana_ib_gd_create_dma_region(mdev, wq->umem, &wq->gdma_region);
+	err = mana_ib_gd_create_dma_region(mib_dev, wq->umem, &wq->gdma_region);
 	if (err) {
-		ibdev_dbg(&mdev->ib_dev,
+		ibdev_dbg(&mib_dev->ib_dev,
 			  "Failed to create dma region for create wq, %d\n",
 			  err);
 		goto err_release_umem;
 	}
 
-	ibdev_dbg(&mdev->ib_dev,
+	ibdev_dbg(&mib_dev->ib_dev,
 		  "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
 		  err, wq->gdma_region);
 
@@ -82,11 +83,11 @@ int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
 {
 	struct mana_ib_wq *wq = container_of(ibwq, struct mana_ib_wq, ibwq);
 	struct ib_device *ib_dev = ibwq->device;
-	struct mana_ib_dev *mdev;
+	struct mana_ib_dev *mib_dev;
 
-	mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
+	mib_dev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
 
-	mana_ib_gd_destroy_dma_region(mdev, wq->gdma_region);
+	mana_ib_gd_destroy_dma_region(mib_dev, wq->gdma_region);
 	ib_umem_release(wq->umem);
 
 	kfree(wq);
-- 
2.25.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox