[PATCH net-next v3 0/4] virtio_net: enable the irq for ctrlq

virtualization.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed

* [PATCH net-next v3 0/4] virtio_net: enable the irq for ctrlq
@ 2024-06-06  6:14 Heng Qi
  2024-06-06  6:14 ` [PATCH net-next v3 1/4] virtio_net: passing control_buf explicitly Heng Qi
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Heng Qi @ 2024-06-06  6:14 UTC (permalink / raw)
  To: netdev, virtualization
  Cc: Jason Wang, Michael S. Tsirkin, Xuan Zhuo, Eugenio Pérez,
	Eric Dumazet, David S. Miller, Jakub Kicinski, Paolo Abeni

Ctrlq in polling mode may cause the virtual machine to hang and
occupy additional CPU resources. Enabling the irq for ctrlq
alleviates this problem and allows commands to be requested
concurrently.

Changelog
=========
v2->v3:
  - Use the completion for dim cmds.

v1->v2:
  - Refactor the patch 1 and rephase the commit log.

Heng Qi (4):
  virtio_net: passing control_buf explicitly
  virtio_net: enable irq for the control vq
  virtio_net: change the command token to completion
  virtio_net: improve dim command request efficiency

 drivers/net/virtio_net.c | 274 +++++++++++++++++++++++++++++++++------
 1 file changed, 232 insertions(+), 42 deletions(-)

-- 
2.32.0.3.g01195cf9f


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH net-next v3 1/4] virtio_net: passing control_buf explicitly
  2024-06-06  6:14 [PATCH net-next v3 0/4] virtio_net: enable the irq for ctrlq Heng Qi
@ 2024-06-06  6:14 ` Heng Qi
  2024-06-06  6:14 ` [PATCH net-next v3 2/4] virtio_net: enable irq for the control vq Heng Qi
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 11+ messages in thread
From: Heng Qi @ 2024-06-06  6:14 UTC (permalink / raw)
  To: netdev, virtualization
  Cc: Jason Wang, Michael S. Tsirkin, Xuan Zhuo, Eugenio Pérez,
	Eric Dumazet, David S. Miller, Jakub Kicinski, Paolo Abeni

In a later patch, the driver may send requests concurrently, in
which case each command will have its own control buffer, so we
refactor virtnet_send_command_reply() to pass the control buffer
explicitly as a parameter.

Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
---
 drivers/net/virtio_net.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4a802c0ea2cb..0f872936d6ed 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2680,7 +2680,9 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
  * supported by the hypervisor, as indicated by feature bits, should
  * never fail unless improperly formatted.
  */
-static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd,
+static bool virtnet_send_command_reply(struct virtnet_info *vi,
+				       u8 class, u8 cmd,
+				       struct control_buf *ctrl,
 				       struct scatterlist *out,
 				       struct scatterlist *in)
 {
@@ -2692,18 +2694,18 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd
 	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
 
 	mutex_lock(&vi->cvq_lock);
-	vi->ctrl->status = ~0;
-	vi->ctrl->hdr.class = class;
-	vi->ctrl->hdr.cmd = cmd;
+	ctrl->status = ~0;
+	ctrl->hdr.class = class;
+	ctrl->hdr.cmd = cmd;
 	/* Add header */
-	sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
+	sg_init_one(&hdr, &ctrl->hdr, sizeof(ctrl->hdr));
 	sgs[out_num++] = &hdr;
 
 	if (out)
 		sgs[out_num++] = out;
 
 	/* Add return status. */
-	sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
+	sg_init_one(&stat, &ctrl->status, sizeof(ctrl->status));
 	sgs[out_num + in_num++] = &stat;
 
 	if (in)
@@ -2732,13 +2734,13 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd
 
 unlock:
 	mutex_unlock(&vi->cvq_lock);
-	return vi->ctrl->status == VIRTIO_NET_OK;
+	return ctrl->status == VIRTIO_NET_OK;
 }
 
 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 				 struct scatterlist *out)
 {
-	return virtnet_send_command_reply(vi, class, cmd, out, NULL);
+	return virtnet_send_command_reply(vi, class, cmd, vi->ctrl, out, NULL);
 }
 
 static int virtnet_set_mac_address(struct net_device *dev, void *p)
@@ -4018,7 +4020,7 @@ static int __virtnet_get_hw_stats(struct virtnet_info *vi,
 
 	ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS,
 					VIRTIO_NET_CTRL_STATS_GET,
-					&sgs_out, &sgs_in);
+					vi->ctrl, &sgs_out, &sgs_in);
 
 	if (!ok)
 		return ok;
@@ -5882,7 +5884,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 
 		if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS,
 						VIRTIO_NET_CTRL_STATS_QUERY,
-						NULL, &sg)) {
+						vi->ctrl, NULL, &sg)) {
 			pr_debug("virtio_net: fail to get stats capability\n");
 			rtnl_unlock();
 			err = -EINVAL;
-- 
2.32.0.3.g01195cf9f


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH net-next v3 2/4] virtio_net: enable irq for the control vq
  2024-06-06  6:14 [PATCH net-next v3 0/4] virtio_net: enable the irq for ctrlq Heng Qi
  2024-06-06  6:14 ` [PATCH net-next v3 1/4] virtio_net: passing control_buf explicitly Heng Qi
@ 2024-06-06  6:14 ` Heng Qi
  2024-06-06  6:14 ` [PATCH net-next v3 3/4] virtio_net: change the command token to completion Heng Qi
  2024-06-06  6:14 ` [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency Heng Qi
  3 siblings, 0 replies; 11+ messages in thread
From: Heng Qi @ 2024-06-06  6:14 UTC (permalink / raw)
  To: netdev, virtualization
  Cc: Jason Wang, Michael S. Tsirkin, Xuan Zhuo, Eugenio Pérez,
	Eric Dumazet, David S. Miller, Jakub Kicinski, Paolo Abeni

If the device does not respond to a request for a long time,
then control vq polling elevates CPU utilization, a problem that
exacerbates with more command requests.

Enabling control vq's irq is advantageous for the guest, and
this still doesn't support concurrent requests.

Suggested-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
---
 drivers/net/virtio_net.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 0f872936d6ed..823a9dca51c1 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -372,6 +372,8 @@ struct virtio_net_ctrl_rss {
 struct control_buf {
 	struct virtio_net_ctrl_hdr hdr;
 	virtio_net_ctrl_ack status;
+	/* Wait for the device to complete the cvq request. */
+	struct completion completion;
 };
 
 struct virtnet_info {
@@ -664,6 +666,13 @@ static bool virtqueue_napi_complete(struct napi_struct *napi,
 	return false;
 }
 
+static void virtnet_cvq_done(struct virtqueue *cvq)
+{
+	struct virtnet_info *vi = cvq->vdev->priv;
+
+	complete(&vi->ctrl->completion);
+}
+
 static void skb_xmit_done(struct virtqueue *vq)
 {
 	struct virtnet_info *vi = vq->vdev->priv;
@@ -2723,14 +2732,8 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
 	if (unlikely(!virtqueue_kick(vi->cvq)))
 		goto unlock;
 
-	/* Spin for a response, the kick causes an ioport write, trapping
-	 * into the hypervisor, so the request should be handled immediately.
-	 */
-	while (!virtqueue_get_buf(vi->cvq, &tmp) &&
-	       !virtqueue_is_broken(vi->cvq)) {
-		cond_resched();
-		cpu_relax();
-	}
+	wait_for_completion(&vi->ctrl->completion);
+	virtqueue_get_buf(vi->cvq, &tmp);
 
 unlock:
 	mutex_unlock(&vi->cvq_lock);
@@ -5314,7 +5317,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 
 	/* Parameters for control virtqueue, if any */
 	if (vi->has_cvq) {
-		callbacks[total_vqs - 1] = NULL;
+		callbacks[total_vqs - 1] = virtnet_cvq_done;
 		names[total_vqs - 1] = "control";
 	}
 
@@ -5834,6 +5837,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (vi->has_rss || vi->has_rss_hash_report)
 		virtnet_init_default_rss(vi);
 
+	init_completion(&vi->ctrl->completion);
 	enable_rx_mode_work(vi);
 
 	/* serialize netdev register + virtio_device_ready() with ndo_open() */
-- 
2.32.0.3.g01195cf9f


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH net-next v3 3/4] virtio_net: change the command token to completion
  2024-06-06  6:14 [PATCH net-next v3 0/4] virtio_net: enable the irq for ctrlq Heng Qi
  2024-06-06  6:14 ` [PATCH net-next v3 1/4] virtio_net: passing control_buf explicitly Heng Qi
  2024-06-06  6:14 ` [PATCH net-next v3 2/4] virtio_net: enable irq for the control vq Heng Qi
@ 2024-06-06  6:14 ` Heng Qi
  2024-06-06  6:14 ` [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency Heng Qi
  3 siblings, 0 replies; 11+ messages in thread
From: Heng Qi @ 2024-06-06  6:14 UTC (permalink / raw)
  To: netdev, virtualization
  Cc: Jason Wang, Michael S. Tsirkin, Xuan Zhuo, Eugenio Pérez,
	Eric Dumazet, David S. Miller, Jakub Kicinski, Paolo Abeni

Previously, control vq only allowed a single request to be sent,
so using virtnet_info as a global token was fine. To support
concurrent requests, the driver needs to use a command-level token
to distinguish between requests that have been sent.

Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
---
 drivers/net/virtio_net.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 823a9dca51c1..e59e12bb7601 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2721,7 +2721,8 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
 		sgs[out_num + in_num++] = in;
 
 	BUG_ON(out_num + in_num > ARRAY_SIZE(sgs));
-	ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC);
+	ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num,
+				&ctrl->completion, GFP_ATOMIC);
 	if (ret < 0) {
 		dev_warn(&vi->vdev->dev,
 			 "Failed to add sgs for command vq: %d\n.", ret);
-- 
2.32.0.3.g01195cf9f


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency
  2024-06-06  6:14 [PATCH net-next v3 0/4] virtio_net: enable the irq for ctrlq Heng Qi
                   ` (2 preceding siblings ...)
  2024-06-06  6:14 ` [PATCH net-next v3 3/4] virtio_net: change the command token to completion Heng Qi
@ 2024-06-06  6:14 ` Heng Qi
  2024-06-06 10:25   ` kernel test robot
                     ` (2 more replies)
  3 siblings, 3 replies; 11+ messages in thread
From: Heng Qi @ 2024-06-06  6:14 UTC (permalink / raw)
  To: netdev, virtualization
  Cc: Jason Wang, Michael S. Tsirkin, Xuan Zhuo, Eugenio Pérez,
	Eric Dumazet, David S. Miller, Jakub Kicinski, Paolo Abeni

Currently, control vq handles commands synchronously,
leading to increased delays for dim commands during multi-queue
VM configuration and directly impacting dim performance.

To address this, we are shifting to asynchronous processing of
ctrlq's dim commands.

Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
---
 drivers/net/virtio_net.c | 233 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 208 insertions(+), 25 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e59e12bb7601..0338528993ab 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -376,6 +376,13 @@ struct control_buf {
 	struct completion completion;
 };
 
+struct virtnet_coal_node {
+	struct control_buf ctrl;
+	struct virtio_net_ctrl_coal_vq coal_vqs;
+	bool is_coal_wait;
+	struct list_head list;
+};
+
 struct virtnet_info {
 	struct virtio_device *vdev;
 	struct virtqueue *cvq;
@@ -420,6 +427,9 @@ struct virtnet_info {
 	/* Lock to protect the control VQ */
 	struct mutex cvq_lock;
 
+	/* Work struct for acquisition of cvq processing results. */
+	struct work_struct get_cvq;
+
 	/* Host can handle any s/g split between our header and packet data */
 	bool any_header_sg;
 
@@ -464,6 +474,14 @@ struct virtnet_info {
 	struct virtnet_interrupt_coalesce intr_coal_tx;
 	struct virtnet_interrupt_coalesce intr_coal_rx;
 
+	/* Free nodes used for concurrent delivery */
+	struct mutex coal_free_lock;
+	struct list_head coal_free_list;
+
+	/* Filled when there are no free nodes or cvq buffers */
+	struct mutex coal_wait_lock;
+	struct list_head coal_wait_list;
+
 	unsigned long guest_offloads;
 	unsigned long guest_offloads_capable;
 
@@ -670,7 +688,7 @@ static void virtnet_cvq_done(struct virtqueue *cvq)
 {
 	struct virtnet_info *vi = cvq->vdev->priv;
 
-	complete(&vi->ctrl->completion);
+	schedule_work(&vi->get_cvq);
 }
 
 static void skb_xmit_done(struct virtqueue *vq)
@@ -2696,7 +2714,7 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
 				       struct scatterlist *in)
 {
 	struct scatterlist *sgs[5], hdr, stat;
-	u32 out_num = 0, tmp, in_num = 0;
+	u32 out_num = 0, in_num = 0;
 	int ret;
 
 	/* Caller should know better */
@@ -2730,14 +2748,14 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
 		return false;
 	}
 
-	if (unlikely(!virtqueue_kick(vi->cvq)))
-		goto unlock;
+	if (unlikely(!virtqueue_kick(vi->cvq))) {
+		mutex_unlock(&vi->cvq_lock);
+		return false;
+	}
+	mutex_unlock(&vi->cvq_lock);
 
-	wait_for_completion(&vi->ctrl->completion);
-	virtqueue_get_buf(vi->cvq, &tmp);
+	wait_for_completion(&ctrl->completion);
 
-unlock:
-	mutex_unlock(&vi->cvq_lock);
 	return ctrl->status == VIRTIO_NET_OK;
 }
 
@@ -2747,6 +2765,86 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 	return virtnet_send_command_reply(vi, class, cmd, vi->ctrl, out, NULL);
 }
 
+static void virtnet_process_dim_cmd(struct virtnet_info *vi,
+				    struct virtnet_coal_node *node)
+{
+	u16 qnum = le16_to_cpu(node->coal_vqs.vqn) / 2;
+
+	mutex_lock(&vi->rq[qnum].dim_lock);
+	vi->rq[qnum].intr_coal.max_usecs =
+		le32_to_cpu(node->coal_vqs.coal.max_usecs);
+	vi->rq[qnum].intr_coal.max_packets =
+		le32_to_cpu(node->coal_vqs.coal.max_packets);
+	vi->rq[qnum].dim.state = DIM_START_MEASURE;
+	mutex_unlock(&vi->rq[qnum].dim_lock);
+
+	if (node->is_coal_wait) {
+		mutex_lock(&vi->coal_wait_lock);
+		list_del(&node->list);
+		mutex_unlock(&vi->coal_wait_lock);
+		kfree(node);
+	} else {
+		mutex_lock(&vi->coal_free_lock);
+		list_add(&node->list, &vi->coal_free_list);
+		mutex_unlock(&vi->coal_free_lock);
+	}
+}
+
+static int virtnet_add_dim_command(struct virtnet_info *vi,
+				   struct virtnet_coal_node *coal_node)
+{
+	struct scatterlist sg;
+	int ret;
+
+	sg_init_one(&sg, &coal_node->coal_vqs, sizeof(coal_node->coal_vqs));
+	ret = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_NOTF_COAL,
+					 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
+					 &coal_node->ctrl, &sg, NULL);
+	if (!ret) {
+		dev_warn(&vi->dev->dev,
+			 "Failed to change coalescing params.\n");
+		return ret;
+	}
+
+	virtnet_process_dim_cmd(vi, coal_node);
+
+	return 0;
+}
+
+static void virtnet_get_cvq_work(struct work_struct *work)
+{
+	struct virtnet_info *vi =
+		container_of(work, struct virtnet_info, get_cvq);
+	struct virtnet_coal_node *wait_coal;
+	bool valid = false;
+	unsigned int tmp;
+	void *res;
+
+	mutex_lock(&vi->cvq_lock);
+	while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
+		complete((struct completion *)res);
+		valid = true;
+	}
+	mutex_unlock(&vi->cvq_lock);
+
+	if (!valid)
+		return;
+
+	while (true) {
+		wait_coal = NULL;
+		mutex_lock(&vi->coal_wait_lock);
+		if (!list_empty(&vi->coal_wait_list))
+			wait_coal = list_first_entry(&vi->coal_wait_list,
+						     struct virtnet_coal_node,
+						     list);
+		mutex_unlock(&vi->coal_wait_lock);
+		if (wait_coal)
+			if (virtnet_add_dim_command(vi, wait_coal))
+				break;
+		else
+			break;
+	}
+}
 static int virtnet_set_mac_address(struct net_device *dev, void *p)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
@@ -4398,35 +4496,73 @@ static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
 	return 0;
 }
 
+static void virtnet_put_wait_coal(struct virtnet_info *vi,
+				  struct receive_queue *rq,
+				  struct dim_cq_moder moder)
+{
+	struct virtnet_coal_node *wait_node;
+
+	wait_node = kzalloc(sizeof(*wait_node), GFP_KERNEL);
+	if (!wait_node) {
+		rq->dim.state = DIM_START_MEASURE;
+		return;
+	}
+
+	wait_node->is_coal_wait = true;
+	wait_node->coal_vqs.vqn = cpu_to_le16(rxq2vq(rq - vi->rq));
+	wait_node->coal_vqs.coal.max_usecs = cpu_to_le32(moder.usec);
+	wait_node->coal_vqs.coal.max_packets = cpu_to_le32(moder.pkts);
+	mutex_lock(&vi->coal_wait_lock);
+	list_add_tail(&wait_node->list, &vi->coal_wait_list);
+	mutex_unlock(&vi->coal_wait_lock);
+}
+
 static void virtnet_rx_dim_work(struct work_struct *work)
 {
 	struct dim *dim = container_of(work, struct dim, work);
 	struct receive_queue *rq = container_of(dim,
 			struct receive_queue, dim);
 	struct virtnet_info *vi = rq->vq->vdev->priv;
-	struct net_device *dev = vi->dev;
+	struct virtnet_coal_node *avail_coal;
 	struct dim_cq_moder update_moder;
-	int qnum, err;
 
-	qnum = rq - vi->rq;
+	update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 
 	mutex_lock(&rq->dim_lock);
-	if (!rq->dim_enabled)
-		goto out;
-
-	update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
-	if (update_moder.usec != rq->intr_coal.max_usecs ||
-	    update_moder.pkts != rq->intr_coal.max_packets) {
-		err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum,
-						       update_moder.usec,
-						       update_moder.pkts);
-		if (err)
-			pr_debug("%s: Failed to send dim parameters on rxq%d\n",
-				 dev->name, qnum);
-		dim->state = DIM_START_MEASURE;
+	if (!rq->dim_enabled ||
+	    (update_moder.usec == rq->intr_coal.max_usecs &&
+	     update_moder.pkts == rq->intr_coal.max_packets)) {
+		rq->dim.state = DIM_START_MEASURE;
+		mutex_unlock(&rq->dim_lock);
+		return;
 	}
-out:
 	mutex_unlock(&rq->dim_lock);
+
+	mutex_lock(&vi->cvq_lock);
+	if (vi->cvq->num_free < 3) {
+		virtnet_put_wait_coal(vi, rq, update_moder);
+		mutex_unlock(&vi->cvq_lock);
+		return;
+	}
+	mutex_unlock(&vi->cvq_lock);
+
+	mutex_lock(&vi->coal_free_lock);
+	if (list_empty(&vi->coal_free_list)) {
+		virtnet_put_wait_coal(vi, rq, update_moder);
+		mutex_unlock(&vi->coal_free_lock);
+		return;
+	}
+
+	avail_coal = list_first_entry(&vi->coal_free_list,
+				      struct virtnet_coal_node, list);
+	avail_coal->coal_vqs.vqn = cpu_to_le16(rxq2vq(rq - vi->rq));
+	avail_coal->coal_vqs.coal.max_usecs = cpu_to_le32(update_moder.usec);
+	avail_coal->coal_vqs.coal.max_packets = cpu_to_le32(update_moder.pkts);
+
+	list_del(&avail_coal->list);
+	mutex_unlock(&vi->coal_free_lock);
+
+	virtnet_add_dim_command(vi, avail_coal);
 }
 
 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
@@ -4839,6 +4975,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
 	flush_work(&vi->config_work);
 	disable_rx_mode_work(vi);
 	flush_work(&vi->rx_mode_work);
+	flush_work(&vi->get_cvq);
 
 	netif_tx_lock_bh(vi->dev);
 	netif_device_detach(vi->dev);
@@ -5612,6 +5749,45 @@ static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
 	.xmo_rx_hash			= virtnet_xdp_rx_hash,
 };
 
+static void virtnet_del_coal_free_list(struct virtnet_info *vi)
+{
+	struct virtnet_coal_node *coal_node, *tmp;
+
+	list_for_each_entry_safe(coal_node, tmp,  &vi->coal_free_list, list) {
+		list_del(&coal_node->list);
+		kfree(coal_node);
+	}
+}
+
+static int virtnet_init_coal_list(struct virtnet_info *vi)
+{
+	struct virtnet_coal_node *coal_node;
+	int batch_dim_nums;
+	int i;
+
+	INIT_LIST_HEAD(&vi->coal_free_list);
+	mutex_init(&vi->coal_free_lock);
+
+	INIT_LIST_HEAD(&vi->coal_wait_list);
+	mutex_init(&vi->coal_wait_lock);
+
+	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
+		return 0;
+
+	batch_dim_nums = min((unsigned int)vi->max_queue_pairs,
+			     virtqueue_get_vring_size(vi->cvq) / 3);
+	for (i = 0; i < batch_dim_nums; i++) {
+		coal_node = kzalloc(sizeof(*coal_node), GFP_KERNEL);
+		if (!coal_node) {
+			virtnet_del_coal_free_list(vi);
+			return -ENOMEM;
+		}
+		list_add(&coal_node->list, &vi->coal_free_list);
+	}
+
+	return 0;
+}
+
 static int virtnet_probe(struct virtio_device *vdev)
 {
 	int i, err = -ENOMEM;
@@ -5797,6 +5973,9 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (err)
 		goto free;
 
+	if (virtnet_init_coal_list(vi))
+		goto free;
+
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
 		vi->intr_coal_rx.max_usecs = 0;
 		vi->intr_coal_tx.max_usecs = 0;
@@ -5838,6 +6017,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (vi->has_rss || vi->has_rss_hash_report)
 		virtnet_init_default_rss(vi);
 
+	INIT_WORK(&vi->get_cvq, virtnet_get_cvq_work);
 	init_completion(&vi->ctrl->completion);
 	enable_rx_mode_work(vi);
 
@@ -5967,11 +6147,14 @@ static void virtnet_remove(struct virtio_device *vdev)
 	flush_work(&vi->config_work);
 	disable_rx_mode_work(vi);
 	flush_work(&vi->rx_mode_work);
+	flush_work(&vi->get_cvq);
 
 	unregister_netdev(vi->dev);
 
 	net_failover_destroy(vi->failover);
 
+	virtnet_del_coal_free_list(vi);
+
 	remove_vq_common(vi);
 
 	free_netdev(vi->dev);
-- 
2.32.0.3.g01195cf9f


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency
  2024-06-06  6:14 ` [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency Heng Qi
@ 2024-06-06 10:25   ` kernel test robot
  2024-06-06 20:34   ` kernel test robot
  2024-06-17  4:05   ` Jason Wang
  2 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2024-06-06 10:25 UTC (permalink / raw)
  To: Heng Qi, netdev, virtualization
  Cc: llvm, oe-kbuild-all, Jason Wang, Michael S. Tsirkin, Xuan Zhuo,
	Eugenio Pérez, Eric Dumazet, Jakub Kicinski, Paolo Abeni

Hi Heng,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Heng-Qi/virtio_net-passing-control_buf-explicitly/20240606-141748
base:   net-next/main
patch link:    https://lore.kernel.org/r/20240606061446.127802-5-hengqi%40linux.alibaba.com
patch subject: [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency
config: riscv-defconfig (https://download.01.org/0day-ci/archive/20240606/202406061803.R48FH9c8-lkp@intel.com/config)
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project d7d2d4f53fc79b4b58e8d8d08151b577c3699d4a)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240606/202406061803.R48FH9c8-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202406061803.R48FH9c8-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from drivers/net/virtio_net.c:7:
   In file included from include/linux/netdevice.h:38:
   In file included from include/net/net_namespace.h:43:
   In file included from include/linux/skbuff.h:17:
   In file included from include/linux/bvec.h:10:
   In file included from include/linux/highmem.h:8:
   In file included from include/linux/cacheflush.h:5:
   In file included from arch/riscv/include/asm/cacheflush.h:9:
   In file included from include/linux/mm.h:2253:
   include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
>> drivers/net/virtio_net.c:2844:3: warning: add explicit braces to avoid dangling else [-Wdangling-else]
    2844 |                 else
         |                 ^
   2 warnings generated.


vim +2844 drivers/net/virtio_net.c

  2813	
  2814	static void virtnet_get_cvq_work(struct work_struct *work)
  2815	{
  2816		struct virtnet_info *vi =
  2817			container_of(work, struct virtnet_info, get_cvq);
  2818		struct virtnet_coal_node *wait_coal;
  2819		bool valid = false;
  2820		unsigned int tmp;
  2821		void *res;
  2822	
  2823		mutex_lock(&vi->cvq_lock);
  2824		while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
  2825			complete((struct completion *)res);
  2826			valid = true;
  2827		}
  2828		mutex_unlock(&vi->cvq_lock);
  2829	
  2830		if (!valid)
  2831			return;
  2832	
  2833		while (true) {
  2834			wait_coal = NULL;
  2835			mutex_lock(&vi->coal_wait_lock);
  2836			if (!list_empty(&vi->coal_wait_list))
  2837				wait_coal = list_first_entry(&vi->coal_wait_list,
  2838							     struct virtnet_coal_node,
  2839							     list);
  2840			mutex_unlock(&vi->coal_wait_lock);
  2841			if (wait_coal)
  2842				if (virtnet_add_dim_command(vi, wait_coal))
  2843					break;
> 2844			else
  2845				break;
  2846		}
  2847	}
  2848	static int virtnet_set_mac_address(struct net_device *dev, void *p)
  2849	{
  2850		struct virtnet_info *vi = netdev_priv(dev);
  2851		struct virtio_device *vdev = vi->vdev;
  2852		int ret;
  2853		struct sockaddr *addr;
  2854		struct scatterlist sg;
  2855	
  2856		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
  2857			return -EOPNOTSUPP;
  2858	
  2859		addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
  2860		if (!addr)
  2861			return -ENOMEM;
  2862	
  2863		ret = eth_prepare_mac_addr_change(dev, addr);
  2864		if (ret)
  2865			goto out;
  2866	
  2867		if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
  2868			sg_init_one(&sg, addr->sa_data, dev->addr_len);
  2869			if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
  2870						  VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
  2871				dev_warn(&vdev->dev,
  2872					 "Failed to set mac address by vq command.\n");
  2873				ret = -EINVAL;
  2874				goto out;
  2875			}
  2876		} else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
  2877			   !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
  2878			unsigned int i;
  2879	
  2880			/* Naturally, this has an atomicity problem. */
  2881			for (i = 0; i < dev->addr_len; i++)
  2882				virtio_cwrite8(vdev,
  2883					       offsetof(struct virtio_net_config, mac) +
  2884					       i, addr->sa_data[i]);
  2885		}
  2886	
  2887		eth_commit_mac_addr_change(dev, p);
  2888		ret = 0;
  2889	
  2890	out:
  2891		kfree(addr);
  2892		return ret;
  2893	}
  2894	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency
  2024-06-06  6:14 ` [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency Heng Qi
  2024-06-06 10:25   ` kernel test robot
@ 2024-06-06 20:34   ` kernel test robot
  2024-06-17  4:05   ` Jason Wang
  2 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2024-06-06 20:34 UTC (permalink / raw)
  To: Heng Qi, netdev, virtualization
  Cc: oe-kbuild-all, Jason Wang, Michael S. Tsirkin, Xuan Zhuo,
	Eugenio Pérez, Eric Dumazet, Jakub Kicinski, Paolo Abeni

Hi Heng,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Heng-Qi/virtio_net-passing-control_buf-explicitly/20240606-141748
base:   net-next/main
patch link:    https://lore.kernel.org/r/20240606061446.127802-5-hengqi%40linux.alibaba.com
patch subject: [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency
config: sh-allmodconfig (https://download.01.org/0day-ci/archive/20240607/202406070616.vNSLJTt2-lkp@intel.com/config)
compiler: sh4-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240607/202406070616.vNSLJTt2-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202406070616.vNSLJTt2-lkp@intel.com/

All warnings (new ones prefixed by >>):

   drivers/net/virtio_net.c: In function 'virtnet_get_cvq_work':
>> drivers/net/virtio_net.c:2841:20: warning: suggest explicit braces to avoid ambiguous 'else' [-Wdangling-else]
    2841 |                 if (wait_coal)
         |                    ^


vim +/else +2841 drivers/net/virtio_net.c

  2813	
  2814	static void virtnet_get_cvq_work(struct work_struct *work)
  2815	{
  2816		struct virtnet_info *vi =
  2817			container_of(work, struct virtnet_info, get_cvq);
  2818		struct virtnet_coal_node *wait_coal;
  2819		bool valid = false;
  2820		unsigned int tmp;
  2821		void *res;
  2822	
  2823		mutex_lock(&vi->cvq_lock);
  2824		while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
  2825			complete((struct completion *)res);
  2826			valid = true;
  2827		}
  2828		mutex_unlock(&vi->cvq_lock);
  2829	
  2830		if (!valid)
  2831			return;
  2832	
  2833		while (true) {
  2834			wait_coal = NULL;
  2835			mutex_lock(&vi->coal_wait_lock);
  2836			if (!list_empty(&vi->coal_wait_list))
  2837				wait_coal = list_first_entry(&vi->coal_wait_list,
  2838							     struct virtnet_coal_node,
  2839							     list);
  2840			mutex_unlock(&vi->coal_wait_lock);
> 2841			if (wait_coal)
  2842				if (virtnet_add_dim_command(vi, wait_coal))
  2843					break;
  2844			else
  2845				break;
  2846		}
  2847	}
  2848	static int virtnet_set_mac_address(struct net_device *dev, void *p)
  2849	{
  2850		struct virtnet_info *vi = netdev_priv(dev);
  2851		struct virtio_device *vdev = vi->vdev;
  2852		int ret;
  2853		struct sockaddr *addr;
  2854		struct scatterlist sg;
  2855	
  2856		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
  2857			return -EOPNOTSUPP;
  2858	
  2859		addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
  2860		if (!addr)
  2861			return -ENOMEM;
  2862	
  2863		ret = eth_prepare_mac_addr_change(dev, addr);
  2864		if (ret)
  2865			goto out;
  2866	
  2867		if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
  2868			sg_init_one(&sg, addr->sa_data, dev->addr_len);
  2869			if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
  2870						  VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
  2871				dev_warn(&vdev->dev,
  2872					 "Failed to set mac address by vq command.\n");
  2873				ret = -EINVAL;
  2874				goto out;
  2875			}
  2876		} else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
  2877			   !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
  2878			unsigned int i;
  2879	
  2880			/* Naturally, this has an atomicity problem. */
  2881			for (i = 0; i < dev->addr_len; i++)
  2882				virtio_cwrite8(vdev,
  2883					       offsetof(struct virtio_net_config, mac) +
  2884					       i, addr->sa_data[i]);
  2885		}
  2886	
  2887		eth_commit_mac_addr_change(dev, p);
  2888		ret = 0;
  2889	
  2890	out:
  2891		kfree(addr);
  2892		return ret;
  2893	}
  2894	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency
  2024-06-06  6:14 ` [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency Heng Qi
  2024-06-06 10:25   ` kernel test robot
  2024-06-06 20:34   ` kernel test robot
@ 2024-06-17  4:05   ` Jason Wang
  2024-06-17  7:27     ` Heng Qi
  2 siblings, 1 reply; 11+ messages in thread
From: Jason Wang @ 2024-06-17  4:05 UTC (permalink / raw)
  To: Heng Qi
  Cc: netdev, virtualization, Michael S. Tsirkin, Xuan Zhuo,
	Eugenio Pérez, Eric Dumazet, David S. Miller, Jakub Kicinski,
	Paolo Abeni

On Thu, Jun 6, 2024 at 2:15 PM Heng Qi <hengqi@linux.alibaba.com> wrote:
>
> Currently, control vq handles commands synchronously,
> leading to increased delays for dim commands during multi-queue
> VM configuration and directly impacting dim performance.
>
> To address this, we are shifting to asynchronous processing of
> ctrlq's dim commands.
>
> Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
> ---
>  drivers/net/virtio_net.c | 233 ++++++++++++++++++++++++++++++++++-----
>  1 file changed, 208 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index e59e12bb7601..0338528993ab 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -376,6 +376,13 @@ struct control_buf {
>         struct completion completion;
>  };
>
> +struct virtnet_coal_node {
> +       struct control_buf ctrl;
> +       struct virtio_net_ctrl_coal_vq coal_vqs;
> +       bool is_coal_wait;
> +       struct list_head list;
> +};
> +
>  struct virtnet_info {
>         struct virtio_device *vdev;
>         struct virtqueue *cvq;
> @@ -420,6 +427,9 @@ struct virtnet_info {
>         /* Lock to protect the control VQ */
>         struct mutex cvq_lock;
>
> +       /* Work struct for acquisition of cvq processing results. */
> +       struct work_struct get_cvq;
> +
>         /* Host can handle any s/g split between our header and packet data */
>         bool any_header_sg;
>
> @@ -464,6 +474,14 @@ struct virtnet_info {
>         struct virtnet_interrupt_coalesce intr_coal_tx;
>         struct virtnet_interrupt_coalesce intr_coal_rx;
>
> +       /* Free nodes used for concurrent delivery */
> +       struct mutex coal_free_lock;
> +       struct list_head coal_free_list;
> +
> +       /* Filled when there are no free nodes or cvq buffers */
> +       struct mutex coal_wait_lock;
> +       struct list_head coal_wait_list;
> +
>         unsigned long guest_offloads;
>         unsigned long guest_offloads_capable;
>
> @@ -670,7 +688,7 @@ static void virtnet_cvq_done(struct virtqueue *cvq)
>  {
>         struct virtnet_info *vi = cvq->vdev->priv;
>
> -       complete(&vi->ctrl->completion);
> +       schedule_work(&vi->get_cvq);
>  }
>
>  static void skb_xmit_done(struct virtqueue *vq)
> @@ -2696,7 +2714,7 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
>                                        struct scatterlist *in)
>  {
>         struct scatterlist *sgs[5], hdr, stat;
> -       u32 out_num = 0, tmp, in_num = 0;
> +       u32 out_num = 0, in_num = 0;
>         int ret;
>
>         /* Caller should know better */
> @@ -2730,14 +2748,14 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
>                 return false;
>         }
>
> -       if (unlikely(!virtqueue_kick(vi->cvq)))
> -               goto unlock;
> +       if (unlikely(!virtqueue_kick(vi->cvq))) {
> +               mutex_unlock(&vi->cvq_lock);
> +               return false;
> +       }
> +       mutex_unlock(&vi->cvq_lock);
>
> -       wait_for_completion(&vi->ctrl->completion);
> -       virtqueue_get_buf(vi->cvq, &tmp);
> +       wait_for_completion(&ctrl->completion);
>
> -unlock:
> -       mutex_unlock(&vi->cvq_lock);
>         return ctrl->status == VIRTIO_NET_OK;
>  }
>
> @@ -2747,6 +2765,86 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>         return virtnet_send_command_reply(vi, class, cmd, vi->ctrl, out, NULL);
>  }
>
> +static void virtnet_process_dim_cmd(struct virtnet_info *vi,
> +                                   struct virtnet_coal_node *node)
> +{
> +       u16 qnum = le16_to_cpu(node->coal_vqs.vqn) / 2;
> +
> +       mutex_lock(&vi->rq[qnum].dim_lock);
> +       vi->rq[qnum].intr_coal.max_usecs =
> +               le32_to_cpu(node->coal_vqs.coal.max_usecs);
> +       vi->rq[qnum].intr_coal.max_packets =
> +               le32_to_cpu(node->coal_vqs.coal.max_packets);
> +       vi->rq[qnum].dim.state = DIM_START_MEASURE;
> +       mutex_unlock(&vi->rq[qnum].dim_lock);
> +
> +       if (node->is_coal_wait) {
> +               mutex_lock(&vi->coal_wait_lock);
> +               list_del(&node->list);
> +               mutex_unlock(&vi->coal_wait_lock);
> +               kfree(node);
> +       } else {
> +               mutex_lock(&vi->coal_free_lock);
> +               list_add(&node->list, &vi->coal_free_list);
> +               mutex_unlock(&vi->coal_free_lock);
> +       }
> +}
> +
> +static int virtnet_add_dim_command(struct virtnet_info *vi,
> +                                  struct virtnet_coal_node *coal_node)
> +{
> +       struct scatterlist sg;
> +       int ret;
> +
> +       sg_init_one(&sg, &coal_node->coal_vqs, sizeof(coal_node->coal_vqs));
> +       ret = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> +                                        VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
> +                                        &coal_node->ctrl, &sg, NULL);
> +       if (!ret) {
> +               dev_warn(&vi->dev->dev,
> +                        "Failed to change coalescing params.\n");
> +               return ret;
> +       }
> +
> +       virtnet_process_dim_cmd(vi, coal_node);
> +
> +       return 0;
> +}
> +
> +static void virtnet_get_cvq_work(struct work_struct *work)
> +{
> +       struct virtnet_info *vi =
> +               container_of(work, struct virtnet_info, get_cvq);
> +       struct virtnet_coal_node *wait_coal;
> +       bool valid = false;
> +       unsigned int tmp;
> +       void *res;
> +
> +       mutex_lock(&vi->cvq_lock);
> +       while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
> +               complete((struct completion *)res);
> +               valid = true;
> +       }
> +       mutex_unlock(&vi->cvq_lock);

How could we synchronize with the device in this case?

E.g what happens if the device finishes another buf here?

> +
> +       if (!valid)
> +               return;
> +
> +       while (true) {
> +               wait_coal = NULL;
> +               mutex_lock(&vi->coal_wait_lock);
> +               if (!list_empty(&vi->coal_wait_list))
> +                       wait_coal = list_first_entry(&vi->coal_wait_list,
> +                                                    struct virtnet_coal_node,
> +                                                    list);
> +               mutex_unlock(&vi->coal_wait_lock);
> +               if (wait_coal)
> +                       if (virtnet_add_dim_command(vi, wait_coal))
> +                               break;
> +               else
> +                       break;
> +       }

This is still an ad-hoc optimization for dim in the general path here.

Could we have a fn callback so for non dim it's just a completion and
for dim it would be a schedule_work()?

> +}
>  static int virtnet_set_mac_address(struct net_device *dev, void *p)
>  {
>         struct virtnet_info *vi = netdev_priv(dev);
> @@ -4398,35 +4496,73 @@ static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
>         return 0;
>  }
>
> +static void virtnet_put_wait_coal(struct virtnet_info *vi,
> +                                 struct receive_queue *rq,
> +                                 struct dim_cq_moder moder)
> +{
> +       struct virtnet_coal_node *wait_node;
> +
> +       wait_node = kzalloc(sizeof(*wait_node), GFP_KERNEL);
> +       if (!wait_node) {
> +               rq->dim.state = DIM_START_MEASURE;
> +               return;
> +       }
> +
> +       wait_node->is_coal_wait = true;
> +       wait_node->coal_vqs.vqn = cpu_to_le16(rxq2vq(rq - vi->rq));
> +       wait_node->coal_vqs.coal.max_usecs = cpu_to_le32(moder.usec);
> +       wait_node->coal_vqs.coal.max_packets = cpu_to_le32(moder.pkts);
> +       mutex_lock(&vi->coal_wait_lock);
> +       list_add_tail(&wait_node->list, &vi->coal_wait_list);
> +       mutex_unlock(&vi->coal_wait_lock);
> +}
> +
>  static void virtnet_rx_dim_work(struct work_struct *work)
>  {
>         struct dim *dim = container_of(work, struct dim, work);
>         struct receive_queue *rq = container_of(dim,
>                         struct receive_queue, dim);
>         struct virtnet_info *vi = rq->vq->vdev->priv;
> -       struct net_device *dev = vi->dev;
> +       struct virtnet_coal_node *avail_coal;
>         struct dim_cq_moder update_moder;
> -       int qnum, err;
>
> -       qnum = rq - vi->rq;
> +       update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
>
>         mutex_lock(&rq->dim_lock);
> -       if (!rq->dim_enabled)
> -               goto out;
> -
> -       update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
> -       if (update_moder.usec != rq->intr_coal.max_usecs ||
> -           update_moder.pkts != rq->intr_coal.max_packets) {
> -               err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum,
> -                                                      update_moder.usec,
> -                                                      update_moder.pkts);
> -               if (err)
> -                       pr_debug("%s: Failed to send dim parameters on rxq%d\n",
> -                                dev->name, qnum);
> -               dim->state = DIM_START_MEASURE;
> +       if (!rq->dim_enabled ||
> +           (update_moder.usec == rq->intr_coal.max_usecs &&
> +            update_moder.pkts == rq->intr_coal.max_packets)) {
> +               rq->dim.state = DIM_START_MEASURE;
> +               mutex_unlock(&rq->dim_lock);
> +               return;
>         }
> -out:
>         mutex_unlock(&rq->dim_lock);
> +
> +       mutex_lock(&vi->cvq_lock);
> +       if (vi->cvq->num_free < 3) {
> +               virtnet_put_wait_coal(vi, rq, update_moder);
> +               mutex_unlock(&vi->cvq_lock);
> +               return;
> +       }

Could we simply sleep instead of using a list here?

> +       mutex_unlock(&vi->cvq_lock);
> +
> +       mutex_lock(&vi->coal_free_lock);
> +       if (list_empty(&vi->coal_free_list)) {
> +               virtnet_put_wait_coal(vi, rq, update_moder);
> +               mutex_unlock(&vi->coal_free_lock);
> +               return;
> +       }
> +
> +       avail_coal = list_first_entry(&vi->coal_free_list,
> +                                     struct virtnet_coal_node, list);
> +       avail_coal->coal_vqs.vqn = cpu_to_le16(rxq2vq(rq - vi->rq));
> +       avail_coal->coal_vqs.coal.max_usecs = cpu_to_le32(update_moder.usec);
> +       avail_coal->coal_vqs.coal.max_packets = cpu_to_le32(update_moder.pkts);
> +
> +       list_del(&avail_coal->list);
> +       mutex_unlock(&vi->coal_free_lock);
> +
> +       virtnet_add_dim_command(vi, avail_coal);
>  }
>
>  static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
> @@ -4839,6 +4975,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
>         flush_work(&vi->config_work);
>         disable_rx_mode_work(vi);
>         flush_work(&vi->rx_mode_work);
> +       flush_work(&vi->get_cvq);
>
>         netif_tx_lock_bh(vi->dev);
>         netif_device_detach(vi->dev);
> @@ -5612,6 +5749,45 @@ static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
>         .xmo_rx_hash                    = virtnet_xdp_rx_hash,
>  };
>
> +static void virtnet_del_coal_free_list(struct virtnet_info *vi)
> +{
> +       struct virtnet_coal_node *coal_node, *tmp;
> +
> +       list_for_each_entry_safe(coal_node, tmp,  &vi->coal_free_list, list) {
> +               list_del(&coal_node->list);
> +               kfree(coal_node);
> +       }
> +}
> +
> +static int virtnet_init_coal_list(struct virtnet_info *vi)
> +{
> +       struct virtnet_coal_node *coal_node;
> +       int batch_dim_nums;
> +       int i;
> +
> +       INIT_LIST_HEAD(&vi->coal_free_list);
> +       mutex_init(&vi->coal_free_lock);
> +
> +       INIT_LIST_HEAD(&vi->coal_wait_list);
> +       mutex_init(&vi->coal_wait_lock);
> +
> +       if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
> +               return 0;
> +
> +       batch_dim_nums = min((unsigned int)vi->max_queue_pairs,
> +                            virtqueue_get_vring_size(vi->cvq) / 3);
> +       for (i = 0; i < batch_dim_nums; i++) {
> +               coal_node = kzalloc(sizeof(*coal_node), GFP_KERNEL);
> +               if (!coal_node) {
> +                       virtnet_del_coal_free_list(vi);
> +                       return -ENOMEM;
> +               }
> +               list_add(&coal_node->list, &vi->coal_free_list);
> +       }
> +
> +       return 0;
> +}
> +
>  static int virtnet_probe(struct virtio_device *vdev)
>  {
>         int i, err = -ENOMEM;
> @@ -5797,6 +5973,9 @@ static int virtnet_probe(struct virtio_device *vdev)
>         if (err)
>                 goto free;
>
> +       if (virtnet_init_coal_list(vi))
> +               goto free;
> +
>         if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
>                 vi->intr_coal_rx.max_usecs = 0;
>                 vi->intr_coal_tx.max_usecs = 0;
> @@ -5838,6 +6017,7 @@ static int virtnet_probe(struct virtio_device *vdev)
>         if (vi->has_rss || vi->has_rss_hash_report)
>                 virtnet_init_default_rss(vi);
>
> +       INIT_WORK(&vi->get_cvq, virtnet_get_cvq_work);
>         init_completion(&vi->ctrl->completion);
>         enable_rx_mode_work(vi);
>
> @@ -5967,11 +6147,14 @@ static void virtnet_remove(struct virtio_device *vdev)
>         flush_work(&vi->config_work);
>         disable_rx_mode_work(vi);
>         flush_work(&vi->rx_mode_work);
> +       flush_work(&vi->get_cvq);

Do we need to prevent cvq work from being scheduled here?

Thanks

>
>         unregister_netdev(vi->dev);
>
>         net_failover_destroy(vi->failover);
>
> +       virtnet_del_coal_free_list(vi);
> +
>         remove_vq_common(vi);
>
>         free_netdev(vi->dev);
> --
> 2.32.0.3.g01195cf9f
>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency
  2024-06-17  4:05   ` Jason Wang
@ 2024-06-17  7:27     ` Heng Qi
  2024-06-18  1:29       ` Jason Wang
  0 siblings, 1 reply; 11+ messages in thread
From: Heng Qi @ 2024-06-17  7:27 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, virtualization, Michael S. Tsirkin, Xuan Zhuo,
	Eugenio Pérez, Eric Dumazet, David S. Miller, Jakub Kicinski,
	Paolo Abeni

On Mon, 17 Jun 2024 12:05:30 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Thu, Jun 6, 2024 at 2:15 PM Heng Qi <hengqi@linux.alibaba.com> wrote:
> >
> > Currently, control vq handles commands synchronously,
> > leading to increased delays for dim commands during multi-queue
> > VM configuration and directly impacting dim performance.
> >
> > To address this, we are shifting to asynchronous processing of
> > ctrlq's dim commands.
> >
> > Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
> > ---
> >  drivers/net/virtio_net.c | 233 ++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 208 insertions(+), 25 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index e59e12bb7601..0338528993ab 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -376,6 +376,13 @@ struct control_buf {
> >         struct completion completion;
> >  };
> >
> > +struct virtnet_coal_node {
> > +       struct control_buf ctrl;
> > +       struct virtio_net_ctrl_coal_vq coal_vqs;
> > +       bool is_coal_wait;
> > +       struct list_head list;
> > +};
> > +
> >  struct virtnet_info {
> >         struct virtio_device *vdev;
> >         struct virtqueue *cvq;
> > @@ -420,6 +427,9 @@ struct virtnet_info {
> >         /* Lock to protect the control VQ */
> >         struct mutex cvq_lock;
> >
> > +       /* Work struct for acquisition of cvq processing results. */
> > +       struct work_struct get_cvq;
> > +
> >         /* Host can handle any s/g split between our header and packet data */
> >         bool any_header_sg;
> >
> > @@ -464,6 +474,14 @@ struct virtnet_info {
> >         struct virtnet_interrupt_coalesce intr_coal_tx;
> >         struct virtnet_interrupt_coalesce intr_coal_rx;
> >
> > +       /* Free nodes used for concurrent delivery */
> > +       struct mutex coal_free_lock;
> > +       struct list_head coal_free_list;
> > +
> > +       /* Filled when there are no free nodes or cvq buffers */
> > +       struct mutex coal_wait_lock;
> > +       struct list_head coal_wait_list;
> > +
> >         unsigned long guest_offloads;
> >         unsigned long guest_offloads_capable;
> >
> > @@ -670,7 +688,7 @@ static void virtnet_cvq_done(struct virtqueue *cvq)
> >  {
> >         struct virtnet_info *vi = cvq->vdev->priv;
> >
> > -       complete(&vi->ctrl->completion);
> > +       schedule_work(&vi->get_cvq);
> >  }
> >
> >  static void skb_xmit_done(struct virtqueue *vq)
> > @@ -2696,7 +2714,7 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
> >                                        struct scatterlist *in)
> >  {
> >         struct scatterlist *sgs[5], hdr, stat;
> > -       u32 out_num = 0, tmp, in_num = 0;
> > +       u32 out_num = 0, in_num = 0;
> >         int ret;
> >
> >         /* Caller should know better */
> > @@ -2730,14 +2748,14 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
> >                 return false;
> >         }
> >
> > -       if (unlikely(!virtqueue_kick(vi->cvq)))
> > -               goto unlock;
> > +       if (unlikely(!virtqueue_kick(vi->cvq))) {
> > +               mutex_unlock(&vi->cvq_lock);
> > +               return false;
> > +       }
> > +       mutex_unlock(&vi->cvq_lock);
> >
> > -       wait_for_completion(&vi->ctrl->completion);
> > -       virtqueue_get_buf(vi->cvq, &tmp);
> > +       wait_for_completion(&ctrl->completion);
> >
> > -unlock:
> > -       mutex_unlock(&vi->cvq_lock);
> >         return ctrl->status == VIRTIO_NET_OK;
> >  }
> >
> > @@ -2747,6 +2765,86 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> >         return virtnet_send_command_reply(vi, class, cmd, vi->ctrl, out, NULL);
> >  }
> >
> > +static void virtnet_process_dim_cmd(struct virtnet_info *vi,
> > +                                   struct virtnet_coal_node *node)
> > +{
> > +       u16 qnum = le16_to_cpu(node->coal_vqs.vqn) / 2;
> > +
> > +       mutex_lock(&vi->rq[qnum].dim_lock);
> > +       vi->rq[qnum].intr_coal.max_usecs =
> > +               le32_to_cpu(node->coal_vqs.coal.max_usecs);
> > +       vi->rq[qnum].intr_coal.max_packets =
> > +               le32_to_cpu(node->coal_vqs.coal.max_packets);
> > +       vi->rq[qnum].dim.state = DIM_START_MEASURE;
> > +       mutex_unlock(&vi->rq[qnum].dim_lock);
> > +
> > +       if (node->is_coal_wait) {
> > +               mutex_lock(&vi->coal_wait_lock);
> > +               list_del(&node->list);
> > +               mutex_unlock(&vi->coal_wait_lock);
> > +               kfree(node);
> > +       } else {
> > +               mutex_lock(&vi->coal_free_lock);
> > +               list_add(&node->list, &vi->coal_free_list);
> > +               mutex_unlock(&vi->coal_free_lock);
> > +       }
> > +}
> > +
> > +static int virtnet_add_dim_command(struct virtnet_info *vi,
> > +                                  struct virtnet_coal_node *coal_node)
> > +{
> > +       struct scatterlist sg;
> > +       int ret;
> > +
> > +       sg_init_one(&sg, &coal_node->coal_vqs, sizeof(coal_node->coal_vqs));
> > +       ret = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> > +                                        VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
> > +                                        &coal_node->ctrl, &sg, NULL);
> > +       if (!ret) {
> > +               dev_warn(&vi->dev->dev,
> > +                        "Failed to change coalescing params.\n");
> > +               return ret;
> > +       }
> > +
> > +       virtnet_process_dim_cmd(vi, coal_node);
> > +
> > +       return 0;
> > +}
> > +
> > +static void virtnet_get_cvq_work(struct work_struct *work)
> > +{
> > +       struct virtnet_info *vi =
> > +               container_of(work, struct virtnet_info, get_cvq);
> > +       struct virtnet_coal_node *wait_coal;
> > +       bool valid = false;
> > +       unsigned int tmp;
> > +       void *res;
> > +
> > +       mutex_lock(&vi->cvq_lock);
> > +       while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
> > +               complete((struct completion *)res);
> > +               valid = true;
> > +       }
> > +       mutex_unlock(&vi->cvq_lock);
> 
> How could we synchronize with the device in this case?
> 
> E.g what happens if the device finishes another buf here?

That's a good question. I think we can solve it using the following snippet?

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e59e12bb7601..5dc3e1244016 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c

@@ -420,6 +427,12 @@ struct virtnet_info {
        /* Lock to protect the control VQ */
        struct mutex cvq_lock;

+       /* Atomic to confirm whether the cvq work is scheduled. */
+       atomic_t scheduled;
+
+       /* Work struct for acquisition of cvq processing results. */
+       struct work_struct get_cvq;
+


@@ -670,7 +691,9 @@ static void virtnet_cvq_done(struct virtqueue *cvq)
 {
        struct virtnet_info *vi = cvq->vdev->priv;

-       complete(&vi->ctrl->completion);
+       virtqueue_disable_cb(cvq);
+       if (!atomic_xchg(&vi->scheduled, 1))
+               schedule_work(&vi->get_cvq);
 }


+static void virtnet_get_cvq_work(struct work_struct *work)
+{
+       struct virtnet_info *vi =
+               container_of(work, struct virtnet_info, get_cvq);
+       struct virtnet_coal_node *wait_coal;
+       bool valid = false;
+       unsigned int tmp;
+       void *res;
+
+       mutex_lock(&vi->cvq_lock);
+       while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
+               complete((struct completion *)res);
+               valid = true;
+       }
+       mutex_unlock(&vi->cvq_lock);
+
+       atomic_set(&vi->scheduled, 0);
+       virtqueue_enable_cb_prepare(vi->cvq);
+}

> 
> > +
> > +       if (!valid)
> > +               return;
> > +
> > +       while (true) {
> > +               wait_coal = NULL;
> > +               mutex_lock(&vi->coal_wait_lock);
> > +               if (!list_empty(&vi->coal_wait_list))
> > +                       wait_coal = list_first_entry(&vi->coal_wait_list,
> > +                                                    struct virtnet_coal_node,
> > +                                                    list);
> > +               mutex_unlock(&vi->coal_wait_lock);
> > +               if (wait_coal)
> > +                       if (virtnet_add_dim_command(vi, wait_coal))
> > +                               break;
> > +               else
> > +                       break;
> > +       }
> 
> This is still an ad-hoc optimization for dim in the general path here.
> 
> Could we have a fn callback so for non dim it's just a completion and
> for dim it would be a schedule_work()?
> 

OK, I will try this.

And how about this :

+static void virtnet_cvq_work_sched(struct virtqueue *cvq)
+{
+       struct virtnet_info *vi = cvq->vdev->priv;
+
+       virtqueue_disable_cb(cvq);
+       if (!atomic_xchg(&vi->scheduled, 1))
+               schedule_work(&vi->get_cvq);
+}
+
 static void virtnet_cvq_done(struct virtqueue *cvq)
 {
        struct virtnet_info *vi = cvq->vdev->priv;
+       unsigned int tmp;

+       virtqueue_get_buf(vi->cvq, &tmp);
        complete(&vi->ctrl->completion);
 }

@@ -5318,7 +5472,11 @@ static int virtnet_find_vqs(struct virtnet_info *vi)

        /* Parameters for control virtqueue, if any */
        if (vi->has_cvq) {
-               callbacks[total_vqs - 1] = virtnet_cvq_done;
+               if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
+                       callbacks[total_vqs - 1] = virtnet_cvq_work_sched;
+               else
+                       callbacks[total_vqs - 1] = virtnet_cvq_done;
+
                names[total_vqs - 1] = "control";
        }

> > +}
> >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> >  {
> >         struct virtnet_info *vi = netdev_priv(dev);
> > @@ -4398,35 +4496,73 @@ static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
> >         return 0;
> >  }
> >
> > +static void virtnet_put_wait_coal(struct virtnet_info *vi,
> > +                                 struct receive_queue *rq,
> > +                                 struct dim_cq_moder moder)
> > +{
> > +       struct virtnet_coal_node *wait_node;
> > +
> > +       wait_node = kzalloc(sizeof(*wait_node), GFP_KERNEL);
> > +       if (!wait_node) {
> > +               rq->dim.state = DIM_START_MEASURE;
> > +               return;
> > +       }
> > +
> > +       wait_node->is_coal_wait = true;
> > +       wait_node->coal_vqs.vqn = cpu_to_le16(rxq2vq(rq - vi->rq));
> > +       wait_node->coal_vqs.coal.max_usecs = cpu_to_le32(moder.usec);
> > +       wait_node->coal_vqs.coal.max_packets = cpu_to_le32(moder.pkts);
> > +       mutex_lock(&vi->coal_wait_lock);
> > +       list_add_tail(&wait_node->list, &vi->coal_wait_list);
> > +       mutex_unlock(&vi->coal_wait_lock);
> > +}
> > +
> >  static void virtnet_rx_dim_work(struct work_struct *work)
> >  {
> >         struct dim *dim = container_of(work, struct dim, work);
> >         struct receive_queue *rq = container_of(dim,
> >                         struct receive_queue, dim);
> >         struct virtnet_info *vi = rq->vq->vdev->priv;
> > -       struct net_device *dev = vi->dev;
> > +       struct virtnet_coal_node *avail_coal;
> >         struct dim_cq_moder update_moder;
> > -       int qnum, err;
> >
> > -       qnum = rq - vi->rq;
> > +       update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
> >
> >         mutex_lock(&rq->dim_lock);
> > -       if (!rq->dim_enabled)
> > -               goto out;
> > -
> > -       update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
> > -       if (update_moder.usec != rq->intr_coal.max_usecs ||
> > -           update_moder.pkts != rq->intr_coal.max_packets) {
> > -               err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum,
> > -                                                      update_moder.usec,
> > -                                                      update_moder.pkts);
> > -               if (err)
> > -                       pr_debug("%s: Failed to send dim parameters on rxq%d\n",
> > -                                dev->name, qnum);
> > -               dim->state = DIM_START_MEASURE;
> > +       if (!rq->dim_enabled ||
> > +           (update_moder.usec == rq->intr_coal.max_usecs &&
> > +            update_moder.pkts == rq->intr_coal.max_packets)) {
> > +               rq->dim.state = DIM_START_MEASURE;
> > +               mutex_unlock(&rq->dim_lock);
> > +               return;
> >         }
> > -out:
> >         mutex_unlock(&rq->dim_lock);
> > +
> > +       mutex_lock(&vi->cvq_lock);
> > +       if (vi->cvq->num_free < 3) {
> > +               virtnet_put_wait_coal(vi, rq, update_moder);
> > +               mutex_unlock(&vi->cvq_lock);
> > +               return;
> > +       }
> 
> Could we simply sleep instead of using a list here?

Do you mean using a semaphore, or a waitqueue?

> 
> > +       mutex_unlock(&vi->cvq_lock);
> > +
> > +       mutex_lock(&vi->coal_free_lock);
> > +       if (list_empty(&vi->coal_free_list)) {
> > +               virtnet_put_wait_coal(vi, rq, update_moder);
> > +               mutex_unlock(&vi->coal_free_lock);
> > +               return;
> > +       }
> > +
> > +       avail_coal = list_first_entry(&vi->coal_free_list,
> > +                                     struct virtnet_coal_node, list);
> > +       avail_coal->coal_vqs.vqn = cpu_to_le16(rxq2vq(rq - vi->rq));
> > +       avail_coal->coal_vqs.coal.max_usecs = cpu_to_le32(update_moder.usec);
> > +       avail_coal->coal_vqs.coal.max_packets = cpu_to_le32(update_moder.pkts);
> > +
> > +       list_del(&avail_coal->list);
> > +       mutex_unlock(&vi->coal_free_lock);
> > +
> > +       virtnet_add_dim_command(vi, avail_coal);
> >  }
> >
> >  static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
> > @@ -4839,6 +4975,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
> >         flush_work(&vi->config_work);
> >         disable_rx_mode_work(vi);
> >         flush_work(&vi->rx_mode_work);
> > +       flush_work(&vi->get_cvq);
> >
> >         netif_tx_lock_bh(vi->dev);
> >         netif_device_detach(vi->dev);
> > @@ -5612,6 +5749,45 @@ static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
> >         .xmo_rx_hash                    = virtnet_xdp_rx_hash,
> >  };
> >
> > +static void virtnet_del_coal_free_list(struct virtnet_info *vi)
> > +{
> > +       struct virtnet_coal_node *coal_node, *tmp;
> > +
> > +       list_for_each_entry_safe(coal_node, tmp,  &vi->coal_free_list, list) {
> > +               list_del(&coal_node->list);
> > +               kfree(coal_node);
> > +       }
> > +}
> > +
> > +static int virtnet_init_coal_list(struct virtnet_info *vi)
> > +{
> > +       struct virtnet_coal_node *coal_node;
> > +       int batch_dim_nums;
> > +       int i;
> > +
> > +       INIT_LIST_HEAD(&vi->coal_free_list);
> > +       mutex_init(&vi->coal_free_lock);
> > +
> > +       INIT_LIST_HEAD(&vi->coal_wait_list);
> > +       mutex_init(&vi->coal_wait_lock);
> > +
> > +       if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
> > +               return 0;
> > +
> > +       batch_dim_nums = min((unsigned int)vi->max_queue_pairs,
> > +                            virtqueue_get_vring_size(vi->cvq) / 3);
> > +       for (i = 0; i < batch_dim_nums; i++) {
> > +               coal_node = kzalloc(sizeof(*coal_node), GFP_KERNEL);
> > +               if (!coal_node) {
> > +                       virtnet_del_coal_free_list(vi);
> > +                       return -ENOMEM;
> > +               }
> > +               list_add(&coal_node->list, &vi->coal_free_list);
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> >  static int virtnet_probe(struct virtio_device *vdev)
> >  {
> >         int i, err = -ENOMEM;
> > @@ -5797,6 +5973,9 @@ static int virtnet_probe(struct virtio_device *vdev)
> >         if (err)
> >                 goto free;
> >
> > +       if (virtnet_init_coal_list(vi))
> > +               goto free;
> > +
> >         if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> >                 vi->intr_coal_rx.max_usecs = 0;
> >                 vi->intr_coal_tx.max_usecs = 0;
> > @@ -5838,6 +6017,7 @@ static int virtnet_probe(struct virtio_device *vdev)
> >         if (vi->has_rss || vi->has_rss_hash_report)
> >                 virtnet_init_default_rss(vi);
> >
> > +       INIT_WORK(&vi->get_cvq, virtnet_get_cvq_work);
> >         init_completion(&vi->ctrl->completion);
> >         enable_rx_mode_work(vi);
> >
> > @@ -5967,11 +6147,14 @@ static void virtnet_remove(struct virtio_device *vdev)
> >         flush_work(&vi->config_work);
> >         disable_rx_mode_work(vi);
> >         flush_work(&vi->rx_mode_work);
> > +       flush_work(&vi->get_cvq);
> 
> Do we need to prevent cvq work from being scheduled here?

You are right, I'll fix in the next version.

Thanks!

> 
> Thanks
> 
> >
> >         unregister_netdev(vi->dev);
> >
> >         net_failover_destroy(vi->failover);
> >
> > +       virtnet_del_coal_free_list(vi);
> > +
> >         remove_vq_common(vi);
> >
> >         free_netdev(vi->dev);
> > --
> > 2.32.0.3.g01195cf9f
> >
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency
  2024-06-17  7:27     ` Heng Qi
@ 2024-06-18  1:29       ` Jason Wang
  2024-06-18 14:24         ` Heng Qi
  0 siblings, 1 reply; 11+ messages in thread
From: Jason Wang @ 2024-06-18  1:29 UTC (permalink / raw)
  To: Heng Qi
  Cc: netdev, virtualization, Michael S. Tsirkin, Xuan Zhuo,
	Eugenio Pérez, Eric Dumazet, David S. Miller, Jakub Kicinski,
	Paolo Abeni

On Mon, Jun 17, 2024 at 4:08 PM Heng Qi <hengqi@linux.alibaba.com> wrote:
>
> On Mon, 17 Jun 2024 12:05:30 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Thu, Jun 6, 2024 at 2:15 PM Heng Qi <hengqi@linux.alibaba.com> wrote:
> > >
> > > Currently, control vq handles commands synchronously,
> > > leading to increased delays for dim commands during multi-queue
> > > VM configuration and directly impacting dim performance.
> > >
> > > To address this, we are shifting to asynchronous processing of
> > > ctrlq's dim commands.
> > >
> > > Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
> > > ---
> > >  drivers/net/virtio_net.c | 233 ++++++++++++++++++++++++++++++++++-----
> > >  1 file changed, 208 insertions(+), 25 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index e59e12bb7601..0338528993ab 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -376,6 +376,13 @@ struct control_buf {
> > >         struct completion completion;
> > >  };
> > >
> > > +struct virtnet_coal_node {
> > > +       struct control_buf ctrl;
> > > +       struct virtio_net_ctrl_coal_vq coal_vqs;
> > > +       bool is_coal_wait;
> > > +       struct list_head list;
> > > +};
> > > +
> > >  struct virtnet_info {
> > >         struct virtio_device *vdev;
> > >         struct virtqueue *cvq;
> > > @@ -420,6 +427,9 @@ struct virtnet_info {
> > >         /* Lock to protect the control VQ */
> > >         struct mutex cvq_lock;
> > >
> > > +       /* Work struct for acquisition of cvq processing results. */
> > > +       struct work_struct get_cvq;
> > > +
> > >         /* Host can handle any s/g split between our header and packet data */
> > >         bool any_header_sg;
> > >
> > > @@ -464,6 +474,14 @@ struct virtnet_info {
> > >         struct virtnet_interrupt_coalesce intr_coal_tx;
> > >         struct virtnet_interrupt_coalesce intr_coal_rx;
> > >
> > > +       /* Free nodes used for concurrent delivery */
> > > +       struct mutex coal_free_lock;
> > > +       struct list_head coal_free_list;
> > > +
> > > +       /* Filled when there are no free nodes or cvq buffers */
> > > +       struct mutex coal_wait_lock;
> > > +       struct list_head coal_wait_list;
> > > +
> > >         unsigned long guest_offloads;
> > >         unsigned long guest_offloads_capable;
> > >
> > > @@ -670,7 +688,7 @@ static void virtnet_cvq_done(struct virtqueue *cvq)
> > >  {
> > >         struct virtnet_info *vi = cvq->vdev->priv;
> > >
> > > -       complete(&vi->ctrl->completion);
> > > +       schedule_work(&vi->get_cvq);
> > >  }
> > >
> > >  static void skb_xmit_done(struct virtqueue *vq)
> > > @@ -2696,7 +2714,7 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
> > >                                        struct scatterlist *in)
> > >  {
> > >         struct scatterlist *sgs[5], hdr, stat;
> > > -       u32 out_num = 0, tmp, in_num = 0;
> > > +       u32 out_num = 0, in_num = 0;
> > >         int ret;
> > >
> > >         /* Caller should know better */
> > > @@ -2730,14 +2748,14 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
> > >                 return false;
> > >         }
> > >
> > > -       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > -               goto unlock;
> > > +       if (unlikely(!virtqueue_kick(vi->cvq))) {
> > > +               mutex_unlock(&vi->cvq_lock);
> > > +               return false;
> > > +       }
> > > +       mutex_unlock(&vi->cvq_lock);
> > >
> > > -       wait_for_completion(&vi->ctrl->completion);
> > > -       virtqueue_get_buf(vi->cvq, &tmp);
> > > +       wait_for_completion(&ctrl->completion);
> > >
> > > -unlock:
> > > -       mutex_unlock(&vi->cvq_lock);
> > >         return ctrl->status == VIRTIO_NET_OK;
> > >  }
> > >
> > > @@ -2747,6 +2765,86 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > >         return virtnet_send_command_reply(vi, class, cmd, vi->ctrl, out, NULL);
> > >  }
> > >
> > > +static void virtnet_process_dim_cmd(struct virtnet_info *vi,
> > > +                                   struct virtnet_coal_node *node)
> > > +{
> > > +       u16 qnum = le16_to_cpu(node->coal_vqs.vqn) / 2;
> > > +
> > > +       mutex_lock(&vi->rq[qnum].dim_lock);
> > > +       vi->rq[qnum].intr_coal.max_usecs =
> > > +               le32_to_cpu(node->coal_vqs.coal.max_usecs);
> > > +       vi->rq[qnum].intr_coal.max_packets =
> > > +               le32_to_cpu(node->coal_vqs.coal.max_packets);
> > > +       vi->rq[qnum].dim.state = DIM_START_MEASURE;
> > > +       mutex_unlock(&vi->rq[qnum].dim_lock);
> > > +
> > > +       if (node->is_coal_wait) {
> > > +               mutex_lock(&vi->coal_wait_lock);
> > > +               list_del(&node->list);
> > > +               mutex_unlock(&vi->coal_wait_lock);
> > > +               kfree(node);
> > > +       } else {
> > > +               mutex_lock(&vi->coal_free_lock);
> > > +               list_add(&node->list, &vi->coal_free_list);
> > > +               mutex_unlock(&vi->coal_free_lock);
> > > +       }
> > > +}
> > > +
> > > +static int virtnet_add_dim_command(struct virtnet_info *vi,
> > > +                                  struct virtnet_coal_node *coal_node)
> > > +{
> > > +       struct scatterlist sg;
> > > +       int ret;
> > > +
> > > +       sg_init_one(&sg, &coal_node->coal_vqs, sizeof(coal_node->coal_vqs));
> > > +       ret = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> > > +                                        VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
> > > +                                        &coal_node->ctrl, &sg, NULL);
> > > +       if (!ret) {
> > > +               dev_warn(&vi->dev->dev,
> > > +                        "Failed to change coalescing params.\n");
> > > +               return ret;
> > > +       }
> > > +
> > > +       virtnet_process_dim_cmd(vi, coal_node);
> > > +
> > > +       return 0;
> > > +}
> > > +
> > > +static void virtnet_get_cvq_work(struct work_struct *work)
> > > +{
> > > +       struct virtnet_info *vi =
> > > +               container_of(work, struct virtnet_info, get_cvq);
> > > +       struct virtnet_coal_node *wait_coal;
> > > +       bool valid = false;
> > > +       unsigned int tmp;
> > > +       void *res;
> > > +
> > > +       mutex_lock(&vi->cvq_lock);
> > > +       while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
> > > +               complete((struct completion *)res);
> > > +               valid = true;
> > > +       }
> > > +       mutex_unlock(&vi->cvq_lock);
> >
> > How could we synchronize with the device in this case?
> >
> > E.g what happens if the device finishes another buf here?
>
> That's a good question. I think we can solve it using the following snippet?
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index e59e12bb7601..5dc3e1244016 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
>
> @@ -420,6 +427,12 @@ struct virtnet_info {
>         /* Lock to protect the control VQ */
>         struct mutex cvq_lock;
>
> +       /* Atomic to confirm whether the cvq work is scheduled. */
> +       atomic_t scheduled;
> +
> +       /* Work struct for acquisition of cvq processing results. */
> +       struct work_struct get_cvq;
> +
>
>
> @@ -670,7 +691,9 @@ static void virtnet_cvq_done(struct virtqueue *cvq)
>  {
>         struct virtnet_info *vi = cvq->vdev->priv;
>
> -       complete(&vi->ctrl->completion);
> +       virtqueue_disable_cb(cvq);
> +       if (!atomic_xchg(&vi->scheduled, 1))
> +               schedule_work(&vi->get_cvq);

I think workqueue subsystem should already handle things like this.

>  }
>
>
> +static void virtnet_get_cvq_work(struct work_struct *work)
> +{
> +       struct virtnet_info *vi =
> +               container_of(work, struct virtnet_info, get_cvq);
> +       struct virtnet_coal_node *wait_coal;
> +       bool valid = false;
> +       unsigned int tmp;
> +       void *res;
> +
> +       mutex_lock(&vi->cvq_lock);
> +       while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
> +               complete((struct completion *)res);
> +               valid = true;
> +       }
> +       mutex_unlock(&vi->cvq_lock);
> +
> +       atomic_set(&vi->scheduled, 0);
> +       virtqueue_enable_cb_prepare(vi->cvq);

We have a brunch of examples in the current codes. Generally it should
be something like.

again:
    disable_cb()
    while(get_buf());
    if (enable_cb())
        disable_cb()
        goto again;

> +}
>
> >
> > > +
> > > +       if (!valid)
> > > +               return;
> > > +
> > > +       while (true) {
> > > +               wait_coal = NULL;
> > > +               mutex_lock(&vi->coal_wait_lock);
> > > +               if (!list_empty(&vi->coal_wait_list))
> > > +                       wait_coal = list_first_entry(&vi->coal_wait_list,
> > > +                                                    struct virtnet_coal_node,
> > > +                                                    list);
> > > +               mutex_unlock(&vi->coal_wait_lock);
> > > +               if (wait_coal)
> > > +                       if (virtnet_add_dim_command(vi, wait_coal))
> > > +                               break;
> > > +               else
> > > +                       break;
> > > +       }
> >
> > This is still an ad-hoc optimization for dim in the general path here.
> >
> > Could we have a fn callback so for non dim it's just a completion and
> > for dim it would be a schedule_work()?
> >
>
> OK, I will try this.
>
> And how about this :
>
> +static void virtnet_cvq_work_sched(struct virtqueue *cvq)
> +{
> +       struct virtnet_info *vi = cvq->vdev->priv;
> +
> +       virtqueue_disable_cb(cvq);
> +       if (!atomic_xchg(&vi->scheduled, 1))
> +               schedule_work(&vi->get_cvq);
> +}
> +
>  static void virtnet_cvq_done(struct virtqueue *cvq)
>  {
>         struct virtnet_info *vi = cvq->vdev->priv;
> +       unsigned int tmp;
>
> +       virtqueue_get_buf(vi->cvq, &tmp);
>         complete(&vi->ctrl->completion);
>  }
>
> @@ -5318,7 +5472,11 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
>
>         /* Parameters for control virtqueue, if any */
>         if (vi->has_cvq) {
> -               callbacks[total_vqs - 1] = virtnet_cvq_done;
> +               if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
> +                       callbacks[total_vqs - 1] = virtnet_cvq_work_sched;
> +               else
> +                       callbacks[total_vqs - 1] = virtnet_cvq_done;
> +
>                 names[total_vqs - 1] = "control";
>         }

Just to clarify, I meant a callback function per control_buf. (I've
avoid touching virtqueue callback layers)


>
> > > +}
> > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > >  {
> > >         struct virtnet_info *vi = netdev_priv(dev);
> > > @@ -4398,35 +4496,73 @@ static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
> > >         return 0;
> > >  }
> > >
> > > +static void virtnet_put_wait_coal(struct virtnet_info *vi,
> > > +                                 struct receive_queue *rq,
> > > +                                 struct dim_cq_moder moder)
> > > +{
> > > +       struct virtnet_coal_node *wait_node;
> > > +
> > > +       wait_node = kzalloc(sizeof(*wait_node), GFP_KERNEL);
> > > +       if (!wait_node) {
> > > +               rq->dim.state = DIM_START_MEASURE;
> > > +               return;
> > > +       }
> > > +
> > > +       wait_node->is_coal_wait = true;
> > > +       wait_node->coal_vqs.vqn = cpu_to_le16(rxq2vq(rq - vi->rq));
> > > +       wait_node->coal_vqs.coal.max_usecs = cpu_to_le32(moder.usec);
> > > +       wait_node->coal_vqs.coal.max_packets = cpu_to_le32(moder.pkts);
> > > +       mutex_lock(&vi->coal_wait_lock);
> > > +       list_add_tail(&wait_node->list, &vi->coal_wait_list);
> > > +       mutex_unlock(&vi->coal_wait_lock);
> > > +}
> > > +
> > >  static void virtnet_rx_dim_work(struct work_struct *work)
> > >  {
> > >         struct dim *dim = container_of(work, struct dim, work);
> > >         struct receive_queue *rq = container_of(dim,
> > >                         struct receive_queue, dim);
> > >         struct virtnet_info *vi = rq->vq->vdev->priv;
> > > -       struct net_device *dev = vi->dev;
> > > +       struct virtnet_coal_node *avail_coal;
> > >         struct dim_cq_moder update_moder;
> > > -       int qnum, err;
> > >
> > > -       qnum = rq - vi->rq;
> > > +       update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
> > >
> > >         mutex_lock(&rq->dim_lock);
> > > -       if (!rq->dim_enabled)
> > > -               goto out;
> > > -
> > > -       update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
> > > -       if (update_moder.usec != rq->intr_coal.max_usecs ||
> > > -           update_moder.pkts != rq->intr_coal.max_packets) {
> > > -               err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum,
> > > -                                                      update_moder.usec,
> > > -                                                      update_moder.pkts);
> > > -               if (err)
> > > -                       pr_debug("%s: Failed to send dim parameters on rxq%d\n",
> > > -                                dev->name, qnum);
> > > -               dim->state = DIM_START_MEASURE;
> > > +       if (!rq->dim_enabled ||
> > > +           (update_moder.usec == rq->intr_coal.max_usecs &&
> > > +            update_moder.pkts == rq->intr_coal.max_packets)) {
> > > +               rq->dim.state = DIM_START_MEASURE;
> > > +               mutex_unlock(&rq->dim_lock);
> > > +               return;
> > >         }
> > > -out:
> > >         mutex_unlock(&rq->dim_lock);
> > > +
> > > +       mutex_lock(&vi->cvq_lock);
> > > +       if (vi->cvq->num_free < 3) {
> > > +               virtnet_put_wait_coal(vi, rq, update_moder);
> > > +               mutex_unlock(&vi->cvq_lock);
> > > +               return;
> > > +       }
> >
> > Could we simply sleep instead of using a list here?
>
> Do you mean using a semaphore, or a waitqueue?

I meant sleep and wait for more space.

Thanks


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency
  2024-06-18  1:29       ` Jason Wang
@ 2024-06-18 14:24         ` Heng Qi
  0 siblings, 0 replies; 11+ messages in thread
From: Heng Qi @ 2024-06-18 14:24 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, virtualization, Michael S. Tsirkin, Xuan Zhuo,
	Eugenio Pérez, Eric Dumazet, David S. Miller, Jakub Kicinski,
	Paolo Abeni

On Tue, 18 Jun 2024 09:29:48 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Mon, Jun 17, 2024 at 4:08 PM Heng Qi <hengqi@linux.alibaba.com> wrote:
> >
> > On Mon, 17 Jun 2024 12:05:30 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Thu, Jun 6, 2024 at 2:15 PM Heng Qi <hengqi@linux.alibaba.com> wrote:
> > > >
> > > > Currently, control vq handles commands synchronously,
> > > > leading to increased delays for dim commands during multi-queue
> > > > VM configuration and directly impacting dim performance.
> > > >
> > > > To address this, we are shifting to asynchronous processing of
> > > > ctrlq's dim commands.
> > > >
> > > > Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
> > > > ---
> > > >  drivers/net/virtio_net.c | 233 ++++++++++++++++++++++++++++++++++-----
> > > >  1 file changed, 208 insertions(+), 25 deletions(-)
> > > >

Hi Jason,

I will incorporate your feedback and update the next version.

Thanks

> > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > index e59e12bb7601..0338528993ab 100644
> > > > --- a/drivers/net/virtio_net.c
> > > > +++ b/drivers/net/virtio_net.c
> > > > @@ -376,6 +376,13 @@ struct control_buf {
> > > >         struct completion completion;
> > > >  };
> > > >
> > > > +struct virtnet_coal_node {
> > > > +       struct control_buf ctrl;
> > > > +       struct virtio_net_ctrl_coal_vq coal_vqs;
> > > > +       bool is_coal_wait;
> > > > +       struct list_head list;
> > > > +};
> > > > +
> > > >  struct virtnet_info {
> > > >         struct virtio_device *vdev;
> > > >         struct virtqueue *cvq;
> > > > @@ -420,6 +427,9 @@ struct virtnet_info {
> > > >         /* Lock to protect the control VQ */
> > > >         struct mutex cvq_lock;
> > > >
> > > > +       /* Work struct for acquisition of cvq processing results. */
> > > > +       struct work_struct get_cvq;
> > > > +
> > > >         /* Host can handle any s/g split between our header and packet data */
> > > >         bool any_header_sg;
> > > >
> > > > @@ -464,6 +474,14 @@ struct virtnet_info {
> > > >         struct virtnet_interrupt_coalesce intr_coal_tx;
> > > >         struct virtnet_interrupt_coalesce intr_coal_rx;
> > > >
> > > > +       /* Free nodes used for concurrent delivery */
> > > > +       struct mutex coal_free_lock;
> > > > +       struct list_head coal_free_list;
> > > > +
> > > > +       /* Filled when there are no free nodes or cvq buffers */
> > > > +       struct mutex coal_wait_lock;
> > > > +       struct list_head coal_wait_list;
> > > > +
> > > >         unsigned long guest_offloads;
> > > >         unsigned long guest_offloads_capable;
> > > >
> > > > @@ -670,7 +688,7 @@ static void virtnet_cvq_done(struct virtqueue *cvq)
> > > >  {
> > > >         struct virtnet_info *vi = cvq->vdev->priv;
> > > >
> > > > -       complete(&vi->ctrl->completion);
> > > > +       schedule_work(&vi->get_cvq);
> > > >  }
> > > >
> > > >  static void skb_xmit_done(struct virtqueue *vq)
> > > > @@ -2696,7 +2714,7 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
> > > >                                        struct scatterlist *in)
> > > >  {
> > > >         struct scatterlist *sgs[5], hdr, stat;
> > > > -       u32 out_num = 0, tmp, in_num = 0;
> > > > +       u32 out_num = 0, in_num = 0;
> > > >         int ret;
> > > >
> > > >         /* Caller should know better */
> > > > @@ -2730,14 +2748,14 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
> > > >                 return false;
> > > >         }
> > > >
> > > > -       if (unlikely(!virtqueue_kick(vi->cvq)))
> > > > -               goto unlock;
> > > > +       if (unlikely(!virtqueue_kick(vi->cvq))) {
> > > > +               mutex_unlock(&vi->cvq_lock);
> > > > +               return false;
> > > > +       }
> > > > +       mutex_unlock(&vi->cvq_lock);
> > > >
> > > > -       wait_for_completion(&vi->ctrl->completion);
> > > > -       virtqueue_get_buf(vi->cvq, &tmp);
> > > > +       wait_for_completion(&ctrl->completion);
> > > >
> > > > -unlock:
> > > > -       mutex_unlock(&vi->cvq_lock);
> > > >         return ctrl->status == VIRTIO_NET_OK;
> > > >  }
> > > >
> > > > @@ -2747,6 +2765,86 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > >         return virtnet_send_command_reply(vi, class, cmd, vi->ctrl, out, NULL);
> > > >  }
> > > >
> > > > +static void virtnet_process_dim_cmd(struct virtnet_info *vi,
> > > > +                                   struct virtnet_coal_node *node)
> > > > +{
> > > > +       u16 qnum = le16_to_cpu(node->coal_vqs.vqn) / 2;
> > > > +
> > > > +       mutex_lock(&vi->rq[qnum].dim_lock);
> > > > +       vi->rq[qnum].intr_coal.max_usecs =
> > > > +               le32_to_cpu(node->coal_vqs.coal.max_usecs);
> > > > +       vi->rq[qnum].intr_coal.max_packets =
> > > > +               le32_to_cpu(node->coal_vqs.coal.max_packets);
> > > > +       vi->rq[qnum].dim.state = DIM_START_MEASURE;
> > > > +       mutex_unlock(&vi->rq[qnum].dim_lock);
> > > > +
> > > > +       if (node->is_coal_wait) {
> > > > +               mutex_lock(&vi->coal_wait_lock);
> > > > +               list_del(&node->list);
> > > > +               mutex_unlock(&vi->coal_wait_lock);
> > > > +               kfree(node);
> > > > +       } else {
> > > > +               mutex_lock(&vi->coal_free_lock);
> > > > +               list_add(&node->list, &vi->coal_free_list);
> > > > +               mutex_unlock(&vi->coal_free_lock);
> > > > +       }
> > > > +}
> > > > +
> > > > +static int virtnet_add_dim_command(struct virtnet_info *vi,
> > > > +                                  struct virtnet_coal_node *coal_node)
> > > > +{
> > > > +       struct scatterlist sg;
> > > > +       int ret;
> > > > +
> > > > +       sg_init_one(&sg, &coal_node->coal_vqs, sizeof(coal_node->coal_vqs));
> > > > +       ret = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> > > > +                                        VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
> > > > +                                        &coal_node->ctrl, &sg, NULL);
> > > > +       if (!ret) {
> > > > +               dev_warn(&vi->dev->dev,
> > > > +                        "Failed to change coalescing params.\n");
> > > > +               return ret;
> > > > +       }
> > > > +
> > > > +       virtnet_process_dim_cmd(vi, coal_node);
> > > > +
> > > > +       return 0;
> > > > +}
> > > > +
> > > > +static void virtnet_get_cvq_work(struct work_struct *work)
> > > > +{
> > > > +       struct virtnet_info *vi =
> > > > +               container_of(work, struct virtnet_info, get_cvq);
> > > > +       struct virtnet_coal_node *wait_coal;
> > > > +       bool valid = false;
> > > > +       unsigned int tmp;
> > > > +       void *res;
> > > > +
> > > > +       mutex_lock(&vi->cvq_lock);
> > > > +       while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
> > > > +               complete((struct completion *)res);
> > > > +               valid = true;
> > > > +       }
> > > > +       mutex_unlock(&vi->cvq_lock);
> > >
> > > How could we synchronize with the device in this case?
> > >
> > > E.g what happens if the device finishes another buf here?
> >
> > That's a good question. I think we can solve it using the following snippet?
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index e59e12bb7601..5dc3e1244016 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> >
> > @@ -420,6 +427,12 @@ struct virtnet_info {
> >         /* Lock to protect the control VQ */
> >         struct mutex cvq_lock;
> >
> > +       /* Atomic to confirm whether the cvq work is scheduled. */
> > +       atomic_t scheduled;
> > +
> > +       /* Work struct for acquisition of cvq processing results. */
> > +       struct work_struct get_cvq;
> > +
> >
> >
> > @@ -670,7 +691,9 @@ static void virtnet_cvq_done(struct virtqueue *cvq)
> >  {
> >         struct virtnet_info *vi = cvq->vdev->priv;
> >
> > -       complete(&vi->ctrl->completion);
> > +       virtqueue_disable_cb(cvq);
> > +       if (!atomic_xchg(&vi->scheduled, 1))
> > +               schedule_work(&vi->get_cvq);
> 
> I think workqueue subsystem should already handle things like this.
> 
> >  }
> >
> >
> > +static void virtnet_get_cvq_work(struct work_struct *work)
> > +{
> > +       struct virtnet_info *vi =
> > +               container_of(work, struct virtnet_info, get_cvq);
> > +       struct virtnet_coal_node *wait_coal;
> > +       bool valid = false;
> > +       unsigned int tmp;
> > +       void *res;
> > +
> > +       mutex_lock(&vi->cvq_lock);
> > +       while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
> > +               complete((struct completion *)res);
> > +               valid = true;
> > +       }
> > +       mutex_unlock(&vi->cvq_lock);
> > +
> > +       atomic_set(&vi->scheduled, 0);
> > +       virtqueue_enable_cb_prepare(vi->cvq);
> 
> We have a brunch of examples in the current codes. Generally it should
> be something like.
> 
> again:
>     disable_cb()
>     while(get_buf());
>     if (enable_cb())
>         disable_cb()
>         goto again;
> 
> > +}
> >
> > >
> > > > +
> > > > +       if (!valid)
> > > > +               return;
> > > > +
> > > > +       while (true) {
> > > > +               wait_coal = NULL;
> > > > +               mutex_lock(&vi->coal_wait_lock);
> > > > +               if (!list_empty(&vi->coal_wait_list))
> > > > +                       wait_coal = list_first_entry(&vi->coal_wait_list,
> > > > +                                                    struct virtnet_coal_node,
> > > > +                                                    list);
> > > > +               mutex_unlock(&vi->coal_wait_lock);
> > > > +               if (wait_coal)
> > > > +                       if (virtnet_add_dim_command(vi, wait_coal))
> > > > +                               break;
> > > > +               else
> > > > +                       break;
> > > > +       }
> > >
> > > This is still an ad-hoc optimization for dim in the general path here.
> > >
> > > Could we have a fn callback so for non dim it's just a completion and
> > > for dim it would be a schedule_work()?
> > >
> >
> > OK, I will try this.
> >
> > And how about this :
> >
> > +static void virtnet_cvq_work_sched(struct virtqueue *cvq)
> > +{
> > +       struct virtnet_info *vi = cvq->vdev->priv;
> > +
> > +       virtqueue_disable_cb(cvq);
> > +       if (!atomic_xchg(&vi->scheduled, 1))
> > +               schedule_work(&vi->get_cvq);
> > +}
> > +
> >  static void virtnet_cvq_done(struct virtqueue *cvq)
> >  {
> >         struct virtnet_info *vi = cvq->vdev->priv;
> > +       unsigned int tmp;
> >
> > +       virtqueue_get_buf(vi->cvq, &tmp);
> >         complete(&vi->ctrl->completion);
> >  }
> >
> > @@ -5318,7 +5472,11 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> >
> >         /* Parameters for control virtqueue, if any */
> >         if (vi->has_cvq) {
> > -               callbacks[total_vqs - 1] = virtnet_cvq_done;
> > +               if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
> > +                       callbacks[total_vqs - 1] = virtnet_cvq_work_sched;
> > +               else
> > +                       callbacks[total_vqs - 1] = virtnet_cvq_done;
> > +
> >                 names[total_vqs - 1] = "control";
> >         }
> 
> Just to clarify, I meant a callback function per control_buf. (I've
> avoid touching virtqueue callback layers)
> 
> 
> >
> > > > +}
> > > >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> > > >  {
> > > >         struct virtnet_info *vi = netdev_priv(dev);
> > > > @@ -4398,35 +4496,73 @@ static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
> > > >         return 0;
> > > >  }
> > > >
> > > > +static void virtnet_put_wait_coal(struct virtnet_info *vi,
> > > > +                                 struct receive_queue *rq,
> > > > +                                 struct dim_cq_moder moder)
> > > > +{
> > > > +       struct virtnet_coal_node *wait_node;
> > > > +
> > > > +       wait_node = kzalloc(sizeof(*wait_node), GFP_KERNEL);
> > > > +       if (!wait_node) {
> > > > +               rq->dim.state = DIM_START_MEASURE;
> > > > +               return;
> > > > +       }
> > > > +
> > > > +       wait_node->is_coal_wait = true;
> > > > +       wait_node->coal_vqs.vqn = cpu_to_le16(rxq2vq(rq - vi->rq));
> > > > +       wait_node->coal_vqs.coal.max_usecs = cpu_to_le32(moder.usec);
> > > > +       wait_node->coal_vqs.coal.max_packets = cpu_to_le32(moder.pkts);
> > > > +       mutex_lock(&vi->coal_wait_lock);
> > > > +       list_add_tail(&wait_node->list, &vi->coal_wait_list);
> > > > +       mutex_unlock(&vi->coal_wait_lock);
> > > > +}
> > > > +
> > > >  static void virtnet_rx_dim_work(struct work_struct *work)
> > > >  {
> > > >         struct dim *dim = container_of(work, struct dim, work);
> > > >         struct receive_queue *rq = container_of(dim,
> > > >                         struct receive_queue, dim);
> > > >         struct virtnet_info *vi = rq->vq->vdev->priv;
> > > > -       struct net_device *dev = vi->dev;
> > > > +       struct virtnet_coal_node *avail_coal;
> > > >         struct dim_cq_moder update_moder;
> > > > -       int qnum, err;
> > > >
> > > > -       qnum = rq - vi->rq;
> > > > +       update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
> > > >
> > > >         mutex_lock(&rq->dim_lock);
> > > > -       if (!rq->dim_enabled)
> > > > -               goto out;
> > > > -
> > > > -       update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
> > > > -       if (update_moder.usec != rq->intr_coal.max_usecs ||
> > > > -           update_moder.pkts != rq->intr_coal.max_packets) {
> > > > -               err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum,
> > > > -                                                      update_moder.usec,
> > > > -                                                      update_moder.pkts);
> > > > -               if (err)
> > > > -                       pr_debug("%s: Failed to send dim parameters on rxq%d\n",
> > > > -                                dev->name, qnum);
> > > > -               dim->state = DIM_START_MEASURE;
> > > > +       if (!rq->dim_enabled ||
> > > > +           (update_moder.usec == rq->intr_coal.max_usecs &&
> > > > +            update_moder.pkts == rq->intr_coal.max_packets)) {
> > > > +               rq->dim.state = DIM_START_MEASURE;
> > > > +               mutex_unlock(&rq->dim_lock);
> > > > +               return;
> > > >         }
> > > > -out:
> > > >         mutex_unlock(&rq->dim_lock);
> > > > +
> > > > +       mutex_lock(&vi->cvq_lock);
> > > > +       if (vi->cvq->num_free < 3) {
> > > > +               virtnet_put_wait_coal(vi, rq, update_moder);
> > > > +               mutex_unlock(&vi->cvq_lock);
> > > > +               return;
> > > > +       }
> > >
> > > Could we simply sleep instead of using a list here?
> >
> > Do you mean using a semaphore, or a waitqueue?
> 
> I meant sleep and wait for more space.
> 
> Thanks
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-06-18 14:27 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-06  6:14 [PATCH net-next v3 0/4] virtio_net: enable the irq for ctrlq Heng Qi
2024-06-06  6:14 ` [PATCH net-next v3 1/4] virtio_net: passing control_buf explicitly Heng Qi
2024-06-06  6:14 ` [PATCH net-next v3 2/4] virtio_net: enable irq for the control vq Heng Qi
2024-06-06  6:14 ` [PATCH net-next v3 3/4] virtio_net: change the command token to completion Heng Qi
2024-06-06  6:14 ` [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency Heng Qi
2024-06-06 10:25   ` kernel test robot
2024-06-06 20:34   ` kernel test robot
2024-06-17  4:05   ` Jason Wang
2024-06-17  7:27     ` Heng Qi
2024-06-18  1:29       ` Jason Wang
2024-06-18 14:24         ` Heng Qi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).