Re: [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Heng Qi <hengqi@linux.alibaba.com>
To: Jason Wang <jasowang@redhat.com>
Cc: netdev@vger.kernel.org, virtualization@lists.linux.dev,
	"Michael S. Tsirkin" <mst@redhat.com>,
	"Xuan Zhuo" <xuanzhuo@linux.alibaba.com>,
	"Eugenio Pérez" <eperezma@redhat.com>,
	"Eric Dumazet" <edumazet@google.com>,
	"David S. Miller" <davem@davemloft.net>,
	"Jakub Kicinski" <kuba@kernel.org>,
	"Paolo Abeni" <pabeni@redhat.com>
Subject: Re: [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency
Date: Mon, 17 Jun 2024 15:27:48 +0800	[thread overview]
Message-ID: <1718609268.7814527-9-hengqi@linux.alibaba.com> (raw)
In-Reply-To: <CACGkMEuFJ=xeeBt9GiCLj8AeJg-u-JG4F9_+8vBoH4dhZ-z=3Q@mail.gmail.com>

On Mon, 17 Jun 2024 12:05:30 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Thu, Jun 6, 2024 at 2:15 PM Heng Qi <hengqi@linux.alibaba.com> wrote:
> >
> > Currently, control vq handles commands synchronously,
> > leading to increased delays for dim commands during multi-queue
> > VM configuration and directly impacting dim performance.
> >
> > To address this, we are shifting to asynchronous processing of
> > ctrlq's dim commands.
> >
> > Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
> > ---
> >  drivers/net/virtio_net.c | 233 ++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 208 insertions(+), 25 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index e59e12bb7601..0338528993ab 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -376,6 +376,13 @@ struct control_buf {
> >         struct completion completion;
> >  };
> >
> > +struct virtnet_coal_node {
> > +       struct control_buf ctrl;
> > +       struct virtio_net_ctrl_coal_vq coal_vqs;
> > +       bool is_coal_wait;
> > +       struct list_head list;
> > +};
> > +
> >  struct virtnet_info {
> >         struct virtio_device *vdev;
> >         struct virtqueue *cvq;
> > @@ -420,6 +427,9 @@ struct virtnet_info {
> >         /* Lock to protect the control VQ */
> >         struct mutex cvq_lock;
> >
> > +       /* Work struct for acquisition of cvq processing results. */
> > +       struct work_struct get_cvq;
> > +
> >         /* Host can handle any s/g split between our header and packet data */
> >         bool any_header_sg;
> >
> > @@ -464,6 +474,14 @@ struct virtnet_info {
> >         struct virtnet_interrupt_coalesce intr_coal_tx;
> >         struct virtnet_interrupt_coalesce intr_coal_rx;
> >
> > +       /* Free nodes used for concurrent delivery */
> > +       struct mutex coal_free_lock;
> > +       struct list_head coal_free_list;
> > +
> > +       /* Filled when there are no free nodes or cvq buffers */
> > +       struct mutex coal_wait_lock;
> > +       struct list_head coal_wait_list;
> > +
> >         unsigned long guest_offloads;
> >         unsigned long guest_offloads_capable;
> >
> > @@ -670,7 +688,7 @@ static void virtnet_cvq_done(struct virtqueue *cvq)
> >  {
> >         struct virtnet_info *vi = cvq->vdev->priv;
> >
> > -       complete(&vi->ctrl->completion);
> > +       schedule_work(&vi->get_cvq);
> >  }
> >
> >  static void skb_xmit_done(struct virtqueue *vq)
> > @@ -2696,7 +2714,7 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
> >                                        struct scatterlist *in)
> >  {
> >         struct scatterlist *sgs[5], hdr, stat;
> > -       u32 out_num = 0, tmp, in_num = 0;
> > +       u32 out_num = 0, in_num = 0;
> >         int ret;
> >
> >         /* Caller should know better */
> > @@ -2730,14 +2748,14 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi,
> >                 return false;
> >         }
> >
> > -       if (unlikely(!virtqueue_kick(vi->cvq)))
> > -               goto unlock;
> > +       if (unlikely(!virtqueue_kick(vi->cvq))) {
> > +               mutex_unlock(&vi->cvq_lock);
> > +               return false;
> > +       }
> > +       mutex_unlock(&vi->cvq_lock);
> >
> > -       wait_for_completion(&vi->ctrl->completion);
> > -       virtqueue_get_buf(vi->cvq, &tmp);
> > +       wait_for_completion(&ctrl->completion);
> >
> > -unlock:
> > -       mutex_unlock(&vi->cvq_lock);
> >         return ctrl->status == VIRTIO_NET_OK;
> >  }
> >
> > @@ -2747,6 +2765,86 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> >         return virtnet_send_command_reply(vi, class, cmd, vi->ctrl, out, NULL);
> >  }
> >
> > +static void virtnet_process_dim_cmd(struct virtnet_info *vi,
> > +                                   struct virtnet_coal_node *node)
> > +{
> > +       u16 qnum = le16_to_cpu(node->coal_vqs.vqn) / 2;
> > +
> > +       mutex_lock(&vi->rq[qnum].dim_lock);
> > +       vi->rq[qnum].intr_coal.max_usecs =
> > +               le32_to_cpu(node->coal_vqs.coal.max_usecs);
> > +       vi->rq[qnum].intr_coal.max_packets =
> > +               le32_to_cpu(node->coal_vqs.coal.max_packets);
> > +       vi->rq[qnum].dim.state = DIM_START_MEASURE;
> > +       mutex_unlock(&vi->rq[qnum].dim_lock);
> > +
> > +       if (node->is_coal_wait) {
> > +               mutex_lock(&vi->coal_wait_lock);
> > +               list_del(&node->list);
> > +               mutex_unlock(&vi->coal_wait_lock);
> > +               kfree(node);
> > +       } else {
> > +               mutex_lock(&vi->coal_free_lock);
> > +               list_add(&node->list, &vi->coal_free_list);
> > +               mutex_unlock(&vi->coal_free_lock);
> > +       }
> > +}
> > +
> > +static int virtnet_add_dim_command(struct virtnet_info *vi,
> > +                                  struct virtnet_coal_node *coal_node)
> > +{
> > +       struct scatterlist sg;
> > +       int ret;
> > +
> > +       sg_init_one(&sg, &coal_node->coal_vqs, sizeof(coal_node->coal_vqs));
> > +       ret = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> > +                                        VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
> > +                                        &coal_node->ctrl, &sg, NULL);
> > +       if (!ret) {
> > +               dev_warn(&vi->dev->dev,
> > +                        "Failed to change coalescing params.\n");
> > +               return ret;
> > +       }
> > +
> > +       virtnet_process_dim_cmd(vi, coal_node);
> > +
> > +       return 0;
> > +}
> > +
> > +static void virtnet_get_cvq_work(struct work_struct *work)
> > +{
> > +       struct virtnet_info *vi =
> > +               container_of(work, struct virtnet_info, get_cvq);
> > +       struct virtnet_coal_node *wait_coal;
> > +       bool valid = false;
> > +       unsigned int tmp;
> > +       void *res;
> > +
> > +       mutex_lock(&vi->cvq_lock);
> > +       while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
> > +               complete((struct completion *)res);
> > +               valid = true;
> > +       }
> > +       mutex_unlock(&vi->cvq_lock);
> 
> How could we synchronize with the device in this case?
> 
> E.g what happens if the device finishes another buf here?

That's a good question. I think we can solve it using the following snippet?

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e59e12bb7601..5dc3e1244016 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c

@@ -420,6 +427,12 @@ struct virtnet_info {
        /* Lock to protect the control VQ */
        struct mutex cvq_lock;

+       /* Atomic to confirm whether the cvq work is scheduled. */
+       atomic_t scheduled;
+
+       /* Work struct for acquisition of cvq processing results. */
+       struct work_struct get_cvq;
+


@@ -670,7 +691,9 @@ static void virtnet_cvq_done(struct virtqueue *cvq)
 {
        struct virtnet_info *vi = cvq->vdev->priv;

-       complete(&vi->ctrl->completion);
+       virtqueue_disable_cb(cvq);
+       if (!atomic_xchg(&vi->scheduled, 1))
+               schedule_work(&vi->get_cvq);
 }


+static void virtnet_get_cvq_work(struct work_struct *work)
+{
+       struct virtnet_info *vi =
+               container_of(work, struct virtnet_info, get_cvq);
+       struct virtnet_coal_node *wait_coal;
+       bool valid = false;
+       unsigned int tmp;
+       void *res;
+
+       mutex_lock(&vi->cvq_lock);
+       while ((res = virtqueue_get_buf(vi->cvq, &tmp)) != NULL) {
+               complete((struct completion *)res);
+               valid = true;
+       }
+       mutex_unlock(&vi->cvq_lock);
+
+       atomic_set(&vi->scheduled, 0);
+       virtqueue_enable_cb_prepare(vi->cvq);
+}

> 
> > +
> > +       if (!valid)
> > +               return;
> > +
> > +       while (true) {
> > +               wait_coal = NULL;
> > +               mutex_lock(&vi->coal_wait_lock);
> > +               if (!list_empty(&vi->coal_wait_list))
> > +                       wait_coal = list_first_entry(&vi->coal_wait_list,
> > +                                                    struct virtnet_coal_node,
> > +                                                    list);
> > +               mutex_unlock(&vi->coal_wait_lock);
> > +               if (wait_coal)
> > +                       if (virtnet_add_dim_command(vi, wait_coal))
> > +                               break;
> > +               else
> > +                       break;
> > +       }
> 
> This is still an ad-hoc optimization for dim in the general path here.
> 
> Could we have a fn callback so for non dim it's just a completion and
> for dim it would be a schedule_work()?
> 

OK, I will try this.

And how about this :

+static void virtnet_cvq_work_sched(struct virtqueue *cvq)
+{
+       struct virtnet_info *vi = cvq->vdev->priv;
+
+       virtqueue_disable_cb(cvq);
+       if (!atomic_xchg(&vi->scheduled, 1))
+               schedule_work(&vi->get_cvq);
+}
+
 static void virtnet_cvq_done(struct virtqueue *cvq)
 {
        struct virtnet_info *vi = cvq->vdev->priv;
+       unsigned int tmp;

+       virtqueue_get_buf(vi->cvq, &tmp);
        complete(&vi->ctrl->completion);
 }

@@ -5318,7 +5472,11 @@ static int virtnet_find_vqs(struct virtnet_info *vi)

        /* Parameters for control virtqueue, if any */
        if (vi->has_cvq) {
-               callbacks[total_vqs - 1] = virtnet_cvq_done;
+               if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
+                       callbacks[total_vqs - 1] = virtnet_cvq_work_sched;
+               else
+                       callbacks[total_vqs - 1] = virtnet_cvq_done;
+
                names[total_vqs - 1] = "control";
        }

> > +}
> >  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> >  {
> >         struct virtnet_info *vi = netdev_priv(dev);
> > @@ -4398,35 +4496,73 @@ static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
> >         return 0;
> >  }
> >
> > +static void virtnet_put_wait_coal(struct virtnet_info *vi,
> > +                                 struct receive_queue *rq,
> > +                                 struct dim_cq_moder moder)
> > +{
> > +       struct virtnet_coal_node *wait_node;
> > +
> > +       wait_node = kzalloc(sizeof(*wait_node), GFP_KERNEL);
> > +       if (!wait_node) {
> > +               rq->dim.state = DIM_START_MEASURE;
> > +               return;
> > +       }
> > +
> > +       wait_node->is_coal_wait = true;
> > +       wait_node->coal_vqs.vqn = cpu_to_le16(rxq2vq(rq - vi->rq));
> > +       wait_node->coal_vqs.coal.max_usecs = cpu_to_le32(moder.usec);
> > +       wait_node->coal_vqs.coal.max_packets = cpu_to_le32(moder.pkts);
> > +       mutex_lock(&vi->coal_wait_lock);
> > +       list_add_tail(&wait_node->list, &vi->coal_wait_list);
> > +       mutex_unlock(&vi->coal_wait_lock);
> > +}
> > +
> >  static void virtnet_rx_dim_work(struct work_struct *work)
> >  {
> >         struct dim *dim = container_of(work, struct dim, work);
> >         struct receive_queue *rq = container_of(dim,
> >                         struct receive_queue, dim);
> >         struct virtnet_info *vi = rq->vq->vdev->priv;
> > -       struct net_device *dev = vi->dev;
> > +       struct virtnet_coal_node *avail_coal;
> >         struct dim_cq_moder update_moder;
> > -       int qnum, err;
> >
> > -       qnum = rq - vi->rq;
> > +       update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
> >
> >         mutex_lock(&rq->dim_lock);
> > -       if (!rq->dim_enabled)
> > -               goto out;
> > -
> > -       update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
> > -       if (update_moder.usec != rq->intr_coal.max_usecs ||
> > -           update_moder.pkts != rq->intr_coal.max_packets) {
> > -               err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum,
> > -                                                      update_moder.usec,
> > -                                                      update_moder.pkts);
> > -               if (err)
> > -                       pr_debug("%s: Failed to send dim parameters on rxq%d\n",
> > -                                dev->name, qnum);
> > -               dim->state = DIM_START_MEASURE;
> > +       if (!rq->dim_enabled ||
> > +           (update_moder.usec == rq->intr_coal.max_usecs &&
> > +            update_moder.pkts == rq->intr_coal.max_packets)) {
> > +               rq->dim.state = DIM_START_MEASURE;
> > +               mutex_unlock(&rq->dim_lock);
> > +               return;
> >         }
> > -out:
> >         mutex_unlock(&rq->dim_lock);
> > +
> > +       mutex_lock(&vi->cvq_lock);
> > +       if (vi->cvq->num_free < 3) {
> > +               virtnet_put_wait_coal(vi, rq, update_moder);
> > +               mutex_unlock(&vi->cvq_lock);
> > +               return;
> > +       }
> 
> Could we simply sleep instead of using a list here?

Do you mean using a semaphore, or a waitqueue?

> 
> > +       mutex_unlock(&vi->cvq_lock);
> > +
> > +       mutex_lock(&vi->coal_free_lock);
> > +       if (list_empty(&vi->coal_free_list)) {
> > +               virtnet_put_wait_coal(vi, rq, update_moder);
> > +               mutex_unlock(&vi->coal_free_lock);
> > +               return;
> > +       }
> > +
> > +       avail_coal = list_first_entry(&vi->coal_free_list,
> > +                                     struct virtnet_coal_node, list);
> > +       avail_coal->coal_vqs.vqn = cpu_to_le16(rxq2vq(rq - vi->rq));
> > +       avail_coal->coal_vqs.coal.max_usecs = cpu_to_le32(update_moder.usec);
> > +       avail_coal->coal_vqs.coal.max_packets = cpu_to_le32(update_moder.pkts);
> > +
> > +       list_del(&avail_coal->list);
> > +       mutex_unlock(&vi->coal_free_lock);
> > +
> > +       virtnet_add_dim_command(vi, avail_coal);
> >  }
> >
> >  static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
> > @@ -4839,6 +4975,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
> >         flush_work(&vi->config_work);
> >         disable_rx_mode_work(vi);
> >         flush_work(&vi->rx_mode_work);
> > +       flush_work(&vi->get_cvq);
> >
> >         netif_tx_lock_bh(vi->dev);
> >         netif_device_detach(vi->dev);
> > @@ -5612,6 +5749,45 @@ static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
> >         .xmo_rx_hash                    = virtnet_xdp_rx_hash,
> >  };
> >
> > +static void virtnet_del_coal_free_list(struct virtnet_info *vi)
> > +{
> > +       struct virtnet_coal_node *coal_node, *tmp;
> > +
> > +       list_for_each_entry_safe(coal_node, tmp,  &vi->coal_free_list, list) {
> > +               list_del(&coal_node->list);
> > +               kfree(coal_node);
> > +       }
> > +}
> > +
> > +static int virtnet_init_coal_list(struct virtnet_info *vi)
> > +{
> > +       struct virtnet_coal_node *coal_node;
> > +       int batch_dim_nums;
> > +       int i;
> > +
> > +       INIT_LIST_HEAD(&vi->coal_free_list);
> > +       mutex_init(&vi->coal_free_lock);
> > +
> > +       INIT_LIST_HEAD(&vi->coal_wait_list);
> > +       mutex_init(&vi->coal_wait_lock);
> > +
> > +       if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
> > +               return 0;
> > +
> > +       batch_dim_nums = min((unsigned int)vi->max_queue_pairs,
> > +                            virtqueue_get_vring_size(vi->cvq) / 3);
> > +       for (i = 0; i < batch_dim_nums; i++) {
> > +               coal_node = kzalloc(sizeof(*coal_node), GFP_KERNEL);
> > +               if (!coal_node) {
> > +                       virtnet_del_coal_free_list(vi);
> > +                       return -ENOMEM;
> > +               }
> > +               list_add(&coal_node->list, &vi->coal_free_list);
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> >  static int virtnet_probe(struct virtio_device *vdev)
> >  {
> >         int i, err = -ENOMEM;
> > @@ -5797,6 +5973,9 @@ static int virtnet_probe(struct virtio_device *vdev)
> >         if (err)
> >                 goto free;
> >
> > +       if (virtnet_init_coal_list(vi))
> > +               goto free;
> > +
> >         if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
> >                 vi->intr_coal_rx.max_usecs = 0;
> >                 vi->intr_coal_tx.max_usecs = 0;
> > @@ -5838,6 +6017,7 @@ static int virtnet_probe(struct virtio_device *vdev)
> >         if (vi->has_rss || vi->has_rss_hash_report)
> >                 virtnet_init_default_rss(vi);
> >
> > +       INIT_WORK(&vi->get_cvq, virtnet_get_cvq_work);
> >         init_completion(&vi->ctrl->completion);
> >         enable_rx_mode_work(vi);
> >
> > @@ -5967,11 +6147,14 @@ static void virtnet_remove(struct virtio_device *vdev)
> >         flush_work(&vi->config_work);
> >         disable_rx_mode_work(vi);
> >         flush_work(&vi->rx_mode_work);
> > +       flush_work(&vi->get_cvq);
> 
> Do we need to prevent cvq work from being scheduled here?

You are right, I'll fix in the next version.

Thanks!

> 
> Thanks
> 
> >
> >         unregister_netdev(vi->dev);
> >
> >         net_failover_destroy(vi->failover);
> >
> > +       virtnet_del_coal_free_list(vi);
> > +
> >         remove_vq_common(vi);
> >
> >         free_netdev(vi->dev);
> > --
> > 2.32.0.3.g01195cf9f
> >
>

next prev parent reply	other threads:[~2024-06-17  8:08 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-06  6:14 [PATCH net-next v3 0/4] virtio_net: enable the irq for ctrlq Heng Qi
2024-06-06  6:14 ` [PATCH net-next v3 1/4] virtio_net: passing control_buf explicitly Heng Qi
2024-06-06  6:14 ` [PATCH net-next v3 2/4] virtio_net: enable irq for the control vq Heng Qi
2024-06-06  6:14 ` [PATCH net-next v3 3/4] virtio_net: change the command token to completion Heng Qi
2024-06-06  6:14 ` [PATCH net-next v3 4/4] virtio_net: improve dim command request efficiency Heng Qi
2024-06-06 10:25   ` kernel test robot
2024-06-06 20:34   ` kernel test robot
2024-06-17  4:05   ` Jason Wang
2024-06-17  7:27     ` Heng Qi [this message]
2024-06-18  1:29       ` Jason Wang
2024-06-18 14:24         ` Heng Qi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1718609268.7814527-9-hengqi@linux.alibaba.com \
    --to=hengqi@linux.alibaba.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=eperezma@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=kuba@kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=virtualization@lists.linux.dev \
    --cc=xuanzhuo@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.