[PATCH net-next v4 0/2] virtio-net: don't busy poll for cvq command

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH net-next v4 0/2] virtio-net: don't busy poll for cvq command
@ 2023-07-20  8:38 Jason Wang
  2023-07-20  8:38 ` [PATCH net-next v4 1/2] virtio-net: convert rx mode setting to use workqueue Jason Wang
  2023-07-20  8:38 ` [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop Jason Wang
  0 siblings, 2 replies; 42+ messages in thread
From: Jason Wang @ 2023-07-20  8:38 UTC (permalink / raw)
  To: mst, jasowang, xuanzhuo
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, alvaro.karsz, maxime.coquelin

Hi all:

The code used to busy poll for cvq command which turns out to have
several side effects:

1) infinite poll for buggy devices
2) bad interaction with scheduler

So this series tries to use cond_resched() in the waiting loop. Before
doing this we need first make sure the cvq command is not executed in
atomic environment, so we need first convert rx mode handling to a
workqueue.

Note that, this doesn't try to solve the case that a malicous device
may block networking stack (RTNL) or break freezer which requries more
thought to gracefully exit and resend the commands.

Please review.

Changes since V3:

- Tweak the comments
- Tweak the changelog to explain the rx mode lost after resuming.
- No functional changes

Changes since V2:

- Don't use interrupt but cond_resched()

Changes since V1:

- use RTNL to synchronize rx mode worker
- use completion for simplicity
- don't try to harden CVQ command

Changes since RFC:

- switch to use BAD_RING in virtio_break_device()
- check virtqueue_is_broken() after being woken up
- use more_used() instead of virtqueue_get_buf() to allow caller to
  get buffers afterwards
- break the virtio-net device when timeout
- get buffer manually since the virtio core check more_used() instead

Jason Wang (2):
  virtio-net: convert rx mode setting to use workqueue
  virtio-net: add cond_resched() to the command waiting loop

 drivers/net/virtio_net.c | 59 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 55 insertions(+), 4 deletions(-)

-- 
2.39.3


^ permalink raw reply	[flat|nested] 42+ messages in thread

* [PATCH net-next v4 1/2] virtio-net: convert rx mode setting to use workqueue
  2023-07-20  8:38 [PATCH net-next v4 0/2] virtio-net: don't busy poll for cvq command Jason Wang
@ 2023-07-20  8:38 ` Jason Wang
  2023-07-20 20:25   ` Shannon Nelson
  2023-07-20  8:38 ` [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop Jason Wang
  1 sibling, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-07-20  8:38 UTC (permalink / raw)
  To: mst, jasowang, xuanzhuo
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, alvaro.karsz, maxime.coquelin

This patch convert rx mode setting to be done in a workqueue, this is
a must for allow to sleep when waiting for the cvq command to
response since current code is executed under addr spin lock.

Note that we need to disable and flush the workqueue during freeze,
this means the rx mode setting is lost after resuming. This is not the
bug of this patch as we never try to restore rx mode setting during
resume.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c | 55 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index d67b36fdba0d..9f3b1d6ac33d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -274,6 +274,12 @@ struct virtnet_info {
 	/* Work struct for config space updates */
 	struct work_struct config_work;
 
+	/* Work struct for setting rx mode */
+	struct work_struct rx_mode_work;
+
+	/* OK to queue work setting RX mode? */
+	bool rx_mode_work_enabled;
+
 	/* Does the affinity hint is set for virtqueues? */
 	bool affinity_hint_set;
 
@@ -397,6 +403,20 @@ static void disable_delayed_refill(struct virtnet_info *vi)
 	spin_unlock_bh(&vi->refill_lock);
 }
 
+static void enable_rx_mode_work(struct virtnet_info *vi)
+{
+	rtnl_lock();
+	vi->rx_mode_work_enabled = true;
+	rtnl_unlock();
+}
+
+static void disable_rx_mode_work(struct virtnet_info *vi)
+{
+	rtnl_lock();
+	vi->rx_mode_work_enabled = false;
+	rtnl_unlock();
+}
+
 static void virtqueue_napi_schedule(struct napi_struct *napi,
 				    struct virtqueue *vq)
 {
@@ -2448,9 +2468,11 @@ static int virtnet_close(struct net_device *dev)
 	return 0;
 }
 
-static void virtnet_set_rx_mode(struct net_device *dev)
+static void virtnet_rx_mode_work(struct work_struct *work)
 {
-	struct virtnet_info *vi = netdev_priv(dev);
+	struct virtnet_info *vi =
+		container_of(work, struct virtnet_info, rx_mode_work);
+	struct net_device *dev = vi->dev;
 	struct scatterlist sg[2];
 	struct virtio_net_ctrl_mac *mac_data;
 	struct netdev_hw_addr *ha;
@@ -2463,6 +2485,8 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
 		return;
 
+	rtnl_lock();
+
 	vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
 	vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
 
@@ -2480,14 +2504,19 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
 			 vi->ctrl->allmulti ? "en" : "dis");
 
+	netif_addr_lock_bh(dev);
+
 	uc_count = netdev_uc_count(dev);
 	mc_count = netdev_mc_count(dev);
 	/* MAC filter - use one buffer for both lists */
 	buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
 		      (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
 	mac_data = buf;
-	if (!buf)
+	if (!buf) {
+		netif_addr_unlock_bh(dev);
+		rtnl_unlock();
 		return;
+	}
 
 	sg_init_table(sg, 2);
 
@@ -2508,6 +2537,8 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	netdev_for_each_mc_addr(ha, dev)
 		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 
+	netif_addr_unlock_bh(dev);
+
 	sg_set_buf(&sg[1], mac_data,
 		   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
 
@@ -2515,9 +2546,19 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 				  VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
 		dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
 
+	rtnl_unlock();
+
 	kfree(buf);
 }
 
+static void virtnet_set_rx_mode(struct net_device *dev)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+
+	if (vi->rx_mode_work_enabled)
+		schedule_work(&vi->rx_mode_work);
+}
+
 static int virtnet_vlan_rx_add_vid(struct net_device *dev,
 				   __be16 proto, u16 vid)
 {
@@ -3286,6 +3327,8 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
 
 	/* Make sure no work handler is accessing the device */
 	flush_work(&vi->config_work);
+	disable_rx_mode_work(vi);
+	flush_work(&vi->rx_mode_work);
 
 	netif_tx_lock_bh(vi->dev);
 	netif_device_detach(vi->dev);
@@ -3308,6 +3351,7 @@ static int virtnet_restore_up(struct virtio_device *vdev)
 	virtio_device_ready(vdev);
 
 	enable_delayed_refill(vi);
+	enable_rx_mode_work(vi);
 
 	if (netif_running(vi->dev)) {
 		err = virtnet_open(vi->dev);
@@ -4121,6 +4165,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	vdev->priv = vi;
 
 	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
+	INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work);
 	spin_lock_init(&vi->refill_lock);
 
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
@@ -4229,6 +4274,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (vi->has_rss || vi->has_rss_hash_report)
 		virtnet_init_default_rss(vi);
 
+	enable_rx_mode_work(vi);
+
 	/* serialize netdev register + virtio_device_ready() with ndo_open() */
 	rtnl_lock();
 
@@ -4326,6 +4373,8 @@ static void virtnet_remove(struct virtio_device *vdev)
 
 	/* Make sure no work handler is accessing the device. */
 	flush_work(&vi->config_work);
+	disable_rx_mode_work(vi);
+	flush_work(&vi->rx_mode_work);
 
 	unregister_netdev(vi->dev);
 
-- 
2.39.3


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-20  8:38 [PATCH net-next v4 0/2] virtio-net: don't busy poll for cvq command Jason Wang
  2023-07-20  8:38 ` [PATCH net-next v4 1/2] virtio-net: convert rx mode setting to use workqueue Jason Wang
@ 2023-07-20  8:38 ` Jason Wang
  2023-07-20 15:31   ` Shannon Nelson
  2023-07-20 20:26   ` Shannon Nelson
  1 sibling, 2 replies; 42+ messages in thread
From: Jason Wang @ 2023-07-20  8:38 UTC (permalink / raw)
  To: mst, jasowang, xuanzhuo
  Cc: davem, edumazet, kuba, pabeni, virtualization, netdev,
	linux-kernel, alvaro.karsz, maxime.coquelin

Adding cond_resched() to the command waiting loop for a better
co-operation with the scheduler. This allows to give CPU a breath to
run other task(workqueue) instead of busy looping when preemption is
not allowed on a device whose CVQ might be slow.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9f3b1d6ac33d..e7533f29b219 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 	 * into the hypervisor, so the request should be handled immediately.
 	 */
 	while (!virtqueue_get_buf(vi->cvq, &tmp) &&
-	       !virtqueue_is_broken(vi->cvq))
+	       !virtqueue_is_broken(vi->cvq)) {
+		cond_resched();
 		cpu_relax();
+	}
 
 	return vi->ctrl->status == VIRTIO_NET_OK;
 }
-- 
2.39.3


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-20  8:38 ` [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop Jason Wang
@ 2023-07-20 15:31   ` Shannon Nelson
  2023-07-20 20:58     ` Michael S. Tsirkin
  2023-07-20 20:26   ` Shannon Nelson
  1 sibling, 1 reply; 42+ messages in thread
From: Shannon Nelson @ 2023-07-20 15:31 UTC (permalink / raw)
  To: Jason Wang, mst, xuanzhuo
  Cc: netdev, linux-kernel, virtualization, edumazet, maxime.coquelin,
	kuba, pabeni, davem

On 7/20/23 1:38 AM, Jason Wang wrote:
> 
> Adding cond_resched() to the command waiting loop for a better
> co-operation with the scheduler. This allows to give CPU a breath to
> run other task(workqueue) instead of busy looping when preemption is
> not allowed on a device whose CVQ might be slow.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>   drivers/net/virtio_net.c | 4 +++-
>   1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 9f3b1d6ac33d..e7533f29b219 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>           * into the hypervisor, so the request should be handled immediately.
>           */
>          while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> -              !virtqueue_is_broken(vi->cvq))
> +              !virtqueue_is_broken(vi->cvq)) {
> +               cond_resched();
>                  cpu_relax();
> +       }

The cover letter suggests that this addresses the infinite poll for 
buggy devices, but I don't see how that is resolved here.  This should 
make it a little nicer to the system, but it still is going to poll 
forever on a device that has gone catatonic.  Is there a reason that I'm 
missing that we don't have a polling limit here?

sln

> 
>          return vi->ctrl->status == VIRTIO_NET_OK;
>   }
> --
> 2.39.3
> 
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 1/2] virtio-net: convert rx mode setting to use workqueue
  2023-07-20  8:38 ` [PATCH net-next v4 1/2] virtio-net: convert rx mode setting to use workqueue Jason Wang
@ 2023-07-20 20:25   ` Shannon Nelson
  0 siblings, 0 replies; 42+ messages in thread
From: Shannon Nelson @ 2023-07-20 20:25 UTC (permalink / raw)
  To: Jason Wang, mst, xuanzhuo
  Cc: netdev, linux-kernel, virtualization, edumazet, maxime.coquelin,
	kuba, pabeni, davem

On 7/20/23 1:38 AM, Jason Wang wrote:
> 
> This patch convert rx mode setting to be done in a workqueue, this is
> a must for allow to sleep when waiting for the cvq command to
> response since current code is executed under addr spin lock.
> 
> Note that we need to disable and flush the workqueue during freeze,
> this means the rx mode setting is lost after resuming. This is not the
> bug of this patch as we never try to restore rx mode setting during
> resume.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>

> ---
>   drivers/net/virtio_net.c | 55 +++++++++++++++++++++++++++++++++++++---
>   1 file changed, 52 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index d67b36fdba0d..9f3b1d6ac33d 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -274,6 +274,12 @@ struct virtnet_info {
>          /* Work struct for config space updates */
>          struct work_struct config_work;
> 
> +       /* Work struct for setting rx mode */
> +       struct work_struct rx_mode_work;
> +
> +       /* OK to queue work setting RX mode? */
> +       bool rx_mode_work_enabled;
> +
>          /* Does the affinity hint is set for virtqueues? */
>          bool affinity_hint_set;
> 
> @@ -397,6 +403,20 @@ static void disable_delayed_refill(struct virtnet_info *vi)
>          spin_unlock_bh(&vi->refill_lock);
>   }
> 
> +static void enable_rx_mode_work(struct virtnet_info *vi)
> +{
> +       rtnl_lock();
> +       vi->rx_mode_work_enabled = true;
> +       rtnl_unlock();
> +}
> +
> +static void disable_rx_mode_work(struct virtnet_info *vi)
> +{
> +       rtnl_lock();
> +       vi->rx_mode_work_enabled = false;
> +       rtnl_unlock();
> +}
> +
>   static void virtqueue_napi_schedule(struct napi_struct *napi,
>                                      struct virtqueue *vq)
>   {
> @@ -2448,9 +2468,11 @@ static int virtnet_close(struct net_device *dev)
>          return 0;
>   }
> 
> -static void virtnet_set_rx_mode(struct net_device *dev)
> +static void virtnet_rx_mode_work(struct work_struct *work)
>   {
> -       struct virtnet_info *vi = netdev_priv(dev);
> +       struct virtnet_info *vi =
> +               container_of(work, struct virtnet_info, rx_mode_work);
> +       struct net_device *dev = vi->dev;
>          struct scatterlist sg[2];
>          struct virtio_net_ctrl_mac *mac_data;
>          struct netdev_hw_addr *ha;
> @@ -2463,6 +2485,8 @@ static void virtnet_set_rx_mode(struct net_device *dev)
>          if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
>                  return;
> 
> +       rtnl_lock();
> +
>          vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
>          vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
> 
> @@ -2480,14 +2504,19 @@ static void virtnet_set_rx_mode(struct net_device *dev)
>                  dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
>                           vi->ctrl->allmulti ? "en" : "dis");
> 
> +       netif_addr_lock_bh(dev);
> +
>          uc_count = netdev_uc_count(dev);
>          mc_count = netdev_mc_count(dev);
>          /* MAC filter - use one buffer for both lists */
>          buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
>                        (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
>          mac_data = buf;
> -       if (!buf)
> +       if (!buf) {
> +               netif_addr_unlock_bh(dev);
> +               rtnl_unlock();
>                  return;
> +       }
> 
>          sg_init_table(sg, 2);
> 
> @@ -2508,6 +2537,8 @@ static void virtnet_set_rx_mode(struct net_device *dev)
>          netdev_for_each_mc_addr(ha, dev)
>                  memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
> 
> +       netif_addr_unlock_bh(dev);
> +
>          sg_set_buf(&sg[1], mac_data,
>                     sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
> 
> @@ -2515,9 +2546,19 @@ static void virtnet_set_rx_mode(struct net_device *dev)
>                                    VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
>                  dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
> 
> +       rtnl_unlock();
> +
>          kfree(buf);
>   }
> 
> +static void virtnet_set_rx_mode(struct net_device *dev)
> +{
> +       struct virtnet_info *vi = netdev_priv(dev);
> +
> +       if (vi->rx_mode_work_enabled)
> +               schedule_work(&vi->rx_mode_work);
> +}
> +
>   static int virtnet_vlan_rx_add_vid(struct net_device *dev,
>                                     __be16 proto, u16 vid)
>   {
> @@ -3286,6 +3327,8 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
> 
>          /* Make sure no work handler is accessing the device */
>          flush_work(&vi->config_work);
> +       disable_rx_mode_work(vi);
> +       flush_work(&vi->rx_mode_work);
> 
>          netif_tx_lock_bh(vi->dev);
>          netif_device_detach(vi->dev);
> @@ -3308,6 +3351,7 @@ static int virtnet_restore_up(struct virtio_device *vdev)
>          virtio_device_ready(vdev);
> 
>          enable_delayed_refill(vi);
> +       enable_rx_mode_work(vi);
> 
>          if (netif_running(vi->dev)) {
>                  err = virtnet_open(vi->dev);
> @@ -4121,6 +4165,7 @@ static int virtnet_probe(struct virtio_device *vdev)
>          vdev->priv = vi;
> 
>          INIT_WORK(&vi->config_work, virtnet_config_changed_work);
> +       INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work);
>          spin_lock_init(&vi->refill_lock);
> 
>          if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
> @@ -4229,6 +4274,8 @@ static int virtnet_probe(struct virtio_device *vdev)
>          if (vi->has_rss || vi->has_rss_hash_report)
>                  virtnet_init_default_rss(vi);
> 
> +       enable_rx_mode_work(vi);
> +
>          /* serialize netdev register + virtio_device_ready() with ndo_open() */
>          rtnl_lock();
> 
> @@ -4326,6 +4373,8 @@ static void virtnet_remove(struct virtio_device *vdev)
> 
>          /* Make sure no work handler is accessing the device. */
>          flush_work(&vi->config_work);
> +       disable_rx_mode_work(vi);
> +       flush_work(&vi->rx_mode_work);
> 
>          unregister_netdev(vi->dev);
> 
> --
> 2.39.3
> 
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-20  8:38 ` [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop Jason Wang
  2023-07-20 15:31   ` Shannon Nelson
@ 2023-07-20 20:26   ` Shannon Nelson
  2023-07-20 21:02     ` Michael S. Tsirkin
  1 sibling, 1 reply; 42+ messages in thread
From: Shannon Nelson @ 2023-07-20 20:26 UTC (permalink / raw)
  To: Jason Wang, mst, xuanzhuo
  Cc: netdev, linux-kernel, virtualization, edumazet, maxime.coquelin,
	kuba, pabeni, davem

On 7/20/23 1:38 AM, Jason Wang wrote:
> 
> Adding cond_resched() to the command waiting loop for a better
> co-operation with the scheduler. This allows to give CPU a breath to
> run other task(workqueue) instead of busy looping when preemption is
> not allowed on a device whose CVQ might be slow.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>

This still leaves hung processes, but at least it doesn't pin the CPU 
any more.  Thanks.

Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>


> ---
>   drivers/net/virtio_net.c | 4 +++-
>   1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 9f3b1d6ac33d..e7533f29b219 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>           * into the hypervisor, so the request should be handled immediately.
>           */
>          while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> -              !virtqueue_is_broken(vi->cvq))
> +              !virtqueue_is_broken(vi->cvq)) {
> +               cond_resched();
>                  cpu_relax();
> +       }
> 
>          return vi->ctrl->status == VIRTIO_NET_OK;
>   }
> --
> 2.39.3
> 
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-20 15:31   ` Shannon Nelson
@ 2023-07-20 20:58     ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-07-20 20:58 UTC (permalink / raw)
  To: Shannon Nelson
  Cc: Jason Wang, xuanzhuo, netdev, linux-kernel, virtualization,
	edumazet, maxime.coquelin, kuba, pabeni, davem

On Thu, Jul 20, 2023 at 08:31:13AM -0700, Shannon Nelson wrote:
> On 7/20/23 1:38 AM, Jason Wang wrote:
> > 
> > Adding cond_resched() to the command waiting loop for a better
> > co-operation with the scheduler. This allows to give CPU a breath to
> > run other task(workqueue) instead of busy looping when preemption is
> > not allowed on a device whose CVQ might be slow.
> > 
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> >   drivers/net/virtio_net.c | 4 +++-
> >   1 file changed, 3 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 9f3b1d6ac33d..e7533f29b219 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> >           * into the hypervisor, so the request should be handled immediately.
> >           */
> >          while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > -              !virtqueue_is_broken(vi->cvq))
> > +              !virtqueue_is_broken(vi->cvq)) {
> > +               cond_resched();
> >                  cpu_relax();
> > +       }
> 
> The cover letter suggests that this addresses the infinite poll for buggy
> devices, but I don't see how that is resolved here.  This should make it a
> little nicer to the system, but it still is going to poll forever on a
> device that has gone catatonic.  Is there a reason that I'm missing that we
> don't have a polling limit here?
> 
> sln

we don't know what the limit would be. but given it's a workqueue
now, why does it still have to poll as opposed to blocking?


> > 
> >          return vi->ctrl->status == VIRTIO_NET_OK;
> >   }
> > --
> > 2.39.3
> > 
> > _______________________________________________
> > Virtualization mailing list
> > Virtualization@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/virtualization


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-20 20:26   ` Shannon Nelson
@ 2023-07-20 21:02     ` Michael S. Tsirkin
  2023-07-21 14:37       ` Maxime Coquelin
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-07-20 21:02 UTC (permalink / raw)
  To: Shannon Nelson
  Cc: Jason Wang, xuanzhuo, netdev, linux-kernel, virtualization,
	edumazet, maxime.coquelin, kuba, pabeni, davem

On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> On 7/20/23 1:38 AM, Jason Wang wrote:
> > 
> > Adding cond_resched() to the command waiting loop for a better
> > co-operation with the scheduler. This allows to give CPU a breath to
> > run other task(workqueue) instead of busy looping when preemption is
> > not allowed on a device whose CVQ might be slow.
> > 
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> 
> This still leaves hung processes, but at least it doesn't pin the CPU any
> more.  Thanks.
> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> 

I'd like to see a full solution
1- block until interrupt
2- still handle surprise removal correctly by waking in that case



> > ---
> >   drivers/net/virtio_net.c | 4 +++-
> >   1 file changed, 3 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 9f3b1d6ac33d..e7533f29b219 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> >           * into the hypervisor, so the request should be handled immediately.
> >           */
> >          while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > -              !virtqueue_is_broken(vi->cvq))
> > +              !virtqueue_is_broken(vi->cvq)) {
> > +               cond_resched();
> >                  cpu_relax();
> > +       }
> > 
> >          return vi->ctrl->status == VIRTIO_NET_OK;
> >   }
> > --
> > 2.39.3
> > 
> > _______________________________________________
> > Virtualization mailing list
> > Virtualization@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/virtualization


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-20 21:02     ` Michael S. Tsirkin
@ 2023-07-21 14:37       ` Maxime Coquelin
  2023-07-21 14:45         ` Michael S. Tsirkin
  0 siblings, 1 reply; 42+ messages in thread
From: Maxime Coquelin @ 2023-07-21 14:37 UTC (permalink / raw)
  To: Michael S. Tsirkin, Shannon Nelson
  Cc: Jason Wang, xuanzhuo, netdev, linux-kernel, virtualization,
	edumazet, kuba, pabeni, davem



On 7/20/23 23:02, Michael S. Tsirkin wrote:
> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
>> On 7/20/23 1:38 AM, Jason Wang wrote:
>>>
>>> Adding cond_resched() to the command waiting loop for a better
>>> co-operation with the scheduler. This allows to give CPU a breath to
>>> run other task(workqueue) instead of busy looping when preemption is
>>> not allowed on a device whose CVQ might be slow.
>>>
>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>
>> This still leaves hung processes, but at least it doesn't pin the CPU any
>> more.  Thanks.
>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
>>
> 
> I'd like to see a full solution
> 1- block until interrupt

Would it make sense to also have a timeout?
And when timeout expires, set FAILED bit in device status?

> 2- still handle surprise removal correctly by waking in that case
> 
> 
> 
>>> ---
>>>    drivers/net/virtio_net.c | 4 +++-
>>>    1 file changed, 3 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>> index 9f3b1d6ac33d..e7533f29b219 100644
>>> --- a/drivers/net/virtio_net.c
>>> +++ b/drivers/net/virtio_net.c
>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>>>            * into the hypervisor, so the request should be handled immediately.
>>>            */
>>>           while (!virtqueue_get_buf(vi->cvq, &tmp) &&
>>> -              !virtqueue_is_broken(vi->cvq))
>>> +              !virtqueue_is_broken(vi->cvq)) {
>>> +               cond_resched();
>>>                   cpu_relax();
>>> +       }
>>>
>>>           return vi->ctrl->status == VIRTIO_NET_OK;
>>>    }
>>> --
>>> 2.39.3
>>>
>>> _______________________________________________
>>> Virtualization mailing list
>>> Virtualization@lists.linux-foundation.org
>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> 


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-21 14:37       ` Maxime Coquelin
@ 2023-07-21 14:45         ` Michael S. Tsirkin
  2023-07-21 14:58           ` Maxime Coquelin
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-07-21 14:45 UTC (permalink / raw)
  To: Maxime Coquelin
  Cc: Shannon Nelson, Jason Wang, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> 
> 
> On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > 
> > > > Adding cond_resched() to the command waiting loop for a better
> > > > co-operation with the scheduler. This allows to give CPU a breath to
> > > > run other task(workqueue) instead of busy looping when preemption is
> > > > not allowed on a device whose CVQ might be slow.
> > > > 
> > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > 
> > > This still leaves hung processes, but at least it doesn't pin the CPU any
> > > more.  Thanks.
> > > Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > 
> > 
> > I'd like to see a full solution
> > 1- block until interrupt
> 
> Would it make sense to also have a timeout?
> And when timeout expires, set FAILED bit in device status?

virtio spec does not set any limits on the timing of vq
processing.

> > 2- still handle surprise removal correctly by waking in that case
> > 
> > 
> > 
> > > > ---
> > > >    drivers/net/virtio_net.c | 4 +++-
> > > >    1 file changed, 3 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > index 9f3b1d6ac33d..e7533f29b219 100644
> > > > --- a/drivers/net/virtio_net.c
> > > > +++ b/drivers/net/virtio_net.c
> > > > @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > >            * into the hypervisor, so the request should be handled immediately.
> > > >            */
> > > >           while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > -              !virtqueue_is_broken(vi->cvq))
> > > > +              !virtqueue_is_broken(vi->cvq)) {
> > > > +               cond_resched();
> > > >                   cpu_relax();
> > > > +       }
> > > > 
> > > >           return vi->ctrl->status == VIRTIO_NET_OK;
> > > >    }
> > > > --
> > > > 2.39.3
> > > > 
> > > > _______________________________________________
> > > > Virtualization mailing list
> > > > Virtualization@lists.linux-foundation.org
> > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > 


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-21 14:45         ` Michael S. Tsirkin
@ 2023-07-21 14:58           ` Maxime Coquelin
  2023-07-21 15:10             ` Michael S. Tsirkin
  0 siblings, 1 reply; 42+ messages in thread
From: Maxime Coquelin @ 2023-07-21 14:58 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Shannon Nelson, Jason Wang, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem



On 7/21/23 16:45, Michael S. Tsirkin wrote:
> On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
>>
>>
>> On 7/20/23 23:02, Michael S. Tsirkin wrote:
>>> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
>>>> On 7/20/23 1:38 AM, Jason Wang wrote:
>>>>>
>>>>> Adding cond_resched() to the command waiting loop for a better
>>>>> co-operation with the scheduler. This allows to give CPU a breath to
>>>>> run other task(workqueue) instead of busy looping when preemption is
>>>>> not allowed on a device whose CVQ might be slow.
>>>>>
>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>>
>>>> This still leaves hung processes, but at least it doesn't pin the CPU any
>>>> more.  Thanks.
>>>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
>>>>
>>>
>>> I'd like to see a full solution
>>> 1- block until interrupt
>>
>> Would it make sense to also have a timeout?
>> And when timeout expires, set FAILED bit in device status?
> 
> virtio spec does not set any limits on the timing of vq
> processing.

Indeed, but I thought the driver could decide it is too long for it.

The issue is we keep waiting with rtnl locked, it can quickly make the
system unusable.

>>> 2- still handle surprise removal correctly by waking in that case
>>>
>>>
>>>
>>>>> ---
>>>>>     drivers/net/virtio_net.c | 4 +++-
>>>>>     1 file changed, 3 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>>>> index 9f3b1d6ac33d..e7533f29b219 100644
>>>>> --- a/drivers/net/virtio_net.c
>>>>> +++ b/drivers/net/virtio_net.c
>>>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>>>>>             * into the hypervisor, so the request should be handled immediately.
>>>>>             */
>>>>>            while (!virtqueue_get_buf(vi->cvq, &tmp) &&
>>>>> -              !virtqueue_is_broken(vi->cvq))
>>>>> +              !virtqueue_is_broken(vi->cvq)) {
>>>>> +               cond_resched();
>>>>>                    cpu_relax();
>>>>> +       }
>>>>>
>>>>>            return vi->ctrl->status == VIRTIO_NET_OK;
>>>>>     }
>>>>> --
>>>>> 2.39.3
>>>>>
>>>>> _______________________________________________
>>>>> Virtualization mailing list
>>>>> Virtualization@lists.linux-foundation.org
>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>>>
> 


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-21 14:58           ` Maxime Coquelin
@ 2023-07-21 15:10             ` Michael S. Tsirkin
  2023-07-21 20:18               ` Maxime Coquelin
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-07-21 15:10 UTC (permalink / raw)
  To: Maxime Coquelin
  Cc: Shannon Nelson, Jason Wang, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> 
> 
> On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > 
> > > 
> > > On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > > On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > > > On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > > > 
> > > > > > Adding cond_resched() to the command waiting loop for a better
> > > > > > co-operation with the scheduler. This allows to give CPU a breath to
> > > > > > run other task(workqueue) instead of busy looping when preemption is
> > > > > > not allowed on a device whose CVQ might be slow.
> > > > > > 
> > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > 
> > > > > This still leaves hung processes, but at least it doesn't pin the CPU any
> > > > > more.  Thanks.
> > > > > Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > > > 
> > > > 
> > > > I'd like to see a full solution
> > > > 1- block until interrupt
> > > 
> > > Would it make sense to also have a timeout?
> > > And when timeout expires, set FAILED bit in device status?
> > 
> > virtio spec does not set any limits on the timing of vq
> > processing.
> 
> Indeed, but I thought the driver could decide it is too long for it.
> 
> The issue is we keep waiting with rtnl locked, it can quickly make the
> system unusable.

if this is a problem we should find a way not to keep rtnl
locked indefinitely.

> > > > 2- still handle surprise removal correctly by waking in that case
> > > > 
> > > > 
> > > > 
> > > > > > ---
> > > > > >     drivers/net/virtio_net.c | 4 +++-
> > > > > >     1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > > 
> > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > index 9f3b1d6ac33d..e7533f29b219 100644
> > > > > > --- a/drivers/net/virtio_net.c
> > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > >             * into the hypervisor, so the request should be handled immediately.
> > > > > >             */
> > > > > >            while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > -              !virtqueue_is_broken(vi->cvq))
> > > > > > +              !virtqueue_is_broken(vi->cvq)) {
> > > > > > +               cond_resched();
> > > > > >                    cpu_relax();
> > > > > > +       }
> > > > > > 
> > > > > >            return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > >     }
> > > > > > --
> > > > > > 2.39.3
> > > > > > 
> > > > > > _______________________________________________
> > > > > > Virtualization mailing list
> > > > > > Virtualization@lists.linux-foundation.org
> > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > 
> > 


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-21 15:10             ` Michael S. Tsirkin
@ 2023-07-21 20:18               ` Maxime Coquelin
  2023-07-24  6:46                 ` Michael S. Tsirkin
  2023-07-24  6:52                 ` Jason Wang
  0 siblings, 2 replies; 42+ messages in thread
From: Maxime Coquelin @ 2023-07-21 20:18 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Shannon Nelson, Jason Wang, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem



On 7/21/23 17:10, Michael S. Tsirkin wrote:
> On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
>>
>>
>> On 7/21/23 16:45, Michael S. Tsirkin wrote:
>>> On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
>>>>
>>>>
>>>> On 7/20/23 23:02, Michael S. Tsirkin wrote:
>>>>> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
>>>>>> On 7/20/23 1:38 AM, Jason Wang wrote:
>>>>>>>
>>>>>>> Adding cond_resched() to the command waiting loop for a better
>>>>>>> co-operation with the scheduler. This allows to give CPU a breath to
>>>>>>> run other task(workqueue) instead of busy looping when preemption is
>>>>>>> not allowed on a device whose CVQ might be slow.
>>>>>>>
>>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>>>>
>>>>>> This still leaves hung processes, but at least it doesn't pin the CPU any
>>>>>> more.  Thanks.
>>>>>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
>>>>>>
>>>>>
>>>>> I'd like to see a full solution
>>>>> 1- block until interrupt
>>>>
>>>> Would it make sense to also have a timeout?
>>>> And when timeout expires, set FAILED bit in device status?
>>>
>>> virtio spec does not set any limits on the timing of vq
>>> processing.
>>
>> Indeed, but I thought the driver could decide it is too long for it.
>>
>> The issue is we keep waiting with rtnl locked, it can quickly make the
>> system unusable.
> 
> if this is a problem we should find a way not to keep rtnl
> locked indefinitely.

 From the tests I have done, I think it is. With OVS, a reconfiguration 
is performed when the VDUSE device is added, and when a MLX5 device is
in the same bridge, it ends up doing an ioctl() that tries to take the
rtnl lock. In this configuration, it is not possible to kill OVS because
it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
net.

> 
>>>>> 2- still handle surprise removal correctly by waking in that case
>>>>>
>>>>>
>>>>>
>>>>>>> ---
>>>>>>>      drivers/net/virtio_net.c | 4 +++-
>>>>>>>      1 file changed, 3 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>>>>>> index 9f3b1d6ac33d..e7533f29b219 100644
>>>>>>> --- a/drivers/net/virtio_net.c
>>>>>>> +++ b/drivers/net/virtio_net.c
>>>>>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>>>>>>>              * into the hypervisor, so the request should be handled immediately.
>>>>>>>              */
>>>>>>>             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
>>>>>>> -              !virtqueue_is_broken(vi->cvq))
>>>>>>> +              !virtqueue_is_broken(vi->cvq)) {
>>>>>>> +               cond_resched();
>>>>>>>                     cpu_relax();
>>>>>>> +       }
>>>>>>>
>>>>>>>             return vi->ctrl->status == VIRTIO_NET_OK;
>>>>>>>      }
>>>>>>> --
>>>>>>> 2.39.3
>>>>>>>
>>>>>>> _______________________________________________
>>>>>>> Virtualization mailing list
>>>>>>> Virtualization@lists.linux-foundation.org
>>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>>>>>
>>>
> 


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-21 20:18               ` Maxime Coquelin
@ 2023-07-24  6:46                 ` Michael S. Tsirkin
  2023-07-24  6:52                   ` Jason Wang
  2023-10-05 19:35                   ` Feng Liu
  2023-07-24  6:52                 ` Jason Wang
  1 sibling, 2 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-07-24  6:46 UTC (permalink / raw)
  To: Maxime Coquelin
  Cc: Shannon Nelson, Jason Wang, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Fri, Jul 21, 2023 at 10:18:03PM +0200, Maxime Coquelin wrote:
> 
> 
> On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > > 
> > > 
> > > On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > > > On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > > > 
> > > > > 
> > > > > On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > > > > On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > > > > > On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > > > > > 
> > > > > > > > Adding cond_resched() to the command waiting loop for a better
> > > > > > > > co-operation with the scheduler. This allows to give CPU a breath to
> > > > > > > > run other task(workqueue) instead of busy looping when preemption is
> > > > > > > > not allowed on a device whose CVQ might be slow.
> > > > > > > > 
> > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > 
> > > > > > > This still leaves hung processes, but at least it doesn't pin the CPU any
> > > > > > > more.  Thanks.
> > > > > > > Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > > > > > 
> > > > > > 
> > > > > > I'd like to see a full solution
> > > > > > 1- block until interrupt
> > > > > 
> > > > > Would it make sense to also have a timeout?
> > > > > And when timeout expires, set FAILED bit in device status?
> > > > 
> > > > virtio spec does not set any limits on the timing of vq
> > > > processing.
> > > 
> > > Indeed, but I thought the driver could decide it is too long for it.
> > > 
> > > The issue is we keep waiting with rtnl locked, it can quickly make the
> > > system unusable.
> > 
> > if this is a problem we should find a way not to keep rtnl
> > locked indefinitely.
> 
> From the tests I have done, I think it is. With OVS, a reconfiguration is
> performed when the VDUSE device is added, and when a MLX5 device is
> in the same bridge, it ends up doing an ioctl() that tries to take the
> rtnl lock. In this configuration, it is not possible to kill OVS because
> it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> net.

So for sure, we can queue up the work and process it later.
The somewhat tricky part is limiting the memory consumption.


> > 
> > > > > > 2- still handle surprise removal correctly by waking in that case
> > > > > > 
> > > > > > 
> > > > > > 
> > > > > > > > ---
> > > > > > > >      drivers/net/virtio_net.c | 4 +++-
> > > > > > > >      1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > > > > 
> > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > index 9f3b1d6ac33d..e7533f29b219 100644
> > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > >              * into the hypervisor, so the request should be handled immediately.
> > > > > > > >              */
> > > > > > > >             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > -              !virtqueue_is_broken(vi->cvq))
> > > > > > > > +              !virtqueue_is_broken(vi->cvq)) {
> > > > > > > > +               cond_resched();
> > > > > > > >                     cpu_relax();
> > > > > > > > +       }
> > > > > > > > 
> > > > > > > >             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > >      }
> > > > > > > > --
> > > > > > > > 2.39.3
> > > > > > > > 
> > > > > > > > _______________________________________________
> > > > > > > > Virtualization mailing list
> > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > 
> > > > 
> > 


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-21 20:18               ` Maxime Coquelin
  2023-07-24  6:46                 ` Michael S. Tsirkin
@ 2023-07-24  6:52                 ` Jason Wang
  2023-07-24  7:17                   ` Michael S. Tsirkin
  1 sibling, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-07-24  6:52 UTC (permalink / raw)
  To: Maxime Coquelin
  Cc: Michael S. Tsirkin, Shannon Nelson, xuanzhuo, netdev,
	linux-kernel, virtualization, edumazet, kuba, pabeni, davem

On Sat, Jul 22, 2023 at 4:18 AM Maxime Coquelin
<maxime.coquelin@redhat.com> wrote:
>
>
>
> On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> >>
> >>
> >> On 7/21/23 16:45, Michael S. Tsirkin wrote:
> >>> On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> >>>>
> >>>>
> >>>> On 7/20/23 23:02, Michael S. Tsirkin wrote:
> >>>>> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> >>>>>> On 7/20/23 1:38 AM, Jason Wang wrote:
> >>>>>>>
> >>>>>>> Adding cond_resched() to the command waiting loop for a better
> >>>>>>> co-operation with the scheduler. This allows to give CPU a breath to
> >>>>>>> run other task(workqueue) instead of busy looping when preemption is
> >>>>>>> not allowed on a device whose CVQ might be slow.
> >>>>>>>
> >>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> >>>>>>
> >>>>>> This still leaves hung processes, but at least it doesn't pin the CPU any
> >>>>>> more.  Thanks.
> >>>>>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> >>>>>>
> >>>>>
> >>>>> I'd like to see a full solution
> >>>>> 1- block until interrupt

I remember in previous versions, you worried about the extra MSI
vector. (Maybe I was wrong).

> >>>>
> >>>> Would it make sense to also have a timeout?
> >>>> And when timeout expires, set FAILED bit in device status?
> >>>
> >>> virtio spec does not set any limits on the timing of vq
> >>> processing.
> >>
> >> Indeed, but I thought the driver could decide it is too long for it.
> >>
> >> The issue is we keep waiting with rtnl locked, it can quickly make the
> >> system unusable.
> >
> > if this is a problem we should find a way not to keep rtnl
> > locked indefinitely.

Any ideas on this direction? Simply dropping rtnl during the busy loop
will result in a lot of races. This seems to require non-trivial
changes in the networking core.

>
>  From the tests I have done, I think it is. With OVS, a reconfiguration
> is performed when the VDUSE device is added, and when a MLX5 device is
> in the same bridge, it ends up doing an ioctl() that tries to take the
> rtnl lock. In this configuration, it is not possible to kill OVS because
> it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> net.

Yeah, basically, any RTNL users would be blocked forever.

And the infinite loop has other side effects like it blocks the freezer to work.

To summarize, there are three issues

1) busy polling
2) breaks freezer
3) hold RTNL during the loop

Solving 3 may help somehow for 2 e.g some pm routine e.g wireguard or
even virtnet_restore() itself may try to hold the lock.

>
> >
> >>>>> 2- still handle surprise removal correctly by waking in that case

This is basically what version 1 did?

https://lore.kernel.org/lkml/6026e801-6fda-fee9-a69b-d06a80368621@redhat.com/t/

Thanks

> >>>>>
> >>>>>
> >>>>>
> >>>>>>> ---
> >>>>>>>      drivers/net/virtio_net.c | 4 +++-
> >>>>>>>      1 file changed, 3 insertions(+), 1 deletion(-)
> >>>>>>>
> >>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >>>>>>> index 9f3b1d6ac33d..e7533f29b219 100644
> >>>>>>> --- a/drivers/net/virtio_net.c
> >>>>>>> +++ b/drivers/net/virtio_net.c
> >>>>>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> >>>>>>>              * into the hypervisor, so the request should be handled immediately.
> >>>>>>>              */
> >>>>>>>             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> >>>>>>> -              !virtqueue_is_broken(vi->cvq))
> >>>>>>> +              !virtqueue_is_broken(vi->cvq)) {
> >>>>>>> +               cond_resched();
> >>>>>>>                     cpu_relax();
> >>>>>>> +       }
> >>>>>>>
> >>>>>>>             return vi->ctrl->status == VIRTIO_NET_OK;
> >>>>>>>      }
> >>>>>>> --
> >>>>>>> 2.39.3
> >>>>>>>
> >>>>>>> _______________________________________________
> >>>>>>> Virtualization mailing list
> >>>>>>> Virtualization@lists.linux-foundation.org
> >>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> >>>>>
> >>>
> >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-24  6:46                 ` Michael S. Tsirkin
@ 2023-07-24  6:52                   ` Jason Wang
  2023-07-24  7:18                     ` Michael S. Tsirkin
  2023-10-05 19:35                   ` Feng Liu
  1 sibling, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-07-24  6:52 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Mon, Jul 24, 2023 at 2:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Fri, Jul 21, 2023 at 10:18:03PM +0200, Maxime Coquelin wrote:
> >
> >
> > On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > > >
> > > >
> > > > On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > > > > On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > > > >
> > > > > >
> > > > > > On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > > > > > On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > > > > > > On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > > > > > >
> > > > > > > > > Adding cond_resched() to the command waiting loop for a better
> > > > > > > > > co-operation with the scheduler. This allows to give CPU a breath to
> > > > > > > > > run other task(workqueue) instead of busy looping when preemption is
> > > > > > > > > not allowed on a device whose CVQ might be slow.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > >
> > > > > > > > This still leaves hung processes, but at least it doesn't pin the CPU any
> > > > > > > > more.  Thanks.
> > > > > > > > Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > > > > > >
> > > > > > >
> > > > > > > I'd like to see a full solution
> > > > > > > 1- block until interrupt
> > > > > >
> > > > > > Would it make sense to also have a timeout?
> > > > > > And when timeout expires, set FAILED bit in device status?
> > > > >
> > > > > virtio spec does not set any limits on the timing of vq
> > > > > processing.
> > > >
> > > > Indeed, but I thought the driver could decide it is too long for it.
> > > >
> > > > The issue is we keep waiting with rtnl locked, it can quickly make the
> > > > system unusable.
> > >
> > > if this is a problem we should find a way not to keep rtnl
> > > locked indefinitely.
> >
> > From the tests I have done, I think it is. With OVS, a reconfiguration is
> > performed when the VDUSE device is added, and when a MLX5 device is
> > in the same bridge, it ends up doing an ioctl() that tries to take the
> > rtnl lock. In this configuration, it is not possible to kill OVS because
> > it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> > net.
>
> So for sure, we can queue up the work and process it later.
> The somewhat tricky part is limiting the memory consumption.

And it needs to sync with rtnl somehow, e.g device unregistering which
seems not easy.

Thanks

>
>
> > >
> > > > > > > 2- still handle surprise removal correctly by waking in that case
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > > > ---
> > > > > > > > >      drivers/net/virtio_net.c | 4 +++-
> > > > > > > > >      1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > > index 9f3b1d6ac33d..e7533f29b219 100644
> > > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > > @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > > >              * into the hypervisor, so the request should be handled immediately.
> > > > > > > > >              */
> > > > > > > > >             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > > -              !virtqueue_is_broken(vi->cvq))
> > > > > > > > > +              !virtqueue_is_broken(vi->cvq)) {
> > > > > > > > > +               cond_resched();
> > > > > > > > >                     cpu_relax();
> > > > > > > > > +       }
> > > > > > > > >
> > > > > > > > >             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > >      }
> > > > > > > > > --
> > > > > > > > > 2.39.3
> > > > > > > > >
> > > > > > > > > _______________________________________________
> > > > > > > > > Virtualization mailing list
> > > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-24  6:52                 ` Jason Wang
@ 2023-07-24  7:17                   ` Michael S. Tsirkin
  2023-07-25  3:07                     ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-07-24  7:17 UTC (permalink / raw)
  To: Jason Wang
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Mon, Jul 24, 2023 at 02:52:05PM +0800, Jason Wang wrote:
> On Sat, Jul 22, 2023 at 4:18 AM Maxime Coquelin
> <maxime.coquelin@redhat.com> wrote:
> >
> >
> >
> > On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > >>
> > >>
> > >> On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > >>> On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > >>>>
> > >>>>
> > >>>> On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > >>>>> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > >>>>>> On 7/20/23 1:38 AM, Jason Wang wrote:
> > >>>>>>>
> > >>>>>>> Adding cond_resched() to the command waiting loop for a better
> > >>>>>>> co-operation with the scheduler. This allows to give CPU a breath to
> > >>>>>>> run other task(workqueue) instead of busy looping when preemption is
> > >>>>>>> not allowed on a device whose CVQ might be slow.
> > >>>>>>>
> > >>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> > >>>>>>
> > >>>>>> This still leaves hung processes, but at least it doesn't pin the CPU any
> > >>>>>> more.  Thanks.
> > >>>>>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > >>>>>>
> > >>>>>
> > >>>>> I'd like to see a full solution
> > >>>>> 1- block until interrupt
> 
> I remember in previous versions, you worried about the extra MSI
> vector. (Maybe I was wrong).
> 
> > >>>>
> > >>>> Would it make sense to also have a timeout?
> > >>>> And when timeout expires, set FAILED bit in device status?
> > >>>
> > >>> virtio spec does not set any limits on the timing of vq
> > >>> processing.
> > >>
> > >> Indeed, but I thought the driver could decide it is too long for it.
> > >>
> > >> The issue is we keep waiting with rtnl locked, it can quickly make the
> > >> system unusable.
> > >
> > > if this is a problem we should find a way not to keep rtnl
> > > locked indefinitely.
> 
> Any ideas on this direction? Simply dropping rtnl during the busy loop
> will result in a lot of races. This seems to require non-trivial
> changes in the networking core.
> 
> >
> >  From the tests I have done, I think it is. With OVS, a reconfiguration
> > is performed when the VDUSE device is added, and when a MLX5 device is
> > in the same bridge, it ends up doing an ioctl() that tries to take the
> > rtnl lock. In this configuration, it is not possible to kill OVS because
> > it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> > net.
> 
> Yeah, basically, any RTNL users would be blocked forever.
> 
> And the infinite loop has other side effects like it blocks the freezer to work.
> 
> To summarize, there are three issues
> 
> 1) busy polling
> 2) breaks freezer
> 3) hold RTNL during the loop
> 
> Solving 3 may help somehow for 2 e.g some pm routine e.g wireguard or
> even virtnet_restore() itself may try to hold the lock.

Yep. So my feeling currently is, the only real fix is to actually
queue up the work in software. It's mostly trivial to limit
memory consumption, vid's is the
only one where it would make sense to have more than
1 command of a given type outstanding. have a tree
or a bitmap with vids to add/remove?



> >
> > >
> > >>>>> 2- still handle surprise removal correctly by waking in that case
> 
> This is basically what version 1 did?
> 
> https://lore.kernel.org/lkml/6026e801-6fda-fee9-a69b-d06a80368621@redhat.com/t/
> 
> Thanks

Yes - except the timeout part.


> > >>>>>
> > >>>>>
> > >>>>>
> > >>>>>>> ---
> > >>>>>>>      drivers/net/virtio_net.c | 4 +++-
> > >>>>>>>      1 file changed, 3 insertions(+), 1 deletion(-)
> > >>>>>>>
> > >>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > >>>>>>> index 9f3b1d6ac33d..e7533f29b219 100644
> > >>>>>>> --- a/drivers/net/virtio_net.c
> > >>>>>>> +++ b/drivers/net/virtio_net.c
> > >>>>>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > >>>>>>>              * into the hypervisor, so the request should be handled immediately.
> > >>>>>>>              */
> > >>>>>>>             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > >>>>>>> -              !virtqueue_is_broken(vi->cvq))
> > >>>>>>> +              !virtqueue_is_broken(vi->cvq)) {
> > >>>>>>> +               cond_resched();
> > >>>>>>>                     cpu_relax();
> > >>>>>>> +       }
> > >>>>>>>
> > >>>>>>>             return vi->ctrl->status == VIRTIO_NET_OK;
> > >>>>>>>      }
> > >>>>>>> --
> > >>>>>>> 2.39.3
> > >>>>>>>
> > >>>>>>> _______________________________________________
> > >>>>>>> Virtualization mailing list
> > >>>>>>> Virtualization@lists.linux-foundation.org
> > >>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > >>>>>
> > >>>
> > >
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-24  6:52                   ` Jason Wang
@ 2023-07-24  7:18                     ` Michael S. Tsirkin
  2023-07-25  3:03                       ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-07-24  7:18 UTC (permalink / raw)
  To: Jason Wang
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Mon, Jul 24, 2023 at 02:52:49PM +0800, Jason Wang wrote:
> On Mon, Jul 24, 2023 at 2:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Fri, Jul 21, 2023 at 10:18:03PM +0200, Maxime Coquelin wrote:
> > >
> > >
> > > On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > > > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > > > >
> > > > >
> > > > > On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > > > > > On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > > > > >
> > > > > > >
> > > > > > > On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > > > > > > On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > > > > > > > On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > > > > > > >
> > > > > > > > > > Adding cond_resched() to the command waiting loop for a better
> > > > > > > > > > co-operation with the scheduler. This allows to give CPU a breath to
> > > > > > > > > > run other task(workqueue) instead of busy looping when preemption is
> > > > > > > > > > not allowed on a device whose CVQ might be slow.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > >
> > > > > > > > > This still leaves hung processes, but at least it doesn't pin the CPU any
> > > > > > > > > more.  Thanks.
> > > > > > > > > Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > > > > > > >
> > > > > > > >
> > > > > > > > I'd like to see a full solution
> > > > > > > > 1- block until interrupt
> > > > > > >
> > > > > > > Would it make sense to also have a timeout?
> > > > > > > And when timeout expires, set FAILED bit in device status?
> > > > > >
> > > > > > virtio spec does not set any limits on the timing of vq
> > > > > > processing.
> > > > >
> > > > > Indeed, but I thought the driver could decide it is too long for it.
> > > > >
> > > > > The issue is we keep waiting with rtnl locked, it can quickly make the
> > > > > system unusable.
> > > >
> > > > if this is a problem we should find a way not to keep rtnl
> > > > locked indefinitely.
> > >
> > > From the tests I have done, I think it is. With OVS, a reconfiguration is
> > > performed when the VDUSE device is added, and when a MLX5 device is
> > > in the same bridge, it ends up doing an ioctl() that tries to take the
> > > rtnl lock. In this configuration, it is not possible to kill OVS because
> > > it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> > > net.
> >
> > So for sure, we can queue up the work and process it later.
> > The somewhat tricky part is limiting the memory consumption.
> 
> And it needs to sync with rtnl somehow, e.g device unregistering which
> seems not easy.
> 
> Thanks

since when does device unregister need to send cvq commands?

> >
> >
> > > >
> > > > > > > > 2- still handle surprise removal correctly by waking in that case
> > > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > > > ---
> > > > > > > > > >      drivers/net/virtio_net.c | 4 +++-
> > > > > > > > > >      1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > > > index 9f3b1d6ac33d..e7533f29b219 100644
> > > > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > > > @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > > > >              * into the hypervisor, so the request should be handled immediately.
> > > > > > > > > >              */
> > > > > > > > > >             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > > > -              !virtqueue_is_broken(vi->cvq))
> > > > > > > > > > +              !virtqueue_is_broken(vi->cvq)) {
> > > > > > > > > > +               cond_resched();
> > > > > > > > > >                     cpu_relax();
> > > > > > > > > > +       }
> > > > > > > > > >
> > > > > > > > > >             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > >      }
> > > > > > > > > > --
> > > > > > > > > > 2.39.3
> > > > > > > > > >
> > > > > > > > > > _______________________________________________
> > > > > > > > > > Virtualization mailing list
> > > > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > > >
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-24  7:18                     ` Michael S. Tsirkin
@ 2023-07-25  3:03                       ` Jason Wang
  2024-02-22 19:21                         ` Michael S. Tsirkin
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-07-25  3:03 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Mon, Jul 24, 2023 at 3:18 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Jul 24, 2023 at 02:52:49PM +0800, Jason Wang wrote:
> > On Mon, Jul 24, 2023 at 2:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Fri, Jul 21, 2023 at 10:18:03PM +0200, Maxime Coquelin wrote:
> > > >
> > > >
> > > > On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > > > > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > > > > >
> > > > > >
> > > > > > On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > > > > > > On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > > > > > >
> > > > > > > >
> > > > > > > > On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > > > > > > > On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > > > > > > > > On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > > > > > > > >
> > > > > > > > > > > Adding cond_resched() to the command waiting loop for a better
> > > > > > > > > > > co-operation with the scheduler. This allows to give CPU a breath to
> > > > > > > > > > > run other task(workqueue) instead of busy looping when preemption is
> > > > > > > > > > > not allowed on a device whose CVQ might be slow.
> > > > > > > > > > >
> > > > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > >
> > > > > > > > > > This still leaves hung processes, but at least it doesn't pin the CPU any
> > > > > > > > > > more.  Thanks.
> > > > > > > > > > Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > I'd like to see a full solution
> > > > > > > > > 1- block until interrupt
> > > > > > > >
> > > > > > > > Would it make sense to also have a timeout?
> > > > > > > > And when timeout expires, set FAILED bit in device status?
> > > > > > >
> > > > > > > virtio spec does not set any limits on the timing of vq
> > > > > > > processing.
> > > > > >
> > > > > > Indeed, but I thought the driver could decide it is too long for it.
> > > > > >
> > > > > > The issue is we keep waiting with rtnl locked, it can quickly make the
> > > > > > system unusable.
> > > > >
> > > > > if this is a problem we should find a way not to keep rtnl
> > > > > locked indefinitely.
> > > >
> > > > From the tests I have done, I think it is. With OVS, a reconfiguration is
> > > > performed when the VDUSE device is added, and when a MLX5 device is
> > > > in the same bridge, it ends up doing an ioctl() that tries to take the
> > > > rtnl lock. In this configuration, it is not possible to kill OVS because
> > > > it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> > > > net.
> > >
> > > So for sure, we can queue up the work and process it later.
> > > The somewhat tricky part is limiting the memory consumption.
> >
> > And it needs to sync with rtnl somehow, e.g device unregistering which
> > seems not easy.
> >
> > Thanks
>
> since when does device unregister need to send cvq commands?

It doesn't do this now. But if we don't process the work under rtnl,
we need to synchronize with device unregistering.

Thanks

>
> > >
> > >
> > > > >
> > > > > > > > > 2- still handle surprise removal correctly by waking in that case
> > > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > > ---
> > > > > > > > > > >      drivers/net/virtio_net.c | 4 +++-
> > > > > > > > > > >      1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > > > > > > >
> > > > > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > > > > index 9f3b1d6ac33d..e7533f29b219 100644
> > > > > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > > > > @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > > > > >              * into the hypervisor, so the request should be handled immediately.
> > > > > > > > > > >              */
> > > > > > > > > > >             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > > > > -              !virtqueue_is_broken(vi->cvq))
> > > > > > > > > > > +              !virtqueue_is_broken(vi->cvq)) {
> > > > > > > > > > > +               cond_resched();
> > > > > > > > > > >                     cpu_relax();
> > > > > > > > > > > +       }
> > > > > > > > > > >
> > > > > > > > > > >             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > > >      }
> > > > > > > > > > > --
> > > > > > > > > > > 2.39.3
> > > > > > > > > > >
> > > > > > > > > > > _______________________________________________
> > > > > > > > > > > Virtualization mailing list
> > > > > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > > > >
> > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-24  7:17                   ` Michael S. Tsirkin
@ 2023-07-25  3:07                     ` Jason Wang
  2023-07-25  7:36                       ` Michael S. Tsirkin
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-07-25  3:07 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Mon, Jul 24, 2023 at 3:17 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Jul 24, 2023 at 02:52:05PM +0800, Jason Wang wrote:
> > On Sat, Jul 22, 2023 at 4:18 AM Maxime Coquelin
> > <maxime.coquelin@redhat.com> wrote:
> > >
> > >
> > >
> > > On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > > > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > > >>
> > > >>
> > > >> On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > > >>> On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > >>>>
> > > >>>>
> > > >>>> On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > >>>>> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > >>>>>> On 7/20/23 1:38 AM, Jason Wang wrote:
> > > >>>>>>>
> > > >>>>>>> Adding cond_resched() to the command waiting loop for a better
> > > >>>>>>> co-operation with the scheduler. This allows to give CPU a breath to
> > > >>>>>>> run other task(workqueue) instead of busy looping when preemption is
> > > >>>>>>> not allowed on a device whose CVQ might be slow.
> > > >>>>>>>
> > > >>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > >>>>>>
> > > >>>>>> This still leaves hung processes, but at least it doesn't pin the CPU any
> > > >>>>>> more.  Thanks.
> > > >>>>>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > >>>>>>
> > > >>>>>
> > > >>>>> I'd like to see a full solution
> > > >>>>> 1- block until interrupt
> >
> > I remember in previous versions, you worried about the extra MSI
> > vector. (Maybe I was wrong).
> >
> > > >>>>
> > > >>>> Would it make sense to also have a timeout?
> > > >>>> And when timeout expires, set FAILED bit in device status?
> > > >>>
> > > >>> virtio spec does not set any limits on the timing of vq
> > > >>> processing.
> > > >>
> > > >> Indeed, but I thought the driver could decide it is too long for it.
> > > >>
> > > >> The issue is we keep waiting with rtnl locked, it can quickly make the
> > > >> system unusable.
> > > >
> > > > if this is a problem we should find a way not to keep rtnl
> > > > locked indefinitely.
> >
> > Any ideas on this direction? Simply dropping rtnl during the busy loop
> > will result in a lot of races. This seems to require non-trivial
> > changes in the networking core.
> >
> > >
> > >  From the tests I have done, I think it is. With OVS, a reconfiguration
> > > is performed when the VDUSE device is added, and when a MLX5 device is
> > > in the same bridge, it ends up doing an ioctl() that tries to take the
> > > rtnl lock. In this configuration, it is not possible to kill OVS because
> > > it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> > > net.
> >
> > Yeah, basically, any RTNL users would be blocked forever.
> >
> > And the infinite loop has other side effects like it blocks the freezer to work.
> >
> > To summarize, there are three issues
> >
> > 1) busy polling
> > 2) breaks freezer
> > 3) hold RTNL during the loop
> >
> > Solving 3 may help somehow for 2 e.g some pm routine e.g wireguard or
> > even virtnet_restore() itself may try to hold the lock.
>
> Yep. So my feeling currently is, the only real fix is to actually
> queue up the work in software.

Do you mean something like:

rtnl_lock();
queue up the work
rtnl_unlock();
return success;

?

> It's mostly trivial to limit
> memory consumption, vid's is the
> only one where it would make sense to have more than
> 1 command of a given type outstanding.

And rx mode so this implies we will fail any command if the previous
work is not finished.

> have a tree
> or a bitmap with vids to add/remove?

Probably.

Thanks

>
>
>
> > >
> > > >
> > > >>>>> 2- still handle surprise removal correctly by waking in that case
> >
> > This is basically what version 1 did?
> >
> > https://lore.kernel.org/lkml/6026e801-6fda-fee9-a69b-d06a80368621@redhat.com/t/
> >
> > Thanks
>
> Yes - except the timeout part.
>
>
> > > >>>>>
> > > >>>>>
> > > >>>>>
> > > >>>>>>> ---
> > > >>>>>>>      drivers/net/virtio_net.c | 4 +++-
> > > >>>>>>>      1 file changed, 3 insertions(+), 1 deletion(-)
> > > >>>>>>>
> > > >>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > >>>>>>> index 9f3b1d6ac33d..e7533f29b219 100644
> > > >>>>>>> --- a/drivers/net/virtio_net.c
> > > >>>>>>> +++ b/drivers/net/virtio_net.c
> > > >>>>>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > >>>>>>>              * into the hypervisor, so the request should be handled immediately.
> > > >>>>>>>              */
> > > >>>>>>>             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > >>>>>>> -              !virtqueue_is_broken(vi->cvq))
> > > >>>>>>> +              !virtqueue_is_broken(vi->cvq)) {
> > > >>>>>>> +               cond_resched();
> > > >>>>>>>                     cpu_relax();
> > > >>>>>>> +       }
> > > >>>>>>>
> > > >>>>>>>             return vi->ctrl->status == VIRTIO_NET_OK;
> > > >>>>>>>      }
> > > >>>>>>> --
> > > >>>>>>> 2.39.3
> > > >>>>>>>
> > > >>>>>>> _______________________________________________
> > > >>>>>>> Virtualization mailing list
> > > >>>>>>> Virtualization@lists.linux-foundation.org
> > > >>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > >>>>>
> > > >>>
> > > >
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-25  3:07                     ` Jason Wang
@ 2023-07-25  7:36                       ` Michael S. Tsirkin
  2023-07-26  1:55                         ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-07-25  7:36 UTC (permalink / raw)
  To: Jason Wang
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Tue, Jul 25, 2023 at 11:07:40AM +0800, Jason Wang wrote:
> On Mon, Jul 24, 2023 at 3:17 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Mon, Jul 24, 2023 at 02:52:05PM +0800, Jason Wang wrote:
> > > On Sat, Jul 22, 2023 at 4:18 AM Maxime Coquelin
> > > <maxime.coquelin@redhat.com> wrote:
> > > >
> > > >
> > > >
> > > > On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > > > > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > > > >>
> > > > >>
> > > > >> On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > > > >>> On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > > >>>>
> > > > >>>>
> > > > >>>> On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > > >>>>> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > > >>>>>> On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > >>>>>>>
> > > > >>>>>>> Adding cond_resched() to the command waiting loop for a better
> > > > >>>>>>> co-operation with the scheduler. This allows to give CPU a breath to
> > > > >>>>>>> run other task(workqueue) instead of busy looping when preemption is
> > > > >>>>>>> not allowed on a device whose CVQ might be slow.
> > > > >>>>>>>
> > > > >>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > >>>>>>
> > > > >>>>>> This still leaves hung processes, but at least it doesn't pin the CPU any
> > > > >>>>>> more.  Thanks.
> > > > >>>>>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > > >>>>>>
> > > > >>>>>
> > > > >>>>> I'd like to see a full solution
> > > > >>>>> 1- block until interrupt
> > >
> > > I remember in previous versions, you worried about the extra MSI
> > > vector. (Maybe I was wrong).
> > >
> > > > >>>>
> > > > >>>> Would it make sense to also have a timeout?
> > > > >>>> And when timeout expires, set FAILED bit in device status?
> > > > >>>
> > > > >>> virtio spec does not set any limits on the timing of vq
> > > > >>> processing.
> > > > >>
> > > > >> Indeed, but I thought the driver could decide it is too long for it.
> > > > >>
> > > > >> The issue is we keep waiting with rtnl locked, it can quickly make the
> > > > >> system unusable.
> > > > >
> > > > > if this is a problem we should find a way not to keep rtnl
> > > > > locked indefinitely.
> > >
> > > Any ideas on this direction? Simply dropping rtnl during the busy loop
> > > will result in a lot of races. This seems to require non-trivial
> > > changes in the networking core.
> > >
> > > >
> > > >  From the tests I have done, I think it is. With OVS, a reconfiguration
> > > > is performed when the VDUSE device is added, and when a MLX5 device is
> > > > in the same bridge, it ends up doing an ioctl() that tries to take the
> > > > rtnl lock. In this configuration, it is not possible to kill OVS because
> > > > it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> > > > net.
> > >
> > > Yeah, basically, any RTNL users would be blocked forever.
> > >
> > > And the infinite loop has other side effects like it blocks the freezer to work.
> > >
> > > To summarize, there are three issues
> > >
> > > 1) busy polling
> > > 2) breaks freezer
> > > 3) hold RTNL during the loop
> > >
> > > Solving 3 may help somehow for 2 e.g some pm routine e.g wireguard or
> > > even virtnet_restore() itself may try to hold the lock.
> >
> > Yep. So my feeling currently is, the only real fix is to actually
> > queue up the work in software.
> 
> Do you mean something like:
> 
> rtnl_lock();
> queue up the work
> rtnl_unlock();
> return success;
> 
> ?

yes


> > It's mostly trivial to limit
> > memory consumption, vid's is the
> > only one where it would make sense to have more than
> > 1 command of a given type outstanding.
> 
> And rx mode so this implies we will fail any command if the previous
> work is not finished.

don't fail it, store it.

> > have a tree
> > or a bitmap with vids to add/remove?
> 
> Probably.
> 
> Thanks
> 
> >
> >
> >
> > > >
> > > > >
> > > > >>>>> 2- still handle surprise removal correctly by waking in that case
> > >
> > > This is basically what version 1 did?
> > >
> > > https://lore.kernel.org/lkml/6026e801-6fda-fee9-a69b-d06a80368621@redhat.com/t/
> > >
> > > Thanks
> >
> > Yes - except the timeout part.
> >
> >
> > > > >>>>>
> > > > >>>>>
> > > > >>>>>
> > > > >>>>>>> ---
> > > > >>>>>>>      drivers/net/virtio_net.c | 4 +++-
> > > > >>>>>>>      1 file changed, 3 insertions(+), 1 deletion(-)
> > > > >>>>>>>
> > > > >>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > >>>>>>> index 9f3b1d6ac33d..e7533f29b219 100644
> > > > >>>>>>> --- a/drivers/net/virtio_net.c
> > > > >>>>>>> +++ b/drivers/net/virtio_net.c
> > > > >>>>>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > >>>>>>>              * into the hypervisor, so the request should be handled immediately.
> > > > >>>>>>>              */
> > > > >>>>>>>             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > >>>>>>> -              !virtqueue_is_broken(vi->cvq))
> > > > >>>>>>> +              !virtqueue_is_broken(vi->cvq)) {
> > > > >>>>>>> +               cond_resched();
> > > > >>>>>>>                     cpu_relax();
> > > > >>>>>>> +       }
> > > > >>>>>>>
> > > > >>>>>>>             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > >>>>>>>      }
> > > > >>>>>>> --
> > > > >>>>>>> 2.39.3
> > > > >>>>>>>
> > > > >>>>>>> _______________________________________________
> > > > >>>>>>> Virtualization mailing list
> > > > >>>>>>> Virtualization@lists.linux-foundation.org
> > > > >>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > >>>>>
> > > > >>>
> > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-25  7:36                       ` Michael S. Tsirkin
@ 2023-07-26  1:55                         ` Jason Wang
  2023-07-26 11:37                           ` Michael S. Tsirkin
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-07-26  1:55 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Tue, Jul 25, 2023 at 3:36 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Tue, Jul 25, 2023 at 11:07:40AM +0800, Jason Wang wrote:
> > On Mon, Jul 24, 2023 at 3:17 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Mon, Jul 24, 2023 at 02:52:05PM +0800, Jason Wang wrote:
> > > > On Sat, Jul 22, 2023 at 4:18 AM Maxime Coquelin
> > > > <maxime.coquelin@redhat.com> wrote:
> > > > >
> > > > >
> > > > >
> > > > > On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > > > > > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > > > > >>
> > > > > >>
> > > > > >> On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > > > > >>> On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > > > >>>>
> > > > > >>>>
> > > > > >>>> On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > > > >>>>> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > > > >>>>>> On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > > >>>>>>>
> > > > > >>>>>>> Adding cond_resched() to the command waiting loop for a better
> > > > > >>>>>>> co-operation with the scheduler. This allows to give CPU a breath to
> > > > > >>>>>>> run other task(workqueue) instead of busy looping when preemption is
> > > > > >>>>>>> not allowed on a device whose CVQ might be slow.
> > > > > >>>>>>>
> > > > > >>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > >>>>>>
> > > > > >>>>>> This still leaves hung processes, but at least it doesn't pin the CPU any
> > > > > >>>>>> more.  Thanks.
> > > > > >>>>>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > > > >>>>>>
> > > > > >>>>>
> > > > > >>>>> I'd like to see a full solution
> > > > > >>>>> 1- block until interrupt
> > > >
> > > > I remember in previous versions, you worried about the extra MSI
> > > > vector. (Maybe I was wrong).
> > > >
> > > > > >>>>
> > > > > >>>> Would it make sense to also have a timeout?
> > > > > >>>> And when timeout expires, set FAILED bit in device status?
> > > > > >>>
> > > > > >>> virtio spec does not set any limits on the timing of vq
> > > > > >>> processing.
> > > > > >>
> > > > > >> Indeed, but I thought the driver could decide it is too long for it.
> > > > > >>
> > > > > >> The issue is we keep waiting with rtnl locked, it can quickly make the
> > > > > >> system unusable.
> > > > > >
> > > > > > if this is a problem we should find a way not to keep rtnl
> > > > > > locked indefinitely.
> > > >
> > > > Any ideas on this direction? Simply dropping rtnl during the busy loop
> > > > will result in a lot of races. This seems to require non-trivial
> > > > changes in the networking core.
> > > >
> > > > >
> > > > >  From the tests I have done, I think it is. With OVS, a reconfiguration
> > > > > is performed when the VDUSE device is added, and when a MLX5 device is
> > > > > in the same bridge, it ends up doing an ioctl() that tries to take the
> > > > > rtnl lock. In this configuration, it is not possible to kill OVS because
> > > > > it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> > > > > net.
> > > >
> > > > Yeah, basically, any RTNL users would be blocked forever.
> > > >
> > > > And the infinite loop has other side effects like it blocks the freezer to work.
> > > >
> > > > To summarize, there are three issues
> > > >
> > > > 1) busy polling
> > > > 2) breaks freezer
> > > > 3) hold RTNL during the loop
> > > >
> > > > Solving 3 may help somehow for 2 e.g some pm routine e.g wireguard or
> > > > even virtnet_restore() itself may try to hold the lock.
> > >
> > > Yep. So my feeling currently is, the only real fix is to actually
> > > queue up the work in software.
> >
> > Do you mean something like:
> >
> > rtnl_lock();
> > queue up the work
> > rtnl_unlock();
> > return success;
> >
> > ?
>
> yes

We will lose the error reporting, is it a real problem or not?

>
>
> > > It's mostly trivial to limit
> > > memory consumption, vid's is the
> > > only one where it would make sense to have more than
> > > 1 command of a given type outstanding.
> >
> > And rx mode so this implies we will fail any command if the previous
> > work is not finished.
>
> don't fail it, store it.

Ok.

Thanks

>
> > > have a tree
> > > or a bitmap with vids to add/remove?
> >
> > Probably.
> >
> > Thanks
> >
> > >
> > >
> > >
> > > > >
> > > > > >
> > > > > >>>>> 2- still handle surprise removal correctly by waking in that case
> > > >
> > > > This is basically what version 1 did?
> > > >
> > > > https://lore.kernel.org/lkml/6026e801-6fda-fee9-a69b-d06a80368621@redhat.com/t/
> > > >
> > > > Thanks
> > >
> > > Yes - except the timeout part.
> > >
> > >
> > > > > >>>>>
> > > > > >>>>>
> > > > > >>>>>
> > > > > >>>>>>> ---
> > > > > >>>>>>>      drivers/net/virtio_net.c | 4 +++-
> > > > > >>>>>>>      1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > >>>>>>>
> > > > > >>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > >>>>>>> index 9f3b1d6ac33d..e7533f29b219 100644
> > > > > >>>>>>> --- a/drivers/net/virtio_net.c
> > > > > >>>>>>> +++ b/drivers/net/virtio_net.c
> > > > > >>>>>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > >>>>>>>              * into the hypervisor, so the request should be handled immediately.
> > > > > >>>>>>>              */
> > > > > >>>>>>>             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > >>>>>>> -              !virtqueue_is_broken(vi->cvq))
> > > > > >>>>>>> +              !virtqueue_is_broken(vi->cvq)) {
> > > > > >>>>>>> +               cond_resched();
> > > > > >>>>>>>                     cpu_relax();
> > > > > >>>>>>> +       }
> > > > > >>>>>>>
> > > > > >>>>>>>             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > >>>>>>>      }
> > > > > >>>>>>> --
> > > > > >>>>>>> 2.39.3
> > > > > >>>>>>>
> > > > > >>>>>>> _______________________________________________
> > > > > >>>>>>> Virtualization mailing list
> > > > > >>>>>>> Virtualization@lists.linux-foundation.org
> > > > > >>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > >>>>>
> > > > > >>>
> > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-26  1:55                         ` Jason Wang
@ 2023-07-26 11:37                           ` Michael S. Tsirkin
  2023-07-27  6:03                             ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-07-26 11:37 UTC (permalink / raw)
  To: Jason Wang
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Wed, Jul 26, 2023 at 09:55:37AM +0800, Jason Wang wrote:
> On Tue, Jul 25, 2023 at 3:36 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Tue, Jul 25, 2023 at 11:07:40AM +0800, Jason Wang wrote:
> > > On Mon, Jul 24, 2023 at 3:17 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Mon, Jul 24, 2023 at 02:52:05PM +0800, Jason Wang wrote:
> > > > > On Sat, Jul 22, 2023 at 4:18 AM Maxime Coquelin
> > > > > <maxime.coquelin@redhat.com> wrote:
> > > > > >
> > > > > >
> > > > > >
> > > > > > On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > > > > > > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > > > > > >>
> > > > > > >>
> > > > > > >> On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > > > > > >>> On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > > > > >>>>
> > > > > > >>>>
> > > > > > >>>> On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > > > > >>>>> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > > > > >>>>>> On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > > > >>>>>>>
> > > > > > >>>>>>> Adding cond_resched() to the command waiting loop for a better
> > > > > > >>>>>>> co-operation with the scheduler. This allows to give CPU a breath to
> > > > > > >>>>>>> run other task(workqueue) instead of busy looping when preemption is
> > > > > > >>>>>>> not allowed on a device whose CVQ might be slow.
> > > > > > >>>>>>>
> > > > > > >>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > >>>>>>
> > > > > > >>>>>> This still leaves hung processes, but at least it doesn't pin the CPU any
> > > > > > >>>>>> more.  Thanks.
> > > > > > >>>>>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > > > > >>>>>>
> > > > > > >>>>>
> > > > > > >>>>> I'd like to see a full solution
> > > > > > >>>>> 1- block until interrupt
> > > > >
> > > > > I remember in previous versions, you worried about the extra MSI
> > > > > vector. (Maybe I was wrong).
> > > > >
> > > > > > >>>>
> > > > > > >>>> Would it make sense to also have a timeout?
> > > > > > >>>> And when timeout expires, set FAILED bit in device status?
> > > > > > >>>
> > > > > > >>> virtio spec does not set any limits on the timing of vq
> > > > > > >>> processing.
> > > > > > >>
> > > > > > >> Indeed, but I thought the driver could decide it is too long for it.
> > > > > > >>
> > > > > > >> The issue is we keep waiting with rtnl locked, it can quickly make the
> > > > > > >> system unusable.
> > > > > > >
> > > > > > > if this is a problem we should find a way not to keep rtnl
> > > > > > > locked indefinitely.
> > > > >
> > > > > Any ideas on this direction? Simply dropping rtnl during the busy loop
> > > > > will result in a lot of races. This seems to require non-trivial
> > > > > changes in the networking core.
> > > > >
> > > > > >
> > > > > >  From the tests I have done, I think it is. With OVS, a reconfiguration
> > > > > > is performed when the VDUSE device is added, and when a MLX5 device is
> > > > > > in the same bridge, it ends up doing an ioctl() that tries to take the
> > > > > > rtnl lock. In this configuration, it is not possible to kill OVS because
> > > > > > it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> > > > > > net.
> > > > >
> > > > > Yeah, basically, any RTNL users would be blocked forever.
> > > > >
> > > > > And the infinite loop has other side effects like it blocks the freezer to work.
> > > > >
> > > > > To summarize, there are three issues
> > > > >
> > > > > 1) busy polling
> > > > > 2) breaks freezer
> > > > > 3) hold RTNL during the loop
> > > > >
> > > > > Solving 3 may help somehow for 2 e.g some pm routine e.g wireguard or
> > > > > even virtnet_restore() itself may try to hold the lock.
> > > >
> > > > Yep. So my feeling currently is, the only real fix is to actually
> > > > queue up the work in software.
> > >
> > > Do you mean something like:
> > >
> > > rtnl_lock();
> > > queue up the work
> > > rtnl_unlock();
> > > return success;
> > >
> > > ?
> >
> > yes
> 
> We will lose the error reporting, is it a real problem or not?

Fundamental isn't it? Maybe we want a per-device flag for a asynch commands,
and vduse will set it while hardware virtio won't.
this way we only lose error reporting for vduse.

> >
> >
> > > > It's mostly trivial to limit
> > > > memory consumption, vid's is the
> > > > only one where it would make sense to have more than
> > > > 1 command of a given type outstanding.
> > >
> > > And rx mode so this implies we will fail any command if the previous
> > > work is not finished.
> >
> > don't fail it, store it.
> 
> Ok.
> 
> Thanks
> 
> >
> > > > have a tree
> > > > or a bitmap with vids to add/remove?
> > >
> > > Probably.
> > >
> > > Thanks
> > >
> > > >
> > > >
> > > >
> > > > > >
> > > > > > >
> > > > > > >>>>> 2- still handle surprise removal correctly by waking in that case
> > > > >
> > > > > This is basically what version 1 did?
> > > > >
> > > > > https://lore.kernel.org/lkml/6026e801-6fda-fee9-a69b-d06a80368621@redhat.com/t/
> > > > >
> > > > > Thanks
> > > >
> > > > Yes - except the timeout part.
> > > >
> > > >
> > > > > > >>>>>
> > > > > > >>>>>
> > > > > > >>>>>
> > > > > > >>>>>>> ---
> > > > > > >>>>>>>      drivers/net/virtio_net.c | 4 +++-
> > > > > > >>>>>>>      1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > > >>>>>>>
> > > > > > >>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > >>>>>>> index 9f3b1d6ac33d..e7533f29b219 100644
> > > > > > >>>>>>> --- a/drivers/net/virtio_net.c
> > > > > > >>>>>>> +++ b/drivers/net/virtio_net.c
> > > > > > >>>>>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > >>>>>>>              * into the hypervisor, so the request should be handled immediately.
> > > > > > >>>>>>>              */
> > > > > > >>>>>>>             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > >>>>>>> -              !virtqueue_is_broken(vi->cvq))
> > > > > > >>>>>>> +              !virtqueue_is_broken(vi->cvq)) {
> > > > > > >>>>>>> +               cond_resched();
> > > > > > >>>>>>>                     cpu_relax();
> > > > > > >>>>>>> +       }
> > > > > > >>>>>>>
> > > > > > >>>>>>>             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > >>>>>>>      }
> > > > > > >>>>>>> --
> > > > > > >>>>>>> 2.39.3
> > > > > > >>>>>>>
> > > > > > >>>>>>> _______________________________________________
> > > > > > >>>>>>> Virtualization mailing list
> > > > > > >>>>>>> Virtualization@lists.linux-foundation.org
> > > > > > >>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > >>>>>
> > > > > > >>>
> > > > > > >
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-26 11:37                           ` Michael S. Tsirkin
@ 2023-07-27  6:03                             ` Jason Wang
  2023-07-27  6:10                               ` Michael S. Tsirkin
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-07-27  6:03 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Wed, Jul 26, 2023 at 7:38 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Jul 26, 2023 at 09:55:37AM +0800, Jason Wang wrote:
> > On Tue, Jul 25, 2023 at 3:36 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Tue, Jul 25, 2023 at 11:07:40AM +0800, Jason Wang wrote:
> > > > On Mon, Jul 24, 2023 at 3:17 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Mon, Jul 24, 2023 at 02:52:05PM +0800, Jason Wang wrote:
> > > > > > On Sat, Jul 22, 2023 at 4:18 AM Maxime Coquelin
> > > > > > <maxime.coquelin@redhat.com> wrote:
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > > > > > > > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > > > > > > >>
> > > > > > > >>
> > > > > > > >> On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > > > > > > >>> On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > > > > > >>>>
> > > > > > > >>>>
> > > > > > > >>>> On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > > > > > >>>>> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > > > > > >>>>>> On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > > > > >>>>>>>
> > > > > > > >>>>>>> Adding cond_resched() to the command waiting loop for a better
> > > > > > > >>>>>>> co-operation with the scheduler. This allows to give CPU a breath to
> > > > > > > >>>>>>> run other task(workqueue) instead of busy looping when preemption is
> > > > > > > >>>>>>> not allowed on a device whose CVQ might be slow.
> > > > > > > >>>>>>>
> > > > > > > >>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > >>>>>>
> > > > > > > >>>>>> This still leaves hung processes, but at least it doesn't pin the CPU any
> > > > > > > >>>>>> more.  Thanks.
> > > > > > > >>>>>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > > > > > >>>>>>
> > > > > > > >>>>>
> > > > > > > >>>>> I'd like to see a full solution
> > > > > > > >>>>> 1- block until interrupt
> > > > > >
> > > > > > I remember in previous versions, you worried about the extra MSI
> > > > > > vector. (Maybe I was wrong).
> > > > > >
> > > > > > > >>>>
> > > > > > > >>>> Would it make sense to also have a timeout?
> > > > > > > >>>> And when timeout expires, set FAILED bit in device status?
> > > > > > > >>>
> > > > > > > >>> virtio spec does not set any limits on the timing of vq
> > > > > > > >>> processing.
> > > > > > > >>
> > > > > > > >> Indeed, but I thought the driver could decide it is too long for it.
> > > > > > > >>
> > > > > > > >> The issue is we keep waiting with rtnl locked, it can quickly make the
> > > > > > > >> system unusable.
> > > > > > > >
> > > > > > > > if this is a problem we should find a way not to keep rtnl
> > > > > > > > locked indefinitely.
> > > > > >
> > > > > > Any ideas on this direction? Simply dropping rtnl during the busy loop
> > > > > > will result in a lot of races. This seems to require non-trivial
> > > > > > changes in the networking core.
> > > > > >
> > > > > > >
> > > > > > >  From the tests I have done, I think it is. With OVS, a reconfiguration
> > > > > > > is performed when the VDUSE device is added, and when a MLX5 device is
> > > > > > > in the same bridge, it ends up doing an ioctl() that tries to take the
> > > > > > > rtnl lock. In this configuration, it is not possible to kill OVS because
> > > > > > > it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> > > > > > > net.
> > > > > >
> > > > > > Yeah, basically, any RTNL users would be blocked forever.
> > > > > >
> > > > > > And the infinite loop has other side effects like it blocks the freezer to work.
> > > > > >
> > > > > > To summarize, there are three issues
> > > > > >
> > > > > > 1) busy polling
> > > > > > 2) breaks freezer
> > > > > > 3) hold RTNL during the loop
> > > > > >
> > > > > > Solving 3 may help somehow for 2 e.g some pm routine e.g wireguard or
> > > > > > even virtnet_restore() itself may try to hold the lock.
> > > > >
> > > > > Yep. So my feeling currently is, the only real fix is to actually
> > > > > queue up the work in software.
> > > >
> > > > Do you mean something like:
> > > >
> > > > rtnl_lock();
> > > > queue up the work
> > > > rtnl_unlock();
> > > > return success;
> > > >
> > > > ?
> > >
> > > yes
> >
> > We will lose the error reporting, is it a real problem or not?
>
> Fundamental isn't it? Maybe we want a per-device flag for a asynch commands,
> and vduse will set it while hardware virtio won't.
> this way we only lose error reporting for vduse.

This problem is not VDUSE specific, DPUs/vDPA may suffer from this as
well. This might require more thoughts.

Thanks

>
> > >
> > >
> > > > > It's mostly trivial to limit
> > > > > memory consumption, vid's is the
> > > > > only one where it would make sense to have more than
> > > > > 1 command of a given type outstanding.
> > > >
> > > > And rx mode so this implies we will fail any command if the previous
> > > > work is not finished.
> > >
> > > don't fail it, store it.
> >
> > Ok.
> >
> > Thanks
> >
> > >
> > > > > have a tree
> > > > > or a bitmap with vids to add/remove?
> > > >
> > > > Probably.
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > >
> > > > >
> > > > > > >
> > > > > > > >
> > > > > > > >>>>> 2- still handle surprise removal correctly by waking in that case
> > > > > >
> > > > > > This is basically what version 1 did?
> > > > > >
> > > > > > https://lore.kernel.org/lkml/6026e801-6fda-fee9-a69b-d06a80368621@redhat.com/t/
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > Yes - except the timeout part.
> > > > >
> > > > >
> > > > > > > >>>>>
> > > > > > > >>>>>
> > > > > > > >>>>>
> > > > > > > >>>>>>> ---
> > > > > > > >>>>>>>      drivers/net/virtio_net.c | 4 +++-
> > > > > > > >>>>>>>      1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > > > >>>>>>>
> > > > > > > >>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > >>>>>>> index 9f3b1d6ac33d..e7533f29b219 100644
> > > > > > > >>>>>>> --- a/drivers/net/virtio_net.c
> > > > > > > >>>>>>> +++ b/drivers/net/virtio_net.c
> > > > > > > >>>>>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > >>>>>>>              * into the hypervisor, so the request should be handled immediately.
> > > > > > > >>>>>>>              */
> > > > > > > >>>>>>>             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > >>>>>>> -              !virtqueue_is_broken(vi->cvq))
> > > > > > > >>>>>>> +              !virtqueue_is_broken(vi->cvq)) {
> > > > > > > >>>>>>> +               cond_resched();
> > > > > > > >>>>>>>                     cpu_relax();
> > > > > > > >>>>>>> +       }
> > > > > > > >>>>>>>
> > > > > > > >>>>>>>             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > >>>>>>>      }
> > > > > > > >>>>>>> --
> > > > > > > >>>>>>> 2.39.3
> > > > > > > >>>>>>>
> > > > > > > >>>>>>> _______________________________________________
> > > > > > > >>>>>>> Virtualization mailing list
> > > > > > > >>>>>>> Virtualization@lists.linux-foundation.org
> > > > > > > >>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > > >>>>>
> > > > > > > >>>
> > > > > > > >
> > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-27  6:03                             ` Jason Wang
@ 2023-07-27  6:10                               ` Michael S. Tsirkin
  2023-07-27  8:59                                 ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-07-27  6:10 UTC (permalink / raw)
  To: Jason Wang
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Thu, Jul 27, 2023 at 02:03:59PM +0800, Jason Wang wrote:
> On Wed, Jul 26, 2023 at 7:38 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Wed, Jul 26, 2023 at 09:55:37AM +0800, Jason Wang wrote:
> > > On Tue, Jul 25, 2023 at 3:36 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Tue, Jul 25, 2023 at 11:07:40AM +0800, Jason Wang wrote:
> > > > > On Mon, Jul 24, 2023 at 3:17 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Mon, Jul 24, 2023 at 02:52:05PM +0800, Jason Wang wrote:
> > > > > > > On Sat, Jul 22, 2023 at 4:18 AM Maxime Coquelin
> > > > > > > <maxime.coquelin@redhat.com> wrote:
> > > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > > > > > > > > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > > > > > > > >>
> > > > > > > > >>
> > > > > > > > >> On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > > > > > > > >>> On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > > > > > > >>>>
> > > > > > > > >>>>
> > > > > > > > >>>> On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > > > > > > >>>>> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > > > > > > >>>>>> On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > > > > > >>>>>>>
> > > > > > > > >>>>>>> Adding cond_resched() to the command waiting loop for a better
> > > > > > > > >>>>>>> co-operation with the scheduler. This allows to give CPU a breath to
> > > > > > > > >>>>>>> run other task(workqueue) instead of busy looping when preemption is
> > > > > > > > >>>>>>> not allowed on a device whose CVQ might be slow.
> > > > > > > > >>>>>>>
> > > > > > > > >>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > >>>>>>
> > > > > > > > >>>>>> This still leaves hung processes, but at least it doesn't pin the CPU any
> > > > > > > > >>>>>> more.  Thanks.
> > > > > > > > >>>>>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > > > > > > >>>>>>
> > > > > > > > >>>>>
> > > > > > > > >>>>> I'd like to see a full solution
> > > > > > > > >>>>> 1- block until interrupt
> > > > > > >
> > > > > > > I remember in previous versions, you worried about the extra MSI
> > > > > > > vector. (Maybe I was wrong).
> > > > > > >
> > > > > > > > >>>>
> > > > > > > > >>>> Would it make sense to also have a timeout?
> > > > > > > > >>>> And when timeout expires, set FAILED bit in device status?
> > > > > > > > >>>
> > > > > > > > >>> virtio spec does not set any limits on the timing of vq
> > > > > > > > >>> processing.
> > > > > > > > >>
> > > > > > > > >> Indeed, but I thought the driver could decide it is too long for it.
> > > > > > > > >>
> > > > > > > > >> The issue is we keep waiting with rtnl locked, it can quickly make the
> > > > > > > > >> system unusable.
> > > > > > > > >
> > > > > > > > > if this is a problem we should find a way not to keep rtnl
> > > > > > > > > locked indefinitely.
> > > > > > >
> > > > > > > Any ideas on this direction? Simply dropping rtnl during the busy loop
> > > > > > > will result in a lot of races. This seems to require non-trivial
> > > > > > > changes in the networking core.
> > > > > > >
> > > > > > > >
> > > > > > > >  From the tests I have done, I think it is. With OVS, a reconfiguration
> > > > > > > > is performed when the VDUSE device is added, and when a MLX5 device is
> > > > > > > > in the same bridge, it ends up doing an ioctl() that tries to take the
> > > > > > > > rtnl lock. In this configuration, it is not possible to kill OVS because
> > > > > > > > it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> > > > > > > > net.
> > > > > > >
> > > > > > > Yeah, basically, any RTNL users would be blocked forever.
> > > > > > >
> > > > > > > And the infinite loop has other side effects like it blocks the freezer to work.
> > > > > > >
> > > > > > > To summarize, there are three issues
> > > > > > >
> > > > > > > 1) busy polling
> > > > > > > 2) breaks freezer
> > > > > > > 3) hold RTNL during the loop
> > > > > > >
> > > > > > > Solving 3 may help somehow for 2 e.g some pm routine e.g wireguard or
> > > > > > > even virtnet_restore() itself may try to hold the lock.
> > > > > >
> > > > > > Yep. So my feeling currently is, the only real fix is to actually
> > > > > > queue up the work in software.
> > > > >
> > > > > Do you mean something like:
> > > > >
> > > > > rtnl_lock();
> > > > > queue up the work
> > > > > rtnl_unlock();
> > > > > return success;
> > > > >
> > > > > ?
> > > >
> > > > yes
> > >
> > > We will lose the error reporting, is it a real problem or not?
> >
> > Fundamental isn't it? Maybe we want a per-device flag for a asynch commands,
> > and vduse will set it while hardware virtio won't.
> > this way we only lose error reporting for vduse.
> 
> This problem is not VDUSE specific, DPUs/vDPA may suffer from this as
> well. This might require more thoughts.
> 
> Thanks

They really shouldn't - any NIC that takes forever to
program will create issues in the networking stack.
But if they do they can always set this flag too.

> >
> > > >
> > > >
> > > > > > It's mostly trivial to limit
> > > > > > memory consumption, vid's is the
> > > > > > only one where it would make sense to have more than
> > > > > > 1 command of a given type outstanding.
> > > > >
> > > > > And rx mode so this implies we will fail any command if the previous
> > > > > work is not finished.
> > > >
> > > > don't fail it, store it.
> > >
> > > Ok.
> > >
> > > Thanks
> > >
> > > >
> > > > > > have a tree
> > > > > > or a bitmap with vids to add/remove?
> > > > >
> > > > > Probably.
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > >>>>> 2- still handle surprise removal correctly by waking in that case
> > > > > > >
> > > > > > > This is basically what version 1 did?
> > > > > > >
> > > > > > > https://lore.kernel.org/lkml/6026e801-6fda-fee9-a69b-d06a80368621@redhat.com/t/
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > > Yes - except the timeout part.
> > > > > >
> > > > > >
> > > > > > > > >>>>>
> > > > > > > > >>>>>
> > > > > > > > >>>>>
> > > > > > > > >>>>>>> ---
> > > > > > > > >>>>>>>      drivers/net/virtio_net.c | 4 +++-
> > > > > > > > >>>>>>>      1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > > > > >>>>>>>
> > > > > > > > >>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > >>>>>>> index 9f3b1d6ac33d..e7533f29b219 100644
> > > > > > > > >>>>>>> --- a/drivers/net/virtio_net.c
> > > > > > > > >>>>>>> +++ b/drivers/net/virtio_net.c
> > > > > > > > >>>>>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > > >>>>>>>              * into the hypervisor, so the request should be handled immediately.
> > > > > > > > >>>>>>>              */
> > > > > > > > >>>>>>>             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > >>>>>>> -              !virtqueue_is_broken(vi->cvq))
> > > > > > > > >>>>>>> +              !virtqueue_is_broken(vi->cvq)) {
> > > > > > > > >>>>>>> +               cond_resched();
> > > > > > > > >>>>>>>                     cpu_relax();
> > > > > > > > >>>>>>> +       }
> > > > > > > > >>>>>>>
> > > > > > > > >>>>>>>             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > >>>>>>>      }
> > > > > > > > >>>>>>> --
> > > > > > > > >>>>>>> 2.39.3
> > > > > > > > >>>>>>>
> > > > > > > > >>>>>>> _______________________________________________
> > > > > > > > >>>>>>> Virtualization mailing list
> > > > > > > > >>>>>>> Virtualization@lists.linux-foundation.org
> > > > > > > > >>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > > > >>>>>
> > > > > > > > >>>
> > > > > > > > >
> > > > > > > >
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-27  6:10                               ` Michael S. Tsirkin
@ 2023-07-27  8:59                                 ` Jason Wang
  2023-07-27  9:46                                   ` Michael S. Tsirkin
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-07-27  8:59 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Thu, Jul 27, 2023 at 2:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Thu, Jul 27, 2023 at 02:03:59PM +0800, Jason Wang wrote:
> > On Wed, Jul 26, 2023 at 7:38 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Wed, Jul 26, 2023 at 09:55:37AM +0800, Jason Wang wrote:
> > > > On Tue, Jul 25, 2023 at 3:36 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Tue, Jul 25, 2023 at 11:07:40AM +0800, Jason Wang wrote:
> > > > > > On Mon, Jul 24, 2023 at 3:17 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > >
> > > > > > > On Mon, Jul 24, 2023 at 02:52:05PM +0800, Jason Wang wrote:
> > > > > > > > On Sat, Jul 22, 2023 at 4:18 AM Maxime Coquelin
> > > > > > > > <maxime.coquelin@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > > > > > > > > > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > > > > > > > > >>
> > > > > > > > > >>
> > > > > > > > > >> On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > > > > > > > > >>> On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > > > > > > > >>>>
> > > > > > > > > >>>>
> > > > > > > > > >>>> On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > > > > > > > >>>>> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > > > > > > > >>>>>> On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > > > > > > >>>>>>>
> > > > > > > > > >>>>>>> Adding cond_resched() to the command waiting loop for a better
> > > > > > > > > >>>>>>> co-operation with the scheduler. This allows to give CPU a breath to
> > > > > > > > > >>>>>>> run other task(workqueue) instead of busy looping when preemption is
> > > > > > > > > >>>>>>> not allowed on a device whose CVQ might be slow.
> > > > > > > > > >>>>>>>
> > > > > > > > > >>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > >>>>>>
> > > > > > > > > >>>>>> This still leaves hung processes, but at least it doesn't pin the CPU any
> > > > > > > > > >>>>>> more.  Thanks.
> > > > > > > > > >>>>>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > > > > > > > >>>>>>
> > > > > > > > > >>>>>
> > > > > > > > > >>>>> I'd like to see a full solution
> > > > > > > > > >>>>> 1- block until interrupt
> > > > > > > >
> > > > > > > > I remember in previous versions, you worried about the extra MSI
> > > > > > > > vector. (Maybe I was wrong).
> > > > > > > >
> > > > > > > > > >>>>
> > > > > > > > > >>>> Would it make sense to also have a timeout?
> > > > > > > > > >>>> And when timeout expires, set FAILED bit in device status?
> > > > > > > > > >>>
> > > > > > > > > >>> virtio spec does not set any limits on the timing of vq
> > > > > > > > > >>> processing.
> > > > > > > > > >>
> > > > > > > > > >> Indeed, but I thought the driver could decide it is too long for it.
> > > > > > > > > >>
> > > > > > > > > >> The issue is we keep waiting with rtnl locked, it can quickly make the
> > > > > > > > > >> system unusable.
> > > > > > > > > >
> > > > > > > > > > if this is a problem we should find a way not to keep rtnl
> > > > > > > > > > locked indefinitely.
> > > > > > > >
> > > > > > > > Any ideas on this direction? Simply dropping rtnl during the busy loop
> > > > > > > > will result in a lot of races. This seems to require non-trivial
> > > > > > > > changes in the networking core.
> > > > > > > >
> > > > > > > > >
> > > > > > > > >  From the tests I have done, I think it is. With OVS, a reconfiguration
> > > > > > > > > is performed when the VDUSE device is added, and when a MLX5 device is
> > > > > > > > > in the same bridge, it ends up doing an ioctl() that tries to take the
> > > > > > > > > rtnl lock. In this configuration, it is not possible to kill OVS because
> > > > > > > > > it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> > > > > > > > > net.
> > > > > > > >
> > > > > > > > Yeah, basically, any RTNL users would be blocked forever.
> > > > > > > >
> > > > > > > > And the infinite loop has other side effects like it blocks the freezer to work.
> > > > > > > >
> > > > > > > > To summarize, there are three issues
> > > > > > > >
> > > > > > > > 1) busy polling
> > > > > > > > 2) breaks freezer
> > > > > > > > 3) hold RTNL during the loop
> > > > > > > >
> > > > > > > > Solving 3 may help somehow for 2 e.g some pm routine e.g wireguard or
> > > > > > > > even virtnet_restore() itself may try to hold the lock.
> > > > > > >
> > > > > > > Yep. So my feeling currently is, the only real fix is to actually
> > > > > > > queue up the work in software.
> > > > > >
> > > > > > Do you mean something like:
> > > > > >
> > > > > > rtnl_lock();
> > > > > > queue up the work
> > > > > > rtnl_unlock();
> > > > > > return success;
> > > > > >
> > > > > > ?
> > > > >
> > > > > yes
> > > >
> > > > We will lose the error reporting, is it a real problem or not?
> > >
> > > Fundamental isn't it? Maybe we want a per-device flag for a asynch commands,
> > > and vduse will set it while hardware virtio won't.
> > > this way we only lose error reporting for vduse.
> >
> > This problem is not VDUSE specific, DPUs/vDPA may suffer from this as
> > well. This might require more thoughts.
> >
> > Thanks
>
> They really shouldn't - any NIC that takes forever to
> program will create issues in the networking stack.

Unfortunately, it's not rare as the device/cvq could be implemented
via firmware or software.

> But if they do they can always set this flag too.

This may have false negatives and may confuse the management.

Maybe we can extend the networking core to allow some device specific
configurations to be done with device specific lock without rtnl. For
example, split the set_channels to

pre_set_channels
set_channels
post_set_channels

The device specific part could be done in pre and post without a rtnl lock?

Thanks



>
> > >
> > > > >
> > > > >
> > > > > > > It's mostly trivial to limit
> > > > > > > memory consumption, vid's is the
> > > > > > > only one where it would make sense to have more than
> > > > > > > 1 command of a given type outstanding.
> > > > > >
> > > > > > And rx mode so this implies we will fail any command if the previous
> > > > > > work is not finished.
> > > > >
> > > > > don't fail it, store it.
> > > >
> > > > Ok.
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > > > have a tree
> > > > > > > or a bitmap with vids to add/remove?
> > > > > >
> > > > > > Probably.
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > >>>>> 2- still handle surprise removal correctly by waking in that case
> > > > > > > >
> > > > > > > > This is basically what version 1 did?
> > > > > > > >
> > > > > > > > https://lore.kernel.org/lkml/6026e801-6fda-fee9-a69b-d06a80368621@redhat.com/t/
> > > > > > > >
> > > > > > > > Thanks
> > > > > > >
> > > > > > > Yes - except the timeout part.
> > > > > > >
> > > > > > >
> > > > > > > > > >>>>>
> > > > > > > > > >>>>>
> > > > > > > > > >>>>>
> > > > > > > > > >>>>>>> ---
> > > > > > > > > >>>>>>>      drivers/net/virtio_net.c | 4 +++-
> > > > > > > > > >>>>>>>      1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > > > > > >>>>>>>
> > > > > > > > > >>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > > >>>>>>> index 9f3b1d6ac33d..e7533f29b219 100644
> > > > > > > > > >>>>>>> --- a/drivers/net/virtio_net.c
> > > > > > > > > >>>>>>> +++ b/drivers/net/virtio_net.c
> > > > > > > > > >>>>>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > > > >>>>>>>              * into the hypervisor, so the request should be handled immediately.
> > > > > > > > > >>>>>>>              */
> > > > > > > > > >>>>>>>             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > > >>>>>>> -              !virtqueue_is_broken(vi->cvq))
> > > > > > > > > >>>>>>> +              !virtqueue_is_broken(vi->cvq)) {
> > > > > > > > > >>>>>>> +               cond_resched();
> > > > > > > > > >>>>>>>                     cpu_relax();
> > > > > > > > > >>>>>>> +       }
> > > > > > > > > >>>>>>>
> > > > > > > > > >>>>>>>             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > >>>>>>>      }
> > > > > > > > > >>>>>>> --
> > > > > > > > > >>>>>>> 2.39.3
> > > > > > > > > >>>>>>>
> > > > > > > > > >>>>>>> _______________________________________________
> > > > > > > > > >>>>>>> Virtualization mailing list
> > > > > > > > > >>>>>>> Virtualization@lists.linux-foundation.org
> > > > > > > > > >>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > > > > >>>>>
> > > > > > > > > >>>
> > > > > > > > > >
> > > > > > > > >
> > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-27  8:59                                 ` Jason Wang
@ 2023-07-27  9:46                                   ` Michael S. Tsirkin
  2023-07-31  6:30                                     ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-07-27  9:46 UTC (permalink / raw)
  To: Jason Wang
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Thu, Jul 27, 2023 at 04:59:33PM +0800, Jason Wang wrote:
> > They really shouldn't - any NIC that takes forever to
> > program will create issues in the networking stack.
> 
> Unfortunately, it's not rare as the device/cvq could be implemented
> via firmware or software.

Currently that mean one either has sane firmware with a scheduler that
can meet deadlines, or loses ability to report errors back.

> > But if they do they can always set this flag too.
> 
> This may have false negatives and may confuse the management.
> 
> Maybe we can extend the networking core to allow some device specific
> configurations to be done with device specific lock without rtnl. For
> example, split the set_channels to
> 
> pre_set_channels
> set_channels
> post_set_channels
> 
> The device specific part could be done in pre and post without a rtnl lock?
> 
> Thanks


Would the benefit be that errors can be reported to userspace then?
Then maybe.  I think you will have to show how this works for at least
one card besides virtio.


-- 
MST


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-27  9:46                                   ` Michael S. Tsirkin
@ 2023-07-31  6:30                                     ` Jason Wang
  2023-08-08  2:30                                       ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-07-31  6:30 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Thu, Jul 27, 2023 at 5:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Thu, Jul 27, 2023 at 04:59:33PM +0800, Jason Wang wrote:
> > > They really shouldn't - any NIC that takes forever to
> > > program will create issues in the networking stack.
> >
> > Unfortunately, it's not rare as the device/cvq could be implemented
> > via firmware or software.
>
> Currently that mean one either has sane firmware with a scheduler that
> can meet deadlines, or loses ability to report errors back.
>
> > > But if they do they can always set this flag too.
> >
> > This may have false negatives and may confuse the management.
> >
> > Maybe we can extend the networking core to allow some device specific
> > configurations to be done with device specific lock without rtnl. For
> > example, split the set_channels to
> >
> > pre_set_channels
> > set_channels
> > post_set_channels
> >
> > The device specific part could be done in pre and post without a rtnl lock?
> >
> > Thanks
>
>
> Would the benefit be that errors can be reported to userspace then?
> Then maybe.  I think you will have to show how this works for at least
> one card besides virtio.

Even for virtio, this seems not easy, as e.g the
virtnet_send_command() and netif_set_real_num_tx_queues() need to
appear to be atomic to the networking core.

I wonder if we can re-consider the way of a timeout here and choose a
sane value as a start.

Thanks

>
>
> --
> MST
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-31  6:30                                     ` Jason Wang
@ 2023-08-08  2:30                                       ` Jason Wang
  2023-08-10 19:41                                         ` Michael S. Tsirkin
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-08  2:30 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Mon, Jul 31, 2023 at 2:30 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Thu, Jul 27, 2023 at 5:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Thu, Jul 27, 2023 at 04:59:33PM +0800, Jason Wang wrote:
> > > > They really shouldn't - any NIC that takes forever to
> > > > program will create issues in the networking stack.
> > >
> > > Unfortunately, it's not rare as the device/cvq could be implemented
> > > via firmware or software.
> >
> > Currently that mean one either has sane firmware with a scheduler that
> > can meet deadlines, or loses ability to report errors back.
> >
> > > > But if they do they can always set this flag too.
> > >
> > > This may have false negatives and may confuse the management.
> > >
> > > Maybe we can extend the networking core to allow some device specific
> > > configurations to be done with device specific lock without rtnl. For
> > > example, split the set_channels to
> > >
> > > pre_set_channels
> > > set_channels
> > > post_set_channels
> > >
> > > The device specific part could be done in pre and post without a rtnl lock?
> > >
> > > Thanks
> >
> >
> > Would the benefit be that errors can be reported to userspace then?
> > Then maybe.  I think you will have to show how this works for at least
> > one card besides virtio.
>
> Even for virtio, this seems not easy, as e.g the
> virtnet_send_command() and netif_set_real_num_tx_queues() need to
> appear to be atomic to the networking core.
>
> I wonder if we can re-consider the way of a timeout here and choose a
> sane value as a start.

Michael, any more input on this?

Thanks

>
> Thanks
>
> >
> >
> > --
> > MST
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-08-08  2:30                                       ` Jason Wang
@ 2023-08-10 19:41                                         ` Michael S. Tsirkin
  2023-08-11  2:23                                           ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-08-10 19:41 UTC (permalink / raw)
  To: Jason Wang
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Tue, Aug 08, 2023 at 10:30:56AM +0800, Jason Wang wrote:
> On Mon, Jul 31, 2023 at 2:30 PM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Thu, Jul 27, 2023 at 5:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Thu, Jul 27, 2023 at 04:59:33PM +0800, Jason Wang wrote:
> > > > > They really shouldn't - any NIC that takes forever to
> > > > > program will create issues in the networking stack.
> > > >
> > > > Unfortunately, it's not rare as the device/cvq could be implemented
> > > > via firmware or software.
> > >
> > > Currently that mean one either has sane firmware with a scheduler that
> > > can meet deadlines, or loses ability to report errors back.
> > >
> > > > > But if they do they can always set this flag too.
> > > >
> > > > This may have false negatives and may confuse the management.
> > > >
> > > > Maybe we can extend the networking core to allow some device specific
> > > > configurations to be done with device specific lock without rtnl. For
> > > > example, split the set_channels to
> > > >
> > > > pre_set_channels
> > > > set_channels
> > > > post_set_channels
> > > >
> > > > The device specific part could be done in pre and post without a rtnl lock?
> > > >
> > > > Thanks
> > >
> > >
> > > Would the benefit be that errors can be reported to userspace then?
> > > Then maybe.  I think you will have to show how this works for at least
> > > one card besides virtio.
> >
> > Even for virtio, this seems not easy, as e.g the
> > virtnet_send_command() and netif_set_real_num_tx_queues() need to
> > appear to be atomic to the networking core.
> >
> > I wonder if we can re-consider the way of a timeout here and choose a
> > sane value as a start.
> 
> Michael, any more input on this?
> 
> Thanks

I think this is just mission creep. We are trying to fix
vduse - let's do that for starters.

Recovering from firmware timeouts is far from trivial and
just assuming that just because it timed out it will not
access memory is just as likely to cause memory corruption
with worse results than an infinite spin.

I propose we fix this for vduse and assume hardware/firmware
is well behaved. Or maybe not well behaved firmware will
set the flag losing error reporting ability.



> >
> > Thanks
> >
> > >
> > >
> > > --
> > > MST
> > >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-08-10 19:41                                         ` Michael S. Tsirkin
@ 2023-08-11  2:23                                           ` Jason Wang
  2023-08-11  5:42                                             ` Michael S. Tsirkin
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-11  2:23 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Fri, Aug 11, 2023 at 3:41 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Tue, Aug 08, 2023 at 10:30:56AM +0800, Jason Wang wrote:
> > On Mon, Jul 31, 2023 at 2:30 PM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Thu, Jul 27, 2023 at 5:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Thu, Jul 27, 2023 at 04:59:33PM +0800, Jason Wang wrote:
> > > > > > They really shouldn't - any NIC that takes forever to
> > > > > > program will create issues in the networking stack.
> > > > >
> > > > > Unfortunately, it's not rare as the device/cvq could be implemented
> > > > > via firmware or software.
> > > >
> > > > Currently that mean one either has sane firmware with a scheduler that
> > > > can meet deadlines, or loses ability to report errors back.
> > > >
> > > > > > But if they do they can always set this flag too.
> > > > >
> > > > > This may have false negatives and may confuse the management.
> > > > >
> > > > > Maybe we can extend the networking core to allow some device specific
> > > > > configurations to be done with device specific lock without rtnl. For
> > > > > example, split the set_channels to
> > > > >
> > > > > pre_set_channels
> > > > > set_channels
> > > > > post_set_channels
> > > > >
> > > > > The device specific part could be done in pre and post without a rtnl lock?
> > > > >
> > > > > Thanks
> > > >
> > > >
> > > > Would the benefit be that errors can be reported to userspace then?
> > > > Then maybe.  I think you will have to show how this works for at least
> > > > one card besides virtio.
> > >
> > > Even for virtio, this seems not easy, as e.g the
> > > virtnet_send_command() and netif_set_real_num_tx_queues() need to
> > > appear to be atomic to the networking core.
> > >
> > > I wonder if we can re-consider the way of a timeout here and choose a
> > > sane value as a start.
> >
> > Michael, any more input on this?
> >
> > Thanks
>
> I think this is just mission creep. We are trying to fix
> vduse - let's do that for starters.
>
> Recovering from firmware timeouts is far from trivial and
> just assuming that just because it timed out it will not
> access memory is just as likely to cause memory corruption
> with worse results than an infinite spin.

Yes, this might require support not only in the driver

>
> I propose we fix this for vduse and assume hardware/firmware
> is well behaved.

One major case is the re-connection, in that case it might take
whatever longer that the kernel virito-net driver expects.

So we can have a timeout in VDUSE and trap CVQ then VDUSE can return
and fail early?

> Or maybe not well behaved firmware will
> set the flag losing error reporting ability.

This might be hard since it means not only the set but also the get is
unreliable.

Thanks

>
>
>
> > >
> > > Thanks
> > >
> > > >
> > > >
> > > > --
> > > > MST
> > > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-08-11  2:23                                           ` Jason Wang
@ 2023-08-11  5:42                                             ` Michael S. Tsirkin
  2023-08-11  9:18                                               ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-08-11  5:42 UTC (permalink / raw)
  To: Jason Wang
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Fri, Aug 11, 2023 at 10:23:15AM +0800, Jason Wang wrote:
> On Fri, Aug 11, 2023 at 3:41 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Tue, Aug 08, 2023 at 10:30:56AM +0800, Jason Wang wrote:
> > > On Mon, Jul 31, 2023 at 2:30 PM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Thu, Jul 27, 2023 at 5:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Thu, Jul 27, 2023 at 04:59:33PM +0800, Jason Wang wrote:
> > > > > > > They really shouldn't - any NIC that takes forever to
> > > > > > > program will create issues in the networking stack.
> > > > > >
> > > > > > Unfortunately, it's not rare as the device/cvq could be implemented
> > > > > > via firmware or software.
> > > > >
> > > > > Currently that mean one either has sane firmware with a scheduler that
> > > > > can meet deadlines, or loses ability to report errors back.
> > > > >
> > > > > > > But if they do they can always set this flag too.
> > > > > >
> > > > > > This may have false negatives and may confuse the management.
> > > > > >
> > > > > > Maybe we can extend the networking core to allow some device specific
> > > > > > configurations to be done with device specific lock without rtnl. For
> > > > > > example, split the set_channels to
> > > > > >
> > > > > > pre_set_channels
> > > > > > set_channels
> > > > > > post_set_channels
> > > > > >
> > > > > > The device specific part could be done in pre and post without a rtnl lock?
> > > > > >
> > > > > > Thanks
> > > > >
> > > > >
> > > > > Would the benefit be that errors can be reported to userspace then?
> > > > > Then maybe.  I think you will have to show how this works for at least
> > > > > one card besides virtio.
> > > >
> > > > Even for virtio, this seems not easy, as e.g the
> > > > virtnet_send_command() and netif_set_real_num_tx_queues() need to
> > > > appear to be atomic to the networking core.
> > > >
> > > > I wonder if we can re-consider the way of a timeout here and choose a
> > > > sane value as a start.
> > >
> > > Michael, any more input on this?
> > >
> > > Thanks
> >
> > I think this is just mission creep. We are trying to fix
> > vduse - let's do that for starters.
> >
> > Recovering from firmware timeouts is far from trivial and
> > just assuming that just because it timed out it will not
> > access memory is just as likely to cause memory corruption
> > with worse results than an infinite spin.
> 
> Yes, this might require support not only in the driver
> 
> >
> > I propose we fix this for vduse and assume hardware/firmware
> > is well behaved.
> 
> One major case is the re-connection, in that case it might take
> whatever longer that the kernel virito-net driver expects.
> So we can have a timeout in VDUSE and trap CVQ then VDUSE can return
> and fail early?

Ugh more mission creep. not at all my point. vduse should cache
values in the driver, until someone manages to change
net core to be more friendly to userspace devices.

> 
> > Or maybe not well behaved firmware will
> > set the flag losing error reporting ability.
> 
> This might be hard since it means not only the set but also the get is
> unreliable.
> 
> Thanks

/me shrugs



> >
> >
> >
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > >
> > > > > --
> > > > > MST
> > > > >
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-08-11  5:42                                             ` Michael S. Tsirkin
@ 2023-08-11  9:18                                               ` Jason Wang
  2023-08-11  9:21                                                 ` Michael S. Tsirkin
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-11  9:18 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Fri, Aug 11, 2023 at 1:42 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Fri, Aug 11, 2023 at 10:23:15AM +0800, Jason Wang wrote:
> > On Fri, Aug 11, 2023 at 3:41 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Tue, Aug 08, 2023 at 10:30:56AM +0800, Jason Wang wrote:
> > > > On Mon, Jul 31, 2023 at 2:30 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > >
> > > > > On Thu, Jul 27, 2023 at 5:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Thu, Jul 27, 2023 at 04:59:33PM +0800, Jason Wang wrote:
> > > > > > > > They really shouldn't - any NIC that takes forever to
> > > > > > > > program will create issues in the networking stack.
> > > > > > >
> > > > > > > Unfortunately, it's not rare as the device/cvq could be implemented
> > > > > > > via firmware or software.
> > > > > >
> > > > > > Currently that mean one either has sane firmware with a scheduler that
> > > > > > can meet deadlines, or loses ability to report errors back.
> > > > > >
> > > > > > > > But if they do they can always set this flag too.
> > > > > > >
> > > > > > > This may have false negatives and may confuse the management.
> > > > > > >
> > > > > > > Maybe we can extend the networking core to allow some device specific
> > > > > > > configurations to be done with device specific lock without rtnl. For
> > > > > > > example, split the set_channels to
> > > > > > >
> > > > > > > pre_set_channels
> > > > > > > set_channels
> > > > > > > post_set_channels
> > > > > > >
> > > > > > > The device specific part could be done in pre and post without a rtnl lock?
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > >
> > > > > > Would the benefit be that errors can be reported to userspace then?
> > > > > > Then maybe.  I think you will have to show how this works for at least
> > > > > > one card besides virtio.
> > > > >
> > > > > Even for virtio, this seems not easy, as e.g the
> > > > > virtnet_send_command() and netif_set_real_num_tx_queues() need to
> > > > > appear to be atomic to the networking core.
> > > > >
> > > > > I wonder if we can re-consider the way of a timeout here and choose a
> > > > > sane value as a start.
> > > >
> > > > Michael, any more input on this?
> > > >
> > > > Thanks
> > >
> > > I think this is just mission creep. We are trying to fix
> > > vduse - let's do that for starters.
> > >
> > > Recovering from firmware timeouts is far from trivial and
> > > just assuming that just because it timed out it will not
> > > access memory is just as likely to cause memory corruption
> > > with worse results than an infinite spin.
> >
> > Yes, this might require support not only in the driver
> >
> > >
> > > I propose we fix this for vduse and assume hardware/firmware
> > > is well behaved.
> >
> > One major case is the re-connection, in that case it might take
> > whatever longer that the kernel virito-net driver expects.
> > So we can have a timeout in VDUSE and trap CVQ then VDUSE can return
> > and fail early?
>
> Ugh more mission creep. not at all my point. vduse should cache
> values in the driver,

What do you mean by values here? The cvq command?

Thanks

> until someone manages to change
> net core to be more friendly to userspace devices.
>
> >
> > > Or maybe not well behaved firmware will
> > > set the flag losing error reporting ability.
> >
> > This might be hard since it means not only the set but also the get is
> > unreliable.
> >
> > Thanks
>
> /me shrugs
>
>
>
> > >
> > >
> > >
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > >
> > > > > > --
> > > > > > MST
> > > > > >
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-08-11  9:18                                               ` Jason Wang
@ 2023-08-11  9:21                                                 ` Michael S. Tsirkin
  2023-08-11  9:43                                                   ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-08-11  9:21 UTC (permalink / raw)
  To: Jason Wang
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Fri, Aug 11, 2023 at 05:18:51PM +0800, Jason Wang wrote:
> On Fri, Aug 11, 2023 at 1:42 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Fri, Aug 11, 2023 at 10:23:15AM +0800, Jason Wang wrote:
> > > On Fri, Aug 11, 2023 at 3:41 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Tue, Aug 08, 2023 at 10:30:56AM +0800, Jason Wang wrote:
> > > > > On Mon, Jul 31, 2023 at 2:30 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > >
> > > > > > On Thu, Jul 27, 2023 at 5:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > >
> > > > > > > On Thu, Jul 27, 2023 at 04:59:33PM +0800, Jason Wang wrote:
> > > > > > > > > They really shouldn't - any NIC that takes forever to
> > > > > > > > > program will create issues in the networking stack.
> > > > > > > >
> > > > > > > > Unfortunately, it's not rare as the device/cvq could be implemented
> > > > > > > > via firmware or software.
> > > > > > >
> > > > > > > Currently that mean one either has sane firmware with a scheduler that
> > > > > > > can meet deadlines, or loses ability to report errors back.
> > > > > > >
> > > > > > > > > But if they do they can always set this flag too.
> > > > > > > >
> > > > > > > > This may have false negatives and may confuse the management.
> > > > > > > >
> > > > > > > > Maybe we can extend the networking core to allow some device specific
> > > > > > > > configurations to be done with device specific lock without rtnl. For
> > > > > > > > example, split the set_channels to
> > > > > > > >
> > > > > > > > pre_set_channels
> > > > > > > > set_channels
> > > > > > > > post_set_channels
> > > > > > > >
> > > > > > > > The device specific part could be done in pre and post without a rtnl lock?
> > > > > > > >
> > > > > > > > Thanks
> > > > > > >
> > > > > > >
> > > > > > > Would the benefit be that errors can be reported to userspace then?
> > > > > > > Then maybe.  I think you will have to show how this works for at least
> > > > > > > one card besides virtio.
> > > > > >
> > > > > > Even for virtio, this seems not easy, as e.g the
> > > > > > virtnet_send_command() and netif_set_real_num_tx_queues() need to
> > > > > > appear to be atomic to the networking core.
> > > > > >
> > > > > > I wonder if we can re-consider the way of a timeout here and choose a
> > > > > > sane value as a start.
> > > > >
> > > > > Michael, any more input on this?
> > > > >
> > > > > Thanks
> > > >
> > > > I think this is just mission creep. We are trying to fix
> > > > vduse - let's do that for starters.
> > > >
> > > > Recovering from firmware timeouts is far from trivial and
> > > > just assuming that just because it timed out it will not
> > > > access memory is just as likely to cause memory corruption
> > > > with worse results than an infinite spin.
> > >
> > > Yes, this might require support not only in the driver
> > >
> > > >
> > > > I propose we fix this for vduse and assume hardware/firmware
> > > > is well behaved.
> > >
> > > One major case is the re-connection, in that case it might take
> > > whatever longer that the kernel virito-net driver expects.
> > > So we can have a timeout in VDUSE and trap CVQ then VDUSE can return
> > > and fail early?
> >
> > Ugh more mission creep. not at all my point. vduse should cache
> > values in the driver,
> 
> What do you mean by values here? The cvq command?
> 
> Thanks

The card status generally.

> > until someone manages to change
> > net core to be more friendly to userspace devices.
> >
> > >
> > > > Or maybe not well behaved firmware will
> > > > set the flag losing error reporting ability.
> > >
> > > This might be hard since it means not only the set but also the get is
> > > unreliable.
> > >
> > > Thanks
> >
> > /me shrugs
> >
> >
> >
> > > >
> > > >
> > > >
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > > >
> > > > > > >
> > > > > > > --
> > > > > > > MST
> > > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-08-11  9:21                                                 ` Michael S. Tsirkin
@ 2023-08-11  9:43                                                   ` Jason Wang
  2023-08-11  9:51                                                     ` Michael S. Tsirkin
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-11  9:43 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Fri, Aug 11, 2023 at 5:21 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Fri, Aug 11, 2023 at 05:18:51PM +0800, Jason Wang wrote:
> > On Fri, Aug 11, 2023 at 1:42 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Fri, Aug 11, 2023 at 10:23:15AM +0800, Jason Wang wrote:
> > > > On Fri, Aug 11, 2023 at 3:41 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Tue, Aug 08, 2023 at 10:30:56AM +0800, Jason Wang wrote:
> > > > > > On Mon, Jul 31, 2023 at 2:30 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > >
> > > > > > > On Thu, Jul 27, 2023 at 5:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Thu, Jul 27, 2023 at 04:59:33PM +0800, Jason Wang wrote:
> > > > > > > > > > They really shouldn't - any NIC that takes forever to
> > > > > > > > > > program will create issues in the networking stack.
> > > > > > > > >
> > > > > > > > > Unfortunately, it's not rare as the device/cvq could be implemented
> > > > > > > > > via firmware or software.
> > > > > > > >
> > > > > > > > Currently that mean one either has sane firmware with a scheduler that
> > > > > > > > can meet deadlines, or loses ability to report errors back.
> > > > > > > >
> > > > > > > > > > But if they do they can always set this flag too.
> > > > > > > > >
> > > > > > > > > This may have false negatives and may confuse the management.
> > > > > > > > >
> > > > > > > > > Maybe we can extend the networking core to allow some device specific
> > > > > > > > > configurations to be done with device specific lock without rtnl. For
> > > > > > > > > example, split the set_channels to
> > > > > > > > >
> > > > > > > > > pre_set_channels
> > > > > > > > > set_channels
> > > > > > > > > post_set_channels
> > > > > > > > >
> > > > > > > > > The device specific part could be done in pre and post without a rtnl lock?
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > >
> > > > > > > >
> > > > > > > > Would the benefit be that errors can be reported to userspace then?
> > > > > > > > Then maybe.  I think you will have to show how this works for at least
> > > > > > > > one card besides virtio.
> > > > > > >
> > > > > > > Even for virtio, this seems not easy, as e.g the
> > > > > > > virtnet_send_command() and netif_set_real_num_tx_queues() need to
> > > > > > > appear to be atomic to the networking core.
> > > > > > >
> > > > > > > I wonder if we can re-consider the way of a timeout here and choose a
> > > > > > > sane value as a start.
> > > > > >
> > > > > > Michael, any more input on this?
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > I think this is just mission creep. We are trying to fix
> > > > > vduse - let's do that for starters.
> > > > >
> > > > > Recovering from firmware timeouts is far from trivial and
> > > > > just assuming that just because it timed out it will not
> > > > > access memory is just as likely to cause memory corruption
> > > > > with worse results than an infinite spin.
> > > >
> > > > Yes, this might require support not only in the driver
> > > >
> > > > >
> > > > > I propose we fix this for vduse and assume hardware/firmware
> > > > > is well behaved.
> > > >
> > > > One major case is the re-connection, in that case it might take
> > > > whatever longer that the kernel virito-net driver expects.
> > > > So we can have a timeout in VDUSE and trap CVQ then VDUSE can return
> > > > and fail early?
> > >
> > > Ugh more mission creep. not at all my point. vduse should cache
> > > values in the driver,
> >
> > What do you mean by values here? The cvq command?
> >
> > Thanks
>
> The card status generally.

Just to make sure I understand here. The CVQ needs to be processed by
the userspace now. How could we cache the status?

Thanks

>
> > > until someone manages to change
> > > net core to be more friendly to userspace devices.
> > >
> > > >
> > > > > Or maybe not well behaved firmware will
> > > > > set the flag losing error reporting ability.
> > > >
> > > > This might be hard since it means not only the set but also the get is
> > > > unreliable.
> > > >
> > > > Thanks
> > >
> > > /me shrugs
> > >
> > >
> > >
> > > > >
> > > > >
> > > > >
> > > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > --
> > > > > > > > MST
> > > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-08-11  9:43                                                   ` Jason Wang
@ 2023-08-11  9:51                                                     ` Michael S. Tsirkin
  2023-08-11  9:54                                                       ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-08-11  9:51 UTC (permalink / raw)
  To: Jason Wang
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Fri, Aug 11, 2023 at 05:43:25PM +0800, Jason Wang wrote:
> On Fri, Aug 11, 2023 at 5:21 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Fri, Aug 11, 2023 at 05:18:51PM +0800, Jason Wang wrote:
> > > On Fri, Aug 11, 2023 at 1:42 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Fri, Aug 11, 2023 at 10:23:15AM +0800, Jason Wang wrote:
> > > > > On Fri, Aug 11, 2023 at 3:41 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Tue, Aug 08, 2023 at 10:30:56AM +0800, Jason Wang wrote:
> > > > > > > On Mon, Jul 31, 2023 at 2:30 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Thu, Jul 27, 2023 at 5:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > On Thu, Jul 27, 2023 at 04:59:33PM +0800, Jason Wang wrote:
> > > > > > > > > > > They really shouldn't - any NIC that takes forever to
> > > > > > > > > > > program will create issues in the networking stack.
> > > > > > > > > >
> > > > > > > > > > Unfortunately, it's not rare as the device/cvq could be implemented
> > > > > > > > > > via firmware or software.
> > > > > > > > >
> > > > > > > > > Currently that mean one either has sane firmware with a scheduler that
> > > > > > > > > can meet deadlines, or loses ability to report errors back.
> > > > > > > > >
> > > > > > > > > > > But if they do they can always set this flag too.
> > > > > > > > > >
> > > > > > > > > > This may have false negatives and may confuse the management.
> > > > > > > > > >
> > > > > > > > > > Maybe we can extend the networking core to allow some device specific
> > > > > > > > > > configurations to be done with device specific lock without rtnl. For
> > > > > > > > > > example, split the set_channels to
> > > > > > > > > >
> > > > > > > > > > pre_set_channels
> > > > > > > > > > set_channels
> > > > > > > > > > post_set_channels
> > > > > > > > > >
> > > > > > > > > > The device specific part could be done in pre and post without a rtnl lock?
> > > > > > > > > >
> > > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > Would the benefit be that errors can be reported to userspace then?
> > > > > > > > > Then maybe.  I think you will have to show how this works for at least
> > > > > > > > > one card besides virtio.
> > > > > > > >
> > > > > > > > Even for virtio, this seems not easy, as e.g the
> > > > > > > > virtnet_send_command() and netif_set_real_num_tx_queues() need to
> > > > > > > > appear to be atomic to the networking core.
> > > > > > > >
> > > > > > > > I wonder if we can re-consider the way of a timeout here and choose a
> > > > > > > > sane value as a start.
> > > > > > >
> > > > > > > Michael, any more input on this?
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > > I think this is just mission creep. We are trying to fix
> > > > > > vduse - let's do that for starters.
> > > > > >
> > > > > > Recovering from firmware timeouts is far from trivial and
> > > > > > just assuming that just because it timed out it will not
> > > > > > access memory is just as likely to cause memory corruption
> > > > > > with worse results than an infinite spin.
> > > > >
> > > > > Yes, this might require support not only in the driver
> > > > >
> > > > > >
> > > > > > I propose we fix this for vduse and assume hardware/firmware
> > > > > > is well behaved.
> > > > >
> > > > > One major case is the re-connection, in that case it might take
> > > > > whatever longer that the kernel virito-net driver expects.
> > > > > So we can have a timeout in VDUSE and trap CVQ then VDUSE can return
> > > > > and fail early?
> > > >
> > > > Ugh more mission creep. not at all my point. vduse should cache
> > > > values in the driver,
> > >
> > > What do you mean by values here? The cvq command?
> > >
> > > Thanks
> >
> > The card status generally.
> 
> Just to make sure I understand here. The CVQ needs to be processed by
> the userspace now. How could we cache the status?
> 
> Thanks

vduse will have to process it in kernel.

> >
> > > > until someone manages to change
> > > > net core to be more friendly to userspace devices.
> > > >
> > > > >
> > > > > > Or maybe not well behaved firmware will
> > > > > > set the flag losing error reporting ability.
> > > > >
> > > > > This might be hard since it means not only the set but also the get is
> > > > > unreliable.
> > > > >
> > > > > Thanks
> > > >
> > > > /me shrugs
> > > >
> > > >
> > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > > > >
> > > > > > > > Thanks
> > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > --
> > > > > > > > > MST
> > > > > > > > >
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-08-11  9:51                                                     ` Michael S. Tsirkin
@ 2023-08-11  9:54                                                       ` Jason Wang
  2023-08-11 10:12                                                         ` Michael S. Tsirkin
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Wang @ 2023-08-11  9:54 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Fri, Aug 11, 2023 at 5:51 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Fri, Aug 11, 2023 at 05:43:25PM +0800, Jason Wang wrote:
> > On Fri, Aug 11, 2023 at 5:21 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Fri, Aug 11, 2023 at 05:18:51PM +0800, Jason Wang wrote:
> > > > On Fri, Aug 11, 2023 at 1:42 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Fri, Aug 11, 2023 at 10:23:15AM +0800, Jason Wang wrote:
> > > > > > On Fri, Aug 11, 2023 at 3:41 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > >
> > > > > > > On Tue, Aug 08, 2023 at 10:30:56AM +0800, Jason Wang wrote:
> > > > > > > > On Mon, Jul 31, 2023 at 2:30 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > On Thu, Jul 27, 2023 at 5:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > > On Thu, Jul 27, 2023 at 04:59:33PM +0800, Jason Wang wrote:
> > > > > > > > > > > > They really shouldn't - any NIC that takes forever to
> > > > > > > > > > > > program will create issues in the networking stack.
> > > > > > > > > > >
> > > > > > > > > > > Unfortunately, it's not rare as the device/cvq could be implemented
> > > > > > > > > > > via firmware or software.
> > > > > > > > > >
> > > > > > > > > > Currently that mean one either has sane firmware with a scheduler that
> > > > > > > > > > can meet deadlines, or loses ability to report errors back.
> > > > > > > > > >
> > > > > > > > > > > > But if they do they can always set this flag too.
> > > > > > > > > > >
> > > > > > > > > > > This may have false negatives and may confuse the management.
> > > > > > > > > > >
> > > > > > > > > > > Maybe we can extend the networking core to allow some device specific
> > > > > > > > > > > configurations to be done with device specific lock without rtnl. For
> > > > > > > > > > > example, split the set_channels to
> > > > > > > > > > >
> > > > > > > > > > > pre_set_channels
> > > > > > > > > > > set_channels
> > > > > > > > > > > post_set_channels
> > > > > > > > > > >
> > > > > > > > > > > The device specific part could be done in pre and post without a rtnl lock?
> > > > > > > > > > >
> > > > > > > > > > > Thanks
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Would the benefit be that errors can be reported to userspace then?
> > > > > > > > > > Then maybe.  I think you will have to show how this works for at least
> > > > > > > > > > one card besides virtio.
> > > > > > > > >
> > > > > > > > > Even for virtio, this seems not easy, as e.g the
> > > > > > > > > virtnet_send_command() and netif_set_real_num_tx_queues() need to
> > > > > > > > > appear to be atomic to the networking core.
> > > > > > > > >
> > > > > > > > > I wonder if we can re-consider the way of a timeout here and choose a
> > > > > > > > > sane value as a start.
> > > > > > > >
> > > > > > > > Michael, any more input on this?
> > > > > > > >
> > > > > > > > Thanks
> > > > > > >
> > > > > > > I think this is just mission creep. We are trying to fix
> > > > > > > vduse - let's do that for starters.
> > > > > > >
> > > > > > > Recovering from firmware timeouts is far from trivial and
> > > > > > > just assuming that just because it timed out it will not
> > > > > > > access memory is just as likely to cause memory corruption
> > > > > > > with worse results than an infinite spin.
> > > > > >
> > > > > > Yes, this might require support not only in the driver
> > > > > >
> > > > > > >
> > > > > > > I propose we fix this for vduse and assume hardware/firmware
> > > > > > > is well behaved.
> > > > > >
> > > > > > One major case is the re-connection, in that case it might take
> > > > > > whatever longer that the kernel virito-net driver expects.
> > > > > > So we can have a timeout in VDUSE and trap CVQ then VDUSE can return
> > > > > > and fail early?
> > > > >
> > > > > Ugh more mission creep. not at all my point. vduse should cache
> > > > > values in the driver,
> > > >
> > > > What do you mean by values here? The cvq command?
> > > >
> > > > Thanks
> > >
> > > The card status generally.
> >
> > Just to make sure I understand here. The CVQ needs to be processed by
> > the userspace now. How could we cache the status?
> >
> > Thanks
>
> vduse will have to process it in kernel.

Right, that's my understanding (trap CVQ).

Thanks

>
> > >
> > > > > until someone manages to change
> > > > > net core to be more friendly to userspace devices.
> > > > >
> > > > > >
> > > > > > > Or maybe not well behaved firmware will
> > > > > > > set the flag losing error reporting ability.
> > > > > >
> > > > > > This might be hard since it means not only the set but also the get is
> > > > > > unreliable.
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > /me shrugs
> > > > >
> > > > >
> > > > >
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > --
> > > > > > > > > > MST
> > > > > > > > > >
> > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-08-11  9:54                                                       ` Jason Wang
@ 2023-08-11 10:12                                                         ` Michael S. Tsirkin
  0 siblings, 0 replies; 42+ messages in thread
From: Michael S. Tsirkin @ 2023-08-11 10:12 UTC (permalink / raw)
  To: Jason Wang
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Fri, Aug 11, 2023 at 05:54:15PM +0800, Jason Wang wrote:
> On Fri, Aug 11, 2023 at 5:51 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Fri, Aug 11, 2023 at 05:43:25PM +0800, Jason Wang wrote:
> > > On Fri, Aug 11, 2023 at 5:21 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Fri, Aug 11, 2023 at 05:18:51PM +0800, Jason Wang wrote:
> > > > > On Fri, Aug 11, 2023 at 1:42 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Fri, Aug 11, 2023 at 10:23:15AM +0800, Jason Wang wrote:
> > > > > > > On Fri, Aug 11, 2023 at 3:41 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Tue, Aug 08, 2023 at 10:30:56AM +0800, Jason Wang wrote:
> > > > > > > > > On Mon, Jul 31, 2023 at 2:30 PM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > > On Thu, Jul 27, 2023 at 5:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > On Thu, Jul 27, 2023 at 04:59:33PM +0800, Jason Wang wrote:
> > > > > > > > > > > > > They really shouldn't - any NIC that takes forever to
> > > > > > > > > > > > > program will create issues in the networking stack.
> > > > > > > > > > > >
> > > > > > > > > > > > Unfortunately, it's not rare as the device/cvq could be implemented
> > > > > > > > > > > > via firmware or software.
> > > > > > > > > > >
> > > > > > > > > > > Currently that mean one either has sane firmware with a scheduler that
> > > > > > > > > > > can meet deadlines, or loses ability to report errors back.
> > > > > > > > > > >
> > > > > > > > > > > > > But if they do they can always set this flag too.
> > > > > > > > > > > >
> > > > > > > > > > > > This may have false negatives and may confuse the management.
> > > > > > > > > > > >
> > > > > > > > > > > > Maybe we can extend the networking core to allow some device specific
> > > > > > > > > > > > configurations to be done with device specific lock without rtnl. For
> > > > > > > > > > > > example, split the set_channels to
> > > > > > > > > > > >
> > > > > > > > > > > > pre_set_channels
> > > > > > > > > > > > set_channels
> > > > > > > > > > > > post_set_channels
> > > > > > > > > > > >
> > > > > > > > > > > > The device specific part could be done in pre and post without a rtnl lock?
> > > > > > > > > > > >
> > > > > > > > > > > > Thanks
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Would the benefit be that errors can be reported to userspace then?
> > > > > > > > > > > Then maybe.  I think you will have to show how this works for at least
> > > > > > > > > > > one card besides virtio.
> > > > > > > > > >
> > > > > > > > > > Even for virtio, this seems not easy, as e.g the
> > > > > > > > > > virtnet_send_command() and netif_set_real_num_tx_queues() need to
> > > > > > > > > > appear to be atomic to the networking core.
> > > > > > > > > >
> > > > > > > > > > I wonder if we can re-consider the way of a timeout here and choose a
> > > > > > > > > > sane value as a start.
> > > > > > > > >
> > > > > > > > > Michael, any more input on this?
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > >
> > > > > > > > I think this is just mission creep. We are trying to fix
> > > > > > > > vduse - let's do that for starters.
> > > > > > > >
> > > > > > > > Recovering from firmware timeouts is far from trivial and
> > > > > > > > just assuming that just because it timed out it will not
> > > > > > > > access memory is just as likely to cause memory corruption
> > > > > > > > with worse results than an infinite spin.
> > > > > > >
> > > > > > > Yes, this might require support not only in the driver
> > > > > > >
> > > > > > > >
> > > > > > > > I propose we fix this for vduse and assume hardware/firmware
> > > > > > > > is well behaved.
> > > > > > >
> > > > > > > One major case is the re-connection, in that case it might take
> > > > > > > whatever longer that the kernel virito-net driver expects.
> > > > > > > So we can have a timeout in VDUSE and trap CVQ then VDUSE can return
> > > > > > > and fail early?
> > > > > >
> > > > > > Ugh more mission creep. not at all my point. vduse should cache
> > > > > > values in the driver,
> > > > >
> > > > > What do you mean by values here? The cvq command?
> > > > >
> > > > > Thanks
> > > >
> > > > The card status generally.
> > >
> > > Just to make sure I understand here. The CVQ needs to be processed by
> > > the userspace now. How could we cache the status?
> > >
> > > Thanks
> >
> > vduse will have to process it in kernel.
> 
> Right, that's my understanding (trap CVQ).
> 
> Thanks

oh this what you mean by trap. ok. I don't see a new
for a timeout then though.


> >
> > > >
> > > > > > until someone manages to change
> > > > > > net core to be more friendly to userspace devices.
> > > > > >
> > > > > > >
> > > > > > > > Or maybe not well behaved firmware will
> > > > > > > > set the flag losing error reporting ability.
> > > > > > >
> > > > > > > This might be hard since it means not only the set but also the get is
> > > > > > > unreliable.
> > > > > > >
> > > > > > > Thanks
> > > > > >
> > > > > > /me shrugs
> > > > > >
> > > > > >
> > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Thanks
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > --
> > > > > > > > > > > MST
> > > > > > > > > > >
> > > > > > > >
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-24  6:46                 ` Michael S. Tsirkin
  2023-07-24  6:52                   ` Jason Wang
@ 2023-10-05 19:35                   ` Feng Liu
  2023-10-08  5:27                     ` Jason Wang
  1 sibling, 1 reply; 42+ messages in thread
From: Feng Liu @ 2023-10-05 19:35 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, linux-kernel, virtualization, kuba, Bodong Wang



On 2023-07-24 a.m.2:46, Michael S. Tsirkin wrote:
> External email: Use caution opening links or attachments
> 
> 
> On Fri, Jul 21, 2023 at 10:18:03PM +0200, Maxime Coquelin wrote:
>>
>>
>> On 7/21/23 17:10, Michael S. Tsirkin wrote:
>>> On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
>>>>
>>>>
>>>> On 7/21/23 16:45, Michael S. Tsirkin wrote:
>>>>> On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
>>>>>>
>>>>>>
>>>>>> On 7/20/23 23:02, Michael S. Tsirkin wrote:
>>>>>>> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
>>>>>>>> On 7/20/23 1:38 AM, Jason Wang wrote:
>>>>>>>>>
>>>>>>>>> Adding cond_resched() to the command waiting loop for a better
>>>>>>>>> co-operation with the scheduler. This allows to give CPU a breath to
>>>>>>>>> run other task(workqueue) instead of busy looping when preemption is
>>>>>>>>> not allowed on a device whose CVQ might be slow.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>>>>>>
>>>>>>>> This still leaves hung processes, but at least it doesn't pin the CPU any
>>>>>>>> more.  Thanks.
>>>>>>>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
>>>>>>>>
>>>>>>>
>>>>>>> I'd like to see a full solution
>>>>>>> 1- block until interrupt
>>>>>>
>>>>>> Would it make sense to also have a timeout?
>>>>>> And when timeout expires, set FAILED bit in device status?
>>>>>
>>>>> virtio spec does not set any limits on the timing of vq
>>>>> processing.
>>>>
>>>> Indeed, but I thought the driver could decide it is too long for it.
>>>>
>>>> The issue is we keep waiting with rtnl locked, it can quickly make the
>>>> system unusable.
>>>
>>> if this is a problem we should find a way not to keep rtnl
>>> locked indefinitely.
>>
>>  From the tests I have done, I think it is. With OVS, a reconfiguration is
>> performed when the VDUSE device is added, and when a MLX5 device is
>> in the same bridge, it ends up doing an ioctl() that tries to take the
>> rtnl lock. In this configuration, it is not possible to kill OVS because
>> it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
>> net.
> 
> So for sure, we can queue up the work and process it later.
> The somewhat tricky part is limiting the memory consumption.
> 
> 


Hi Jason

Excuse me, is there any plan for when will v5 patch series be sent out? 
Will the v5 patches solve the problem of ctrlvq's infinite poll for 
buggy devices?

Thanks
Feng

>>>
>>>>>>> 2- still handle surprise removal correctly by waking in that case
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>>> ---
>>>>>>>>>       drivers/net/virtio_net.c | 4 +++-
>>>>>>>>>       1 file changed, 3 insertions(+), 1 deletion(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>>>>>>>> index 9f3b1d6ac33d..e7533f29b219 100644
>>>>>>>>> --- a/drivers/net/virtio_net.c
>>>>>>>>> +++ b/drivers/net/virtio_net.c
>>>>>>>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>>>>>>>>>               * into the hypervisor, so the request should be handled immediately.
>>>>>>>>>               */
>>>>>>>>>              while (!virtqueue_get_buf(vi->cvq, &tmp) &&
>>>>>>>>> -              !virtqueue_is_broken(vi->cvq))
>>>>>>>>> +              !virtqueue_is_broken(vi->cvq)) {
>>>>>>>>> +               cond_resched();
>>>>>>>>>                      cpu_relax();
>>>>>>>>> +       }
>>>>>>>>>
>>>>>>>>>              return vi->ctrl->status == VIRTIO_NET_OK;
>>>>>>>>>       }
>>>>>>>>> --
>>>>>>>>> 2.39.3
>>>>>>>>>
>>>>>>>>> _______________________________________________
>>>>>>>>> Virtualization mailing list
>>>>>>>>> Virtualization@lists.linux-foundation.org
>>>>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>>>>>>>
>>>>>
>>>
> 
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-10-05 19:35                   ` Feng Liu
@ 2023-10-08  5:27                     ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2023-10-08  5:27 UTC (permalink / raw)
  To: Feng Liu; +Cc: netdev, linux-kernel, virtualization, kuba, Bodong Wang

On Fri, Oct 6, 2023 at 3:35 AM Feng Liu <feliu@nvidia.com> wrote:
>
>
>
> On 2023-07-24 a.m.2:46, Michael S. Tsirkin wrote:
> > External email: Use caution opening links or attachments
> >
> >
> > On Fri, Jul 21, 2023 at 10:18:03PM +0200, Maxime Coquelin wrote:
> >>
> >>
> >> On 7/21/23 17:10, Michael S. Tsirkin wrote:
> >>> On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> >>>>
> >>>>
> >>>> On 7/21/23 16:45, Michael S. Tsirkin wrote:
> >>>>> On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> >>>>>>
> >>>>>>
> >>>>>> On 7/20/23 23:02, Michael S. Tsirkin wrote:
> >>>>>>> On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> >>>>>>>> On 7/20/23 1:38 AM, Jason Wang wrote:
> >>>>>>>>>
> >>>>>>>>> Adding cond_resched() to the command waiting loop for a better
> >>>>>>>>> co-operation with the scheduler. This allows to give CPU a breath to
> >>>>>>>>> run other task(workqueue) instead of busy looping when preemption is
> >>>>>>>>> not allowed on a device whose CVQ might be slow.
> >>>>>>>>>
> >>>>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> >>>>>>>>
> >>>>>>>> This still leaves hung processes, but at least it doesn't pin the CPU any
> >>>>>>>> more.  Thanks.
> >>>>>>>> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> >>>>>>>>
> >>>>>>>
> >>>>>>> I'd like to see a full solution
> >>>>>>> 1- block until interrupt
> >>>>>>
> >>>>>> Would it make sense to also have a timeout?
> >>>>>> And when timeout expires, set FAILED bit in device status?
> >>>>>
> >>>>> virtio spec does not set any limits on the timing of vq
> >>>>> processing.
> >>>>
> >>>> Indeed, but I thought the driver could decide it is too long for it.
> >>>>
> >>>> The issue is we keep waiting with rtnl locked, it can quickly make the
> >>>> system unusable.
> >>>
> >>> if this is a problem we should find a way not to keep rtnl
> >>> locked indefinitely.
> >>
> >>  From the tests I have done, I think it is. With OVS, a reconfiguration is
> >> performed when the VDUSE device is added, and when a MLX5 device is
> >> in the same bridge, it ends up doing an ioctl() that tries to take the
> >> rtnl lock. In this configuration, it is not possible to kill OVS because
> >> it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> >> net.
> >
> > So for sure, we can queue up the work and process it later.
> > The somewhat tricky part is limiting the memory consumption.
> >
> >
>
>
> Hi Jason
>
> Excuse me, is there any plan for when will v5 patch series be sent out?
> Will the v5 patches solve the problem of ctrlvq's infinite poll for
> buggy devices?

We agree to harden VDUSE and,

It would be hard if we try to solve it at the virtio-net level, see
the discussions before. It might require support from various layers
(e.g networking core etc).

We can use workqueue etc as a mitigation. If Michael is fine with
this, I can post v5.

Thanks

>
> Thanks
> Feng
>
> >>>
> >>>>>>> 2- still handle surprise removal correctly by waking in that case
> >>>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>>>> ---
> >>>>>>>>>       drivers/net/virtio_net.c | 4 +++-
> >>>>>>>>>       1 file changed, 3 insertions(+), 1 deletion(-)
> >>>>>>>>>
> >>>>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >>>>>>>>> index 9f3b1d6ac33d..e7533f29b219 100644
> >>>>>>>>> --- a/drivers/net/virtio_net.c
> >>>>>>>>> +++ b/drivers/net/virtio_net.c
> >>>>>>>>> @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> >>>>>>>>>               * into the hypervisor, so the request should be handled immediately.
> >>>>>>>>>               */
> >>>>>>>>>              while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> >>>>>>>>> -              !virtqueue_is_broken(vi->cvq))
> >>>>>>>>> +              !virtqueue_is_broken(vi->cvq)) {
> >>>>>>>>> +               cond_resched();
> >>>>>>>>>                      cpu_relax();
> >>>>>>>>> +       }
> >>>>>>>>>
> >>>>>>>>>              return vi->ctrl->status == VIRTIO_NET_OK;
> >>>>>>>>>       }
> >>>>>>>>> --
> >>>>>>>>> 2.39.3
> >>>>>>>>>
> >>>>>>>>> _______________________________________________
> >>>>>>>>> Virtualization mailing list
> >>>>>>>>> Virtualization@lists.linux-foundation.org
> >>>>>>>>> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> >>>>>>>
> >>>>>
> >>>
> >
> > _______________________________________________
> > Virtualization mailing list
> > Virtualization@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2023-07-25  3:03                       ` Jason Wang
@ 2024-02-22 19:21                         ` Michael S. Tsirkin
  2024-02-26  5:08                           ` Jason Wang
  0 siblings, 1 reply; 42+ messages in thread
From: Michael S. Tsirkin @ 2024-02-22 19:21 UTC (permalink / raw)
  To: Jason Wang
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Tue, Jul 25, 2023 at 11:03:11AM +0800, Jason Wang wrote:
> On Mon, Jul 24, 2023 at 3:18 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Mon, Jul 24, 2023 at 02:52:49PM +0800, Jason Wang wrote:
> > > On Mon, Jul 24, 2023 at 2:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Fri, Jul 21, 2023 at 10:18:03PM +0200, Maxime Coquelin wrote:
> > > > >
> > > > >
> > > > > On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > > > > > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > > > > > >
> > > > > > >
> > > > > > > On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > > > > > > > On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > > > > > > > > On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > > > > > > > > > On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > Adding cond_resched() to the command waiting loop for a better
> > > > > > > > > > > > co-operation with the scheduler. This allows to give CPU a breath to
> > > > > > > > > > > > run other task(workqueue) instead of busy looping when preemption is
> > > > > > > > > > > > not allowed on a device whose CVQ might be slow.
> > > > > > > > > > > >
> > > > > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > > >
> > > > > > > > > > > This still leaves hung processes, but at least it doesn't pin the CPU any
> > > > > > > > > > > more.  Thanks.
> > > > > > > > > > > Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > I'd like to see a full solution
> > > > > > > > > > 1- block until interrupt
> > > > > > > > >
> > > > > > > > > Would it make sense to also have a timeout?
> > > > > > > > > And when timeout expires, set FAILED bit in device status?
> > > > > > > >
> > > > > > > > virtio spec does not set any limits on the timing of vq
> > > > > > > > processing.
> > > > > > >
> > > > > > > Indeed, but I thought the driver could decide it is too long for it.
> > > > > > >
> > > > > > > The issue is we keep waiting with rtnl locked, it can quickly make the
> > > > > > > system unusable.
> > > > > >
> > > > > > if this is a problem we should find a way not to keep rtnl
> > > > > > locked indefinitely.
> > > > >
> > > > > From the tests I have done, I think it is. With OVS, a reconfiguration is
> > > > > performed when the VDUSE device is added, and when a MLX5 device is
> > > > > in the same bridge, it ends up doing an ioctl() that tries to take the
> > > > > rtnl lock. In this configuration, it is not possible to kill OVS because
> > > > > it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> > > > > net.
> > > >
> > > > So for sure, we can queue up the work and process it later.
> > > > The somewhat tricky part is limiting the memory consumption.
> > >
> > > And it needs to sync with rtnl somehow, e.g device unregistering which
> > > seems not easy.
> > >
> > > Thanks
> >
> > since when does device unregister need to send cvq commands?
> 
> It doesn't do this now. But if we don't process the work under rtnl,
> we need to synchronize with device unregistering.
> 
> Thanks

But what's not easy about it?

> >
> > > >
> > > >
> > > > > >
> > > > > > > > > > 2- still handle surprise removal correctly by waking in that case
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > > ---
> > > > > > > > > > > >      drivers/net/virtio_net.c | 4 +++-
> > > > > > > > > > > >      1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > > > > > > > >
> > > > > > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > > > > > index 9f3b1d6ac33d..e7533f29b219 100644
> > > > > > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > > > > > @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > > > > > >              * into the hypervisor, so the request should be handled immediately.
> > > > > > > > > > > >              */
> > > > > > > > > > > >             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > > > > > -              !virtqueue_is_broken(vi->cvq))
> > > > > > > > > > > > +              !virtqueue_is_broken(vi->cvq)) {
> > > > > > > > > > > > +               cond_resched();
> > > > > > > > > > > >                     cpu_relax();
> > > > > > > > > > > > +       }
> > > > > > > > > > > >
> > > > > > > > > > > >             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > > > >      }
> > > > > > > > > > > > --
> > > > > > > > > > > > 2.39.3
> > > > > > > > > > > >
> > > > > > > > > > > > _______________________________________________
> > > > > > > > > > > > Virtualization mailing list
> > > > > > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > > > > >
> > > > > > > >
> > > > > >
> > > >
> >


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop
  2024-02-22 19:21                         ` Michael S. Tsirkin
@ 2024-02-26  5:08                           ` Jason Wang
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Wang @ 2024-02-26  5:08 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Maxime Coquelin, Shannon Nelson, xuanzhuo, netdev, linux-kernel,
	virtualization, edumazet, kuba, pabeni, davem

On Fri, Feb 23, 2024 at 3:22 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Tue, Jul 25, 2023 at 11:03:11AM +0800, Jason Wang wrote:
> > On Mon, Jul 24, 2023 at 3:18 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Mon, Jul 24, 2023 at 02:52:49PM +0800, Jason Wang wrote:
> > > > On Mon, Jul 24, 2023 at 2:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Fri, Jul 21, 2023 at 10:18:03PM +0200, Maxime Coquelin wrote:
> > > > > >
> > > > > >
> > > > > > On 7/21/23 17:10, Michael S. Tsirkin wrote:
> > > > > > > On Fri, Jul 21, 2023 at 04:58:04PM +0200, Maxime Coquelin wrote:
> > > > > > > >
> > > > > > > >
> > > > > > > > On 7/21/23 16:45, Michael S. Tsirkin wrote:
> > > > > > > > > On Fri, Jul 21, 2023 at 04:37:00PM +0200, Maxime Coquelin wrote:
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > On 7/20/23 23:02, Michael S. Tsirkin wrote:
> > > > > > > > > > > On Thu, Jul 20, 2023 at 01:26:20PM -0700, Shannon Nelson wrote:
> > > > > > > > > > > > On 7/20/23 1:38 AM, Jason Wang wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > Adding cond_resched() to the command waiting loop for a better
> > > > > > > > > > > > > co-operation with the scheduler. This allows to give CPU a breath to
> > > > > > > > > > > > > run other task(workqueue) instead of busy looping when preemption is
> > > > > > > > > > > > > not allowed on a device whose CVQ might be slow.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > > > > >
> > > > > > > > > > > > This still leaves hung processes, but at least it doesn't pin the CPU any
> > > > > > > > > > > > more.  Thanks.
> > > > > > > > > > > > Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
> > > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > I'd like to see a full solution
> > > > > > > > > > > 1- block until interrupt
> > > > > > > > > >
> > > > > > > > > > Would it make sense to also have a timeout?
> > > > > > > > > > And when timeout expires, set FAILED bit in device status?
> > > > > > > > >
> > > > > > > > > virtio spec does not set any limits on the timing of vq
> > > > > > > > > processing.
> > > > > > > >
> > > > > > > > Indeed, but I thought the driver could decide it is too long for it.
> > > > > > > >
> > > > > > > > The issue is we keep waiting with rtnl locked, it can quickly make the
> > > > > > > > system unusable.
> > > > > > >
> > > > > > > if this is a problem we should find a way not to keep rtnl
> > > > > > > locked indefinitely.
> > > > > >
> > > > > > From the tests I have done, I think it is. With OVS, a reconfiguration is
> > > > > > performed when the VDUSE device is added, and when a MLX5 device is
> > > > > > in the same bridge, it ends up doing an ioctl() that tries to take the
> > > > > > rtnl lock. In this configuration, it is not possible to kill OVS because
> > > > > > it is stuck trying to acquire rtnl lock for mlx5 that is held by virtio-
> > > > > > net.
> > > > >
> > > > > So for sure, we can queue up the work and process it later.
> > > > > The somewhat tricky part is limiting the memory consumption.
> > > >
> > > > And it needs to sync with rtnl somehow, e.g device unregistering which
> > > > seems not easy.
> > > >
> > > > Thanks
> > >
> > > since when does device unregister need to send cvq commands?
> >
> > It doesn't do this now. But if we don't process the work under rtnl,
> > we need to synchronize with device unregistering.
> >
> > Thanks
>
> But what's not easy about it?

Yes, so I'm wondering if we can simply have the cond_resched() merged
as the first step.

And leave the indefinite wait for future investigation?

Thanks

>
> > >
> > > > >
> > > > >
> > > > > > >
> > > > > > > > > > > 2- still handle surprise removal correctly by waking in that case
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > > ---
> > > > > > > > > > > > >      drivers/net/virtio_net.c | 4 +++-
> > > > > > > > > > > > >      1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > > > > > > > > >
> > > > > > > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > > > > > > index 9f3b1d6ac33d..e7533f29b219 100644
> > > > > > > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > > > > > > @@ -2314,8 +2314,10 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > > > > > > > > > > > >              * into the hypervisor, so the request should be handled immediately.
> > > > > > > > > > > > >              */
> > > > > > > > > > > > >             while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > > > > > > > > > > > -              !virtqueue_is_broken(vi->cvq))
> > > > > > > > > > > > > +              !virtqueue_is_broken(vi->cvq)) {
> > > > > > > > > > > > > +               cond_resched();
> > > > > > > > > > > > >                     cpu_relax();
> > > > > > > > > > > > > +       }
> > > > > > > > > > > > >
> > > > > > > > > > > > >             return vi->ctrl->status == VIRTIO_NET_OK;
> > > > > > > > > > > > >      }
> > > > > > > > > > > > > --
> > > > > > > > > > > > > 2.39.3
> > > > > > > > > > > > >
> > > > > > > > > > > > > _______________________________________________
> > > > > > > > > > > > > Virtualization mailing list
> > > > > > > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > > > > > >
> > > > > > > > >
> > > > > > >
> > > > >
> > >
>


^ permalink raw reply	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2024-02-26  5:08 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-07-20  8:38 [PATCH net-next v4 0/2] virtio-net: don't busy poll for cvq command Jason Wang
2023-07-20  8:38 ` [PATCH net-next v4 1/2] virtio-net: convert rx mode setting to use workqueue Jason Wang
2023-07-20 20:25   ` Shannon Nelson
2023-07-20  8:38 ` [PATCH net-next v4 2/2] virtio-net: add cond_resched() to the command waiting loop Jason Wang
2023-07-20 15:31   ` Shannon Nelson
2023-07-20 20:58     ` Michael S. Tsirkin
2023-07-20 20:26   ` Shannon Nelson
2023-07-20 21:02     ` Michael S. Tsirkin
2023-07-21 14:37       ` Maxime Coquelin
2023-07-21 14:45         ` Michael S. Tsirkin
2023-07-21 14:58           ` Maxime Coquelin
2023-07-21 15:10             ` Michael S. Tsirkin
2023-07-21 20:18               ` Maxime Coquelin
2023-07-24  6:46                 ` Michael S. Tsirkin
2023-07-24  6:52                   ` Jason Wang
2023-07-24  7:18                     ` Michael S. Tsirkin
2023-07-25  3:03                       ` Jason Wang
2024-02-22 19:21                         ` Michael S. Tsirkin
2024-02-26  5:08                           ` Jason Wang
2023-10-05 19:35                   ` Feng Liu
2023-10-08  5:27                     ` Jason Wang
2023-07-24  6:52                 ` Jason Wang
2023-07-24  7:17                   ` Michael S. Tsirkin
2023-07-25  3:07                     ` Jason Wang
2023-07-25  7:36                       ` Michael S. Tsirkin
2023-07-26  1:55                         ` Jason Wang
2023-07-26 11:37                           ` Michael S. Tsirkin
2023-07-27  6:03                             ` Jason Wang
2023-07-27  6:10                               ` Michael S. Tsirkin
2023-07-27  8:59                                 ` Jason Wang
2023-07-27  9:46                                   ` Michael S. Tsirkin
2023-07-31  6:30                                     ` Jason Wang
2023-08-08  2:30                                       ` Jason Wang
2023-08-10 19:41                                         ` Michael S. Tsirkin
2023-08-11  2:23                                           ` Jason Wang
2023-08-11  5:42                                             ` Michael S. Tsirkin
2023-08-11  9:18                                               ` Jason Wang
2023-08-11  9:21                                                 ` Michael S. Tsirkin
2023-08-11  9:43                                                   ` Jason Wang
2023-08-11  9:51                                                     ` Michael S. Tsirkin
2023-08-11  9:54                                                       ` Jason Wang
2023-08-11 10:12                                                         ` Michael S. Tsirkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).