All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Julio Faracco <jcfaracco@gmail.com>
Cc: netdev@vger.kernel.org, dnmendes76@gmail.com,
	Jason Wang <jasowang@redhat.com>,
	"David S. Miller" <davem@davemloft.net>,
	virtualization@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH net-next v2] drivers: net: virtio_net: Implement a dev_watchdog handler
Date: Fri, 22 Nov 2019 05:31:19 -0500	[thread overview]
Message-ID: <20191122052506-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <20191122013636.1041-1-jcfaracco@gmail.com>

On Thu, Nov 21, 2019 at 10:36:36PM -0300, Julio Faracco wrote:
> Driver virtio_net is not handling error events for TX provided by
> dev_watchdog. This event is reached when transmission queue is having
> problems to transmit packets. This could happen for any reason. To
> enable it, driver should have .ndo_tx_timeout implemented.
> 
> This commit brings back virtnet_reset method to recover TX queues from a
> error state. That function is called by schedule_work method and it puts
> the reset function into work queue.
> 
> As the error cause is unknown at this moment, it would be better to
> reset all queues.
> 
> Signed-off-by: Julio Faracco <jcfaracco@gmail.com>
> Signed-off-by: Daiane Mendes <dnmendes76@gmail.com>
> Cc: Jason Wang <jasowang@redhat.com>
> ---
> v1-v2: Tag `net-next` was included to indentify where patch would be
> applied.
> ---
>  drivers/net/virtio_net.c | 95 +++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 94 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 4d7d5434cc5d..31890d77eaf2 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -75,6 +75,7 @@ struct virtnet_sq_stats {
>  	u64 xdp_tx;
>  	u64 xdp_tx_drops;
>  	u64 kicks;
> +	u64 tx_timeouts;
>  };
>  
>  struct virtnet_rq_stats {
> @@ -98,6 +99,7 @@ static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
>  	{ "xdp_tx",		VIRTNET_SQ_STAT(xdp_tx) },
>  	{ "xdp_tx_drops",	VIRTNET_SQ_STAT(xdp_tx_drops) },
>  	{ "kicks",		VIRTNET_SQ_STAT(kicks) },
> +	{ "tx_timeouts",	VIRTNET_SQ_STAT(tx_timeouts) },
>  };
>  
>  static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
> @@ -211,6 +213,9 @@ struct virtnet_info {
>  	/* Work struct for config space updates */
>  	struct work_struct config_work;
>  
> +	/* Work struct for resetting the virtio-net driver. */
> +	struct work_struct reset_work;
> +
>  	/* Does the affinity hint is set for virtqueues? */
>  	bool affinity_hint_set;
>  
> @@ -1721,7 +1726,7 @@ static void virtnet_stats(struct net_device *dev,
>  	int i;
>  
>  	for (i = 0; i < vi->max_queue_pairs; i++) {
> -		u64 tpackets, tbytes, rpackets, rbytes, rdrops;
> +		u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops;
>  		struct receive_queue *rq = &vi->rq[i];
>  		struct send_queue *sq = &vi->sq[i];
>  
> @@ -1729,6 +1734,7 @@ static void virtnet_stats(struct net_device *dev,
>  			start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
>  			tpackets = sq->stats.packets;
>  			tbytes   = sq->stats.bytes;
> +			terrors  = sq->stats.tx_timeouts;
>  		} while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
>  
>  		do {
> @@ -1743,6 +1749,7 @@ static void virtnet_stats(struct net_device *dev,
>  		tot->rx_bytes   += rbytes;
>  		tot->tx_bytes   += tbytes;
>  		tot->rx_dropped += rdrops;
> +		tot->tx_errors  += terrors;
>  	}
>  
>  	tot->tx_dropped = dev->stats.tx_dropped;
> @@ -2578,6 +2585,33 @@ static int virtnet_set_features(struct net_device *dev,
>  	return 0;
>  }
>  
> +static void virtnet_tx_timeout(struct net_device *dev)
> +{
> +	struct virtnet_info *vi = netdev_priv(dev);
> +	u32 i;
> +
> +	netdev_warn(dev, "TX timeout stats:\n");
> +	/* find the stopped queue the same way dev_watchdog() does */
> +	for (i = 0; i < vi->curr_queue_pairs; i++) {
> +		struct send_queue *sq = &vi->sq[i];
> +
> +		if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) {
> +			netdev_warn(dev, " Available send queue: %d, sq: %s, vq: %d, name: %s\n",
> +				    i, sq->name, sq->vq->index, sq->vq->name);

What does this mean?

> +			continue;
> +		}
> +
> +		u64_stats_update_begin(&sq->stats.syncp);
> +		sq->stats.tx_timeouts++;
> +		u64_stats_update_end(&sq->stats.syncp);
> +
> +		netdev_warn(dev, " Unavailable send queue: %d, sq: %s, vq: %d, name: %s\n",
> +			    i, sq->name, sq->vq->index, sq->vq->name);
> +	}

Can we make the warning less cryptic?
I wonder why don't we get the sq from timeout directly?
Would seem cleaner.

> +
> +	schedule_work(&vi->reset_work);
> +}
> +
>  static const struct net_device_ops virtnet_netdev = {
>  	.ndo_open            = virtnet_open,
>  	.ndo_stop   	     = virtnet_close,
> @@ -2593,6 +2627,7 @@ static const struct net_device_ops virtnet_netdev = {
>  	.ndo_features_check	= passthru_features_check,
>  	.ndo_get_phys_port_name	= virtnet_get_phys_port_name,
>  	.ndo_set_features	= virtnet_set_features,
> +	.ndo_tx_timeout		= virtnet_tx_timeout,
>  };
>  
>  static void virtnet_config_changed_work(struct work_struct *work)
> @@ -2982,6 +3017,62 @@ static int virtnet_validate(struct virtio_device *vdev)
>  	return 0;
>  }
>  
> +static void _remove_vq_common(struct virtnet_info *vi)
> +{
> +	vi->vdev->config->reset(vi->vdev);
> +
> +	/* Free unused buffers in both send and recv, if any. */
> +	free_unused_bufs(vi);
> +
> +	_free_receive_bufs(vi);
> +
> +	free_receive_page_frags(vi);
> +
> +	virtnet_del_vqs(vi);
> +}
> +
> +static int _virtnet_reset(struct virtnet_info *vi)
> +{
> +	struct virtio_device *vdev = vi->vdev;
> +	int ret;
> +
> +	virtio_config_disable(vdev);
> +	vdev->failed = vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_FAILED;
> +
> +	virtnet_freeze_down(vdev);
> +	_remove_vq_common(vi);
> +
> +	virtio_add_status(vdev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
> +	virtio_add_status(vdev, VIRTIO_CONFIG_S_DRIVER);
> +
> +	ret = virtio_finalize_features(vdev);
> +	if (ret)
> +		goto err;
> +
> +	ret = virtnet_restore_up(vdev);
> +	if (ret)
> +		goto err;
> +
> +	ret = _virtnet_set_queues(vi, vi->curr_queue_pairs);
> +	if (ret)
> +		goto err;
> +
> +	virtio_add_status(vdev, VIRTIO_CONFIG_S_DRIVER_OK);
> +	virtio_config_enable(vdev);


Is this enough? E.g. all RX mode programming has been lost.



> +	return 0;
> +err:
> +	virtio_add_status(vdev, VIRTIO_CONFIG_S_FAILED);
> +	return ret;
> +}
> +
> +static void virtnet_reset(struct work_struct *work)
> +{
> +	struct virtnet_info *vi =
> +		container_of(work, struct virtnet_info, reset_work);
> +
> +	_virtnet_reset(vi);
> +}
> +
>  static int virtnet_probe(struct virtio_device *vdev)
>  {
>  	int i, err = -ENOMEM;
> @@ -3011,6 +3102,7 @@ static int virtnet_probe(struct virtio_device *vdev)
>  	dev->netdev_ops = &virtnet_netdev;
>  	dev->features = NETIF_F_HIGHDMA;
>  
> +	dev->watchdog_timeo = 5 * HZ;
>  	dev->ethtool_ops = &virtnet_ethtool_ops;
>  	SET_NETDEV_DEV(dev, &vdev->dev);
>

Is there a way to make this tuneable from ethtool?
  
> @@ -3068,6 +3160,7 @@ static int virtnet_probe(struct virtio_device *vdev)
>  	vdev->priv = vi;
>  
>  	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
> +	INIT_WORK(&vi->reset_work, virtnet_reset);
>  
>  	/* If we can receive ANY GSO packets, we must allocate large ones. */
>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
> -- 
> 2.17.1


  parent reply	other threads:[~2019-11-22 10:31 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-22  1:36 [PATCH net-next v2] drivers: net: virtio_net: Implement a dev_watchdog handler Julio Faracco
2019-11-22 10:31 ` Michael S. Tsirkin
2019-11-22 10:31 ` Michael S. Tsirkin [this message]
2019-11-22 12:59   ` Julio Faracco
2019-11-22 13:53     ` Michael S. Tsirkin
2019-11-22 13:53     ` Michael S. Tsirkin
2019-11-23 22:33       ` Julio Faracco
2019-11-23 22:33         ` Julio Faracco
2019-11-24 10:52         ` Michael S. Tsirkin
2019-11-24 10:52         ` Michael S. Tsirkin
2019-11-24 14:49       ` kbuild test robot
2019-11-24 15:05     ` Michael S. Tsirkin
2019-11-24 15:05     ` Michael S. Tsirkin
2019-11-24 15:05     ` Michael S. Tsirkin
2019-11-24 21:48       ` Michael S. Tsirkin
2019-11-24 21:48       ` Michael S. Tsirkin
2019-11-24 23:03         ` Jakub Kicinski
2019-11-24 23:03         ` Jakub Kicinski
2019-11-24 23:18           ` Michael S. Tsirkin
2019-11-24 23:18           ` Michael S. Tsirkin
2019-11-24 23:29           ` Michael S. Tsirkin
2019-11-24 23:37             ` Jakub Kicinski
2019-11-24 23:37             ` Jakub Kicinski
2019-11-25  8:42               ` Michael S. Tsirkin
2019-11-28 19:26                 ` kbuild test robot
2019-11-25  8:42               ` Michael S. Tsirkin
2019-11-25  8:20             ` kbuild test robot
2019-11-24 23:29           ` Michael S. Tsirkin
2019-11-22 12:59   ` Julio Faracco
  -- strict thread matches above, loose matches on Subject: below --
2019-11-22  1:36 Julio Faracco

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191122052506-mutt-send-email-mst@kernel.org \
    --to=mst@redhat.com \
    --cc=davem@davemloft.net \
    --cc=dnmendes76@gmail.com \
    --cc=jasowang@redhat.com \
    --cc=jcfaracco@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.