netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jiri Pirko <jiri@resnulli.us>
To: souradeep chakrabarti <schakrabarti@linux.microsoft.com>
Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
	decui@microsoft.com, davem@davemloft.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com, longli@microsoft.com,
	sharmaajay@microsoft.com, leon@kernel.org, cai.huoqing@linux.dev,
	ssengar@linux.microsoft.com, vkuznets@redhat.com,
	tglx@linutronix.de, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-rdma@vger.kernel.org, stable@vger.kernel.org,
	schakrabarti@microsoft.com
Subject: Re: [PATCH V2 net] net: mana: Fix MANA VF unload when host is unresponsive
Date: Fri, 23 Jun 2023 11:43:57 +0200	[thread overview]
Message-ID: <ZJVpXR52RQxdYRY+@nanopsycho> (raw)
In-Reply-To: <1687505355-29212-1-git-send-email-schakrabarti@linux.microsoft.com>

Fri, Jun 23, 2023 at 09:29:15AM CEST, schakrabarti@linux.microsoft.com wrote:
>From: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
>
>This patch addresses  the VF unload issue, where mana_dealloc_queues()

double space here    ^^

>gets stuck in infinite while loop, because of host unresponsiveness.
>It adds a timeout in the while loop, to fix it.
>
>Also this patch adds a new attribute in mana_context, which gets set when
>mana_hwc_send_request() hits a timeout because of host unresponsiveness.
>This flag then helps to avoid the timeouts in successive calls.

You aparently combine 2 patches together. Please split.


>
>Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>

Please provide a "Fixes" tag with pointer to the commit that introduced
the problem.


>---
>V1 -> V2:
>* Added net branch
>* Removed the typecasting to (struct mana_context*) of void pointer
>* Repositioned timeout variable in mana_dealloc_queues()
>* Repositioned vf_unload_timeout in mana_context struct, to utilise the
>  6 bytes hole
>---
> .../net/ethernet/microsoft/mana/gdma_main.c   |  4 +++-
> .../net/ethernet/microsoft/mana/hw_channel.c  | 12 ++++++++++-
> drivers/net/ethernet/microsoft/mana/mana_en.c | 21 +++++++++++++++++--
> include/net/mana/mana.h                       |  2 ++
> 4 files changed, 35 insertions(+), 4 deletions(-)
>
>diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
>index 8f3f78b68592..6411f01be0d9 100644
>--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
>+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
>@@ -946,10 +946,12 @@ int mana_gd_deregister_device(struct gdma_dev *gd)
> 	struct gdma_context *gc = gd->gdma_context;
> 	struct gdma_general_resp resp = {};
> 	struct gdma_general_req req = {};
>+	struct mana_context *ac;
> 	int err;
> 
> 	if (gd->pdid == INVALID_PDID)
> 		return -EINVAL;
>+	ac = gd->driver_data;
> 
> 	mana_gd_init_req_hdr(&req.hdr, GDMA_DEREGISTER_DEVICE, sizeof(req),
> 			     sizeof(resp));
>@@ -957,7 +959,7 @@ int mana_gd_deregister_device(struct gdma_dev *gd)
> 	req.hdr.dev_id = gd->dev_id;
> 
> 	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
>-	if (err || resp.hdr.status) {
>+	if ((err || resp.hdr.status) && !ac->vf_unload_timeout) {
> 		dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n",
> 			err, resp.hdr.status);
> 		if (!err)
>diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
>index 9d1507eba5b9..492cb2c6e2cb 100644
>--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
>+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
>@@ -1,8 +1,10 @@
> // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
> /* Copyright (c) 2021, Microsoft Corporation. */
> 
>+#include "asm-generic/errno.h"
> #include <net/mana/gdma.h>
> #include <net/mana/hw_channel.h>
>+#include <net/mana/mana.h>
> 
> static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id)
> {
>@@ -786,12 +788,19 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
> 	struct hwc_wq *txq = hwc->txq;
> 	struct gdma_req_hdr *req_msg;
> 	struct hwc_caller_ctx *ctx;
>+	struct mana_context *ac;
> 	u32 dest_vrcq = 0;
> 	u32 dest_vrq = 0;
> 	u16 msg_id;
> 	int err;
> 
> 	mana_hwc_get_msg_index(hwc, &msg_id);
>+	ac = hwc->gdma_dev->driver_data;
>+	if (ac->vf_unload_timeout) {
>+		dev_err(hwc->dev, "HWC: vport is already unloaded.\n");
>+		err = -ETIMEDOUT;
>+		goto out;
>+	}
> 
> 	tx_wr = &txq->msg_buf->reqs[msg_id];
> 
>@@ -825,9 +834,10 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
> 		goto out;
> 	}
> 
>-	if (!wait_for_completion_timeout(&ctx->comp_event, 30 * HZ)) {
>+	if (!wait_for_completion_timeout(&ctx->comp_event, 5 * HZ)) {
> 		dev_err(hwc->dev, "HWC: Request timed out!\n");
> 		err = -ETIMEDOUT;
>+		ac->vf_unload_timeout = true;
> 		goto out;
> 	}
> 
>diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
>index d907727c7b7a..cb2080b3a00c 100644
>--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
>+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
>@@ -2329,7 +2329,10 @@ static int mana_dealloc_queues(struct net_device *ndev)
> {
> 	struct mana_port_context *apc = netdev_priv(ndev);
> 	struct gdma_dev *gd = apc->ac->gdma_dev;
>+	unsigned long timeout;
> 	struct mana_txq *txq;
>+	struct sk_buff *skb;
>+	struct mana_cq *cq;
> 	int i, err;
> 
> 	if (apc->port_is_up)
>@@ -2348,13 +2351,26 @@ static int mana_dealloc_queues(struct net_device *ndev)
> 	 *
> 	 * Drain all the in-flight TX packets
> 	 */
>+
>+	timeout = jiffies + 120 * HZ;
> 	for (i = 0; i < apc->num_queues; i++) {
> 		txq = &apc->tx_qp[i].txq;
>-
>-		while (atomic_read(&txq->pending_sends) > 0)
>+		while (atomic_read(&txq->pending_sends) > 0 &&
>+		       time_before(jiffies, timeout)) {
> 			usleep_range(1000, 2000);
>+		}
> 	}
> 
>+	for (i = 0; i < apc->num_queues; i++) {
>+		txq = &apc->tx_qp[i].txq;
>+		cq = &apc->tx_qp[i].tx_cq;
>+		while (atomic_read(&txq->pending_sends)) {
>+			skb = skb_dequeue(&txq->pending_skbs);
>+			mana_unmap_skb(skb, apc);
>+			napi_consume_skb(skb, cq->budget);
>+			atomic_sub(1, &txq->pending_sends);
>+		}
>+	}
> 	/* We're 100% sure the queues can no longer be woken up, because
> 	 * we're sure now mana_poll_tx_cq() can't be running.
> 	 */
>@@ -2605,6 +2621,7 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
> 		}
> 	}
> 
>+	ac->vf_unload_timeout = false;

Pointless init. You have the struct zeroed during allocation.


> 	err = add_adev(gd);
> out:
> 	if (err)
>diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
>index 9eef19972845..5f5affdca1eb 100644
>--- a/include/net/mana/mana.h
>+++ b/include/net/mana/mana.h
>@@ -358,6 +358,8 @@ struct mana_context {
> 
> 	u16 num_ports;
> 
>+	bool vf_unload_timeout;
>+
> 	struct mana_eq *eqs;
> 
> 	struct net_device *ports[MAX_PORTS_IN_MANA_DEV];
>-- 
>2.34.1
>
>

  reply	other threads:[~2023-06-23  9:44 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-23  7:29 [PATCH V2 net] net: mana: Fix MANA VF unload when host is unresponsive souradeep chakrabarti
2023-06-23  9:43 ` Jiri Pirko [this message]
2023-06-23 17:44 ` Simon Horman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZJVpXR52RQxdYRY+@nanopsycho \
    --to=jiri@resnulli.us \
    --cc=cai.huoqing@linux.dev \
    --cc=davem@davemloft.net \
    --cc=decui@microsoft.com \
    --cc=edumazet@google.com \
    --cc=haiyangz@microsoft.com \
    --cc=kuba@kernel.org \
    --cc=kys@microsoft.com \
    --cc=leon@kernel.org \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=longli@microsoft.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=schakrabarti@linux.microsoft.com \
    --cc=schakrabarti@microsoft.com \
    --cc=sharmaajay@microsoft.com \
    --cc=ssengar@linux.microsoft.com \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=vkuznets@redhat.com \
    --cc=wei.liu@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).