From: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
To: Jijie Shao <shaojijie@huawei.com>
Cc: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, andrew+netdev@lunn.ch, horms@kernel.org,
shenjian15@huawei.com, wangpeiyang1@huawei.com,
liuyonglong@huawei.com, chenhao418@huawei.com,
jonathan.cameron@huawei.com,
shameerali.kolothum.thodi@huawei.com, salil.mehta@huawei.com,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH RESEND V2 net 1/7] net: hns3: fixed reset failure issues caused by the incorrect reset type
Date: Wed, 18 Dec 2024 10:02:59 +0100 [thread overview]
Message-ID: <Z2KPw9WYCI/SZIjg@mev-dev.igk.intel.com> (raw)
In-Reply-To: <20241217010839.1742227-2-shaojijie@huawei.com>
On Tue, Dec 17, 2024 at 09:08:33AM +0800, Jijie Shao wrote:
> From: Hao Lan <lanhao@huawei.com>
>
> When a reset type that is not supported by the driver is input, a reset
> pending flag bit of the HNAE3_NONE_RESET type is generated in
> reset_pending. The driver does not have a mechanism to clear this type
> of error. As a result, the driver considers that the reset is not
> complete. This patch provides a mechanism to clear the
> HNAE3_NONE_RESET flag and the parameter of
> hnae3_ae_ops.set_default_reset_request is verified.
>
> The error message:
> hns3 0000:39:01.0: cmd failed -16
> hns3 0000:39:01.0: hclge device re-init failed, VF is disabled!
> hns3 0000:39:01.0: failed to reset VF stack
> hns3 0000:39:01.0: failed to reset VF(4)
> hns3 0000:39:01.0: prepare reset(2) wait done
> hns3 0000:39:01.0 eth4: already uninitialized
>
> Use the crash tool to view struct hclgevf_dev:
> struct hclgevf_dev {
> ...
> default_reset_request = 0x20,
> reset_level = HNAE3_NONE_RESET,
> reset_pending = 0x100,
> reset_type = HNAE3_NONE_RESET,
> ...
> };
>
> Fixes: 720bd5837e37 ("net: hns3: add set_default_reset_request in the hnae3_ae_ops")
> Signed-off-by: Hao Lan <lanhao@huawei.com>
> Signed-off-by: Jijie Shao <shaojijie@huawei.com>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> .../hisilicon/hns3/hns3pf/hclge_main.c | 33 ++++++++++++++--
> .../hisilicon/hns3/hns3vf/hclgevf_main.c | 38 ++++++++++++++++---
> 2 files changed, 61 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> index 05942fa78b11..7d44dc777dc5 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> @@ -3574,6 +3574,17 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf,
> return ret;
> }
>
> +static void hclge_set_reset_pending(struct hclge_dev *hdev,
> + enum hnae3_reset_type reset_type)
> +{
> + /* When an incorrect reset type is executed, the get_reset_level
> + * function generates the HNAE3_NONE_RESET flag. As a result, this
> + * type do not need to pending.
> + */
> + if (reset_type != HNAE3_NONE_RESET)
> + set_bit(reset_type, &hdev->reset_pending);
> +}
> +
> static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
> {
> u32 cmdq_src_reg, msix_src_reg, hw_err_src_reg;
> @@ -3594,7 +3605,7 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
> */
> if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & msix_src_reg) {
> dev_info(&hdev->pdev->dev, "IMP reset interrupt\n");
> - set_bit(HNAE3_IMP_RESET, &hdev->reset_pending);
> + hclge_set_reset_pending(hdev, HNAE3_IMP_RESET);
> set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state);
> *clearval = BIT(HCLGE_VECTOR0_IMPRESET_INT_B);
> hdev->rst_stats.imp_rst_cnt++;
> @@ -3604,7 +3615,7 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
> if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & msix_src_reg) {
> dev_info(&hdev->pdev->dev, "global reset interrupt\n");
> set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state);
> - set_bit(HNAE3_GLOBAL_RESET, &hdev->reset_pending);
> + hclge_set_reset_pending(hdev, HNAE3_GLOBAL_RESET);
> *clearval = BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B);
> hdev->rst_stats.global_rst_cnt++;
> return HCLGE_VECTOR0_EVENT_RST;
> @@ -4052,7 +4063,7 @@ static void hclge_do_reset(struct hclge_dev *hdev)
> case HNAE3_FUNC_RESET:
> dev_info(&pdev->dev, "PF reset requested\n");
> /* schedule again to check later */
> - set_bit(HNAE3_FUNC_RESET, &hdev->reset_pending);
> + hclge_set_reset_pending(hdev, HNAE3_FUNC_RESET);
> hclge_reset_task_schedule(hdev);
> break;
> default:
> @@ -4086,6 +4097,8 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
> clear_bit(HNAE3_FLR_RESET, addr);
> }
>
> + clear_bit(HNAE3_NONE_RESET, addr);
> +
> if (hdev->reset_type != HNAE3_NONE_RESET &&
> rst_level < hdev->reset_type)
> return HNAE3_NONE_RESET;
> @@ -4227,7 +4240,7 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev)
> return false;
> } else if (hdev->rst_stats.reset_fail_cnt < MAX_RESET_FAIL_CNT) {
> hdev->rst_stats.reset_fail_cnt++;
> - set_bit(hdev->reset_type, &hdev->reset_pending);
> + hclge_set_reset_pending(hdev, hdev->reset_type);
Sth is unclear for me here. Doesn't HNAE3_NONE_RESET mean that there is
no reset? If yes, why in this case reset_fail_cnt++ is increasing?
Maybe the check for NONE_RESET should be done in this else if check to
prevent reset_fail_cnt from increasing (and also solve the problem with
pending bit set)
> dev_info(&hdev->pdev->dev,
> "re-schedule reset task(%u)\n",
> hdev->rst_stats.reset_fail_cnt);
> @@ -4470,8 +4483,20 @@ static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle)
> static void hclge_set_def_reset_request(struct hnae3_ae_dev *ae_dev,
> enum hnae3_reset_type rst_type)
> {
> +#define HCLGE_SUPPORT_RESET_TYPE \
> + (BIT(HNAE3_FLR_RESET) | BIT(HNAE3_FUNC_RESET) | \
> + BIT(HNAE3_GLOBAL_RESET) | BIT(HNAE3_IMP_RESET))
> +
> struct hclge_dev *hdev = ae_dev->priv;
>
> + if (!(BIT(rst_type) & HCLGE_SUPPORT_RESET_TYPE)) {
> + /* To prevent reset triggered by hclge_reset_event */
> + set_bit(HNAE3_NONE_RESET, &hdev->default_reset_request);
> + dev_warn(&hdev->pdev->dev, "unsupported reset type %d\n",
> + rst_type);
> + return;
> + }
Maybe (nit):
if (...) {
rst_type =
dev_warn();
}
set_bit(rst_type, );
It is a little hard to follow with return in the if.
> +
> set_bit(rst_type, &hdev->default_reset_request);
> }
>
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
> index 2f6ffb88e700..fd0abe37fdd7 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
> @@ -1393,6 +1393,17 @@ static int hclgevf_notify_roce_client(struct hclgevf_dev *hdev,
> return ret;
> }
>
> +static void hclgevf_set_reset_pending(struct hclgevf_dev *hdev,
> + enum hnae3_reset_type reset_type)
> +{
> + /* When an incorrect reset type is executed, the get_reset_level
> + * function generates the HNAE3_NONE_RESET flag. As a result, this
> + * type do not need to pending.
> + */
> + if (reset_type != HNAE3_NONE_RESET)
> + set_bit(reset_type, &hdev->reset_pending);
> +}
You already have a way to share the code between PF and VF, so please
move the same functions to common file in one direction up.
> +
> static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
> {
> #define HCLGEVF_RESET_WAIT_US 20000
> @@ -1542,7 +1553,7 @@ static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev)
> hdev->rst_stats.rst_fail_cnt);
>
> if (hdev->rst_stats.rst_fail_cnt < HCLGEVF_RESET_MAX_FAIL_CNT)
> - set_bit(hdev->reset_type, &hdev->reset_pending);
> + hclgevf_set_reset_pending(hdev, hdev->reset_type);
>
> if (hclgevf_is_reset_pending(hdev)) {
> set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
> @@ -1662,6 +1673,8 @@ static enum hnae3_reset_type hclgevf_get_reset_level(unsigned long *addr)
> clear_bit(HNAE3_FLR_RESET, addr);
> }
>
> + clear_bit(HNAE3_NONE_RESET, addr);
> +
> return rst_level;
> }
>
> @@ -1671,14 +1684,15 @@ static void hclgevf_reset_event(struct pci_dev *pdev,
> struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
> struct hclgevf_dev *hdev = ae_dev->priv;
>
> - dev_info(&hdev->pdev->dev, "received reset request from VF enet\n");
> -
> if (hdev->default_reset_request)
> hdev->reset_level =
> hclgevf_get_reset_level(&hdev->default_reset_request);
> else
> hdev->reset_level = HNAE3_VF_FUNC_RESET;
>
> + dev_info(&hdev->pdev->dev, "received reset request from VF enet, reset level is %d\n",
> + hdev->reset_level);
> +
> /* reset of this VF requested */
> set_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state);
> hclgevf_reset_task_schedule(hdev);
> @@ -1689,8 +1703,20 @@ static void hclgevf_reset_event(struct pci_dev *pdev,
> static void hclgevf_set_def_reset_request(struct hnae3_ae_dev *ae_dev,
> enum hnae3_reset_type rst_type)
> {
> +#define HCLGEVF_SUPPORT_RESET_TYPE \
> + (BIT(HNAE3_VF_RESET) | BIT(HNAE3_VF_FUNC_RESET) | \
> + BIT(HNAE3_VF_PF_FUNC_RESET) | BIT(HNAE3_VF_FULL_RESET) | \
> + BIT(HNAE3_FLR_RESET) | BIT(HNAE3_VF_EXP_RESET))
> +
> struct hclgevf_dev *hdev = ae_dev->priv;
>
> + if (!(BIT(rst_type) & HCLGEVF_SUPPORT_RESET_TYPE)) {
> + /* To prevent reset triggered by hclge_reset_event */
> + set_bit(HNAE3_NONE_RESET, &hdev->default_reset_request);
> + dev_info(&hdev->pdev->dev, "unsupported reset type %d\n",
> + rst_type);
> + return;
> + }
> set_bit(rst_type, &hdev->default_reset_request);
> }
>
> @@ -1847,14 +1873,14 @@ static void hclgevf_reset_service_task(struct hclgevf_dev *hdev)
> */
> if (hdev->reset_attempts > HCLGEVF_MAX_RESET_ATTEMPTS_CNT) {
> /* prepare for full reset of stack + pcie interface */
> - set_bit(HNAE3_VF_FULL_RESET, &hdev->reset_pending);
> + hclgevf_set_reset_pending(hdev, HNAE3_VF_FULL_RESET);
>
> /* "defer" schedule the reset task again */
> set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
> } else {
> hdev->reset_attempts++;
>
> - set_bit(hdev->reset_level, &hdev->reset_pending);
> + hclgevf_set_reset_pending(hdev, hdev->reset_level);
> set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
> }
> hclgevf_reset_task_schedule(hdev);
> @@ -1977,7 +2003,7 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev,
> rst_ing_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING);
> dev_info(&hdev->pdev->dev,
> "receive reset interrupt 0x%x!\n", rst_ing_reg);
> - set_bit(HNAE3_VF_RESET, &hdev->reset_pending);
> + hclgevf_set_reset_pending(hdev, HNAE3_VF_RESET);
> set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
> set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state);
> *clearval = ~(1U << HCLGEVF_VECTOR0_RST_INT_B);
> --
> 2.33.0
next prev parent reply other threads:[~2024-12-18 9:06 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-17 1:08 [PATCH RESEND V2 net 0/7] There are some bugfix for the HNS3 ethernet driver Jijie Shao
2024-12-17 1:08 ` [PATCH RESEND V2 net 1/7] net: hns3: fixed reset failure issues caused by the incorrect reset type Jijie Shao
2024-12-18 9:02 ` Michal Swiatkowski [this message]
2024-12-19 9:41 ` Paolo Abeni
2024-12-19 10:11 ` Michal Swiatkowski
2024-12-19 10:43 ` Paolo Abeni
2024-12-19 12:26 ` Jijie Shao
2025-01-06 14:41 ` Jijie Shao
2024-12-19 10:13 ` Michal Swiatkowski
2024-12-19 12:18 ` Jijie Shao
2024-12-17 1:08 ` [PATCH RESEND V2 net 2/7] net: hns3: fix missing features due to dev->features configuration too early Jijie Shao
2024-12-18 9:16 ` Michal Swiatkowski
2024-12-17 1:08 ` [PATCH RESEND V2 net 3/7] net: hns3: Resolved the issue that the debugfs query result is inconsistent Jijie Shao
2024-12-17 1:08 ` [PATCH RESEND V2 net 4/7] net: hns3: don't auto enable misc vector Jijie Shao
2024-12-17 1:08 ` [PATCH RESEND V2 net 5/7] net: hns3: initialize reset_timer before hclgevf_misc_irq_init() Jijie Shao
2024-12-18 9:20 ` Michal Swiatkowski
2024-12-19 11:48 ` Jijie Shao
2024-12-17 1:08 ` [PATCH RESEND V2 net 6/7] net: hns3: fixed hclge_fetch_pf_reg accesses bar space out of bounds issue Jijie Shao
2024-12-18 9:29 ` Michal Swiatkowski
2024-12-19 9:51 ` Paolo Abeni
2024-12-19 10:23 ` Michal Swiatkowski
2024-12-19 11:52 ` Jijie Shao
2024-12-17 1:08 ` [PATCH RESEND V2 net 7/7] net: hns3: fix kernel crash when 1588 is sent on HIP08 devices Jijie Shao
2024-12-18 9:30 ` Michal Swiatkowski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Z2KPw9WYCI/SZIjg@mev-dev.igk.intel.com \
--to=michal.swiatkowski@linux.intel.com \
--cc=andrew+netdev@lunn.ch \
--cc=chenhao418@huawei.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=jonathan.cameron@huawei.com \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=liuyonglong@huawei.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=salil.mehta@huawei.com \
--cc=shameerali.kolothum.thodi@huawei.com \
--cc=shaojijie@huawei.com \
--cc=shenjian15@huawei.com \
--cc=wangpeiyang1@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.