From: Jakub Kicinski <kuba@kernel.org>
To: jiawenwu@trustnetic.com
Cc: Jakub Kicinski <kuba@kernel.org>,
mengyuanlou@net-swift.com, horms@kernel.org,
netdev@vger.kernel.org, edumazet@google.com, davem@davemloft.net,
andrew+netdev@lunn.ch, pabeni@redhat.com
Subject: Re: [net-next,v1,7/7] net: wangxun: add pcie error handler
Date: Tue, 3 Feb 2026 18:22:49 -0800 [thread overview]
Message-ID: <20260204022249.1573252-1-kuba@kernel.org> (raw)
In-Reply-To: <20260203075759.5852-8-jiawenwu@trustnetic.com>
This is an AI-generated review of your patch. The human sending this
email says "Sending this out without checking since you have to
repost. Please note that the AI code review may be wrong especially
around defensive programming (which we recommend against)."
For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
---
net: wangxun: add pcie error handler
Support to check pcie error and invoke aer driver to recover pcie.
> diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.c b/drivers/net/ethernet/wangxun/libwx/wx_err.c
> index bb77bdce69d2..36eb06bd1798 100644
> --- a/drivers/net/ethernet/wangxun/libwx/wx_err.c
> +++ b/drivers/net/ethernet/wangxun/libwx/wx_err.c
[ ... ]
> @@ -3,10 +3,228 @@
>
> #include <linux/netdevice.h>
> #include <linux/pci.h>
> +#include <linux/aer.h>
>
> #include "wx_type.h"
> #include "wx_lib.h"
> #include "wx_err.h"
> +#include "wx_hw.h"
> +
> +/**
> + * wx_io_error_detected - called when PCI error is detected
> + * @pdev: Pointer to PCI device
> + * @state: The current pci connection state
> + *
> + * This function is called after a PCI bus error affecting
> + * this device has been detected.
> + */
> +static pci_ers_result_t wx_io_error_detected(struct pci_dev *pdev,
> + pci_channel_state_t state)
> +{
> + struct wx *wx = pci_get_drvdata(pdev);
> + struct net_device *netdev;
> +
> + netdev = wx->netdev;
> + if (!netif_device_present(netdev))
> + return PCI_ERS_RESULT_DISCONNECT;
> +
> + rtnl_lock();
> + netif_device_detach(netdev);
> +
> + wx->io_err = true;
> + if (netif_running(netdev))
> + wx->close_suspend(wx);
> +
> + if (state == pci_channel_io_perm_failure) {
> + rtnl_unlock();
> + return PCI_ERS_RESULT_DISCONNECT;
> + }
> +
> + if (!test_and_set_bit(WX_STATE_DISABLED, wx->state))
> + pci_disable_device(pdev);
> + rtnl_unlock();
> +
> + /* Request a slot reset. */
> + return PCI_ERS_RESULT_NEED_RESET;
> +}
> +
> +/**
> + * wx_io_slot_reset - called after the pci bus has been reset.
> + * @pdev: Pointer to PCI device
> + *
> + * Restart the card from scratch, as if from a cold-boot.
> + */
> +static pci_ers_result_t wx_io_slot_reset(struct pci_dev *pdev)
> +{
> + struct wx *wx = pci_get_drvdata(pdev);
> + pci_ers_result_t result;
> +
> + if (pci_enable_device_mem(pdev)) {
> + wx_err(wx, "Cannot re-enable PCI device after reset.\n");
> + result = PCI_ERS_RESULT_DISCONNECT;
> + } else {
> + /* make all bar access done before reset. */
> + smp_mb__before_atomic();
> + clear_bit(WX_STATE_DISABLED, wx->state);
> + pci_set_master(pdev);
> + pci_restore_state(pdev);
> + pci_save_state(pdev);
> + pci_wake_from_d3(pdev, false);
> +
> + wx->do_reset(wx->netdev, false);
> + result = PCI_ERS_RESULT_RECOVERED;
> + }
> +
> + pci_aer_clear_nonfatal_status(pdev);
> +
> + return result;
> +}
> +
> +/**
> + * wx_io_resume - called when traffic can start flowing again.
> + * @pdev: Pointer to PCI device
> + *
> + * This callback is called when the error recovery driver tells us that
> + * its OK to resume normal operation.
> + */
> +static void wx_io_resume(struct pci_dev *pdev)
> +{
> + struct wx *wx = pci_get_drvdata(pdev);
> + struct net_device *netdev;
> +
> + netdev = wx->netdev;
> + rtnl_lock();
> + if (netif_running(netdev))
> + netdev->netdev_ops->ndo_open(netdev);
> +
> + wx->io_err = false;
> + netif_device_attach(netdev);
> + rtnl_unlock();
> +}
> +
> +const struct pci_error_handlers wx_err_handler = {
> + .error_detected = wx_io_error_detected,
> + .slot_reset = wx_io_slot_reset,
> + .resume = wx_io_resume,
> +};
> +EXPORT_SYMBOL(wx_err_handler);
> +
> +static bool wx_check_pcie_error(struct wx *wx)
> +{
> + u16 vid, pci_cmd, devctl2;
> + u32 value;
> +
> + pci_read_config_word(wx->pdev, PCI_VENDOR_ID, &vid);
> + wx_warn(wx, "PCI vendor id is 0x%x\n", vid);
> + pci_read_config_word(wx->pdev, PCI_COMMAND, &pci_cmd);
> + wx_warn(wx, "PCI command reg is 0x%x\n", pci_cmd);
> + pcie_capability_read_word(wx->pdev, PCI_EXP_DEVCTL2, &devctl2);
> + wx_warn(wx, "Device Control2 Register: 0x%04x\n", devctl2);
> +
> + value = rd32(wx, WX_MIS_PWR);
> + wx_warn(wx, "MIS_PWR value is 0x%08x\n", value);
> + value = rd32(wx, WX_PX_IMS(0));
> + wx_warn(wx, "PX_IMS0 value is 0x%08x\n", value);
> + if (test_bit(WX_FLAG_MULTI_64_FUNC, wx->flags)) {
> + value = rd32(wx, WX_PX_IMS(1));
> + wx_warn(wx, "PX_IMS1 value is 0x%08x\n", value);
> + }
> + value = rd32(wx, WX_TDB_TFCS);
> + wx_warn(wx, "Tx flow control Status[TDB_TFCS 0xCE00]: 0x%x\n", value);
> +
> + /* PCIe link loss or memory space can't access */
> + if (vid == WX_FAILED_READ_CFG_WORD || !(pci_cmd & 0x2))
> + return true;
> +
> + return false;
> +}
> +
> +static void wx_check_error_subtask(struct wx *wx)
> +{
> + u32 sm;
> +
> + if (test_bit(WX_FLAG_ERROR_CHECK, wx->flags)) {
> + /* get PF semaphore */
> + wr32(wx, WX_MIS_PF_SM, 1);
> + clear_bit(WX_FLAG_ERROR_CHECK, wx->flags);
> + }
> +
> + sm = rd32(wx, WX_MIS_PF_SM);
> + /* PCIe memory space access error */
> + if (sm == U32_MAX)
> + goto out;
> +
> + /* PCIe error may be occurred in another port */
> + if ((sm == 1 && wx_check_first_lan_up(wx)))
> + goto out;
> +
> + return;
> +out:
> + set_bit(WX_FLAG_NEED_PCIE_RECOVER, wx->flags);
> + wx_warn(wx, "Set PCIe recover on LAN %d\n", wx->bus.func);
> +}
> +
> +static bool wx_check_recovery_capability(struct pci_dev *dev)
> +{
> +#if defined(__i386__) || defined(__x86_64__)
> + return true;
> +#else
> + /* check upstream bridge is root or PLX bridge,
> + * or if CPU is kunpeng 920
> + */
> + if (dev->bus->self->vendor == PCI_VENDOR_ID_PLX ||
> + dev->bus->self->vendor == PCI_VENDOR_ID_HUAWEI)
Can this dereference dev->bus->self without checking if it's NULL first?
When the PCI device is directly attached to the root bus, dev->bus->self
is NULL. On non-x86 platforms, this code checks the upstream bridge
vendor without verifying that an upstream bridge exists.
The function comment states "check upstream bridge is root or PLX bridge"
but the code doesn't actually check for the root bus case. The x86 path
returns true unconditionally, but the non-x86 path assumes dev->bus->self
is non-NULL.
> + return true;
> + else
> + return false;
> +#endif
> +}
[ ... ]
> diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> index 3171a98d81a6..653a98d9835b 100644
> --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> @@ -234,6 +234,12 @@ static irqreturn_t __ngbe_msix_misc(struct wx *wx, u32 eicr)
> if (eicr & NGBE_PX_MISC_IC_VF_MBOX)
> wx_msg_task(wx);
>
> + if (eicr & NGBE_PX_MISC_PCIE_REQ_ERR) {
> + wx_warn(wx, "PCIe Request Error founded on Lan %d\n",
> + wx->bus.func);
This isn't a bug, but there's a typo: 'founded' should be 'found'.
> + wx_pcie_error_handler(wx);
> + }
> +
> if (unlikely(eicr & NGBE_PX_MISC_IC_TIMESYNC))
> wx_ptp_check_pps_event(wx);
>
[ ... ]
> diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> index aa14958d439a..c832100bcb61 100644
> --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> @@ -6,6 +6,7 @@
>
> #include "../libwx/wx_type.h"
> #include "../libwx/wx_lib.h"
> +#include "../libwx/wx_err.h"
> #include "../libwx/wx_ptp.h"
> #include "../libwx/wx_hw.h"
> #include "../libwx/wx_sriov.h"
> @@ -178,6 +179,12 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data)
> handle_nested_irq(sub_irq);
> nhandled++;
> }
> + if (eicr & TXGBE_PX_MISC_PCIE_REQ_ERR) {
> + wx_warn(wx, "PCIe Request Error founded on Lan %d\n",
> + wx->bus.func);
This isn't a bug, but there's a typo: 'founded' should be 'found'.
> + wx_pcie_error_handler(wx);
> + nhandled++;
> + }
> if (unlikely(eicr & TXGBE_PX_MISC_IC_TIMESYNC)) {
> wx_ptp_check_pps_event(wx);
> nhandled++;
next prev parent reply other threads:[~2026-02-04 2:22 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-03 7:57 [PATCH net-next v1 0/7] Wangxun improvement and new support Jiawen Wu
2026-02-03 7:57 ` [PATCH net-next v1 1/7] net: ngbe: move the WOL functions to libwx Jiawen Wu
2026-02-03 13:52 ` Andrew Lunn
2026-02-04 2:05 ` Jiawen Wu
2026-02-03 7:57 ` [PATCH net-next v1 2/7] net: ngbe: improve the reset flow Jiawen Wu
2026-02-03 13:56 ` Andrew Lunn
2026-02-04 2:20 ` Jiawen Wu
2026-02-03 7:57 ` [PATCH net-next v1 3/7] net: wangxun: move reusable PCI driver ops functions into libwx Jiawen Wu
2026-02-04 2:22 ` [net-next,v1,3/7] " Jakub Kicinski
2026-02-04 6:12 ` Jiawen Wu
2026-02-03 7:57 ` [PATCH net-next v1 4/7] net: txgbe: add power management support Jiawen Wu
2026-02-03 7:57 ` [PATCH net-next v1 5/7] net: wangxun: move ethtool_ops.set_channels into libwx Jiawen Wu
2026-02-03 7:57 ` [PATCH net-next v1 6/7] net: wangxun: add Tx timeout process Jiawen Wu
2026-02-03 7:57 ` [PATCH net-next v1 7/7] net: wangxun: add pcie error handler Jiawen Wu
2026-02-04 1:34 ` kernel test robot
2026-02-04 2:22 ` Jakub Kicinski [this message]
2026-02-04 6:10 ` [net-next,v1,7/7] " Jiawen Wu
2026-02-04 11:18 ` [PATCH net-next v1 7/7] " kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260204022249.1573252-1-kuba@kernel.org \
--to=kuba@kernel.org \
--cc=andrew+netdev@lunn.ch \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=jiawenwu@trustnetic.com \
--cc=mengyuanlou@net-swift.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox