All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jiawen Wu" <jiawenwu@trustnetic.com>
To: "'Jakub Kicinski'" <kuba@kernel.org>
Cc: <mengyuanlou@net-swift.com>, <horms@kernel.org>,
	<netdev@vger.kernel.org>, <edumazet@google.com>,
	<davem@davemloft.net>, <andrew+netdev@lunn.ch>,
	<pabeni@redhat.com>, <mengyuanlou@net-swift.com>,
	<horms@kernel.org>, <netdev@vger.kernel.org>,
	<edumazet@google.com>, <davem@davemloft.net>,
	<andrew+netdev@lunn.ch>, <pabeni@redhat.com>
Subject: RE: [net-next,v1,7/7] net: wangxun: add pcie error handler
Date: Wed, 4 Feb 2026 14:10:12 +0800	[thread overview]
Message-ID: <06f201dc959c$ec2b74e0$c4825ea0$@trustnetic.com> (raw)
In-Reply-To: <20260204022249.1573252-1-kuba@kernel.org>

On Wed, Feb 4, 2026 10:23 AM, Jakub Kicinski wrote:
> This is an AI-generated review of your patch. The human sending this
> email says "Sending this out without checking since you have to
> repost. Please note that the AI code review may be wrong especially
> around defensive programming (which we recommend against)."
> 
> For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
> ---
> net: wangxun: add pcie error handler
> 
> Support to check pcie error and invoke aer driver to recover pcie.
> 
> > diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.c b/drivers/net/ethernet/wangxun/libwx/wx_err.c
> > index bb77bdce69d2..36eb06bd1798 100644
> > --- a/drivers/net/ethernet/wangxun/libwx/wx_err.c
> > +++ b/drivers/net/ethernet/wangxun/libwx/wx_err.c
> 
> [ ... ]
> 
> > @@ -3,10 +3,228 @@
> >
> >  #include <linux/netdevice.h>
> >  #include <linux/pci.h>
> > +#include <linux/aer.h>
> >
> >  #include "wx_type.h"
> >  #include "wx_lib.h"
> >  #include "wx_err.h"
> > +#include "wx_hw.h"
> > +
> > +/**
> > + * wx_io_error_detected - called when PCI error is detected
> > + * @pdev: Pointer to PCI device
> > + * @state: The current pci connection state
> > + *
> > + * This function is called after a PCI bus error affecting
> > + * this device has been detected.
> > + */
> > +static pci_ers_result_t wx_io_error_detected(struct pci_dev *pdev,
> > +					     pci_channel_state_t state)
> > +{
> > +	struct wx *wx = pci_get_drvdata(pdev);
> > +	struct net_device *netdev;
> > +
> > +	netdev = wx->netdev;
> > +	if (!netif_device_present(netdev))
> > +		return PCI_ERS_RESULT_DISCONNECT;
> > +
> > +	rtnl_lock();
> > +	netif_device_detach(netdev);
> > +
> > +	wx->io_err = true;
> > +	if (netif_running(netdev))
> > +		wx->close_suspend(wx);
> > +
> > +	if (state == pci_channel_io_perm_failure) {
> > +		rtnl_unlock();
> > +		return PCI_ERS_RESULT_DISCONNECT;
> > +	}
> > +
> > +	if (!test_and_set_bit(WX_STATE_DISABLED, wx->state))
> > +		pci_disable_device(pdev);
> > +	rtnl_unlock();
> > +
> > +	/* Request a slot reset. */
> > +	return PCI_ERS_RESULT_NEED_RESET;
> > +}
> > +
> > +/**
> > + * wx_io_slot_reset - called after the pci bus has been reset.
> > + * @pdev: Pointer to PCI device
> > + *
> > + * Restart the card from scratch, as if from a cold-boot.
> > + */
> > +static pci_ers_result_t wx_io_slot_reset(struct pci_dev *pdev)
> > +{
> > +	struct wx *wx = pci_get_drvdata(pdev);
> > +	pci_ers_result_t result;
> > +
> > +	if (pci_enable_device_mem(pdev)) {
> > +		wx_err(wx, "Cannot re-enable PCI device after reset.\n");
> > +		result = PCI_ERS_RESULT_DISCONNECT;
> > +	} else {
> > +		/* make all bar access done before reset. */
> > +		smp_mb__before_atomic();
> > +		clear_bit(WX_STATE_DISABLED, wx->state);
> > +		pci_set_master(pdev);
> > +		pci_restore_state(pdev);
> > +		pci_save_state(pdev);
> > +		pci_wake_from_d3(pdev, false);
> > +
> > +		wx->do_reset(wx->netdev, false);
> > +		result = PCI_ERS_RESULT_RECOVERED;
> > +	}
> > +
> > +	pci_aer_clear_nonfatal_status(pdev);
> > +
> > +	return result;
> > +}
> > +
> > +/**
> > + * wx_io_resume - called when traffic can start flowing again.
> > + * @pdev: Pointer to PCI device
> > + *
> > + * This callback is called when the error recovery driver tells us that
> > + * its OK to resume normal operation.
> > + */
> > +static void wx_io_resume(struct pci_dev *pdev)
> > +{
> > +	struct wx *wx = pci_get_drvdata(pdev);
> > +	struct net_device *netdev;
> > +
> > +	netdev = wx->netdev;
> > +	rtnl_lock();
> > +	if (netif_running(netdev))
> > +		netdev->netdev_ops->ndo_open(netdev);
> > +
> > +	wx->io_err = false;
> > +	netif_device_attach(netdev);
> > +	rtnl_unlock();
> > +}
> > +
> > +const struct pci_error_handlers wx_err_handler = {
> > +	.error_detected = wx_io_error_detected,
> > +	.slot_reset = wx_io_slot_reset,
> > +	.resume = wx_io_resume,
> > +};
> > +EXPORT_SYMBOL(wx_err_handler);
> > +
> > +static bool wx_check_pcie_error(struct wx *wx)
> > +{
> > +	u16 vid, pci_cmd, devctl2;
> > +	u32 value;
> > +
> > +	pci_read_config_word(wx->pdev, PCI_VENDOR_ID, &vid);
> > +	wx_warn(wx, "PCI vendor id is 0x%x\n", vid);
> > +	pci_read_config_word(wx->pdev, PCI_COMMAND, &pci_cmd);
> > +	wx_warn(wx, "PCI command reg is 0x%x\n", pci_cmd);
> > +	pcie_capability_read_word(wx->pdev, PCI_EXP_DEVCTL2, &devctl2);
> > +	wx_warn(wx, "Device Control2 Register: 0x%04x\n", devctl2);
> > +
> > +	value = rd32(wx, WX_MIS_PWR);
> > +	wx_warn(wx, "MIS_PWR value is 0x%08x\n", value);
> > +	value = rd32(wx, WX_PX_IMS(0));
> > +	wx_warn(wx, "PX_IMS0 value is 0x%08x\n", value);
> > +	if (test_bit(WX_FLAG_MULTI_64_FUNC, wx->flags)) {
> > +		value = rd32(wx, WX_PX_IMS(1));
> > +		wx_warn(wx, "PX_IMS1 value is 0x%08x\n", value);
> > +	}
> > +	value = rd32(wx, WX_TDB_TFCS);
> > +	wx_warn(wx, "Tx flow control Status[TDB_TFCS 0xCE00]: 0x%x\n", value);
> > +
> > +	/* PCIe link loss or memory space can't access */
> > +	if (vid == WX_FAILED_READ_CFG_WORD || !(pci_cmd & 0x2))
> > +		return true;
> > +
> > +	return false;
> > +}
> > +
> > +static void wx_check_error_subtask(struct wx *wx)
> > +{
> > +	u32 sm;
> > +
> > +	if (test_bit(WX_FLAG_ERROR_CHECK, wx->flags)) {
> > +		/* get PF semaphore */
> > +		wr32(wx, WX_MIS_PF_SM, 1);
> > +		clear_bit(WX_FLAG_ERROR_CHECK, wx->flags);
> > +	}
> > +
> > +	sm = rd32(wx, WX_MIS_PF_SM);
> > +	/* PCIe memory space access error */
> > +	if (sm == U32_MAX)
> > +		goto out;
> > +
> > +	/* PCIe error may be occurred in another port */
> > +	if ((sm == 1 && wx_check_first_lan_up(wx)))
> > +		goto out;
> > +
> > +	return;
> > +out:
> > +	set_bit(WX_FLAG_NEED_PCIE_RECOVER, wx->flags);
> > +	wx_warn(wx, "Set PCIe recover on LAN %d\n", wx->bus.func);
> > +}
> > +
> > +static bool wx_check_recovery_capability(struct pci_dev *dev)
> > +{
> > +#if defined(__i386__) || defined(__x86_64__)
> > +	return true;
> > +#else
> > +	/* check upstream bridge is root or PLX bridge,
> > +	 * or if CPU is kunpeng 920
> > +	 */
> > +	if (dev->bus->self->vendor == PCI_VENDOR_ID_PLX ||
> > +	    dev->bus->self->vendor == PCI_VENDOR_ID_HUAWEI)
> 
> Can this dereference dev->bus->self without checking if it's NULL first?
> 
> When the PCI device is directly attached to the root bus, dev->bus->self
> is NULL. On non-x86 platforms, this code checks the upstream bridge
> vendor without verifying that an upstream bridge exists.
> 
> The function comment states "check upstream bridge is root or PLX bridge"
> but the code doesn't actually check for the root bus case. The x86 path
> returns true unconditionally, but the non-x86 path assumes dev->bus->self
> is non-NULL.

I think the comment here should be revised. On the non-x86 path, it should
returns true only when the upstream bridge is PLX or CPU is Kunpeng 920.
When dev->bus->self is null, it is necessary to return false. Although in reality,
it is not null either when device is directly attached to the root bus.

> 
> > +		return true;
> > +	else
> > +		return false;
> > +#endif
> > +}
> 
> [ ... ]
> 
> > diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> > index 3171a98d81a6..653a98d9835b 100644
> > --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> > +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> > @@ -234,6 +234,12 @@ static irqreturn_t __ngbe_msix_misc(struct wx *wx, u32 eicr)
> >  	if (eicr & NGBE_PX_MISC_IC_VF_MBOX)
> >  		wx_msg_task(wx);
> >
> > +	if (eicr & NGBE_PX_MISC_PCIE_REQ_ERR) {
> > +		wx_warn(wx, "PCIe Request Error founded on Lan %d\n",
> > +			wx->bus.func);
> 
> This isn't a bug, but there's a typo: 'founded' should be 'found'.
> 
> > +		wx_pcie_error_handler(wx);
> > +	}
> > +
> >  	if (unlikely(eicr & NGBE_PX_MISC_IC_TIMESYNC))
> >  		wx_ptp_check_pps_event(wx);
> >
> 
> [ ... ]
> 
> > diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> > index aa14958d439a..c832100bcb61 100644
> > --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> > +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> > @@ -6,6 +6,7 @@
> >
> >  #include "../libwx/wx_type.h"
> >  #include "../libwx/wx_lib.h"
> > +#include "../libwx/wx_err.h"
> >  #include "../libwx/wx_ptp.h"
> >  #include "../libwx/wx_hw.h"
> >  #include "../libwx/wx_sriov.h"
> > @@ -178,6 +179,12 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data)
> >  		handle_nested_irq(sub_irq);
> >  		nhandled++;
> >  	}
> > +	if (eicr & TXGBE_PX_MISC_PCIE_REQ_ERR) {
> > +		wx_warn(wx, "PCIe Request Error founded on Lan %d\n",
> > +			wx->bus.func);
> 
> This isn't a bug, but there's a typo: 'founded' should be 'found'.
> 
> > +		wx_pcie_error_handler(wx);
> > +		nhandled++;
> > +	}
> >  	if (unlikely(eicr & TXGBE_PX_MISC_IC_TIMESYNC)) {
> >  		wx_ptp_check_pps_event(wx);
> >  		nhandled++;
> 


  reply	other threads:[~2026-02-04  6:11 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-03  7:57 [PATCH net-next v1 0/7] Wangxun improvement and new support Jiawen Wu
2026-02-03  7:57 ` [PATCH net-next v1 1/7] net: ngbe: move the WOL functions to libwx Jiawen Wu
2026-02-03 13:52   ` Andrew Lunn
2026-02-04  2:05     ` Jiawen Wu
2026-02-03  7:57 ` [PATCH net-next v1 2/7] net: ngbe: improve the reset flow Jiawen Wu
2026-02-03 13:56   ` Andrew Lunn
2026-02-04  2:20     ` Jiawen Wu
2026-02-03  7:57 ` [PATCH net-next v1 3/7] net: wangxun: move reusable PCI driver ops functions into libwx Jiawen Wu
2026-02-04  2:22   ` [net-next,v1,3/7] " Jakub Kicinski
2026-02-04  6:12     ` Jiawen Wu
2026-02-03  7:57 ` [PATCH net-next v1 4/7] net: txgbe: add power management support Jiawen Wu
2026-02-03  7:57 ` [PATCH net-next v1 5/7] net: wangxun: move ethtool_ops.set_channels into libwx Jiawen Wu
2026-02-03  7:57 ` [PATCH net-next v1 6/7] net: wangxun: add Tx timeout process Jiawen Wu
2026-02-03  7:57 ` [PATCH net-next v1 7/7] net: wangxun: add pcie error handler Jiawen Wu
2026-02-04  1:34   ` kernel test robot
2026-02-04  2:22   ` [net-next,v1,7/7] " Jakub Kicinski
2026-02-04  6:10     ` Jiawen Wu [this message]
2026-02-04 11:18   ` [PATCH net-next v1 7/7] " kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='06f201dc959c$ec2b74e0$c4825ea0$@trustnetic.com' \
    --to=jiawenwu@trustnetic.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=kuba@kernel.org \
    --cc=mengyuanlou@net-swift.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.