public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Raju Rangoju <Raju.Rangoju@amd.com>
To: <netdev@vger.kernel.org>
Cc: <linux-kernel@vger.kernel.org>, <pabeni@redhat.com>,
	<kuba@kernel.org>, <edumazet@google.com>, <davem@davemloft.net>,
	<andrew+netdev@lunn.ch>, <Thomas.Lendacky@amd.com>,
	<maxime.chevallier@bootlin.com>,
	Raju Rangoju <Raju.Rangoju@amd.com>
Subject: [PATCH net] amd-xgbe: synchronize KR training with device operations
Date: Fri, 13 Mar 2026 19:12:10 +0530	[thread overview]
Message-ID: <20260313134210.3824872-1-Raju.Rangoju@amd.com> (raw)

During 10GBASE-KR link training, the PHY state machine can be corrupted
if device stop or rate change operations are initiated while training is
in progress. This manifests as:

  - Link stability issues after interface down/up cycles
  - PHY state machine lockups requiring a full driver reset
  - Intermittent link failures on Inphi re-driver configurations

The root cause is that the firmware mailbox operations for device stop
and rate changes can interfere with ongoing KR training sequences,
leaving the PHY in an inconsistent state.

Add synchronization to prevent device operations from interrupting
active KR training:

  - Introduce a mailbox mutex to serialize firmware command access
  - Wait for KR training completion (or timeout) before proceeding
    with stop/rate change operations
  - Only wait when KR training is actually active (KR mode with
    autoneg enabled or Inphi re-driver present)
  - Use a 500ms timeout to handle hung training sequences

The mailbox mutex protects the critical section of firmware command
submission and completion checking, preventing concurrent mailbox
access from multiple code paths.

Testing on AMD platforms with both direct-attach and Inphi re-driver
configurations shows this eliminates PHY state corruption during
interface operations and link changes.

Fixes: 549b32af9f7c ("amd-xgbe: Simplify mailbox interface rate change code")
Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c    |  2 +
 drivers/net/ethernet/amd/xgbe/xgbe-main.c   |  1 +
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 51 ++++++++++++++++++++-
 drivers/net/ethernet/amd/xgbe/xgbe.h        |  5 ++
 4 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 23beea48ae26..3913eb7e1da3 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1321,6 +1321,8 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
 
 	DBGPR("-->xgbe_stop\n");
 
+	xgbe_check_kr_training_in_progress(pdata);
+
 	if (test_bit(XGBE_STOPPED, &pdata->dev_state))
 		return;
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index 7d45ea22a02e..5f3ab29707b7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -78,6 +78,7 @@ struct xgbe_prv_data *xgbe_alloc_pdata(struct device *dev)
 
 	spin_lock_init(&pdata->xpcs_lock);
 	mutex_init(&pdata->rss_mutex);
+	mutex_init(&pdata->mailbox_lock);
 	spin_lock_init(&pdata->tstamp_lock);
 	mutex_init(&pdata->i2c_mutex);
 	init_completion(&pdata->i2c_complete);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index b8cf6ccfe641..142eb952a29c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -2095,12 +2095,57 @@ static void xgbe_phy_pll_ctrl(struct xgbe_prv_data *pdata, bool enable)
 	usleep_range(100, 200);
 }
 
+static bool xgbe_phy_port_is_inphi(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	/* Re-driver models 4223 && 4227 are supported Inphi models */
+	return phy_data->redrv &&
+	       (phy_data->redrv_model == XGBE_PHY_REDRV_MODEL_4223 ||
+		phy_data->redrv_model == XGBE_PHY_REDRV_MODEL_4227);
+}
+
+void xgbe_check_kr_training_in_progress(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned long kr_timeout;
+	int wait;
+
+	/* Only wait for KR training in specific conditions:
+	 *  - Inphi re-driver is present, OR
+	 *  - Currently in KR mode with autoneg enabled
+	 */
+	if (!xgbe_phy_port_is_inphi(pdata) &&
+	    !(phy_data->cur_mode == XGBE_MODE_KR &&
+	      pdata->phy.autoneg == AUTONEG_ENABLE))
+		return;
+
+	wait = XGBE_KR_TRAINING_WAIT_ITER;
+	while (wait--) {
+		/* Check if we've exceeded the AN timeout window */
+		kr_timeout = pdata->kr_start_time +
+			msecs_to_jiffies(XGBE_AN_MS_TIMEOUT +
+					XGBE_KR_TRAINING_WAIT_MS);
+		if (time_after(jiffies, kr_timeout))
+			break;
+
+		/* Training is complete - no need to wait */
+		if (pdata->an_result == XGBE_AN_COMPLETE)
+			return;
+
+		usleep_range(10000, 11000);
+	}
+}
+
 static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
-					enum xgbe_mb_cmd cmd, enum xgbe_mb_subcmd sub_cmd)
+					enum xgbe_mb_cmd cmd,
+					enum xgbe_mb_subcmd sub_cmd)
 {
 	unsigned int s0 = 0;
 	unsigned int wait;
 
+	xgbe_check_kr_training_in_progress(pdata);
+
 	/* Disable PLL re-initialization during FW command processing */
 	xgbe_phy_pll_ctrl(pdata, false);
 
@@ -2115,7 +2160,9 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
 	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, cmd);
 	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, sub_cmd);
 
-	/* Issue the command */
+	/* Acquire mailbox lock for firmware command */
+	guard(mutex)(&pdata->mailbox_lock);
+
 	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
 	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
 	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 438033a71523..238eeee0d422 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -202,6 +202,7 @@
 #define XGBE_AN_MS_TIMEOUT		500
 #define XGBE_LINK_TIMEOUT		5
 #define XGBE_KR_TRAINING_WAIT_ITER	50
+#define XGBE_KR_TRAINING_WAIT_MS	100
 
 #define XGBE_SGMII_AN_LINK_DUPLEX	BIT(1)
 #define XGBE_SGMII_AN_LINK_SPEED	(BIT(2) | BIT(3))
@@ -1015,6 +1016,9 @@ struct xgbe_prv_data {
 	/* RSS addressing mutex */
 	struct mutex rss_mutex;
 
+	/* Firmware mailbox mutex */
+	struct mutex mailbox_lock;
+
 	/* Flags representing xgbe_state */
 	unsigned long dev_state;
 
@@ -1252,6 +1256,7 @@ struct xgbe_prv_data {
 };
 
 /* Function prototypes*/
+void xgbe_check_kr_training_in_progress(struct xgbe_prv_data *pdata);
 struct xgbe_prv_data *xgbe_alloc_pdata(struct device *);
 void xgbe_free_pdata(struct xgbe_prv_data *);
 void xgbe_set_counts(struct xgbe_prv_data *);
-- 
2.34.1


             reply	other threads:[~2026-03-13 13:42 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-13 13:42 Raju Rangoju [this message]
2026-03-17 12:08 ` [net] amd-xgbe: synchronize KR training with device operations Paolo Abeni
2026-03-17 12:09 ` [PATCH net] " Paolo Abeni

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260313134210.3824872-1-Raju.Rangoju@amd.com \
    --to=raju.rangoju@amd.com \
    --cc=Thomas.Lendacky@amd.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maxime.chevallier@bootlin.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox