* [PATCH v6 net-next 6/8] ice: implement CPI support for E825C
From: Grzegorz Nitka @ 2026-04-09 23:51 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, intel-wired-lan, poros, richardcochran,
andrew+netdev, przemyslaw.kitszel, anthony.l.nguyen,
Prathosh.Satish, ivecera, jiri, arkadiusz.kubalewski,
vadim.fedorenko, donald.hunter, horms, pabeni, kuba, davem,
edumazet, Grzegorz Nitka
In-Reply-To: <20260409235122.436749-1-grzegorz.nitka@intel.com>
Add full CPI (Converged PHY Interface) command handling required for
E825C devices. The CPI interface allows the driver to interact with
PHY-side control logic through the LM/PHY command registers, including
enabling/disabling/selection of PHY reference clock.
This patch introduces:
- a new CPI subsystem (ice_cpi.c / ice_cpi.h) implementing the CPI
request/acknowledge state machine, including REQ/ACK protocol,
command execution, and response handling
- helper functions for reading/writing PHY registers over Sideband
Queue
- CPI command execution API (ice_cpi_exec) and a helper for enabling or
disabling Tx reference clocks (CPI 0xF1 opcode 'Config PHY clocking')
- assurance of CPI transaction serialization into the CPI core.
CPI REQ/ACK is a multi-step handshake and must be executed
atomically per PHY. Centralize the lock in ice_cpi_exec() and
use adapter-scoped per-PHY mutexes, which match the hardware sharing
model across PFs.
- addition of the non-posted write opcode (wr_np) to SBQ
- Makefile integration to build CPI support together with the PTP stack
This provides the infrastructure necessary to support PHY-side
configuration flows on E825C and is required for advanced link control
and Tx reference clock management.
Reviewed-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Signed-off-by: Grzegorz Nitka <grzegorz.nitka@intel.com>
---
drivers/net/ethernet/intel/ice/Makefile | 2 +-
drivers/net/ethernet/intel/ice/ice_adapter.c | 4 +
drivers/net/ethernet/intel/ice/ice_adapter.h | 7 +
drivers/net/ethernet/intel/ice/ice_cpi.c | 364 +++++++++++++++++++
drivers/net/ethernet/intel/ice/ice_cpi.h | 61 ++++
drivers/net/ethernet/intel/ice/ice_sbq_cmd.h | 5 +-
drivers/net/ethernet/intel/ice/ice_type.h | 2 +
7 files changed, 442 insertions(+), 3 deletions(-)
create mode 100644 drivers/net/ethernet/intel/ice/ice_cpi.c
create mode 100644 drivers/net/ethernet/intel/ice/ice_cpi.h
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 5b2c666496e7..38db476ab2ec 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -54,7 +54,7 @@ ice-$(CONFIG_PCI_IOV) += \
ice_vf_mbx.o \
ice_vf_vsi_vlan_ops.o \
ice_vf_lib.o
-ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o ice_dpll.o ice_tspll.o
+ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o ice_dpll.o ice_tspll.o ice_cpi.o
ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o
ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
index cbb57060bd56..2dc3629d6d0f 100644
--- a/drivers/net/ethernet/intel/ice/ice_adapter.c
+++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
@@ -62,6 +62,8 @@ static struct ice_adapter *ice_adapter_new(struct pci_dev *pdev)
adapter->index = ice_adapter_index(pdev);
spin_lock_init(&adapter->ptp_gltsyn_time_lock);
spin_lock_init(&adapter->txq_ctx_lock);
+ for (int i = 0; i < ARRAY_SIZE(adapter->cpi_phy_lock); i++)
+ mutex_init(&adapter->cpi_phy_lock[i]);
refcount_set(&adapter->refcount, 1);
mutex_init(&adapter->ports.lock);
@@ -73,6 +75,8 @@ static struct ice_adapter *ice_adapter_new(struct pci_dev *pdev)
static void ice_adapter_free(struct ice_adapter *adapter)
{
WARN_ON(!list_empty(&adapter->ports.ports));
+ for (int i = 0; i < ARRAY_SIZE(adapter->cpi_phy_lock); i++)
+ mutex_destroy(&adapter->cpi_phy_lock[i]);
mutex_destroy(&adapter->ports.lock);
kfree(adapter);
diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h
index e95266c7f20b..fa238a6a0e1a 100644
--- a/drivers/net/ethernet/intel/ice/ice_adapter.h
+++ b/drivers/net/ethernet/intel/ice/ice_adapter.h
@@ -5,9 +5,12 @@
#define _ICE_ADAPTER_H_
#include <linux/types.h>
+#include <linux/mutex.h>
#include <linux/spinlock_types.h>
#include <linux/refcount_types.h>
+#include "ice_type.h"
+
struct pci_dev;
struct ice_pf;
@@ -31,6 +34,8 @@ struct ice_port_list {
* @ptp_gltsyn_time_lock: Spinlock protecting access to the GLTSYN_TIME
* register of the PTP clock.
* @txq_ctx_lock: Spinlock protecting access to the GLCOMM_QTX_CNTX_CTL register
+ * @cpi_phy_lock: Per-PHY mutex serializing CPI REQ/ACK transactions.
+ * Index 0 = PHY0, index 1 = PHY1. Only used on E825C.
* @ctrl_pf: Control PF of the adapter
* @ports: Ports list
* @index: 64-bit index cached for collision detection on 32bit systems
@@ -41,6 +46,8 @@ struct ice_adapter {
spinlock_t ptp_gltsyn_time_lock;
/* For access to GLCOMM_QTX_CNTX_CTL register */
spinlock_t txq_ctx_lock;
+ /* Serialize CPI REQ/ACK transactions per PHY (E825C only) */
+ struct mutex cpi_phy_lock[ICE_E825_MAX_PHYS];
struct ice_pf *ctrl_pf;
struct ice_port_list ports;
diff --git a/drivers/net/ethernet/intel/ice/ice_cpi.c b/drivers/net/ethernet/intel/ice/ice_cpi.c
new file mode 100644
index 000000000000..22c8d5a9f859
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_cpi.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2026 Intel Corporation */
+
+#include "ice_type.h"
+#include "ice_common.h"
+#include "ice_ptp_hw.h"
+#include "ice.h"
+#include "ice_cpi.h"
+
+/**
+ * ice_cpi_get_dest_dev - get destination PHY for given phy index
+ * @hw: pointer to the HW struct
+ * @phy: phy index of port the CPI action is taken on
+ *
+ * Return: sideband queue destination PHY device.
+ */
+static enum ice_sbq_dev_id ice_cpi_get_dest_dev(struct ice_hw *hw, u8 phy)
+{
+ u8 curr_phy = hw->lane_num / hw->ptp.ports_per_phy;
+
+ /* In the driver, lanes 4..7 are in fact 0..3 on a second PHY.
+ * On a single complex E825C, PHY 0 is always destination device phy_0
+ * and PHY 1 is phy_0_peer.
+ * On dual complex E825C, device phy_0 points to PHY on a current
+ * complex and phy_0_peer to PHY on a different complex.
+ */
+ if ((!ice_is_dual(hw) && phy) ||
+ (ice_is_dual(hw) && phy != curr_phy))
+ return ice_sbq_dev_phy_0_peer;
+ else
+ return ice_sbq_dev_phy_0;
+}
+
+/**
+ * ice_cpi_write_phy - Write a CPI port register
+ * @hw: pointer to the HW struct
+ * @phy: phy index of port the CPI action is taken on
+ * @addr: PHY register address
+ * @val: Value to write
+ *
+ * Return:
+ * * 0 on success
+ * * other error codes when failed to write to PHY
+ */
+static int ice_cpi_write_phy(struct ice_hw *hw, u8 phy, u32 addr, u32 val)
+{
+ struct ice_sbq_msg_input msg = {
+ .dest_dev = ice_cpi_get_dest_dev(hw, phy),
+ .opcode = ice_sbq_msg_wr_np,
+ .msg_addr_low = lower_16_bits(addr),
+ .msg_addr_high = upper_16_bits(addr),
+ .data = val
+ };
+ int err;
+
+ err = ice_sbq_rw_reg(hw, &msg, LIBIE_AQ_FLAG_RD);
+ if (err)
+ ice_debug(hw, ICE_DBG_PTP,
+ "Failed to write CPI msg to phy %d, err: %d\n",
+ phy, err);
+
+ return err;
+}
+
+/**
+ * ice_cpi_read_phy - Read a CPI port register
+ * @hw: pointer to the HW struct
+ * @phy: phy index of port the CPI action is taken on
+ * @addr: PHY register address
+ * @val: storage for register value
+ *
+ * Return:
+ * * 0 on success
+ * * other error codes when failed to read from PHY
+ */
+static int ice_cpi_read_phy(struct ice_hw *hw, u8 phy, u32 addr, u32 *val)
+{
+ struct ice_sbq_msg_input msg = {
+ .dest_dev = ice_cpi_get_dest_dev(hw, phy),
+ .opcode = ice_sbq_msg_rd,
+ .msg_addr_low = lower_16_bits(addr),
+ .msg_addr_high = upper_16_bits(addr)
+ };
+ int err;
+
+ err = ice_sbq_rw_reg(hw, &msg, LIBIE_AQ_FLAG_RD);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP,
+ "Failed to read CPI msg from phy %d, err: %d\n",
+ phy, err);
+ return err;
+ }
+
+ *val = msg.data;
+
+ return 0;
+}
+
+/**
+ * ice_cpi_wait_req0_ack0 - waits for CPI interface to be available
+ * @hw: pointer to the HW struct
+ * @phy: phy index of port the CPI action is taken on
+ *
+ * This function checks if CPI interface is ready to use by CPI client.
+ * It's done by assuring LM.CMD.REQ and PHY.CMD.ACK bit in CPI
+ * interface registers to be 0.
+ *
+ * Return: 0 on success, negative on error
+ */
+static int ice_cpi_wait_req0_ack0(struct ice_hw *hw, int phy)
+{
+ u32 phy_val;
+ u32 lm_val;
+
+ for (int i = 0; i < CPI_RETRIES_COUNT; i++) {
+ int err;
+
+ /* check if another CPI Client is also accessing CPI */
+ err = ice_cpi_read_phy(hw, phy, CPI0_LM1_CMD_DATA, &lm_val);
+ if (err)
+ return err;
+ if (FIELD_GET(CPI_LM_CMD_REQ_M, lm_val))
+ return -EBUSY;
+
+ /* check if PHY.ACK is deasserted */
+ err = ice_cpi_read_phy(hw, phy, CPI0_PHY1_CMD_DATA, &phy_val);
+ if (err)
+ return err;
+ if (FIELD_GET(CPI_PHY_CMD_ERROR_M, phy_val))
+ return -EFAULT;
+ if (!FIELD_GET(CPI_PHY_CMD_ACK_M, phy_val))
+ /* req0 and ack0 at this point - ready to go */
+ return 0;
+
+ msleep(CPI_RETRIES_CADENCE_MS);
+ }
+
+ return -ETIMEDOUT;
+}
+
+/**
+ * ice_cpi_wait_ack - Waits for the PHY.ACK bit to be asserted/deasserted
+ * @hw: pointer to the HW struct
+ * @phy: phy index of port the CPI action is taken on
+ * @asserted: desired state of PHY.ACK bit
+ * @data: pointer to the user data where PHY.data is stored
+ *
+ * This function checks if PHY.ACK bit is asserted or deasserted, depending
+ * on the phase of CPI handshake. If 'asserted' state is required, PHY command
+ * data is stored in the 'data' storage.
+ *
+ * Return: 0 on success, negative on error
+ */
+static int ice_cpi_wait_ack(struct ice_hw *hw, u8 phy, bool asserted,
+ u32 *data)
+{
+ u32 phy_val;
+
+ for (int i = 0; i < CPI_RETRIES_COUNT; i++) {
+ int err;
+
+ err = ice_cpi_read_phy(hw, phy, CPI0_PHY1_CMD_DATA, &phy_val);
+ if (err)
+ return err;
+ if (FIELD_GET(CPI_PHY_CMD_ERROR_M, phy_val))
+ return -EFAULT;
+ if (asserted && FIELD_GET(CPI_PHY_CMD_ACK_M, phy_val)) {
+ if (data)
+ *data = phy_val;
+ return 0;
+ }
+ if (!asserted && !FIELD_GET(CPI_PHY_CMD_ACK_M, phy_val))
+ return 0;
+
+ msleep(CPI_RETRIES_CADENCE_MS);
+ }
+
+ return -ETIMEDOUT;
+}
+
+#define ice_cpi_wait_ack0(hw, port) \
+ ice_cpi_wait_ack(hw, port, false, NULL)
+
+#define ice_cpi_wait_ack1(hw, port, data) \
+ ice_cpi_wait_ack(hw, port, true, data)
+
+/**
+ * ice_cpi_req0 - deasserts LM.REQ bit
+ * @hw: pointer to the HW struct
+ * @phy: phy index of port the CPI action is taken on
+ * @data: the command data
+ *
+ * Return: 0 on success, negative on CPI write error
+ */
+static int ice_cpi_req0(struct ice_hw *hw, u8 phy, u32 data)
+{
+ data &= ~CPI_LM_CMD_REQ_M;
+
+ return ice_cpi_write_phy(hw, phy, CPI0_LM1_CMD_DATA, data);
+}
+
+/**
+ * ice_cpi_exec_cmd - writes command data to CPI interface
+ * @hw: pointer to the HW struct
+ * @phy: phy index of port the CPI action is taken on
+ * @data: the command data
+ *
+ * Return: 0 on success, otherwise negative on error
+ */
+static int ice_cpi_exec_cmd(struct ice_hw *hw, int phy, u32 data)
+{
+ return ice_cpi_write_phy(hw, phy, CPI0_LM1_CMD_DATA, data);
+}
+
+/**
+ * ice_cpi_phy_lock - get per-PHY lock for CPI transaction serialization
+ * @hw: pointer to the HW struct
+ * @phy: PHY index
+ *
+ * Return: pointer to PHY mutex, or %NULL when context is unavailable.
+ */
+static struct mutex *ice_cpi_phy_lock(struct ice_hw *hw, u8 phy)
+{
+ struct ice_pf *pf = hw->back;
+
+ if (!pf || !pf->adapter || phy >= ICE_E825_MAX_PHYS)
+ return NULL;
+
+ return &pf->adapter->cpi_phy_lock[phy];
+}
+
+/**
+ * ice_cpi_exec - executes CPI command
+ * @hw: pointer to the HW struct
+ * @phy: phy index of port the CPI action is taken on
+ * @cmd: pointer to the command struct to execute
+ * @resp: pointer to user allocated CPI response struct
+ *
+ * This function executes CPI request with respect to CPI handshake
+ * mechanism.
+ *
+ * Return: 0 on success, otherwise negative on error
+ */
+int ice_cpi_exec(struct ice_hw *hw, u8 phy,
+ const struct ice_cpi_cmd *cmd,
+ struct ice_cpi_resp *resp)
+{
+ struct mutex *cpi_lock;
+ u32 phy_cmd, lm_cmd = 0;
+ int err, err1 = 0;
+
+ if (!cmd || !resp)
+ return -EINVAL;
+
+ cpi_lock = ice_cpi_phy_lock(hw, phy);
+ if (!cpi_lock)
+ return -EINVAL;
+
+ mutex_lock(cpi_lock);
+
+ lm_cmd =
+ FIELD_PREP(CPI_LM_CMD_REQ_M, CPI_LM_CMD_REQ) |
+ FIELD_PREP(CPI_LM_CMD_GET_SET_M, cmd->set) |
+ FIELD_PREP(CPI_LM_CMD_OPCODE_M, cmd->opcode) |
+ FIELD_PREP(CPI_LM_CMD_PORTLANE_M, cmd->port) |
+ FIELD_PREP(CPI_LM_CMD_DATA_M, cmd->data);
+
+ /* 1. Try to acquire the bus, PHY ACK should be low before we begin */
+ err = ice_cpi_wait_req0_ack0(hw, phy);
+ if (err)
+ goto cpi_exec_exit;
+
+ /* 2. We start the CPI request */
+ err = ice_cpi_exec_cmd(hw, phy, lm_cmd);
+ if (err)
+ goto cpi_exec_exit;
+
+ /*
+ * 3. Wait for CPI confirmation, PHY ACK should be asserted and opcode
+ * echoed in the response
+ */
+ err = ice_cpi_wait_ack1(hw, phy, &phy_cmd);
+ if (err)
+ goto cpi_deassert;
+
+ if (FIELD_GET(CPI_PHY_CMD_ACK_M, phy_cmd) &&
+ FIELD_GET(CPI_LM_CMD_OPCODE_M, lm_cmd) !=
+ FIELD_GET(CPI_PHY_CMD_OPCODE_M, phy_cmd)) {
+ err = -EFAULT;
+ goto cpi_deassert;
+ }
+
+ resp->opcode = FIELD_GET(CPI_PHY_CMD_OPCODE_M, phy_cmd);
+ resp->data = FIELD_GET(CPI_PHY_CMD_DATA_M, phy_cmd);
+ resp->port = FIELD_GET(CPI_PHY_CMD_PORTLANE_M, phy_cmd);
+
+cpi_deassert:
+ /* 4. We deassert REQ */
+ err1 = ice_cpi_req0(hw, phy, lm_cmd);
+ if (err1)
+ goto cpi_exec_exit;
+
+ /* 5. PHY ACK should be deasserted in response */
+ err1 = ice_cpi_wait_ack0(hw, phy);
+
+cpi_exec_exit:
+ if (!err)
+ err = err1;
+
+ mutex_unlock(cpi_lock);
+
+ return err;
+}
+
+/**
+ * ice_cpi_set_cmd - execute CPI SET command
+ * @hw: pointer to the HW struct
+ * @opcode: CPI command opcode
+ * @phy: phy index CPI command is applied for
+ * @port_lane: ephy index CPI command is applied for
+ * @data: CPI opcode context specific data
+ *
+ * Return: 0 on success.
+ */
+static int ice_cpi_set_cmd(struct ice_hw *hw, u16 opcode, u8 phy, u8 port_lane,
+ u16 data)
+{
+ struct ice_cpi_resp cpi_resp = {0};
+ struct ice_cpi_cmd cpi_cmd = {
+ .opcode = opcode,
+ .set = true,
+ .port = port_lane,
+ .data = data,
+ };
+
+ return ice_cpi_exec(hw, phy, &cpi_cmd, &cpi_resp);
+}
+
+/**
+ * ice_cpi_ena_dis_clk_ref - enables/disables Tx reference clock on port
+ * @hw: pointer to the HW struct
+ * @phy: phy index of port for which Tx reference clock is enabled/disabled
+ * @clk: Tx reference clock to enable or disable
+ * @enable: bool value to enable or disable Tx reference clock
+ *
+ * This function executes CPI request to enable or disable specific
+ * Tx reference clock on given PHY.
+ *
+ * Return: 0 on success.
+ */
+int ice_cpi_ena_dis_clk_ref(struct ice_hw *hw, u8 phy,
+ enum ice_e825c_ref_clk clk, bool enable)
+{
+ u16 val;
+
+ val = FIELD_PREP(CPI_OPCODE_PHY_CLK_PHY_SEL_M, phy) |
+ FIELD_PREP(CPI_OPCODE_PHY_CLK_REF_CTRL_M,
+ enable ? CPI_OPCODE_PHY_CLK_ENABLE :
+ CPI_OPCODE_PHY_CLK_DISABLE) |
+ FIELD_PREP(CPI_OPCODE_PHY_CLK_REF_SEL_M, clk);
+
+ return ice_cpi_set_cmd(hw, CPI_OPCODE_PHY_CLK, phy, 0, val);
+}
+
diff --git a/drivers/net/ethernet/intel/ice/ice_cpi.h b/drivers/net/ethernet/intel/ice/ice_cpi.h
new file mode 100644
index 000000000000..932fe0c0824a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_cpi.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2018-2025 Intel Corporation */
+
+#ifndef _ICE_CPI_H_
+#define _ICE_CPI_H_
+
+#define CPI0_PHY1_CMD_DATA 0x7FD028
+#define CPI0_LM1_CMD_DATA 0x7FD024
+#define CPI_RETRIES_COUNT 10
+#define CPI_RETRIES_CADENCE_MS 100
+
+/* CPI PHY CMD DATA register (CPI0_PHY1_CMD_DATA) */
+#define CPI_PHY_CMD_DATA_M GENMASK(15, 0)
+#define CPI_PHY_CMD_OPCODE_M GENMASK(23, 16)
+#define CPI_PHY_CMD_PORTLANE_M GENMASK(26, 24)
+#define CPI_PHY_CMD_RSVD_M GENMASK(29, 27)
+#define CPI_PHY_CMD_ERROR_M BIT(30)
+#define CPI_PHY_CMD_ACK_M BIT(31)
+
+/* CPI LM CMD DATA register (CPI0_LM1_CMD_DATA) */
+#define CPI_LM_CMD_DATA_M GENMASK(15, 0)
+#define CPI_LM_CMD_OPCODE_M GENMASK(23, 16)
+#define CPI_LM_CMD_PORTLANE_M GENMASK(26, 24)
+#define CPI_LM_CMD_RSVD_M GENMASK(28, 27)
+#define CPI_LM_CMD_GET_SET_M BIT(29)
+#define CPI_LM_CMD_RESET_M BIT(30)
+#define CPI_LM_CMD_REQ_M BIT(31)
+
+#define CPI_OPCODE_PHY_CLK 0xF1
+#define CPI_OPCODE_PHY_CLK_PHY_SEL_M GENMASK(9, 6)
+#define CPI_OPCODE_PHY_CLK_REF_CTRL_M GENMASK(5, 4)
+#define CPI_OPCODE_PHY_CLK_PORT_SEL 0
+#define CPI_OPCODE_PHY_CLK_DISABLE 1
+#define CPI_OPCODE_PHY_CLK_ENABLE 2
+#define CPI_OPCODE_PHY_CLK_REF_SEL_M GENMASK(3, 0)
+
+#define CPI_OPCODE_PHY_PCS_RESET 0xF0
+#define CPI_OPCODE_PHY_PCS_ONPI_RESET_VAL 0x3F
+
+#define CPI_LM_CMD_REQ 1
+#define CPI_LM_CMD_SET 1
+
+struct ice_cpi_cmd {
+ u8 port;
+ u8 opcode;
+ u16 data;
+ bool set;
+};
+
+struct ice_cpi_resp {
+ u8 port;
+ u8 opcode;
+ u16 data;
+};
+
+int ice_cpi_exec(struct ice_hw *hw, u8 phy,
+ const struct ice_cpi_cmd *cmd,
+ struct ice_cpi_resp *resp);
+int ice_cpi_ena_dis_clk_ref(struct ice_hw *hw, u8 port,
+ enum ice_e825c_ref_clk clk, bool enable);
+#endif /* _ICE_CPI_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
index 21bb861febbf..226243d32968 100644
--- a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
@@ -54,8 +54,9 @@ enum ice_sbq_dev_id {
};
enum ice_sbq_msg_opcode {
- ice_sbq_msg_rd = 0x00,
- ice_sbq_msg_wr = 0x01
+ ice_sbq_msg_rd = 0x00,
+ ice_sbq_msg_wr = 0x01,
+ ice_sbq_msg_wr_np = 0x02
};
#define ICE_SBQ_MSG_FLAGS 0x40
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 1e82f4c40b32..d9a5c1aae7c2 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -893,6 +893,8 @@ struct ice_ptp_hw {
u8 ports_per_phy;
};
+#define ICE_E825_MAX_PHYS 2
+
/* Port hardware description */
struct ice_hw {
u8 __iomem *hw_addr;
--
2.39.3
^ permalink raw reply related
* [PATCH v6 net-next 5/8] ice: introduce TXC DPLL device and TX ref clock pin framework for E825
From: Grzegorz Nitka @ 2026-04-09 23:51 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, intel-wired-lan, poros, richardcochran,
andrew+netdev, przemyslaw.kitszel, anthony.l.nguyen,
Prathosh.Satish, ivecera, jiri, arkadiusz.kubalewski,
vadim.fedorenko, donald.hunter, horms, pabeni, kuba, davem,
edumazet, Grzegorz Nitka
In-Reply-To: <20260409235122.436749-1-grzegorz.nitka@intel.com>
E825 devices provide a dedicated TX clock (TXC) domain which may be
driven by multiple reference clock sources, including external board
references and port-derived SyncE. To support future TX clock control
and observability through the Linux DPLL subsystem, introduce a
separate TXC DPLL device and a framework for representing TX reference
clock inputs.
This change adds a new DPLL pin type (TXCLK) and registers TX reference
clock pins for E825-based devices:
- EXT_EREF0: a board-level external electrical reference
- SYNCE: a port-derived SyncE reference described via firmware nodes
The TXC DPLL device is created and managed alongside the existing
PPS and EEC DPLL instances. TXCLK pins are registered directly or
deferred via a notifier when backed by fwnode-described firmware pins.
A per-pin attribute encodes the TX reference source associated with
each TXCLK pin.
At this stage, TXCLK pin state callbacks and TXC DPLL lock status
reporting are implemented as placeholders. Pin state getters always
return DISCONNECTED, and the TXC DPLL is initialized in the UNLOCKED
state. No hardware configuration or TX reference switching is
performed yet.
This patch establishes the structural groundwork required for
hardware-backed TX reference selection, verification, and
synchronization
status reporting, which will be implemented in subsequent patches.
Reviewed-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Signed-off-by: Grzegorz Nitka <grzegorz.nitka@intel.com>
---
drivers/net/ethernet/intel/ice/ice_dpll.c | 296 ++++++++++++++++++--
drivers/net/ethernet/intel/ice/ice_dpll.h | 6 +
drivers/net/ethernet/intel/ice/ice_ptp_hw.h | 7 +
3 files changed, 286 insertions(+), 23 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c
index 62f75701d652..ab62aac77399 100644
--- a/drivers/net/ethernet/intel/ice/ice_dpll.c
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.c
@@ -19,6 +19,11 @@
#define ICE_DPLL_SW_PIN_INPUT_BASE_QSFP 6
#define ICE_DPLL_SW_PIN_OUTPUT_BASE 0
+#define E825_EXT_EREF_PIN_IDX 0
+#define E825_EXT_SYNCE_PIN_IDX 1
+#define E825_RCLK_PARENT_0_PIN_IDX 0
+#define E825_RCLK_PARENT_1_PIN_IDX 1
+
#define ICE_DPLL_PIN_SW_INPUT_ABS(in_idx) \
(ICE_DPLL_SW_PIN_INPUT_BASE_SFP + (in_idx))
@@ -57,6 +62,7 @@
* @ICE_DPLL_PIN_TYPE_OUTPUT: output pin
* @ICE_DPLL_PIN_TYPE_RCLK_INPUT: recovery clock input pin
* @ICE_DPLL_PIN_TYPE_SOFTWARE: software controlled SMA/U.FL pins
+ * @ICE_DPLL_PIN_TYPE_TXCLK: transmit clock reference input pin
*/
enum ice_dpll_pin_type {
ICE_DPLL_PIN_INVALID,
@@ -64,6 +70,7 @@ enum ice_dpll_pin_type {
ICE_DPLL_PIN_TYPE_OUTPUT,
ICE_DPLL_PIN_TYPE_RCLK_INPUT,
ICE_DPLL_PIN_TYPE_SOFTWARE,
+ ICE_DPLL_PIN_TYPE_TXCLK,
};
static const char * const pin_type_name[] = {
@@ -71,10 +78,13 @@ static const char * const pin_type_name[] = {
[ICE_DPLL_PIN_TYPE_OUTPUT] = "output",
[ICE_DPLL_PIN_TYPE_RCLK_INPUT] = "rclk-input",
[ICE_DPLL_PIN_TYPE_SOFTWARE] = "software",
+ [ICE_DPLL_PIN_TYPE_TXCLK] = "txclk-input",
};
static const char * const ice_dpll_sw_pin_sma[] = { "SMA1", "SMA2" };
static const char * const ice_dpll_sw_pin_ufl[] = { "U.FL1", "U.FL2" };
+static const char * const ice_dpll_ext_eref_pin = "EXT_EREF0";
+static const char * const ice_dpll_fwnode_ext_synce = "clk_ref_synce";
static const struct dpll_pin_frequency ice_esync_range[] = {
DPLL_PIN_FREQUENCY_RANGE(0, DPLL_PIN_FREQUENCY_1_HZ),
@@ -2517,12 +2527,75 @@ ice_dpll_rclk_state_on_pin_get(const struct dpll_pin *pin, void *pin_priv,
return ret;
}
+/**
+ * ice_dpll_txclk_state_on_dpll_set - set a state on TX clk pin
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @state: state to be set on pin
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback, set a state of a Tx reference clock pin
+ *
+ * Return:
+ * * negative - failure
+ */
+static int
+ice_dpll_txclk_state_on_dpll_set(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll,
+ void *dpll_priv, enum dpll_pin_state state,
+ struct netlink_ext_ack *extack)
+{
+ /*
+ * TODO: set HW accordingly to selected TX reference clock.
+ * To be added in the follow up patches.
+ */
+ return -EOPNOTSUPP;
+}
+
+/**
+ * ice_dpll_txclk_state_on_dpll_get - get a state of Tx clk reference pin
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @state: on success holds pin state on parent pin
+ * @extack: error reporting
+ *
+ * dpll subsystem callback, get a state of a TX clock reference pin.
+ *
+ * Return:
+ * * 0 - success
+ */
+static int
+ice_dpll_txclk_state_on_dpll_get(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll,
+ void *dpll_priv,
+ enum dpll_pin_state *state,
+ struct netlink_ext_ack *extack)
+{
+ /*
+ * TODO: query HW status to determine if the TX reference is selected.
+ * To be added in the follow up patches.
+ */
+ *state = DPLL_PIN_STATE_DISCONNECTED;
+
+ return 0;
+}
+
static const struct dpll_pin_ops ice_dpll_rclk_ops = {
.state_on_pin_set = ice_dpll_rclk_state_on_pin_set,
.state_on_pin_get = ice_dpll_rclk_state_on_pin_get,
.direction_get = ice_dpll_input_direction,
};
+static const struct dpll_pin_ops ice_dpll_txclk_ops = {
+ .state_on_dpll_set = ice_dpll_txclk_state_on_dpll_set,
+ .state_on_dpll_get = ice_dpll_txclk_state_on_dpll_get,
+ .direction_get = ice_dpll_input_direction,
+};
+
static const struct dpll_pin_ops ice_dpll_pin_sma_ops = {
.state_on_dpll_set = ice_dpll_sma_pin_state_set,
.state_on_dpll_get = ice_dpll_sw_pin_state_get,
@@ -3023,9 +3096,13 @@ ice_dpll_unregister_pins(struct dpll_device *dpll, struct ice_dpll_pin *pins,
{
int i;
- for (i = 0; i < count; i++)
- if (!pins[i].hidden)
- dpll_pin_unregister(dpll, pins[i].pin, ops, &pins[i]);
+ for (i = 0; i < count; i++) {
+ if (pins[i].hidden)
+ continue;
+ if (IS_ERR_OR_NULL(pins[i].pin))
+ continue;
+ dpll_pin_unregister(dpll, pins[i].pin, ops, &pins[i]);
+ }
}
/**
@@ -3199,19 +3276,40 @@ static bool ice_dpll_is_fwnode_pin(struct ice_dpll_pin *pin)
return !IS_ERR_OR_NULL(pin->fwnode);
}
+static bool ice_dpll_fwnode_eq(const struct fwnode_handle *a,
+ const struct fwnode_handle *b)
+{
+ return a && b && a == b;
+}
+
static void ice_dpll_pin_notify_work(struct work_struct *work)
{
struct ice_dpll_pin_work *w = container_of(work,
struct ice_dpll_pin_work,
work);
struct ice_dpll_pin *pin, *parent = w->pin;
+ bool is_tx_synce_parent = false;
struct ice_pf *pf = parent->pf;
+ bool is_rclk_parent = false;
int ret;
wait_for_completion(&pf->dplls.dpll_init);
if (!test_bit(ICE_FLAG_DPLL, pf->flags))
goto out; /* DPLL initialization failed */
+ /* Decide which parent we are handling, defensively checking FWNs */
+ is_rclk_parent =
+ ice_dpll_fwnode_eq(parent->fwnode,
+ pf->dplls.inputs[E825_RCLK_PARENT_0_PIN_IDX].fwnode) ||
+ ice_dpll_fwnode_eq(parent->fwnode,
+ pf->dplls.inputs[E825_RCLK_PARENT_1_PIN_IDX].fwnode);
+
+ is_tx_synce_parent =
+ ice_dpll_fwnode_eq(parent->fwnode,
+ pf->dplls.txclks[E825_EXT_SYNCE_PIN_IDX].fwnode);
+ if (!is_rclk_parent && !is_tx_synce_parent)
+ goto out;
+
switch (w->action) {
case DPLL_PIN_CREATED:
if (!IS_ERR_OR_NULL(parent->pin)) {
@@ -3228,16 +3326,28 @@ static void ice_dpll_pin_notify_work(struct work_struct *work)
goto out;
}
- /* Register rclk pin */
- pin = &pf->dplls.rclk;
- ret = dpll_pin_on_pin_register(parent->pin, pin->pin,
- &ice_dpll_rclk_ops, pin);
- if (ret) {
- dev_err(ice_pf_to_dev(pf),
- "Failed to register pin: %pe\n", ERR_PTR(ret));
- dpll_pin_put(parent->pin, &parent->tracker);
- parent->pin = NULL;
- goto out;
+ if (is_rclk_parent) {
+ /* Register rclk pin via on-pin relationship */
+ pin = &pf->dplls.rclk;
+ ret = dpll_pin_on_pin_register(parent->pin, pin->pin,
+ &ice_dpll_rclk_ops, pin);
+ if (ret) {
+ dev_err(ice_pf_to_dev(pf),
+ "RCLK pin register failed: %pe\n",
+ ERR_PTR(ret));
+ goto drop_parent_ref;
+ }
+ } else if (is_tx_synce_parent) {
+ /* Register TX-CLK SYNCE pin directly to TXC DPLL */
+ pin = &pf->dplls.txclks[E825_EXT_SYNCE_PIN_IDX];
+ ret = dpll_pin_register(pf->dplls.txc.dpll, pin->pin,
+ &ice_dpll_txclk_ops, pin);
+ if (ret) {
+ dev_err(ice_pf_to_dev(pf),
+ "TX SYNCE pin register failed: %pe\n",
+ ERR_PTR(ret));
+ goto drop_parent_ref;
+ }
}
break;
case DPLL_PIN_DELETED:
@@ -3246,11 +3356,18 @@ static void ice_dpll_pin_notify_work(struct work_struct *work)
goto out;
}
- /* Unregister rclk pin */
- pin = &pf->dplls.rclk;
- dpll_pin_on_pin_unregister(parent->pin, pin->pin,
- &ice_dpll_rclk_ops, pin);
-
+ if (is_rclk_parent) {
+ /* Unregister rclk pin */
+ pin = &pf->dplls.rclk;
+ dpll_pin_on_pin_unregister(parent->pin, pin->pin,
+ &ice_dpll_rclk_ops, pin);
+ } else if (is_tx_synce_parent) {
+ /* Unregister TX-CLK SYNCE pin from TXC DPLL */
+ pin = &pf->dplls.txclks[E825_EXT_SYNCE_PIN_IDX];
+ dpll_pin_unregister(pf->dplls.txc.dpll, pin->pin,
+ &ice_dpll_txclk_ops, pin);
+ }
+drop_parent_ref:
/* Drop fwnode pin reference */
dpll_pin_put(parent->pin, &parent->tracker);
parent->pin = NULL;
@@ -3276,6 +3393,12 @@ static int ice_dpll_pin_notify(struct notifier_block *nb, unsigned long action,
if (pin->fwnode != info->fwnode)
return NOTIFY_DONE; /* Not this pin */
+ /* Ignore notification which are the outcome of internal pin
+ * registration/unregistration calls - synce pin case.
+ */
+ if (info->src_clock_id == pin->pf->dplls.clock_id)
+ return NOTIFY_DONE;
+
work = kzalloc_obj(*work);
if (!work)
return NOTIFY_DONE;
@@ -3401,6 +3524,19 @@ ice_dpll_deinit_fwnode_pins(struct ice_pf *pf, struct ice_dpll_pin *pins,
destroy_workqueue(pf->dplls.wq);
}
+static int ice_dpll_deinit_txclk_pins(struct ice_pf *pf)
+{
+ struct ice_dpll_pin *synce_pin = &pf->dplls.txclks[E825_EXT_SYNCE_PIN_IDX];
+ struct ice_dpll *dt = &pf->dplls.txc;
+
+ ice_dpll_unregister_pins(dt->dpll, pf->dplls.txclks,
+ &ice_dpll_txclk_ops,
+ ARRAY_SIZE(pf->dplls.txclks));
+ ice_dpll_release_pins(&pf->dplls.txclks[E825_EXT_EREF_PIN_IDX], 1);
+ ice_dpll_deinit_fwnode_pin(synce_pin);
+ return 0;
+}
+
/**
* ice_dpll_deinit_pins - deinitialize direct pins
* @pf: board private structure
@@ -3420,8 +3556,10 @@ static void ice_dpll_deinit_pins(struct ice_pf *pf, bool cgu)
struct ice_dpll *dp = &d->pps;
ice_dpll_deinit_rclk_pin(pf);
- if (pf->hw.mac_type == ICE_MAC_GENERIC_3K_E825)
+ if (pf->hw.mac_type == ICE_MAC_GENERIC_3K_E825) {
+ ice_dpll_deinit_txclk_pins(pf);
ice_dpll_deinit_fwnode_pins(pf, pf->dplls.inputs, 0);
+ }
if (cgu) {
ice_dpll_unregister_pins(dp->dpll, inputs, &ice_dpll_input_ops,
num_inputs);
@@ -3552,6 +3690,58 @@ ice_dpll_init_fwnode_pins(struct ice_pf *pf, struct ice_dpll_pin *pins,
return ret;
}
+static int ice_dpll_init_txclk_pins(struct ice_pf *pf, int start_idx)
+{
+ struct ice_dpll_pin *ref_pin = pf->dplls.txclks;
+ struct ice_dpll *txc = &pf->dplls.txc;
+ int ret;
+
+ /* Configure EXT_EREF0 pin */
+ ret = ice_dpll_get_pins(pf, ref_pin, start_idx, 1, pf->dplls.clock_id);
+ if (ret)
+ return ret;
+ ret = dpll_pin_register(txc->dpll, ref_pin->pin, &ice_dpll_txclk_ops,
+ ref_pin);
+ if (ret)
+ goto err_release_ext_eref;
+
+ /*
+ * Configure EXT_SYNCE pin (fwnode-backed).
+ * The pin may not yet be available; in that case registration
+ * will be deferred via the notifier path.
+ */
+ ref_pin++;
+ ret = ice_dpll_init_fwnode_pin(ref_pin, ice_dpll_fwnode_ext_synce);
+ if (ret)
+ goto err_unregister_ext_eref;
+
+ if (IS_ERR_OR_NULL(ref_pin->pin)) {
+ dev_dbg(ice_pf_to_dev(pf),
+ "Tx-clk SYNCE pin not registered yet\n");
+ return 0;
+ }
+
+ ret = dpll_pin_register(txc->dpll, ref_pin->pin, &ice_dpll_txclk_ops,
+ ref_pin);
+ if (ret)
+ goto err_deinit_synce;
+
+ return 0;
+
+err_deinit_synce:
+ ice_dpll_deinit_fwnode_pin(ref_pin);
+err_unregister_ext_eref:
+ dpll_pin_unregister(txc->dpll,
+ pf->dplls.txclks[E825_EXT_EREF_PIN_IDX].pin,
+ &ice_dpll_txclk_ops,
+ &pf->dplls.txclks[E825_EXT_EREF_PIN_IDX]);
+
+err_release_ext_eref:
+ ice_dpll_release_pins(&pf->dplls.txclks[E825_EXT_EREF_PIN_IDX], 1);
+
+ return ret;
+}
+
/**
* ice_dpll_init_pins_e825 - init pins and register pins with a dplls
* @pf: board private structure
@@ -3574,6 +3764,15 @@ static int ice_dpll_init_pins_e825(struct ice_pf *pf)
ret = ice_dpll_init_rclk_pin(pf, DPLL_PIN_IDX_UNSPEC,
&ice_dpll_rclk_ops);
+
+ if (ret)
+ goto unregister_pins;
+
+ ret = ice_dpll_init_txclk_pins(pf, 0);
+ if (ret)
+ ice_dpll_deinit_rclk_pin(pf);
+
+unregister_pins:
if (ret) {
/* Inform DPLL notifier works that DPLL init was finished
* unsuccessfully (ICE_DPLL_FLAG not set).
@@ -3692,7 +3891,7 @@ static int ice_dpll_init_pins(struct ice_pf *pf, bool cgu)
static void
ice_dpll_deinit_dpll(struct ice_pf *pf, struct ice_dpll *d, bool cgu)
{
- if (cgu)
+ if (cgu || pf->hw.mac_type == ICE_MAC_GENERIC_3K_E825)
dpll_device_unregister(d->dpll, d->ops, d);
dpll_device_put(d->dpll, &d->tracker);
}
@@ -3727,12 +3926,13 @@ ice_dpll_init_dpll(struct ice_pf *pf, struct ice_dpll *d, bool cgu,
return ret;
}
d->pf = pf;
- if (cgu) {
+ if (cgu || pf->hw.mac_type == ICE_MAC_GENERIC_3K_E825) {
const struct dpll_device_ops *ops = &ice_dpll_ops;
if (type == DPLL_TYPE_PPS && ice_dpll_is_pps_phase_monitor(pf))
ops = &ice_dpll_pom_ops;
- ice_dpll_update_state(pf, d, true);
+ if (cgu)
+ ice_dpll_update_state(pf, d, true);
ret = dpll_device_register(d->dpll, type, ops, d);
if (ret) {
dpll_device_put(d->dpll, &d->tracker);
@@ -4081,6 +4281,36 @@ static int ice_dpll_init_info_sw_pins(struct ice_pf *pf)
return 0;
}
+/**
+ * ice_dpll_init_info_txclk_pins_e825c - initializes tx-clk pins information
+ * @pf: board private structure
+ *
+ * Init information for tx-clks pin, cache them in pf->dplls.txclks
+ *
+ * Return:
+ * * 0 - success
+ */
+static int ice_dpll_init_info_txclk_pins_e825c(struct ice_pf *pf)
+{
+ struct ice_dpll_pin *tx_pin;
+
+ for (int i = 0; i < ICE_DPLL_TXCLK_NUM_MAX; i++) {
+ tx_pin = &pf->dplls.txclks[i];
+ tx_pin->prop.type = DPLL_PIN_TYPE_EXT;
+ tx_pin->prop.capabilities |=
+ DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+ tx_pin->pf = pf;
+ if (i == E825_EXT_EREF_PIN_IDX) {
+ tx_pin->prop.board_label = ice_dpll_ext_eref_pin;
+ tx_pin->tx_ref_src = ICE_REF_CLK_EREF0;
+ } else if (i == E825_EXT_SYNCE_PIN_IDX) {
+ tx_pin->tx_ref_src = ICE_REF_CLK_SYNCE;
+ }
+ }
+
+ return 0;
+}
+
/**
* ice_dpll_init_pins_info - init pins info wrapper
* @pf: board private structure
@@ -4106,6 +4336,9 @@ ice_dpll_init_pins_info(struct ice_pf *pf, enum ice_dpll_pin_type pin_type)
return ice_dpll_init_info_rclk_pin(pf);
case ICE_DPLL_PIN_TYPE_SOFTWARE:
return ice_dpll_init_info_sw_pins(pf);
+
+ case ICE_DPLL_PIN_TYPE_TXCLK:
+ return ice_dpll_init_info_txclk_pins_e825c(pf);
default:
return -EINVAL;
}
@@ -4139,11 +4372,15 @@ static void ice_dpll_deinit_info(struct ice_pf *pf)
static int ice_dpll_init_info_e825c(struct ice_pf *pf)
{
struct ice_dplls *d = &pf->dplls;
+ struct ice_dpll *dt = &d->txc;
int ret = 0;
int i;
d->clock_id = ice_generate_clock_id(pf);
d->num_inputs = ICE_SYNCE_CLK_NUM;
+ dt->dpll_state = DPLL_LOCK_STATUS_UNLOCKED;
+ dt->mode = DPLL_MODE_MANUAL;
+ dt->dpll_idx = pf->ptp.port.port_num;
d->inputs = kzalloc_objs(*d->inputs, d->num_inputs);
if (!d->inputs)
@@ -4160,6 +4397,11 @@ static int ice_dpll_init_info_e825c(struct ice_pf *pf)
ret = ice_dpll_init_pins_info(pf, ICE_DPLL_PIN_TYPE_RCLK_INPUT);
if (ret)
goto deinit_info;
+
+ ret = ice_dpll_init_pins_info(pf, ICE_DPLL_PIN_TYPE_TXCLK);
+ if (ret)
+ goto deinit_info;
+
dev_dbg(ice_pf_to_dev(pf),
"%s - success, inputs: %u, outputs: %u, rclk-parents: %u\n",
__func__, d->num_inputs, d->num_outputs, d->rclk.num_parents);
@@ -4292,6 +4534,9 @@ void ice_dpll_deinit(struct ice_pf *pf)
ice_dpll_deinit_dpll(pf, &pf->dplls.pps, cgu);
if (!IS_ERR_OR_NULL(pf->dplls.eec.dpll))
ice_dpll_deinit_dpll(pf, &pf->dplls.eec, cgu);
+ if (!IS_ERR_OR_NULL(pf->dplls.txc.dpll))
+ ice_dpll_deinit_dpll(pf, &pf->dplls.txc, false);
+
ice_dpll_deinit_info(pf);
mutex_destroy(&pf->dplls.lock);
}
@@ -4317,14 +4562,19 @@ static void ice_dpll_init_e825(struct ice_pf *pf)
err = ice_dpll_init_info_e825c(pf);
if (err)
goto err_exit;
- err = ice_dpll_init_pins_e825(pf);
+ err = ice_dpll_init_dpll(pf, &pf->dplls.txc, false, DPLL_TYPE_TXC);
if (err)
goto deinit_info;
+ err = ice_dpll_init_pins_e825(pf);
+ if (err)
+ goto deinit_txclk;
set_bit(ICE_FLAG_DPLL, pf->flags);
complete_all(&d->dpll_init);
return;
+deinit_txclk:
+ ice_dpll_deinit_dpll(pf, &pf->dplls.txc, false);
deinit_info:
ice_dpll_deinit_info(pf);
err_exit:
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.h b/drivers/net/ethernet/intel/ice/ice_dpll.h
index ae42cdea0ee1..23f9d4da73c5 100644
--- a/drivers/net/ethernet/intel/ice/ice_dpll.h
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.h
@@ -7,6 +7,7 @@
#include "ice.h"
#define ICE_DPLL_RCLK_NUM_MAX 4
+#define ICE_DPLL_TXCLK_NUM_MAX 2
/**
* enum ice_dpll_pin_sw - enumerate ice software pin indices:
@@ -63,6 +64,7 @@ struct ice_dpll_pin {
u8 ref_sync;
bool active;
bool hidden;
+ enum ice_e825c_ref_clk tx_ref_src;
};
/** ice_dpll - store info required for DPLL control
@@ -111,9 +113,11 @@ struct ice_dpll {
* @lock: locks access to configuration of a dpll
* @eec: pointer to EEC dpll dev
* @pps: pointer to PPS dpll dev
+ * @txc: pointer to TXC dpll dev
* @inputs: input pins pointer
* @outputs: output pins pointer
* @rclk: recovered pins pointer
+ * @txclks: TX clock reference pins pointer
* @num_inputs: number of input pins available on dpll
* @num_outputs: number of output pins available on dpll
* @cgu_state_acq_err_num: number of errors returned during periodic work
@@ -131,11 +135,13 @@ struct ice_dplls {
struct completion dpll_init;
struct ice_dpll eec;
struct ice_dpll pps;
+ struct ice_dpll txc;
struct ice_dpll_pin *inputs;
struct ice_dpll_pin *outputs;
struct ice_dpll_pin sma[ICE_DPLL_PIN_SW_NUM];
struct ice_dpll_pin ufl[ICE_DPLL_PIN_SW_NUM];
struct ice_dpll_pin rclk;
+ struct ice_dpll_pin txclks[ICE_DPLL_TXCLK_NUM_MAX];
u8 num_inputs;
u8 num_outputs;
u8 sma_data;
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
index 9bfd3e79c580..cbc9693179a1 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
@@ -265,6 +265,13 @@ struct ice_cgu_pin_desc {
struct dpll_pin_frequency *freq_supp;
};
+enum ice_e825c_ref_clk {
+ ICE_REF_CLK_ENET,
+ ICE_REF_CLK_SYNCE,
+ ICE_REF_CLK_EREF0,
+ ICE_REF_CLK_MAX,
+};
+
#define E810C_QSFP_C827_0_HANDLE 2
#define E810C_QSFP_C827_1_HANDLE 3
--
2.39.3
^ permalink raw reply related
* [PATCH v6 net-next 4/8] dpll: zl3073x: allow SyncE_Ref pin state change
From: Grzegorz Nitka @ 2026-04-09 23:51 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, intel-wired-lan, poros, richardcochran,
andrew+netdev, przemyslaw.kitszel, anthony.l.nguyen,
Prathosh.Satish, ivecera, jiri, arkadiusz.kubalewski,
vadim.fedorenko, donald.hunter, horms, pabeni, kuba, davem,
edumazet, Grzegorz Nitka, Aleksandr Loktionov
In-Reply-To: <20260409235122.436749-1-grzegorz.nitka@intel.com>
The SyncE_Ref pin may operate as either an active or inactive reference
depending on board design and system configuration. Some platforms need
to disable the SyncE reference dynamically (e.g., when selecting a
different recovered clock input). The hardware supports toggling this
pin, therefore advertise the STATE_CAN_CHANGE capability.
Reviewed-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Grzegorz Nitka <grzegorz.nitka@intel.com>
---
drivers/dpll/zl3073x/prop.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/dpll/zl3073x/prop.c b/drivers/dpll/zl3073x/prop.c
index ac9d41d0f978..acd7061a741a 100644
--- a/drivers/dpll/zl3073x/prop.c
+++ b/drivers/dpll/zl3073x/prop.c
@@ -215,6 +215,15 @@ struct zl3073x_pin_props *zl3073x_pin_props_get(struct zl3073x_dev *zldev,
props->dpll_props.type = DPLL_PIN_TYPE_GNSS;
+ /*
+ * The SyncE_Ref pin supports enabling/disabling dynamically.
+ * Some platforms may choose to expose this through firmware
+ * configuration later. For now, advertise this capability
+ * universally since the hardware allows state toggling.
+ */
+ props->dpll_props.capabilities |=
+ DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+
/* The output pin phase adjustment granularity equals half of
* the synth frequency count.
*/
--
2.39.3
^ permalink raw reply related
* [PATCH v6 net-next 3/8] dpll: extend pin notifier and netlink events with notification source ID
From: Grzegorz Nitka @ 2026-04-09 23:51 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, intel-wired-lan, poros, richardcochran,
andrew+netdev, przemyslaw.kitszel, anthony.l.nguyen,
Prathosh.Satish, ivecera, jiri, arkadiusz.kubalewski,
vadim.fedorenko, donald.hunter, horms, pabeni, kuba, davem,
edumazet, Grzegorz Nitka, Aleksandr Loktionov
In-Reply-To: <20260409235122.436749-1-grzegorz.nitka@intel.com>
Extend the DPLL pin notification API to include a source identifier
indicating where the notification originates. This allows notifier
consumers and netlink listeners to distinguish between notifications
coming from an associated DPLL instance, a parent pin, or the pin
itself.
A new field, src_id, is added to struct dpll_pin_notifier_info and is
passed through all pin-related notification paths. Callers of
dpll_pin_notify() are updated to provide a meaningful source identifier
based on their context:
- pin registration/unregistration uses the DPLL's clock_id,
- pin-on-pin operations use the parent pin's clock_id,
- pin changes use the pin's own clock_id.
As introduced in the commit ("dpll: allow registering FW-identified pin
with a different DPLL"), it is possible to share the same physical pin
via firmware description (fwnode) with DPLL objects from different
kernel modules. This means that a given pin can be registered multiple
times.
Driver such as ICE (E825 devices) rely on this mechanism when listening
for the event where a shared-fwnode pin appears, while avoiding reacting
to events triggered by their own registration logic.
This change only extends the notification metadata and does not alter
existing semantics for drivers that do not use the new field.
Reviewed-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Grzegorz Nitka <grzegorz.nitka@intel.com>
---
drivers/dpll/dpll_core.c | 14 ++++++++------
drivers/dpll/dpll_core.h | 3 ++-
drivers/dpll/dpll_netlink.c | 10 +++++-----
drivers/dpll/dpll_netlink.h | 4 ++--
include/linux/dpll.h | 1 +
5 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
index afe4552bdcd4..857bd02da7ba 100644
--- a/drivers/dpll/dpll_core.c
+++ b/drivers/dpll/dpll_core.c
@@ -71,7 +71,8 @@ void dpll_device_notify(struct dpll_device *dpll, unsigned long action)
call_dpll_notifiers(action, &info);
}
-void dpll_pin_notify(struct dpll_pin *pin, unsigned long action)
+void dpll_pin_notify(struct dpll_pin *pin, u64 src_clock_id,
+ unsigned long action)
{
struct dpll_pin_notifier_info info = {
.pin = pin,
@@ -80,6 +81,7 @@ void dpll_pin_notify(struct dpll_pin *pin, unsigned long action)
.clock_id = pin->clock_id,
.fwnode = pin->fwnode,
.prop = &pin->prop,
+ .src_clock_id = src_clock_id,
};
call_dpll_notifiers(action, &info);
@@ -847,7 +849,7 @@ __dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
if (ret)
goto ref_pin_del;
xa_set_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED);
- dpll_pin_create_ntf(pin);
+ dpll_pin_create_ntf(pin, dpll->clock_id);
return ret;
@@ -949,7 +951,7 @@ void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin,
return;
mutex_lock(&dpll_lock);
- dpll_pin_delete_ntf(pin);
+ dpll_pin_delete_ntf(pin, dpll->clock_id);
__dpll_pin_unregister(dpll, pin, ops, priv, NULL);
mutex_unlock(&dpll_lock);
}
@@ -995,7 +997,7 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin,
stop = i;
goto dpll_unregister;
}
- dpll_pin_create_ntf(pin);
+ dpll_pin_create_ntf(pin, parent->clock_id);
}
mutex_unlock(&dpll_lock);
@@ -1006,7 +1008,7 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin,
if (i < stop) {
__dpll_pin_unregister(ref->dpll, pin, ops, priv,
parent);
- dpll_pin_delete_ntf(pin);
+ dpll_pin_delete_ntf(pin, parent->clock_id);
}
dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv, pin);
unlock:
@@ -1032,7 +1034,7 @@ void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin,
unsigned long i;
mutex_lock(&dpll_lock);
- dpll_pin_delete_ntf(pin);
+ dpll_pin_delete_ntf(pin, parent->clock_id);
dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv, pin);
xa_for_each(&pin->dpll_refs, i, ref)
__dpll_pin_unregister(ref->dpll, pin, ops, priv, parent);
diff --git a/drivers/dpll/dpll_core.h b/drivers/dpll/dpll_core.h
index 71ac88ef2017..92cb919317eb 100644
--- a/drivers/dpll/dpll_core.h
+++ b/drivers/dpll/dpll_core.h
@@ -98,6 +98,7 @@ extern struct xarray dpll_pin_xa;
extern struct mutex dpll_lock;
void dpll_device_notify(struct dpll_device *dpll, unsigned long action);
-void dpll_pin_notify(struct dpll_pin *pin, unsigned long action);
+void dpll_pin_notify(struct dpll_pin *pin, u64 src_clock_id,
+ unsigned long action);
#endif
diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c
index af7ce62ec55c..aeae5afe71c8 100644
--- a/drivers/dpll/dpll_netlink.c
+++ b/drivers/dpll/dpll_netlink.c
@@ -888,21 +888,21 @@ dpll_pin_event_send(enum dpll_cmd event, struct dpll_pin *pin)
return ret;
}
-int dpll_pin_create_ntf(struct dpll_pin *pin)
+int dpll_pin_create_ntf(struct dpll_pin *pin, u64 src_clock_id)
{
- dpll_pin_notify(pin, DPLL_PIN_CREATED);
+ dpll_pin_notify(pin, src_clock_id, DPLL_PIN_CREATED);
return dpll_pin_event_send(DPLL_CMD_PIN_CREATE_NTF, pin);
}
-int dpll_pin_delete_ntf(struct dpll_pin *pin)
+int dpll_pin_delete_ntf(struct dpll_pin *pin, u64 src_clock_id)
{
- dpll_pin_notify(pin, DPLL_PIN_DELETED);
+ dpll_pin_notify(pin, src_clock_id, DPLL_PIN_DELETED);
return dpll_pin_event_send(DPLL_CMD_PIN_DELETE_NTF, pin);
}
int __dpll_pin_change_ntf(struct dpll_pin *pin)
{
- dpll_pin_notify(pin, DPLL_PIN_CHANGED);
+ dpll_pin_notify(pin, pin->clock_id, DPLL_PIN_CHANGED);
return dpll_pin_event_send(DPLL_CMD_PIN_CHANGE_NTF, pin);
}
diff --git a/drivers/dpll/dpll_netlink.h b/drivers/dpll/dpll_netlink.h
index dd28b56d27c5..89fef266392f 100644
--- a/drivers/dpll/dpll_netlink.h
+++ b/drivers/dpll/dpll_netlink.h
@@ -8,8 +8,8 @@ int dpll_device_create_ntf(struct dpll_device *dpll);
int dpll_device_delete_ntf(struct dpll_device *dpll);
-int dpll_pin_create_ntf(struct dpll_pin *pin);
+int dpll_pin_create_ntf(struct dpll_pin *pin, u64 src_clock_id);
-int dpll_pin_delete_ntf(struct dpll_pin *pin);
+int dpll_pin_delete_ntf(struct dpll_pin *pin, u64 src_clock_id);
int __dpll_pin_change_ntf(struct dpll_pin *pin);
diff --git a/include/linux/dpll.h b/include/linux/dpll.h
index b7277a8b484d..299cef38b657 100644
--- a/include/linux/dpll.h
+++ b/include/linux/dpll.h
@@ -212,6 +212,7 @@ struct dpll_pin_notifier_info {
u64 clock_id;
const struct fwnode_handle *fwnode;
const struct dpll_pin_properties *prop;
+ u64 src_clock_id;
};
#if IS_ENABLED(CONFIG_DPLL)
--
2.39.3
^ permalink raw reply related
* [PATCH v6 net-next 2/8] dpll: allow registering FW-identified pin with a different DPLL
From: Grzegorz Nitka @ 2026-04-09 23:51 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, intel-wired-lan, poros, richardcochran,
andrew+netdev, przemyslaw.kitszel, anthony.l.nguyen,
Prathosh.Satish, ivecera, jiri, arkadiusz.kubalewski,
vadim.fedorenko, donald.hunter, horms, pabeni, kuba, davem,
edumazet, Grzegorz Nitka, Jiri Pirko, Aleksandr Loktionov
In-Reply-To: <20260409235122.436749-1-grzegorz.nitka@intel.com>
Relax the (module, clock_id) equality requirement when registering a
pin identified by firmware (pin->fwnode). Some platforms associate a
FW-described pin with a DPLL instance that differs from the pin's
(module, clock_id) tuple. For such pins, permit registration without
requiring the strict match. Non-FW pins still require equality.
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Grzegorz Nitka <grzegorz.nitka@intel.com>
---
drivers/dpll/dpll_core.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
index cbb635db4321..afe4552bdcd4 100644
--- a/drivers/dpll/dpll_core.c
+++ b/drivers/dpll/dpll_core.c
@@ -883,11 +883,21 @@ dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
return -EINVAL;
mutex_lock(&dpll_lock);
- if (WARN_ON(!(dpll->module == pin->module &&
- dpll->clock_id == pin->clock_id)))
+
+ /*
+ * For pins identified via firmware (pin->fwnode), allow registration
+ * even if the pin's (module, clock_id) differs from the target DPLL.
+ * For non-fwnode pins, require a strict (module, clock_id) match.
+ */
+ if (!pin->fwnode &&
+ WARN_ON_ONCE(dpll->module != pin->module ||
+ dpll->clock_id != pin->clock_id)) {
ret = -EINVAL;
- else
- ret = __dpll_pin_register(dpll, pin, ops, priv, NULL);
+ goto out_unlock;
+ }
+
+ ret = __dpll_pin_register(dpll, pin, ops, priv, NULL);
+out_unlock:
mutex_unlock(&dpll_lock);
return ret;
--
2.39.3
^ permalink raw reply related
* [PATCH v6 net-next 1/8] dpll: add new DPLL type for transmit clock (TXC) usage
From: Grzegorz Nitka @ 2026-04-09 23:51 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, intel-wired-lan, poros, richardcochran,
andrew+netdev, przemyslaw.kitszel, anthony.l.nguyen,
Prathosh.Satish, ivecera, jiri, arkadiusz.kubalewski,
vadim.fedorenko, donald.hunter, horms, pabeni, kuba, davem,
edumazet, Grzegorz Nitka, Jiri Pirko, Aleksandr Loktionov
In-Reply-To: <20260409235122.436749-1-grzegorz.nitka@intel.com>
Extend the DPLL subsystem with a new DPLL type, DPLL_TYPE_TXC,
representing devices that drive a transmit reference clock. Certain
PHYs, MACs and SerDes blocks use a dedicated TX reference clock for
link operation, and this clock domain is distinct from PPS- and
EEC-driven synchronization sources. Defining a dedicated type allows
user space and drivers to correctly classify and configure DPLLs
intended for TX clock generation.
The corresponding netlink specification is updated to expose "txc".
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Grzegorz Nitka <grzegorz.nitka@intel.com>
---
Documentation/netlink/specs/dpll.yaml | 3 +++
drivers/dpll/dpll_nl.c | 2 +-
include/uapi/linux/dpll.h | 2 ++
3 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml
index 40465a3d7fc2..69e907850c01 100644
--- a/Documentation/netlink/specs/dpll.yaml
+++ b/Documentation/netlink/specs/dpll.yaml
@@ -138,6 +138,9 @@ definitions:
-
name: eec
doc: dpll drives the Ethernet Equipment Clock
+ -
+ name: txc
+ doc: dpll drives Tx reference clock
render-max: true
-
type: enum
diff --git a/drivers/dpll/dpll_nl.c b/drivers/dpll/dpll_nl.c
index 1e652340a5d7..9a3b70ea3ae0 100644
--- a/drivers/dpll/dpll_nl.c
+++ b/drivers/dpll/dpll_nl.c
@@ -34,7 +34,7 @@ const struct nla_policy dpll_reference_sync_nl_policy[DPLL_A_PIN_STATE + 1] = {
static const struct nla_policy dpll_device_id_get_nl_policy[DPLL_A_TYPE + 1] = {
[DPLL_A_MODULE_NAME] = { .type = NLA_NUL_STRING, },
[DPLL_A_CLOCK_ID] = { .type = NLA_U64, },
- [DPLL_A_TYPE] = NLA_POLICY_RANGE(NLA_U32, 1, 2),
+ [DPLL_A_TYPE] = NLA_POLICY_RANGE(NLA_U32, 1, 3),
};
/* DPLL_CMD_DEVICE_GET - do */
diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h
index 871685f7c353..b2045cb0a779 100644
--- a/include/uapi/linux/dpll.h
+++ b/include/uapi/linux/dpll.h
@@ -109,10 +109,12 @@ enum dpll_clock_quality_level {
* enum dpll_type - type of dpll, valid values for DPLL_A_TYPE attribute
* @DPLL_TYPE_PPS: dpll produces Pulse-Per-Second signal
* @DPLL_TYPE_EEC: dpll drives the Ethernet Equipment Clock
+ * @DPLL_TYPE_TXC: dpll drives Tx reference clock
*/
enum dpll_type {
DPLL_TYPE_PPS = 1,
DPLL_TYPE_EEC,
+ DPLL_TYPE_TXC,
/* private: */
__DPLL_TYPE_MAX,
--
2.39.3
^ permalink raw reply related
* [PATCH v6 net-next 0/8] dpll/ice: Add TXC DPLL type and full TX reference clock control for E825
From: Grzegorz Nitka @ 2026-04-09 23:51 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, intel-wired-lan, poros, richardcochran,
andrew+netdev, przemyslaw.kitszel, anthony.l.nguyen,
Prathosh.Satish, ivecera, jiri, arkadiusz.kubalewski,
vadim.fedorenko, donald.hunter, horms, pabeni, kuba, davem,
edumazet, Grzegorz Nitka
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=y, Size: 6345 bytes --]
NOTE: This series is intentionally submitted on net-next (not
intel-wired-lan) as early feedback of DPLL subsystem changes is
welcomed. In the past possible approaches were discussed in [1].
This series adds TX reference clock support for E825 devices and exposes
TX clock selection and synchronization status via the Linux DPLL
subsystem.
E825 hardware contains a dedicated Tx clock (TXC) domain that is
distinct
from PPS and EEC. TX reference clock selection is device‑wide, shared
across ports, and mediated by firmware as part of the link bring‑up
process. As a result, TX clock selection intent may differ from the
effective hardware configuration, and software must verify the outcome
after link‑up.
To support this, the series introduces TXC support incrementally across
the DPLL core and the ice driver:
- add a new DPLL type (TXC) to represent transmit clock generators;
- relax DPLL pin registration rules for firmware‑described shared pins
and extend pin notifications with a source identifier;
- allow dynamic state control of SyncE reference pins where hardware
supports it;
- add CPI infrastructure for PHY‑side TX clock control on E825C;
- introduce a TXC DPLL device and TX reference clock pins (EXT_EREF0 and
SYNCE) in the ice driver;
- extend the Restart Auto‑Negotiation command to carry a TX reference
clock index;
- implement hardware‑backed TX reference clock switching, post‑link
- verification, and TX synchronization reporting.
TXCLK pins report TX reference topology only. Actual synchronization
success is reported via the TXC DPLL lock status, which is updated after
hardware verification: external Tx references report LOCKED, while the
internal ENET/TXCO source reports UNLOCKED.
This provides reliable TX reference selection and observability on E825
devices using standard DPLL interfaces, without conflating user intent
with effective hardware behavior.
[1] https://lore.kernel.org/netdev/20250905160333.715c34ac@kernel.org/
Changes in v6:
- rebased
- AI-review: fix unprotected concurrent access to shared clock
bitmap (patch 8/8)
- AI-review: fix potential issue in tx-clk pin state request handling
('already set' early-exit based now on tx_clk_req comparison, patch 8/8)
- AI-review: CPI transaction serialization (patch 6/8)
Changes in v5:
- rebased
- reworded cover letter
- replace 'ntfy_src' new argument name with 'src_clk_id' and use it
consistently in DPLL notification calls (patch 3/8)
- reworded commit message (patch 5/8)
- use FIELD_PREP/GENMSK macros instead of struct bitfields (patch 6/8)
- reworded commit message (patch 5/8, patch 8/8)
- refactor the code to avoid sleeping while DPLL mutex is held (using
work_queue, patch 8/8)
- added TXCLK pins and TXC DPLL notifications (patch 8/8)
- removed 'unused clock disable' mechanism from the scope of this series
Changes in v4:
- rebased
- edited, shortened the commit message in 3/8 patch
- moved ice_get_ctrl_pf to the header file (patch 8/8) and
removed duplicated static definitions from ice_ptp and ice_txlck
modules
- add NULL/invalid pointer checker for returned pointer from
ice_get_ctrl_pf (patch 8/8)
- edited error message in case AN restart failure (patch 8/8)
Changes in v3:
- improved commit message (patch 1/8, AI review comment)
- improved deinitialization path in ice_dpll_deinit_txclk_pins to
avoid potential NULL dereference. NULL checking moved to
ice_dpll_unregister_pins (patch 5/8, found by AI review)
- removed redundant semicolon (patch 6/8)
Changes in v2:
- rebased
- added autogenerated DPLL files (patch 1/8)
- fixed checkpatch 'parenthesis alignment' warning (patch 2/8)
- fixed error path in ice_dpll_init_txclk_pins (AI warning, patch 5/8)
- fixed kdoc warnings (patch 6/8, patch 8/8)
Grzegorz Nitka (8):
dpll: add new DPLL type for transmit clock (TXC) usage
dpll: allow registering FW-identified pin with a different DPLL
dpll: extend pin notifier and netlink events with notification source
ID
dpll: zl3073x: allow SyncE_Ref pin state change
ice: introduce TXC DPLL device and TX ref clock pin framework for E825
ice: implement CPI support for E825C
ice: add Tx reference clock index handling to AN restart command
ice: implement E825 TX ref clock control and TXC hardware sync status
Documentation/netlink/specs/dpll.yaml | 3 +
drivers/dpll/dpll_core.c | 32 +-
drivers/dpll/dpll_core.h | 3 +-
drivers/dpll/dpll_netlink.c | 10 +-
drivers/dpll/dpll_netlink.h | 4 +-
drivers/dpll/dpll_nl.c | 2 +-
drivers/dpll/zl3073x/prop.c | 9 +
drivers/net/ethernet/intel/ice/Makefile | 2 +-
drivers/net/ethernet/intel/ice/ice.h | 12 +
drivers/net/ethernet/intel/ice/ice_adapter.c | 4 +
drivers/net/ethernet/intel/ice/ice_adapter.h | 7 +
.../net/ethernet/intel/ice/ice_adminq_cmd.h | 2 +
drivers/net/ethernet/intel/ice/ice_common.c | 5 +-
drivers/net/ethernet/intel/ice/ice_common.h | 2 +-
drivers/net/ethernet/intel/ice/ice_cpi.c | 364 +++++++++++++++++
drivers/net/ethernet/intel/ice/ice_cpi.h | 61 +++
drivers/net/ethernet/intel/ice/ice_dpll.c | 380 ++++++++++++++++--
drivers/net/ethernet/intel/ice/ice_dpll.h | 10 +
drivers/net/ethernet/intel/ice/ice_lib.c | 3 +-
drivers/net/ethernet/intel/ice/ice_ptp.c | 26 +-
drivers/net/ethernet/intel/ice/ice_ptp.h | 7 +
drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 37 ++
drivers/net/ethernet/intel/ice/ice_ptp_hw.h | 34 ++
drivers/net/ethernet/intel/ice/ice_sbq_cmd.h | 5 +-
drivers/net/ethernet/intel/ice/ice_txclk.c | 251 ++++++++++++
drivers/net/ethernet/intel/ice/ice_txclk.h | 38 ++
drivers/net/ethernet/intel/ice/ice_type.h | 2 +
include/linux/dpll.h | 1 +
include/uapi/linux/dpll.h | 2 +
29 files changed, 1265 insertions(+), 53 deletions(-)
create mode 100644 drivers/net/ethernet/intel/ice/ice_cpi.c
create mode 100644 drivers/net/ethernet/intel/ice/ice_cpi.h
create mode 100644 drivers/net/ethernet/intel/ice/ice_txclk.c
create mode 100644 drivers/net/ethernet/intel/ice/ice_txclk.h
base-commit: b6e39e48469e37057fce27a1b87cf6d3e456aa42
--
2.39.3
^ permalink raw reply
* Re: [PATCH v9 02/10] x86/bhi: Make clear_bhb_loop() effective on newer CPUs
From: Pawan Gupta @ 2026-04-09 23:48 UTC (permalink / raw)
To: Jim Mattson
Cc: Dave Hansen, x86, Jon Kohler, Nikolay Borisov, H. Peter Anvin,
Josh Poimboeuf, David Kaplan, Sean Christopherson,
Borislav Petkov, Dave Hansen, Peter Zijlstra, Alexei Starovoitov,
Daniel Borkmann, Andrii Nakryiko, KP Singh, Jiri Olsa,
David S. Miller, David Laight, Andy Lutomirski, Thomas Gleixner,
Ingo Molnar, David Ahern, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, John Fastabend, Stanislav Fomichev,
Hao Luo, Paolo Bonzini, Jonathan Corbet, linux-kernel, kvm,
Asit Mallick, Tao Zhang, bpf, netdev, linux-doc, chao.gao
In-Reply-To: <CALMp9eQx3H+n3V3dQh+ZafQZ6uNBjSYk8tZsvG6ffcY43YTrnQ@mail.gmail.com>
On Thu, Apr 09, 2026 at 02:06:36PM -0700, Jim Mattson wrote:
> On Thu, Apr 9, 2026 at 1:36 PM Dave Hansen <dave.hansen@intel.com> wrote:
> >
> > On 4/7/26 17:47, Jim Mattson wrote:
> > > On Tue, Apr 7, 2026 at 4:41 PM Dave Hansen <dave.hansen@intel.com> wrote:
> > >> On 4/7/26 16:27, Jim Mattson wrote:
> > >>> What is your proposed BHI_DIS_S override mechanism, then?
> > >> Let me make sure I get this right. The desire is to:
> > >>
> > >> 1. Have hypervisors lie to guests about the CPU they are running on (for
> > >> the benefit of large/diverse migration pools)
> > >> 2. Have guests be allowed to boot with BHI_DIS_S for performance
> > >> 3. Have apps in those guests that care about security to opt back in to
> > >> BHI_DIS_S for themselves?
> > > I just want guests on heterogeneous migration pools to properly
> > > protect themselves from native BHI when running on host kernels at
> > > least as far back as Linux v6.6.
> > >
> > > To that end, I would be satisfied with using the longer BHB clearing
> > > sequence when HYPERVISOR is true and BHI_CTRL is false.
> >
> > If the guests can't get mitigation information from model/family because
> > the hypervisor is lying (or may lie), then it's on the hypervisor to
> > figure it out.
> >
> > I'm not sure we want to just assume that all hypervisors are going to
> > lie all the time about this.
>
> Without any information, that is exactly what we must assume. There is
> precedent for this.
>
> In vulnerable_to_its():
>
> /*
> * If a VMM did not expose ITS_NO, assume that a guest could
> * be running on a vulnerable hardware or may migrate to such
> * hardware.
> */
> if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
> return true;
>
>
> In cpu_set_bug_bits():
>
> /*
> * Intel parts with eIBRS are vulnerable to BHI attacks. Parts with
> * BHI_NO still need to use the BHI mitigation to prevent Intra-mode
> * attacks. When virtualized, eIBRS could be hidden, assume vulnerable.
> */
> if (!cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
> (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
> boot_cpu_has(X86_FEATURE_HYPERVISOR)))
> setup_force_cpu_bug(X86_BUG_BHI);
>
> ...and...
>
> if (c->x86_vendor == X86_VENDOR_AMD) {
> if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) ||
> !cpu_has(c, X86_FEATURE_TSA_L1_NO)) {
> if (cpu_matches(cpu_vuln_blacklist, TSA) ||
> /* Enable bug on Zen guests to allow for
> live migration. */
> (cpu_has(c, X86_FEATURE_HYPERVISOR) &&
> cpu_has(c, X86_FEATURE_ZEN)))
> setup_force_cpu_bug(X86_BUG_TSA);
> }
> }
>
>
> In check_null_seg_clears_base():
>
> /*
> * CPUID bit above wasn't set. If this kernel is still running
> * as a HV guest, then the HV has decided not to advertize
> * that CPUID bit for whatever reason. For example, one
> * member of the migration pool might be vulnerable. Which
> * means, the bug is present: set the BUG flag and return.
> */
> if (cpu_has(c, X86_FEATURE_HYPERVISOR)) {
> set_cpu_bug(c, X86_BUG_NULL_SEG);
> return;
> }
>
> The hypervisor could provide more information so that the guest can
> determine when it's safe to use the short sequence, but that's just
> icing on the cake. The default, out-of-the-box configuration must be
> safe.
In the above cases there was no practical way a VMM could have mitigated
the guest. So the only option for the guest was to take a conservative
approach. Secondly, in the BHI case, real world scenarios of migration
between pre and post ADL CPUs were unknown.
Nevertheless, Intel guidance covers this case by having KVM deploy
BHI_DIS_S for the guest using virtual-SPEC_CTRL. I understand that support
is missing currently, I am working on it. Hopefully, I will be able to
share the draft after this series settles down. We can workout the details
there.
In retrospect, it would have been ideal if this discussion had happened at
the time when virtual-SPEC_CTRL series was introduced.
^ permalink raw reply
* [PATCH net 3/3] nfc: llcp: fix OOB read of DM reason byte in nfc_llcp_recv_dm()
From: Lekë Hapçiu @ 2026-04-09 23:35 UTC (permalink / raw)
To: netdev
Cc: linux-nfc, stable, davem, edumazet, kuba, pabeni,
Lekë Hapçiu
In-Reply-To: <20260409233517.1891497-1-snowwlake@icloud.com>
nfc_llcp_recv_dm() reads skb->data[2] (the DM reason byte) without
verifying that the frame is at least LLCP_HEADER_SIZE + 1 bytes long.
A rogue NFC peer can send a 2-byte DM PDU (header only, no reason
byte), triggering a 1-byte out-of-bounds read of kernel heap memory.
The same missing guard also leaves the nfc_llcp_dsap() and
nfc_llcp_ssap() macro accesses to data[0]/data[1] technically
unprotected against a 0- or 1-byte frame.
Add a single skb->len < LLCP_HEADER_SIZE + 1 check before any field
access, consistent with the guard added to nfc_llcp_recv_snl() by
commit ef8ddc69c ("nfc: llcp: fix bounds check in
nfc_llcp_recv_snl()").
The DM PDU is dispatched unconditionally by nfc_llcp_rx_skb() with no
prior length check, so this path is reachable from RF without any prior
pairing or session establishment.
Fixes: 5c0560b7a5c6 ("NFC: Handle LLCP Disconnected Mode frames")
Cc: stable@vger.kernel.org
Signed-off-by: Lekë Hapçiu <snowwlake@icloud.com>
---
net/nfc/llcp_core.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -1247,6 +1247,10 @@
struct nfc_llcp_sock *llcp_sock;
struct sock *sk;
u8 dsap, ssap, reason;
+ if (skb->len < LLCP_HEADER_SIZE + 1) {
+ pr_err("Malformed DM PDU\n");
+ return;
+ }
dsap = nfc_llcp_dsap(skb);
ssap = nfc_llcp_ssap(skb);
--
2.34.1
^ permalink raw reply
* [PATCH net 2/3] nfc: llcp: fix TLV parsing OOB and length underflow in nfc_llcp_recv_snl
From: Lekë Hapçiu @ 2026-04-09 23:35 UTC (permalink / raw)
To: netdev
Cc: linux-nfc, stable, davem, edumazet, kuba, pabeni,
Lekë Hapçiu
In-Reply-To: <20260409233517.1891497-1-snowwlake@icloud.com>
nfc_llcp_recv_snl() contains four distinct vulnerabilities.
Issue 1 - missing minimum-length guard on skb:
nfc_llcp_dsap() and nfc_llcp_ssap() access pdu->data[0] and pdu->data[1]
unconditionally. The subsequent computation:
tlv_len = skb->len - LLCP_HEADER_SIZE; /* LLCP_HEADER_SIZE = 2 */
truncates to u16. If skb->len < 2, the unsigned subtraction wraps at
unsigned int width and the truncation to u16 yields up to 65534, causing
the while loop to iterate far beyond the skb data. No guard exists at
the dispatch path to prevent this.
Fix: add `if (skb->len < LLCP_HEADER_SIZE) return;` before any skb->data
access, matching the pattern already used in nfc_llcp_recv_agf().
Issue 2 - missing per-iteration TLV header guard:
The loop reads tlv[0] and tlv[1] with no prior check that two bytes
remain. When one byte remains, tlv[1] is one byte past the array end.
Fix: `if (tlv_len - offset < 2) break;`
Issue 3 - peer-controlled `length` field advances tlv past skb end:
`length` (tlv[1]) is advanced unconditionally into `offset` and `tlv`
without verifying that `length` bytes of TLV value exist. A malicious
peer sets `length` large enough that `offset` remains below `tlv_len` on
the next iteration while `tlv` points into adjacent kernel heap.
Fix: `if (tlv_len - offset - 2 < length) break;`
Issue 4 - per-type minimum-length hazards:
LLCP_TLV_SDREQ: `service_name_len = length - 1` is u8 arithmetic. When
length == 0 this wraps to 255, causing a 255-byte kernel memory scan via
strncmp. tlv[2] (tid) is also accessed unconditionally.
Fix: require length >= 1 before the tid/service_name access.
LLCP_TLV_SDRES: tlv[2] and tlv[3] are accessed without verifying
length >= 2. Unlike the GB/connection parsers, SDREQ/SDRES are not
processed via llcp_tlv8/16, so the llcp_tlv_length[] table provides no
protection here.
Fix: require length >= 2 before the tlv[2]/tlv[3] accesses.
In both cases a `break` from the inner switch falls through to the
unconditional `offset += length + 2; tlv += length + 2` at the loop
tail, correctly advancing past the malformed TLV. The outer two guards
break from the while loop entirely.
Reachability: SNL PDUs are processed during LLCP service discovery, before
any connection is established, from any NFC peer within ~4 cm with no
authentication or pairing.
Fixes: 7a06f0ee2823 ("NFC: llcp: Service Name Lookup implementation")
Cc: stable@vger.kernel.org
Signed-off-by: Lekë Hapçiu <snowwlake@icloud.com>
---
net/nfc/llcp_core.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index db5bc6a87..16acf7c2b 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -1284,6 +1284,11 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
size_t sdres_tlvs_len;
HLIST_HEAD(nl_sdres_list);
+ if (skb->len < LLCP_HEADER_SIZE) {
+ pr_err("Malformed SNL PDU\n");
+ return;
+ }
+
dsap = nfc_llcp_dsap(skb);
ssap = nfc_llcp_ssap(skb);
@@ -1300,11 +1305,17 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
sdres_tlvs_len = 0;
while (offset < tlv_len) {
+ if (tlv_len - offset < 2)
+ break;
type = tlv[0];
length = tlv[1];
+ if (tlv_len - offset - 2 < length)
+ break;
switch (type) {
case LLCP_TLV_SDREQ:
+ if (length < 1)
+ break;
tid = tlv[2];
service_name = (char *) &tlv[3];
service_name_len = length - 1;
@@ -1369,6 +1380,8 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
break;
case LLCP_TLV_SDRES:
+ if (length < 2)
+ break;
mutex_lock(&local->sdreq_lock);
pr_debug("LLCP_TLV_SDRES: searching tid %d\n", tlv[2]);
--
2.51.0
^ permalink raw reply related
* [PATCH net 1/3] nfc: llcp: add TLV length bounds checks in parse_gb_tlv and parse_connection_tlv
From: Lekë Hapçiu @ 2026-04-09 23:35 UTC (permalink / raw)
To: netdev
Cc: linux-nfc, stable, davem, edumazet, kuba, pabeni,
Lekë Hapçiu, Simon Horman
In-Reply-To: <20260409233517.1891497-1-snowwlake@icloud.com>
v1 of this fix promoted `offset` from u8 to u16 in both TLV parsers,
preventing the infinite loop when a connection TLV array exceeds 255 bytes.
During review, Simon Horman identified two additional issues that the u16
promotion alone does not address.
Issue 1 - truncated TLV header:
The loop guard `offset < tlv_array_len` is not sufficient to guarantee
that reading tlv[0] (type) and tlv[1] (length) is safe. When exactly
one byte remains (offset == tlv_array_len - 1) the loop body reads
tlv[1] one byte past the end of the array.
Issue 2 - peer-controlled `length` field:
`length` is read from peer-supplied frame data and is not checked against
the remaining array space before advancing `tlv` and `offset`:
offset += length + 2; /* always */
tlv += length + 2; /* may now point past buffer end */
A crafted `length` advances `tlv` past the array boundary; the following
iteration reads tlv[0]/tlv[1] from adjacent kernel memory.
For nfc_llcp_parse_gb_tlv() this is particularly impactful: its input is
&local->remote_gb[3], a field within nfc_llcp_local. A large `length`
can walk `tlv` into adjacent struct fields including sdreq_timer and
sdreq_timeout_work which contain kernel function pointers at approximately
+176 and +216 bytes past remote_gb[]. The parsed `type` byte at those
positions may match a recognized TLV type causing the parser to store
bytes from the function pointer into local->remote_miu, which is
subsequently readable via getsockopt().
Issue 3 - zero-length TLV value:
The llcp_tlv8() and llcp_tlv16() accessor helpers read tlv[2] and
tlv[2..3] respectively. The outer guard guarantees `length` bytes of
value are available past the two-byte header, but when length == 0 it
only guarantees offset+2 <= tlv_array_len (non-strict), leaving tlv[2]
out of bounds. Per-type minimum-length checks are required before each
accessor call. Note: llcp_tlv8/16 additionally validate against the
llcp_tlv_length[] table, providing a second safety layer; the per-type
checks here make the rejection explicit and avoid silent zero-defaults.
Fix: add two loop-level guards inside each parsing loop:
if (tlv_array_len - offset < 2) /* need type + length */
break;
[read type, length]
if (tlv_array_len - offset - 2 < length) /* need length value bytes */
break;
Both subtractions are safe: the loop condition guarantees offset <
tlv_array_len; the first guard then guarantees the difference is >= 2,
making the second subtraction non-negative.
Add per-type minimum-length checks before each accessor call:
- tlv8-based (VERSION, LTO, OPT, RW): require length >= 1
- tlv16-based (MIUX, WKS): require length >= 2
Reachability: nfc_llcp_parse_connection_tlv() is reached on receipt of a
CONNECT or CC PDU before any connection is established.
nfc_llcp_parse_gb_tlv() is reached during ATR_RES processing. Both are
triggerable from any NFC peer within ~4 cm with no authentication.
Reported-by: Simon Horman <horms@kernel.org>
Fixes: d646960f7986 ("NFC: Add LLCP sockets")
Cc: stable@vger.kernel.org
Signed-off-by: Lekë Hapçiu <snowwlake@icloud.com>
---
net/nfc/llcp_commands.c | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 6937dcb3b..7cc237a6d 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -202,25 +202,39 @@ int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
return -ENODEV;
while (offset < tlv_array_len) {
+ if (tlv_array_len - offset < 2)
+ break;
type = tlv[0];
length = tlv[1];
+ if (tlv_array_len - offset - 2 < length)
+ break;
pr_debug("type 0x%x length %d\n", type, length);
switch (type) {
case LLCP_TLV_VERSION:
+ if (length < 1)
+ break;
local->remote_version = llcp_tlv_version(tlv);
break;
case LLCP_TLV_MIUX:
+ if (length < 2)
+ break;
local->remote_miu = llcp_tlv_miux(tlv) + 128;
break;
case LLCP_TLV_WKS:
+ if (length < 2)
+ break;
local->remote_wks = llcp_tlv_wks(tlv);
break;
case LLCP_TLV_LTO:
+ if (length < 1)
+ break;
local->remote_lto = llcp_tlv_lto(tlv) * 10;
break;
case LLCP_TLV_OPT:
+ if (length < 1)
+ break;
local->remote_opt = llcp_tlv_opt(tlv);
break;
default:
@@ -253,16 +267,24 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
return -ENOTCONN;
while (offset < tlv_array_len) {
+ if (tlv_array_len - offset < 2)
+ break;
type = tlv[0];
length = tlv[1];
+ if (tlv_array_len - offset - 2 < length)
+ break;
pr_debug("type 0x%x length %d\n", type, length);
switch (type) {
case LLCP_TLV_MIUX:
+ if (length < 2)
+ break;
sock->remote_miu = llcp_tlv_miux(tlv) + 128;
break;
case LLCP_TLV_RW:
+ if (length < 1)
+ break;
sock->remote_rw = llcp_tlv_rw(tlv);
break;
case LLCP_TLV_SN:
--
2.51.0
^ permalink raw reply related
* [PATCH net 0/3] nfc: llcp: fix OOB reads in TLV parsers and PDU handlers
From: Lekë Hapçiu @ 2026-04-09 23:35 UTC (permalink / raw)
To: netdev
Cc: linux-nfc, stable, davem, edumazet, kuba, pabeni,
Lekë Hapçiu
This series fixes three out-of-bounds read vulnerabilities in the NFC
LLCP layer, all reachable from RF without prior pairing or session
establishment.
Patch 1 adds missing TLV length bounds checks in nfc_llcp_parse_gb_tlv()
and nfc_llcp_parse_connection_tlv() — a crafted CONNECT or SNL PDU
containing a short TLV value field can read beyond the skb tail.
Patch 2 fixes nfc_llcp_recv_snl(), which accessed TLV fields and
performed arithmetic on an uncapped length byte before any bounds
check, enabling a 1-byte heap OOB read and a u8 wrap-around.
Patch 3 fixes nfc_llcp_recv_dm(), which read the DM reason byte at
skb->data[2] without verifying the frame is at least 3 bytes long.
A 2-byte DM PDU (header only) from a rogue peer triggers a 1-byte
OOB heap read.
All three bugs are independently triggered via RF (AV:A, AC:L, no
authentication required).
Lekë Hapçiu (3):
nfc: llcp: add TLV length bounds checks in parse_gb_tlv and
parse_connection_tlv
nfc: llcp: fix TLV parsing OOB and length underflow in
nfc_llcp_recv_snl
nfc: llcp: fix OOB read of DM reason byte in nfc_llcp_recv_dm()
net/nfc/llcp_commands.c | 9 ++++++++-
net/nfc/llcp_core.c | 22 ++++++++++++++++++++++
2 files changed, 30 insertions(+), 1 deletion(-)
--
2.34.1
^ permalink raw reply
* Re: [PATCH RFC net-next 02/10] net: stmmac: rename dev_id to userver
From: Jitendra Vegiraju @ 2026-04-09 23:07 UTC (permalink / raw)
To: Russell King (Oracle)
Cc: Andrew Lunn, Alexandre Torgue, Andrew Lunn, Chen-Yu Tsai,
David S. Miller, Eric Dumazet, Jakub Kicinski, linux-arm-kernel,
linux-stm32, linux-sunxi, netdev, Paolo Abeni, Samuel Holland
In-Reply-To: <E1wAPBR-0000000F7ju-1fD9@rmk-PC.armlinux.org.uk>
[-- Attachment #1: Type: text/plain, Size: 4607 bytes --]
Hi Russell,
On Wed, Apr 8, 2026 at 2:27 AM Russell King (Oracle)
<rmk+kernel@armlinux.org.uk> wrote:
>
> The Synopsys Databook and several implementation TRMs identify bits
> 15:8 of the version register in dwmac v3.xx and v4.xx as "userver".
> We even print its value with "User ID". Rather than using "dev_id",
> use "userver" instead.
>
> Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
> ---
> drivers/net/ethernet/stmicro/stmmac/hwif.c | 18 +++++++++---------
> 1 file changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
> index 3774af66db48..830ff816ab4f 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
> @@ -15,7 +15,7 @@
>
> struct stmmac_version {
> u8 snpsver;
> - u8 dev_id;
> + u8 userver;
> };
From the XGMAC databook that I have access to bits(15:8) identify the
DEVID field of MAC_version register.
The userver field is from bits(23:16) of the same register. This is a
customer defined field (configured with coreConsultant).
Currently stmmac doesn't care about bits(23:16).
I think the confusion is coming from macro name in common.h
#define DWMAC_USERVER GENMASK_U32(15, 8)
This should be named
#define DWMAC_DEVID GENMASK_U32(15, 8)
Hope someone with access to another databook can confirm this.
>
> static void stmmac_get_version(struct stmmac_priv *priv,
> @@ -26,7 +26,7 @@ static void stmmac_get_version(struct stmmac_priv *priv,
> u32 version;
>
> ver->snpsver = 0;
> - ver->dev_id = 0;
> + ver->userver = 0;
>
> if (core_type == DWMAC_CORE_MAC100)
> return;
> @@ -48,7 +48,7 @@ static void stmmac_get_version(struct stmmac_priv *priv,
>
> ver->snpsver = FIELD_GET(DWMAC_SNPSVER, version);
> if (core_type == DWMAC_CORE_XGMAC)
> - ver->dev_id = FIELD_GET(DWMAC_USERVER, version);
> + ver->userver = FIELD_GET(DWMAC_USERVER, version);
> }
>
> static void stmmac_dwmac_mode_quirk(struct stmmac_priv *priv)
> @@ -111,7 +111,7 @@ int stmmac_reset(struct stmmac_priv *priv)
> static const struct stmmac_hwif_entry {
> enum dwmac_core_type core_type;
> u32 min_snpsver;
> - u32 dev_id;
> + u32 userver;
> const struct stmmac_regs_off regs;
> const void *desc;
> const void *dma;
> @@ -247,7 +247,7 @@ static const struct stmmac_hwif_entry {
> }, {
> .core_type = DWMAC_CORE_XGMAC,
> .min_snpsver = DWXGMAC_CORE_2_10,
> - .dev_id = DWXGMAC_ID,
> + .userver = DWXGMAC_ID,
> .regs = {
> .ptp_off = PTP_XGMAC_OFFSET,
> .mmc_off = MMC_XGMAC_OFFSET,
> @@ -269,7 +269,7 @@ static const struct stmmac_hwif_entry {
> }, {
> .core_type = DWMAC_CORE_XGMAC,
> .min_snpsver = DWXLGMAC_CORE_2_00,
> - .dev_id = DWXLGMAC_ID,
> + .userver = DWXLGMAC_ID,
> .regs = {
> .ptp_off = PTP_XGMAC_OFFSET,
> .mmc_off = MMC_XGMAC_OFFSET,
> @@ -291,7 +291,7 @@ static const struct stmmac_hwif_entry {
> };
>
> static const struct stmmac_hwif_entry *
> -stmmac_hwif_find(enum dwmac_core_type core_type, u8 snpsver, u8 dev_id)
> +stmmac_hwif_find(enum dwmac_core_type core_type, u8 snpsver, u8 userver)
> {
> const struct stmmac_hwif_entry *entry;
> int i;
> @@ -305,7 +305,7 @@ stmmac_hwif_find(enum dwmac_core_type core_type, u8 snpsver, u8 dev_id)
> if (snpsver < entry->min_snpsver)
> continue;
> if (core_type == DWMAC_CORE_XGMAC &&
> - dev_id != entry->dev_id)
> + userver != entry->userver)
> continue;
>
> return entry;
> @@ -358,7 +358,7 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
> /* Fallback to generic HW */
>
> /* Use synopsys_id var because some setups can override this */
> - entry = stmmac_hwif_find(core_type, priv->synopsys_id, version.dev_id);
> + entry = stmmac_hwif_find(core_type, priv->synopsys_id, version.userver);
> if (!entry) {
> dev_err(priv->device,
> "Failed to find HW IF (id=0x%x, gmac=%d/%d)\n",
> --
> 2.47.3
>
[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 5435 bytes --]
^ permalink raw reply
* [PATCH net] selftests: netfilter: nft_tproxy.sh: adjust to socat changes
From: Florian Westphal @ 2026-04-09 22:45 UTC (permalink / raw)
To: netdev; +Cc: Florian Westphal, Jakub Kicinski
Like e65d8b6f3092 ("selftests: drv-net: adjust to socat changes") we
need to add shut-none for this test too.
The extra 0-packet can trigger a second (unexpected) reply from the server.
Fixes: 7e37e0eacd22 ("selftests: netfilter: nft_tproxy.sh: add tcp tests")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Closes: https://lore.kernel.org/netdev/20260408152432.24b8ad0d@kernel.org/
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
I'll leave it up to netdev maintainers to apply this to net-next
instead.
.../selftests/net/netfilter/nft_tproxy_udp.sh | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh
index d16de13fe5a7..1dc7b0450145 100755
--- a/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh
+++ b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh
@@ -190,13 +190,13 @@ table inet filter {
}
EOF
- timeout "$timeout" ip netns exec "$nsrouter" socat -u "$socat_ipproto" udp-listen:12345,fork,ip-transparent,reuseport udp:"$ns1_ip_port",ip-transparent,reuseport,bind="$ns2_ip_port" 2>/dev/null &
+ timeout "$timeout" ip netns exec "$nsrouter" socat -u "$socat_ipproto" udp-listen:12345,fork,ip-transparent,reuseport,shut-none udp:"$ns1_ip_port",ip-transparent,reuseport,bind="$ns2_ip_port",shut-none 2>/dev/null &
local tproxy_pid=$!
- timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null &
+ timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" udp-listen:8080,fork,shut-none SYSTEM:"echo PONG_NS2" 2>/dev/null &
local server2_pid=$!
- timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null &
+ timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" udp-listen:8080,fork,shut-none SYSTEM:"echo PONG_NS3" 2>/dev/null &
local server3_pid=$!
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-u"
@@ -205,7 +205,7 @@ EOF
local result
# request from ns1 to ns2 (forwarded traffic)
- result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",sourceport=18888)
+ result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",sourceport=18888,shut-none)
if [ "$result" == "$expect_ns1_ns2" ] ;then
echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2"
else
@@ -214,7 +214,7 @@ EOF
fi
# request from ns1 to ns3 (forwarded traffic)
- result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port")
+ result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port",shut-none)
if [ "$result" = "$expect_ns1_ns3" ] ;then
echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3"
else
@@ -223,7 +223,7 @@ EOF
fi
# request from nsrouter to ns2 (localy originated traffic)
- result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port")
+ result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",shut-none)
if [ "$result" == "$expect_nsrouter_ns2" ] ;then
echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2"
else
@@ -232,7 +232,7 @@ EOF
fi
# request from nsrouter to ns3 (localy originated traffic)
- result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port")
+ result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port",shut-none)
if [ "$result" = "$expect_nsrouter_ns3" ] ;then
echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3"
else
--
2.52.0
^ permalink raw reply related
* Re: [PATCH v2 1/2] drm/drm_ras: Add clear-error-counter netlink command to drm_ras
From: Zack McKevitt @ 2026-04-09 23:01 UTC (permalink / raw)
To: Tauro, Riana, intel-xe, dri-devel, netdev, rodrigo.vivi,
joonas.lahtinen, aravind.iddamsetty
Cc: anshuman.gupta, simona.vetter, airlied, pratik.bari,
joshua.santosh.ranjan, ashwin.kumar.kulkarni, shubham.kumar,
ravi.kishore.koppuravuri, raag.jadav, anvesh.bakwad,
maarten.lankhorst, Jakub Kicinski, Lijo Lazar, Hawking Zhang,
David S. Miller, Paolo Abeni, Eric Dumazet
In-Reply-To: <e7978696-eff6-4c1f-ab0f-047dccb18b59@intel.com>
On 4/9/2026 1:21 AM, Tauro, Riana wrote:
> Hi Zack
>
> Could you please take a look at this patch if applicable to your
> usecase. Please let me know if any
> changes are required
>
From a quick glance, I think this looks good from our end.
Thanks,
Zack
> @Rodrigo This is already reviewed by Jakub and Raag.
> If there are no opens, can this be merged via drm_misc
>
> Thanks
> Riana
>
> On 4/9/2026 1:03 PM, Riana Tauro wrote:
>> Introduce a new 'clear-error-counter' drm_ras command to reset the
>> counter
>> value for a specific error counter of a given node.
>>
>> The command is a 'do' netlink request with 'node-id' and 'error-id'
>> as parameters with no response payload.
>>
>> Usage:
>>
>> $ sudo ynl --family drm_ras --do clear-error-counter --json \
>> '{"node-id":1, "error-id":1}'
>> None
>>
>> Cc: Jakub Kicinski <kuba@kernel.org>
>> Cc: Zack McKevitt <zachary.mckevitt@oss.qualcomm.com>
>> Cc: Lijo Lazar <lijo.lazar@amd.com>
>> Cc: Hawking Zhang <Hawking.Zhang@amd.com>
>> Cc: David S. Miller <davem@davemloft.net>
>> Cc: Paolo Abeni <pabeni@redhat.com>
>> Cc: Eric Dumazet <edumazet@google.com>
>> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
>> Reviewed-by: Jakub Kicinski <kuba@kernel.org>
>> Reviewed-by: Raag Jadav <raag.jadav@intel.com>
>> ---
>> Documentation/gpu/drm-ras.rst | 8 +++++
>> Documentation/netlink/specs/drm_ras.yaml | 13 ++++++-
>> drivers/gpu/drm/drm_ras.c | 43 +++++++++++++++++++++++-
>> drivers/gpu/drm/drm_ras_nl.c | 13 +++++++
>> drivers/gpu/drm/drm_ras_nl.h | 2 ++
>> include/drm/drm_ras.h | 11 ++++++
>> include/uapi/drm/drm_ras.h | 1 +
>> 7 files changed, 89 insertions(+), 2 deletions(-)
>>
>> diff --git a/Documentation/gpu/drm-ras.rst b/Documentation/gpu/drm-
>> ras.rst
>> index 70b246a78fc8..4636e68f5678 100644
>> --- a/Documentation/gpu/drm-ras.rst
>> +++ b/Documentation/gpu/drm-ras.rst
>> @@ -52,6 +52,8 @@ User space tools can:
>> as a parameter.
>> * Query specific error counter values with the ``get-error-counter``
>> command, using both
>> ``node-id`` and ``error-id`` as parameters.
>> +* Clear specific error counters with the ``clear-error-counter``
>> command, using both
>> + ``node-id`` and ``error-id`` as parameters.
>> YAML-based Interface
>> --------------------
>> @@ -101,3 +103,9 @@ Example: Query an error counter for a given node
>> sudo ynl --family drm_ras --do get-error-counter --json '{"node-
>> id":0, "error-id":1}'
>> {'error-id': 1, 'error-name': 'error_name1', 'error-value': 0}
>> +Example: Clear an error counter for a given node
>> +
>> +.. code-block:: bash
>> +
>> + sudo ynl --family drm_ras --do clear-error-counter --json
>> '{"node-id":0, "error-id":1}'
>> + None
>> diff --git a/Documentation/netlink/specs/drm_ras.yaml b/Documentation/
>> netlink/specs/drm_ras.yaml
>> index 79af25dac3c5..e113056f8c01 100644
>> --- a/Documentation/netlink/specs/drm_ras.yaml
>> +++ b/Documentation/netlink/specs/drm_ras.yaml
>> @@ -99,7 +99,7 @@ operations:
>> flags: [admin-perm]
>> do:
>> request:
>> - attributes:
>> + attributes: &id-attrs
>> - node-id
>> - error-id
>> reply:
>> @@ -113,3 +113,14 @@ operations:
>> - node-id
>> reply:
>> attributes: *errorinfo
>> + -
>> + name: clear-error-counter
>> + doc: >-
>> + Clear error counter for a given node.
>> + The request includes the error-id and node-id of the
>> + counter to be cleared.
>> + attribute-set: error-counter-attrs
>> + flags: [admin-perm]
>> + do:
>> + request:
>> + attributes: *id-attrs
>> diff --git a/drivers/gpu/drm/drm_ras.c b/drivers/gpu/drm/drm_ras.c
>> index b2fa5ab86d87..d6eab29a1394 100644
>> --- a/drivers/gpu/drm/drm_ras.c
>> +++ b/drivers/gpu/drm/drm_ras.c
>> @@ -26,7 +26,7 @@
>> * efficient lookup by ID. Nodes can be registered or unregistered
>> * dynamically at runtime.
>> *
>> - * A Generic Netlink family `drm_ras` exposes two main operations to
>> + * A Generic Netlink family `drm_ras` exposes the below operations to
>> * userspace:
>> *
>> * 1. LIST_NODES: Dump all currently registered RAS nodes.
>> @@ -37,6 +37,10 @@
>> * Returns all counters of a node if only Node ID is provided or
>> specific
>> * error counters.
>> *
>> + * 3. CLEAR_ERROR_COUNTER: Clear error counter of a given node.
>> + * Userspace must provide Node ID, Error ID.
>> + * Clears specific error counter of a node if supported.
>> + *
>> * Node registration:
>> *
>> * - drm_ras_node_register(): Registers a new node and assigns
>> @@ -66,6 +70,8 @@
>> * operation, fetching all counters from a specific node.
>> * - drm_ras_nl_get_error_counter_doit(): Implements the
>> GET_ERROR_COUNTER doit
>> * operation, fetching a counter value from a specific node.
>> + * - drm_ras_nl_clear_error_counter_doit(): Implements the
>> CLEAR_ERROR_COUNTER doit
>> + * operation, clearing a counter value from a specific node.
>> */
>> static DEFINE_XARRAY_ALLOC(drm_ras_xa);
>> @@ -314,6 +320,41 @@ int drm_ras_nl_get_error_counter_doit(struct
>> sk_buff *skb,
>> return doit_reply_value(info, node_id, error_id);
>> }
>> +/**
>> + * drm_ras_nl_clear_error_counter_doit() - Clear an error counter of
>> a node
>> + * @skb: Netlink message buffer
>> + * @info: Generic Netlink info containing attributes of the request
>> + *
>> + * Extracts the node ID and error ID from the netlink attributes and
>> + * clears the current value.
>> + *
>> + * Return: 0 on success, or negative errno on failure.
>> + */
>> +int drm_ras_nl_clear_error_counter_doit(struct sk_buff *skb,
>> + struct genl_info *info)
>> +{
>> + struct drm_ras_node *node;
>> + u32 node_id, error_id;
>> +
>> + if (!info->attrs ||
>> + GENL_REQ_ATTR_CHECK(info,
>> DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID) ||
>> + GENL_REQ_ATTR_CHECK(info,
>> DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID))
>> + return -EINVAL;
>> +
>> + node_id = nla_get_u32(info-
>> >attrs[DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID]);
>> + error_id = nla_get_u32(info-
>> >attrs[DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID]);
>> +
>> + node = xa_load(&drm_ras_xa, node_id);
>> + if (!node || !node->clear_error_counter)
>> + return -ENOENT;
>> +
>> + if (error_id < node->error_counter_range.first ||
>> + error_id > node->error_counter_range.last)
>> + return -EINVAL;
>> +
>> + return node->clear_error_counter(node, error_id);
>> +}
>> +
>> /**
>> * drm_ras_node_register() - Register a new RAS node
>> * @node: Node structure to register
>> diff --git a/drivers/gpu/drm/drm_ras_nl.c b/drivers/gpu/drm/drm_ras_nl.c
>> index 16803d0c4a44..dea1c1b2494e 100644
>> --- a/drivers/gpu/drm/drm_ras_nl.c
>> +++ b/drivers/gpu/drm/drm_ras_nl.c
>> @@ -22,6 +22,12 @@ static const struct nla_policy
>> drm_ras_get_error_counter_dump_nl_policy[DRM_RAS_
>> [DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID] = { .type = NLA_U32, },
>> };
>> +/* DRM_RAS_CMD_CLEAR_ERROR_COUNTER - do */
>> +static const struct nla_policy
>> drm_ras_clear_error_counter_nl_policy[DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID + 1] = {
>> + [DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID] = { .type = NLA_U32, },
>> + [DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID] = { .type = NLA_U32, },
>> +};
>> +
>> /* Ops table for drm_ras */
>> static const struct genl_split_ops drm_ras_nl_ops[] = {
>> {
>> @@ -43,6 +49,13 @@ static const struct genl_split_ops drm_ras_nl_ops[]
>> = {
>> .maxattr = DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID,
>> .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
>> },
>> + {
>> + .cmd = DRM_RAS_CMD_CLEAR_ERROR_COUNTER,
>> + .doit = drm_ras_nl_clear_error_counter_doit,
>> + .policy = drm_ras_clear_error_counter_nl_policy,
>> + .maxattr = DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID,
>> + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
>> + },
>> };
>> struct genl_family drm_ras_nl_family __ro_after_init = {
>> diff --git a/drivers/gpu/drm/drm_ras_nl.h b/drivers/gpu/drm/drm_ras_nl.h
>> index 06ccd9342773..a398643572a5 100644
>> --- a/drivers/gpu/drm/drm_ras_nl.h
>> +++ b/drivers/gpu/drm/drm_ras_nl.h
>> @@ -18,6 +18,8 @@ int drm_ras_nl_get_error_counter_doit(struct sk_buff
>> *skb,
>> struct genl_info *info);
>> int drm_ras_nl_get_error_counter_dumpit(struct sk_buff *skb,
>> struct netlink_callback *cb);
>> +int drm_ras_nl_clear_error_counter_doit(struct sk_buff *skb,
>> + struct genl_info *info);
>> extern struct genl_family drm_ras_nl_family;
>> diff --git a/include/drm/drm_ras.h b/include/drm/drm_ras.h
>> index 5d50209e51db..f2a787bc4f64 100644
>> --- a/include/drm/drm_ras.h
>> +++ b/include/drm/drm_ras.h
>> @@ -58,6 +58,17 @@ struct drm_ras_node {
>> int (*query_error_counter)(struct drm_ras_node *node, u32 error_id,
>> const char **name, u32 *val);
>> + /**
>> + * @clear_error_counter:
>> + *
>> + * This callback is used by drm_ras to clear a specific error
>> counter.
>> + * Driver should implement this callback to support clearing
>> error counters
>> + * of a node.
>> + *
>> + * Returns: 0 on success, negative error code on failure.
>> + */
>> + int (*clear_error_counter)(struct drm_ras_node *node, u32 error_id);
>> +
>> /** @priv: Driver private data */
>> void *priv;
>> };
>> diff --git a/include/uapi/drm/drm_ras.h b/include/uapi/drm/drm_ras.h
>> index 5f40fa5b869d..218a3ee86805 100644
>> --- a/include/uapi/drm/drm_ras.h
>> +++ b/include/uapi/drm/drm_ras.h
>> @@ -41,6 +41,7 @@ enum {
>> enum {
>> DRM_RAS_CMD_LIST_NODES = 1,
>> DRM_RAS_CMD_GET_ERROR_COUNTER,
>> + DRM_RAS_CMD_CLEAR_ERROR_COUNTER,
>> __DRM_RAS_CMD_MAX,
>> DRM_RAS_CMD_MAX = (__DRM_RAS_CMD_MAX - 1)
^ permalink raw reply
* [PATCH net-next v2 2/2] KEYS: annotate struct user_key_payload with __counted_by
From: Thorsten Blum @ 2026-04-09 22:57 UTC (permalink / raw)
To: David Howells, Jarkko Sakkinen, Kees Cook, Gustavo A. R. Silva
Cc: Thorsten Blum, netdev, keyrings, linux-kernel, linux-hardening
In-Reply-To: <20260409225703.158552-4-thorsten.blum@linux.dev>
Add the __counted_by() compiler attribute to the flexible array member
'data' to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and
CONFIG_FORTIFY_SOURCE.
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
---
Changes in v2:
- Use __aligned(8) as suggested by David
- v1: https://lore.kernel.org/lkml/20260409073711.57020-6-thorsten.blum@linux.dev/
Cc: netdev@vger.kernel.org
---
include/keys/user-type.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
index 386c31432789..c29ed9f5d300 100644
--- a/include/keys/user-type.h
+++ b/include/keys/user-type.h
@@ -27,7 +27,8 @@
struct user_key_payload {
struct rcu_head rcu; /* RCU destructor */
unsigned short datalen; /* length of this data */
- char data[] __aligned(__alignof__(u64)); /* actual data */
+ char data[] /* actual data */
+ __aligned(8) __counted_by(datalen);
};
extern struct key_type key_type_user;
^ permalink raw reply related
* [PATCH net-next v2 1/2] keys, dns: drop unused upayload->data NUL terminator
From: Thorsten Blum @ 2026-04-09 22:57 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Thorsten Blum, Tim Bird
Cc: netdev, linux-kernel
In dns_resolver_preparse(), do not NUL-terminate ->data and allocate one
byte less. The NUL terminator is never used and only ->datalen bytes are
accessed.
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
---
No changes in patch 1/2.
---
net/dns_resolver/dns_key.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index c3c8c3240ef9..451247864a63 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -203,7 +203,7 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
kdebug("store result");
prep->quotalen = result_len;
- upayload = kmalloc_flex(*upayload, data, result_len + 1);
+ upayload = kmalloc_flex(*upayload, data, result_len);
if (!upayload) {
kleave(" = -ENOMEM");
return -ENOMEM;
@@ -211,7 +211,6 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
upayload->datalen = result_len;
memcpy(upayload->data, data, result_len);
- upayload->data[result_len] = '\0';
prep->payload.data[dns_key_data] = upayload;
kleave(" = 0");
^ permalink raw reply related
* Re: [PATCH] udp: Force compute_score to always inline
From: Gabriel Krisman Bertazi @ 2026-04-09 22:50 UTC (permalink / raw)
To: Eric Dumazet
Cc: willemdebruijn.kernel, davem, dsahern, kuba, pabeni, kuniyu,
horms, netdev
In-Reply-To: <CANn89iKQhLOdtn-_viyDN8ytjJtR-4p0gteXL6gGSHoUYZp5Hw@mail.gmail.com>
Eric Dumazet <edumazet@google.com> writes:
> On Thu, Apr 9, 2026 at 3:16 PM Gabriel Krisman Bertazi <krisman@suse.de> wrote:
>
>>
>> Back in 2024 I reported a 7-12% regression on an iperf3 UDP loopback
>> thoughput test that we traced to the extra overhead of calling
>> compute_score on two places, introduced by commit f0ea27e7bfe1 ("udp:
>> re-score reuseport groups when connected sockets are present"). At the
>> time, I pointed out the overhead was caused by the multiple calls,
>> associated with cpu-specific mitigations, and merged commit
>> 50aee97d1511 ("udp: Avoid call to compute_score on multiple sites") to
>> jump back explicitly, to force the rescore call in a single place.
>>
>> Recently though, we got another regression report against a newer distro
>> version, which a team colleague traced back to the same root-cause.
>> Turns out that once we updated to gcc-13, the compiler got smart enough
>> to unroll the loop, undoing my previous mitigation. Let's bite the
>> bullet and __always_inline compute_score on both ipv4 and ipv6 to
>> prevent gcc from de-optimizing it again in the future. These functions
>> are only called in two places each, udpX_lib_lookup1 and
>> udpX_lib_lookup2, so the extra size shouldn't be a problem and it is hot
>> enough to be very visible in profilings. In fact, with gcc13, forcing
>> the inline will prevent gcc from unrolling the fix from commit
>> 50aee97d1511, so we don't end up increasing udpX_lib_lookup2 at all.
>>
>> I haven't recollected the results myself, as I don't have access to the
>> machine at the moment. But the same colleague reported 4.67%
>> inprovement with this patch in the loopback benchmark, solving the
>> regression report within noise margins.
>
> You could include scripts/bloat-o-meter results, so that we can sense
> the cost of such a change.
>
> $ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
> add/remove: 0/2 grow/shrink: 6/1 up/down: 622/-410 (212)
> Function old new delta
> __udp6_lib_lookup 797 1007 +210
> __udp4_lib_lookup 838 984 +146
> udp6_lib_lookup2 404 536 +132
> udp4_lib_lookup2 396 498 +102
> udpv6_rcv 3018 3034 +16
> udp_init_sock 244 260 +16
> bpf_iter_udp_batch 953 937 -16
> __pfx_compute_score 32 - -32
> compute_score 362 - -362
> Total: Before=30269687, After=30269899, chg +0.00%
>
> No change for clang.
>
> Reviewed-by: Eric Dumazet <edumazet@google.com>
Apologies, I wasn't aware of that tool. I did some calculations by hand
and found something like 200 bytes extra in udp6_lib_lookup2.
For gcc-13:
scripts/bloat-o-meter vmlinux vmlinux-inline
add/remove: 0/2 grow/shrink: 4/0 up/down: 616/-416 (200)
Function old new delta
udp6_lib_lookup2 762 949 +187
__udp6_lib_lookup 810 975 +165
udp4_lib_lookup2 757 906 +149
__udp4_lib_lookup 871 986 +115
__pfx_compute_score 32 - -32
compute_score 384 - -384
Total: Before=35011784, After=35011984, chg +0.00%
--
Gabriel Krisman Bertazi
^ permalink raw reply
* Re: [PATCH] udp: Force compute_score to always inline
From: Eric Dumazet @ 2026-04-09 22:36 UTC (permalink / raw)
To: Gabriel Krisman Bertazi
Cc: willemdebruijn.kernel, davem, dsahern, kuba, pabeni, kuniyu,
horms, netdev
In-Reply-To: <20260409221532.69090-1-krisman@suse.de>
On Thu, Apr 9, 2026 at 3:16 PM Gabriel Krisman Bertazi <krisman@suse.de> wrote:
>
> Back in 2024 I reported a 7-12% regression on an iperf3 UDP loopback
> thoughput test that we traced to the extra overhead of calling
> compute_score on two places, introduced by commit f0ea27e7bfe1 ("udp:
> re-score reuseport groups when connected sockets are present"). At the
> time, I pointed out the overhead was caused by the multiple calls,
> associated with cpu-specific mitigations, and merged commit
> 50aee97d1511 ("udp: Avoid call to compute_score on multiple sites") to
> jump back explicitly, to force the rescore call in a single place.
>
> Recently though, we got another regression report against a newer distro
> version, which a team colleague traced back to the same root-cause.
> Turns out that once we updated to gcc-13, the compiler got smart enough
> to unroll the loop, undoing my previous mitigation. Let's bite the
> bullet and __always_inline compute_score on both ipv4 and ipv6 to
> prevent gcc from de-optimizing it again in the future. These functions
> are only called in two places each, udpX_lib_lookup1 and
> udpX_lib_lookup2, so the extra size shouldn't be a problem and it is hot
> enough to be very visible in profilings. In fact, with gcc13, forcing
> the inline will prevent gcc from unrolling the fix from commit
> 50aee97d1511, so we don't end up increasing udpX_lib_lookup2 at all.
>
> I haven't recollected the results myself, as I don't have access to the
> machine at the moment. But the same colleague reported 4.67%
> inprovement with this patch in the loopback benchmark, solving the
> regression report within noise margins.
You could include scripts/bloat-o-meter results, so that we can sense
the cost of such a change.
$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 0/2 grow/shrink: 6/1 up/down: 622/-410 (212)
Function old new delta
__udp6_lib_lookup 797 1007 +210
__udp4_lib_lookup 838 984 +146
udp6_lib_lookup2 404 536 +132
udp4_lib_lookup2 396 498 +102
udpv6_rcv 3018 3034 +16
udp_init_sock 244 260 +16
bpf_iter_udp_batch 953 937 -16
__pfx_compute_score 32 - -32
compute_score 362 - -362
Total: Before=30269687, After=30269899, chg +0.00%
No change for clang.
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply
* [PATCH v3][next] netfilter: x_tables: Avoid a couple -Wflex-array-member-not-at-end warnings
From: Gustavo A. R. Silva @ 2026-04-09 22:34 UTC (permalink / raw)
To: Pablo Neira Ayuso, Florian Westphal, Phil Sutter, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman
Cc: netfilter-devel, coreteam, netdev, linux-kernel,
Gustavo A. R. Silva, linux-hardening, Kees Cook
-Wflex-array-member-not-at-end was introduced in GCC-14, and we are
getting ready to enable it, globally.
Use the TRAILING_OVERLAP() helper to fix the following warnings:
1 net/netfilter/x_tables.c:816:39: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end]
1 net/netfilter/x_tables.c:811:39: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end]
This helper creates a union between a flexible-array member (FAM)
and a set of members that would otherwise follow it. This overlays
the trailing members onto the FAM while preserving the original
memory layout.
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
Changes in v3:
- Use the TRAILING_OVERLAP() helper.
- Update changelog text.
Changes in v2:
- Update verdict after (compat_uint_t *)st->data;
- Link: https://lore.kernel.org/linux-hardening/adgL5wPm9VpaV3MO@kspp/
v1:
- Link: https://lore.kernel.org/linux-hardening/adbIKC0cZcK7VcCF@kspp/
net/netfilter/x_tables.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index b39017c80548..9f837fb5ceb4 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -819,13 +819,17 @@ EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
/* non-compat version may have padding after verdict */
struct compat_xt_standard_target {
- struct compat_xt_entry_target t;
- compat_uint_t verdict;
+ /* Must be last as it ends in a flexible-array member. */
+ TRAILING_OVERLAP(struct compat_xt_entry_target, t, data,
+ compat_uint_t verdict;
+ );
};
struct compat_xt_error_target {
- struct compat_xt_entry_target t;
- char errorname[XT_FUNCTION_MAXNAMELEN];
+ /* Must be last as it ends in a flexible-array member. */
+ TRAILING_OVERLAP(struct compat_xt_entry_target, t, data,
+ char errorname[XT_FUNCTION_MAXNAMELEN];
+ );
};
int xt_compat_check_entry_offsets(const void *base, const char *elems,
--
2.43.0
^ permalink raw reply related
* [PATCH net 4/4] nfc: digital: Fix OOB read of DID byte in digital_tg_recv_dep_req()
From: Lekë Hapçiu @ 2026-04-09 22:34 UTC (permalink / raw)
To: netdev; +Cc: linux-wireless, stable, krzysztof.kozlowski,
Lekë Hapçiu
In-Reply-To: <20260409223436.1887988-1-snowwlake@icloud.com>
digital_tg_recv_dep_req() guards against short frames with:
if (resp->len < size ...) /* size = sizeof(struct digital_dep_req_res) = 3 */
This guarantees resp->len >= 3 (dir + cmd + pfb). However, when the
DID bit is set in pfb, the code immediately accesses resp->data[3] — the
DID byte — which is one byte past the guaranteed minimum:
if (DIGITAL_NFC_DEP_DID_BIT_SET(pfb)) {
if (ddev->did && (ddev->did == resp->data[3])) {
A remote NFC-DEP initiator can trigger this with a 3-byte DEP_REQ frame
that has the DID bit set in the PFB field, causing a 1-byte
out-of-bounds read of kernel heap memory.
Increment the minimum required length to 4 when the DID bit is present
before accessing resp->data[3], mirroring the pattern used for the
size++ / check at the end of the DID block.
Fixes: 7d0911c07b44 ("NFC Digital: Implement NFC-DEP target TX and RX")
Cc: stable@vger.kernel.org
Signed-off-by: Lekë Hapçiu <snowwlake@icloud.com>
---
net/nfc/digital_dep.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index XXXXXXX..XXXXXXX 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -1117,6 +1117,11 @@ static int digital_tg_recv_dep_req(struct nfc_digital_dev *ddev, void *arg,
pfb = dep_req->pfb;
if (DIGITAL_NFC_DEP_DID_BIT_SET(pfb)) {
+ if (resp->len < size + 1) {
+ rc = -EIO;
+ goto exit;
+ }
+
if (ddev->did && (ddev->did == resp->data[3])) {
size++;
} else {
--
2.34.1
^ permalink raw reply
* [PATCH net 3/4] nfc: digital: Fix OOB read of RTOX byte in digital_in_recv_dep_res()
From: Lekë Hapçiu @ 2026-04-09 22:34 UTC (permalink / raw)
To: netdev; +Cc: linux-wireless, stable, krzysztof.kozlowski,
Lekë Hapçiu
In-Reply-To: <20260409223436.1887988-1-snowwlake@icloud.com>
In the SUPERVISOR_PDU / timeout (RTOX) branch of digital_in_recv_dep_res(),
the RTOX value byte is read from resp->data[0] after skb_pull() has
stripped the 3-byte DEP_RES header:
skb_pull(resp, size); /* size = sizeof(struct digital_dep_req_res) = 3 */
...
case DIGITAL_NFC_DEP_PFB_SUPERVISOR_PDU:
...
rtox = DIGITAL_NFC_DEP_RTOX_VALUE(resp->data[0]);
If the remote device sends a DEP_RES frame that is exactly the minimum
length (3 bytes -- dir + cmd + pfb only, no payload), the skb_pull leaves
resp->len == 0 and the read of resp->data[0] is a 1-byte out-of-bounds
read of kernel heap memory beyond the socket buffer.
The I-PDU and ACK/NACK branches are not affected because they either
pass resp directly to upper layers or perform a separate minimum-length
check before accessing payload bytes. Only the RTOX branch is missing
its guard.
Add a resp->len >= 1 check before the RTOX value read.
Fixes: 4b60cfce7aba ("NFC Digital: Implement NFC-DEP initiator TX and RX")
Cc: stable@vger.kernel.org
Signed-off-by: Lekë Hapçiu <snowwlake@icloud.com>
---
net/nfc/digital_dep.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index XXXXXXX..XXXXXXX 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -866,6 +866,12 @@
goto error;
}
+ if (!resp->len) {
+ PROTOCOL_ERR("14.8.4.1");
+ rc = -EIO;
+ goto error;
+ }
+
rtox = DIGITAL_NFC_DEP_RTOX_VALUE(resp->data[0]);
if (!rtox || rtox > DIGITAL_NFC_DEP_RTOX_MAX) {
PROTOCOL_ERR("14.8.4.1");
--
2.34.1
^ permalink raw reply
* [PATCH net 2/4] nfc: digital: Fix check-after-read in digital_tg_recv_sens_req()
From: Lekë Hapçiu @ 2026-04-09 22:34 UTC (permalink / raw)
To: netdev; +Cc: linux-wireless, stable, krzysztof.kozlowski,
Lekë Hapçiu
In-Reply-To: <20260409223436.1887988-1-snowwlake@icloud.com>
digital_tg_recv_sens_req() reads resp->data[0] into sens_req at line
1092 before the !resp->len guard fires at line 1094. A zero-length
frame causes an unconditional 1-byte out-of-bounds read before any
length check has taken place.
The root cause is that the assignment and the length check are split
across two statements: resp->data[0] is read unconditionally into
sens_req, and only then is resp->len tested as part of a compound
condition. Even though the || operator correctly short-circuits, the
read on the previous line is already done.
Move the length guard before the data access by splitting the combined
condition into an early resp->len check followed by the data read and
the command comparison.
Fixes: 2e7a3e7ee80d ("NFC Digital: Add target mode for NFC-A/ISO14443A")
Cc: stable@vger.kernel.org
Signed-off-by: Lekë Hapçiu <snowwlake@icloud.com>
---
net/nfc/digital_technology.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index XXXXXXX..XXXXXXX 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -1090,11 +1090,14 @@ void digital_tg_recv_sens_req(struct nfc_digital_dev *ddev, void *arg,
}
- sens_req = resp->data[0];
-
- if (!resp->len || (sens_req != DIGITAL_CMD_SENS_REQ &&
- sens_req != DIGITAL_CMD_ALL_REQ)) {
+ if (!resp->len) {
rc = -EINVAL;
goto exit;
}
+
+ sens_req = resp->data[0];
+ if (sens_req != DIGITAL_CMD_SENS_REQ && sens_req != DIGITAL_CMD_ALL_REQ) {
+ rc = -EINVAL;
+ goto exit;
+ }
rc = digital_tg_send_sens_res(ddev);
--
2.34.1
^ permalink raw reply
* [PATCH net 1/4] nfc: digital: Fix stack buffer overflow in digital_in_recv_sensf_res()
From: Lekë Hapçiu @ 2026-04-09 22:34 UTC (permalink / raw)
To: netdev; +Cc: linux-wireless, stable, krzysztof.kozlowski,
Lekë Hapçiu
In-Reply-To: <20260409223436.1887988-1-snowwlake@icloud.com>
The function digital_in_recv_sensf_res() validates that the incoming
SENSF_RES frame is at least DIGITAL_SENSF_RES_MIN_LENGTH (17) bytes,
but does not check that it is at most NFC_SENSF_RES_MAXSIZE (18) bytes
before copying into the 18-byte target.sensf_res stack buffer.
After skb_pull(resp, 1) removes the framing byte, resp->len can range
from 16 up to 253 — an NFC-F frame carries a 1-byte length field with
maximum value 255, from which the driver status byte (pulled here) and
the protocol length byte are subtracted. The memcpy() at line 775 then
writes up to 235 bytes past the end of target.sensf_res, overflowing
into adjacent stack data including saved registers and the return address.
A device in NFC-F polling mode can trigger this condition without any
prior pairing or authentication by responding to a SENSF_REQ with an
oversized frame. No user interaction is required on the victim device
while NFC discovery is active.
The NCI code path handles this correctly; nci/ntf.c line 508:
nfcf_poll->sensf_res_len = min_t(__u8, *data++, NFC_SENSF_RES_MAXSIZE);
Apply the equivalent upper-bound check to the digital protocol path by
rejecting frames whose post-strip length exceeds NFC_SENSF_RES_MAXSIZE.
Fixes: 8c0695e4998d ("NFC Digital: Add NFC-F technology support")
Cc: stable@vger.kernel.org
Signed-off-by: Lekë Hapçiu <snowwlake@icloud.com>
---
net/nfc/digital_technology.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index XXXXXXX..XXXXXXX 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -768,6 +768,11 @@ static void digital_in_recv_sensf_res(struct nfc_digital_dev *ddev, void *arg,
skb_pull(resp, 1);
+ if (resp->len > NFC_SENSF_RES_MAXSIZE) {
+ rc = -EIO;
+ goto exit;
+ }
+
memset(&target, 0, sizeof(struct nfc_target));
sensf_res = (struct digital_sensf_res *)resp->data;
--
2.34.1
^ permalink raw reply
* [PATCH net 0/4] nfc: digital: Fix missing and misplaced length checks
From: Lekë Hapçiu @ 2026-04-09 22:34 UTC (permalink / raw)
To: netdev; +Cc: linux-wireless, stable, krzysztof.kozlowski,
Lekë Hapçiu
This series fixes four length-check bugs in the NFC Digital Protocol stack.
All are reachable from RF without authentication:
- Patch 1: Missing upper-bound check before memcpy into target.sensf_res
in the NFC-F initiator polling path. An oversized SENSF_RES overflows
an 18-byte stack buffer by up to 235 bytes. CVSS 8.1.
- Patch 2: Check-after-read in the NFC-A target receive path.
resp->data[0] is read before the resp->len != 0 guard fires.
A zero-length frame triggers a 1-byte OOB read.
- Patch 3: Missing post-pull length check in the RTOX handler inside
digital_in_recv_dep_res(). After skb_pull strips the 3-byte DEP
header, resp->data[0] is read with no guarantee that any payload byte
remains.
- Patch 4: DID byte accessed at resp->data[3] after only a
sizeof(struct digital_dep_req_res) == 3 byte guard in
digital_tg_recv_dep_req(). An attacker with DID bit set and a 3-byte
frame triggers a 1-byte OOB read.
Patches 1-4 are independent and can be applied in any order.
Security Research (4):
nfc: digital: Fix stack buffer overflow in digital_in_recv_sensf_res()
nfc: digital: Fix check-after-read in digital_tg_recv_sens_req()
nfc: digital: Fix OOB read of RTOX byte in digital_in_recv_dep_res()
nfc: digital: Fix OOB read of DID byte in digital_tg_recv_dep_req()
net/nfc/digital_dep.c | 10 ++++++++--
net/nfc/digital_technology.c | 13 ++++++++-----
2 files changed, 16 insertions(+), 7 deletions(-)
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox