Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next v05 4/6] hinic3: Add ethtool rss ops
From: Fan Gong @ 2026-04-11  3:37 UTC (permalink / raw)
  To: Fan Gong, Zhu Yikai, netdev, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Simon Horman, Andrew Lunn,
	Ioana Ciornei, Mohsin Bashir
  Cc: linux-kernel, linux-doc, luosifu, Xin Guo, Zhou Shuai, Wu Like,
	Shi Jing, Zheng Jiezhen, Maxime Chevallier
In-Reply-To: <cover.1775711066.git.zhuyikai1@h-partners.com>

  Implement following ethtool callback function:
.get_rxnfc
.set_rxnfc
.get_channels
.set_channels
.get_rxfh_indir_size
.get_rxfh_key_size
.get_rxfh
.set_rxfh

  These callbacks allow users to utilize ethtool for detailed
RSS parameters configuration and monitoring.

Co-developed-by: Zhu Yikai <zhuyikai1@h-partners.com>
Signed-off-by: Zhu Yikai <zhuyikai1@h-partners.com>
Signed-off-by: Fan Gong <gongfan1@huawei.com>
---
 .../ethernet/huawei/hinic3/hinic3_ethtool.c   |   9 +
 .../huawei/hinic3/hinic3_mgmt_interface.h     |   2 +
 .../net/ethernet/huawei/hinic3/hinic3_rss.c   | 487 +++++++++++++++++-
 .../net/ethernet/huawei/hinic3/hinic3_rss.h   |  19 +
 4 files changed, 515 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_ethtool.c b/drivers/net/ethernet/huawei/hinic3/hinic3_ethtool.c
index f0fb9a30840b..69663ee70cbd 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_ethtool.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_ethtool.c
@@ -15,6 +15,7 @@
 #include "hinic3_hw_comm.h"
 #include "hinic3_nic_dev.h"
 #include "hinic3_nic_cfg.h"
+#include "hinic3_rss.h"
 
 #define HINIC3_MGMT_VERSION_MAX_LEN     32
 /* Coalesce time properties in microseconds */
@@ -1231,6 +1232,14 @@ static const struct ethtool_ops hinic3_ethtool_ops = {
 	.get_pause_stats                = hinic3_get_pause_stats,
 	.get_coalesce                   = hinic3_get_coalesce,
 	.set_coalesce                   = hinic3_set_coalesce,
+	.get_rxnfc                      = hinic3_get_rxnfc,
+	.set_rxnfc                      = hinic3_set_rxnfc,
+	.get_channels                   = hinic3_get_channels,
+	.set_channels                   = hinic3_set_channels,
+	.get_rxfh_indir_size            = hinic3_get_rxfh_indir_size,
+	.get_rxfh_key_size              = hinic3_get_rxfh_key_size,
+	.get_rxfh                       = hinic3_get_rxfh,
+	.set_rxfh                       = hinic3_set_rxfh,
 };
 
 void hinic3_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt_interface.h b/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt_interface.h
index 76c691f82703..3c1263ff99ff 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt_interface.h
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt_interface.h
@@ -282,6 +282,7 @@ enum l2nic_cmd {
 	L2NIC_CMD_SET_VLAN_FILTER_EN  = 26,
 	L2NIC_CMD_SET_RX_VLAN_OFFLOAD = 27,
 	L2NIC_CMD_CFG_RSS             = 60,
+	L2NIC_CMD_GET_RSS_CTX_TBL     = 62,
 	L2NIC_CMD_CFG_RSS_HASH_KEY    = 63,
 	L2NIC_CMD_CFG_RSS_HASH_ENGINE = 64,
 	L2NIC_CMD_SET_RSS_CTX_TBL     = 65,
@@ -301,6 +302,7 @@ enum l2nic_ucode_cmd {
 	L2NIC_UCODE_CMD_MODIFY_QUEUE_CTX  = 0,
 	L2NIC_UCODE_CMD_CLEAN_QUEUE_CTX   = 1,
 	L2NIC_UCODE_CMD_SET_RSS_INDIR_TBL = 4,
+	L2NIC_UCODE_CMD_GET_RSS_INDIR_TBL = 6,
 };
 
 /* hilink mac group command */
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_rss.c b/drivers/net/ethernet/huawei/hinic3/hinic3_rss.c
index 25db74d8c7dd..b40d5fa885c2 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_rss.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_rss.c
@@ -155,7 +155,7 @@ static int hinic3_set_rss_type(struct hinic3_hwdev *hwdev,
 				       L2NIC_CMD_SET_RSS_CTX_TBL, &msg_params);
 
 	if (ctx_tbl.msg_head.status == MGMT_STATUS_CMD_UNSUPPORTED) {
-		return MGMT_STATUS_CMD_UNSUPPORTED;
+		return -EOPNOTSUPP;
 	} else if (err || ctx_tbl.msg_head.status) {
 		dev_err(hwdev->dev, "mgmt Failed to set rss context offload, err: %d, status: 0x%x\n",
 			err, ctx_tbl.msg_head.status);
@@ -165,6 +165,39 @@ static int hinic3_set_rss_type(struct hinic3_hwdev *hwdev,
 	return 0;
 }
 
+static int hinic3_get_rss_type(struct hinic3_hwdev *hwdev,
+			       struct hinic3_rss_type *rss_type)
+{
+	struct l2nic_cmd_rss_ctx_tbl ctx_tbl = {};
+	struct mgmt_msg_params msg_params = {};
+	int err;
+
+	ctx_tbl.func_id = hinic3_global_func_id(hwdev);
+
+	mgmt_msg_params_init_default(&msg_params, &ctx_tbl, sizeof(ctx_tbl));
+
+	err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_L2NIC,
+				       L2NIC_CMD_GET_RSS_CTX_TBL,
+				       &msg_params);
+	if (err || ctx_tbl.msg_head.status) {
+		dev_err(hwdev->dev, "Failed to get hash type, err: %d, status: 0x%x\n",
+			err, ctx_tbl.msg_head.status);
+		return -EINVAL;
+	}
+
+	rss_type->ipv4         = L2NIC_RSS_TYPE_GET(ctx_tbl.context, IPV4);
+	rss_type->ipv6         = L2NIC_RSS_TYPE_GET(ctx_tbl.context, IPV6);
+	rss_type->ipv6_ext     = L2NIC_RSS_TYPE_GET(ctx_tbl.context, IPV6_EXT);
+	rss_type->tcp_ipv4     = L2NIC_RSS_TYPE_GET(ctx_tbl.context, TCP_IPV4);
+	rss_type->tcp_ipv6     = L2NIC_RSS_TYPE_GET(ctx_tbl.context, TCP_IPV6);
+	rss_type->tcp_ipv6_ext = L2NIC_RSS_TYPE_GET(ctx_tbl.context,
+						    TCP_IPV6_EXT);
+	rss_type->udp_ipv4     = L2NIC_RSS_TYPE_GET(ctx_tbl.context, UDP_IPV4);
+	rss_type->udp_ipv6     = L2NIC_RSS_TYPE_GET(ctx_tbl.context, UDP_IPV6);
+
+	return 0;
+}
+
 static int hinic3_rss_cfg_hash_type(struct hinic3_hwdev *hwdev, u8 opcode,
 				    enum hinic3_rss_hash_type *type)
 {
@@ -264,7 +297,8 @@ static int hinic3_set_hw_rss_parameters(struct net_device *netdev, u8 rss_en)
 	if (err)
 		return err;
 
-	hinic3_fillout_indir_tbl(netdev, nic_dev->rss_indir);
+	if (!netif_is_rxfh_configured(netdev))
+		hinic3_fillout_indir_tbl(netdev, nic_dev->rss_indir);
 
 	err = hinic3_config_rss_hw_resource(netdev, nic_dev->rss_indir);
 	if (err)
@@ -334,3 +368,452 @@ void hinic3_try_to_enable_rss(struct net_device *netdev)
 	clear_bit(HINIC3_RSS_ENABLE, &nic_dev->flags);
 	nic_dev->q_params.num_qps = nic_dev->max_qps;
 }
+
+static int hinic3_set_l4_rss_hash_ops(const struct ethtool_rxnfc *cmd,
+				      struct hinic3_rss_type *rss_type)
+{
+	u8 rss_l4_en;
+
+	switch (cmd->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+	case 0:
+		rss_l4_en = 0;
+		break;
+	case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+		rss_l4_en = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		rss_type->tcp_ipv4 = rss_l4_en;
+		break;
+	case TCP_V6_FLOW:
+		rss_type->tcp_ipv6 = rss_l4_en;
+		break;
+	case UDP_V4_FLOW:
+		rss_type->udp_ipv4 = rss_l4_en;
+		break;
+	case UDP_V6_FLOW:
+		rss_type->udp_ipv6 = rss_l4_en;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hinic3_update_rss_hash_opts(struct net_device *netdev,
+				       struct ethtool_rxnfc *cmd,
+				       struct hinic3_rss_type *rss_type)
+{
+	int err;
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+	case UDP_V4_FLOW:
+	case UDP_V6_FLOW:
+		err = hinic3_set_l4_rss_hash_ops(cmd, rss_type);
+		if (err)
+			return err;
+
+		break;
+	case IPV4_FLOW:
+		rss_type->ipv4 = 1;
+		break;
+	case IPV6_FLOW:
+		rss_type->ipv6 = 1;
+		break;
+	default:
+		netdev_err(netdev, "Unsupported flow type\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hinic3_set_rss_hash_opts(struct net_device *netdev,
+				    struct ethtool_rxnfc *cmd)
+{
+	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic3_rss_type rss_type;
+	int err;
+
+	if (!test_bit(HINIC3_RSS_ENABLE, &nic_dev->flags)) {
+		cmd->data = 0;
+		netdev_err(netdev, "RSS is disable, not support to set flow-hash\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* RSS only supports hashing of IP addresses and L4 ports */
+	if (cmd->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	/* Both IP addresses must be part of the hash tuple */
+	if (!(cmd->data & RXH_IP_SRC) || !(cmd->data & RXH_IP_DST))
+		return -EINVAL;
+
+	err = hinic3_get_rss_type(nic_dev->hwdev, &rss_type);
+	if (err) {
+		netdev_err(netdev, "Failed to get rss type\n");
+		return err;
+	}
+
+	err = hinic3_update_rss_hash_opts(netdev, cmd, &rss_type);
+	if (err)
+		return err;
+
+	err = hinic3_set_rss_type(nic_dev->hwdev, rss_type);
+	if (err) {
+		netdev_err(netdev, "Failed to set rss type\n");
+		return err;
+	}
+
+	nic_dev->rss_type = rss_type;
+
+	return 0;
+}
+
+static void convert_rss_type(u8 rss_opt, struct ethtool_rxnfc *cmd)
+{
+	if (rss_opt)
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+}
+
+static int hinic3_convert_rss_type(struct net_device *netdev,
+				   struct hinic3_rss_type *rss_type,
+				   struct ethtool_rxnfc *cmd)
+{
+	cmd->data = RXH_IP_SRC | RXH_IP_DST;
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		convert_rss_type(rss_type->tcp_ipv4, cmd);
+		break;
+	case TCP_V6_FLOW:
+		convert_rss_type(rss_type->tcp_ipv6, cmd);
+		break;
+	case UDP_V4_FLOW:
+		convert_rss_type(rss_type->udp_ipv4, cmd);
+		break;
+	case UDP_V6_FLOW:
+		convert_rss_type(rss_type->udp_ipv6, cmd);
+		break;
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+		break;
+	default:
+		netdev_err(netdev, "Unsupported flow type\n");
+		cmd->data = 0;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hinic3_get_rss_hash_opts(struct net_device *netdev,
+				    struct ethtool_rxnfc *cmd)
+{
+	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic3_rss_type rss_type;
+	int err;
+
+	cmd->data = 0;
+
+	if (!test_bit(HINIC3_RSS_ENABLE, &nic_dev->flags))
+		return 0;
+
+	err = hinic3_get_rss_type(nic_dev->hwdev, &rss_type);
+	if (err) {
+		netdev_err(netdev, "Failed to get rss type\n");
+		return err;
+	}
+
+	return hinic3_convert_rss_type(netdev, &rss_type, cmd);
+}
+
+int hinic3_get_rxnfc(struct net_device *netdev,
+		     struct ethtool_rxnfc *cmd, u32 *rule_locs)
+{
+	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+	int err = 0;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = nic_dev->q_params.num_qps;
+		break;
+	case ETHTOOL_GRXFH:
+		err = hinic3_get_rss_hash_opts(netdev, cmd);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+int hinic3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+	int err;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		err = hinic3_set_rss_hash_opts(netdev, cmd);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+static u16 hinic3_max_channels(struct net_device *netdev)
+{
+	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+	u8 tcs = netdev_get_num_tc(netdev);
+
+	return tcs ? nic_dev->max_qps / tcs : nic_dev->max_qps;
+}
+
+static u16 hinic3_curr_channels(struct net_device *netdev)
+{
+	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		return nic_dev->q_params.num_qps ?
+				nic_dev->q_params.num_qps : 1;
+	else
+		return min_t(u16, hinic3_max_channels(netdev),
+			     nic_dev->q_params.num_qps);
+}
+
+void hinic3_get_channels(struct net_device *netdev,
+			 struct ethtool_channels *channels)
+{
+	channels->max_rx = 0;
+	channels->max_tx = 0;
+	channels->max_other = 0;
+	/* report maximum channels */
+	channels->max_combined = hinic3_max_channels(netdev);
+	channels->rx_count = 0;
+	channels->tx_count = 0;
+	channels->other_count = 0;
+	/* report flow director queues as maximum channels */
+	channels->combined_count = hinic3_curr_channels(netdev);
+}
+
+static int
+hinic3_validate_channel_parameter(struct net_device *netdev,
+				  const struct ethtool_channels *channels)
+{
+	u16 max_channel = hinic3_max_channels(netdev);
+	unsigned int count = channels->combined_count;
+
+	if (!count) {
+		netdev_err(netdev, "Unsupported combined_count=0\n");
+		return -EINVAL;
+	}
+
+	if (channels->tx_count || channels->rx_count || channels->other_count) {
+		netdev_err(netdev, "Setting rx/tx/other count not supported\n");
+		return -EINVAL;
+	}
+
+	if (count > max_channel) {
+		netdev_err(netdev, "Combined count %u exceed limit %u\n", count,
+			   max_channel);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic3_set_channels(struct net_device *netdev,
+			struct ethtool_channels *channels)
+{
+	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+	unsigned int count = channels->combined_count;
+	struct hinic3_dyna_txrxq_params q_params;
+	int err;
+
+	if (hinic3_validate_channel_parameter(netdev, channels))
+		return -EINVAL;
+
+	if (!test_bit(HINIC3_RSS_ENABLE, &nic_dev->flags)) {
+		netdev_err(netdev, "This function doesn't support RSS, only support 1 queue pair\n");
+		return -EOPNOTSUPP;
+	}
+
+	netdev_dbg(netdev, "Set max combined queue number from %u to %u\n",
+		   nic_dev->q_params.num_qps, count);
+
+	if (netif_running(netdev)) {
+		q_params = nic_dev->q_params;
+		q_params.num_qps = (u16)count;
+		q_params.txqs_res = NULL;
+		q_params.rxqs_res = NULL;
+		q_params.irq_cfg = NULL;
+
+		err = hinic3_change_channel_settings(netdev, &q_params);
+		if (err) {
+			netdev_err(netdev, "Failed to change channel settings\n");
+			return err;
+		}
+	} else {
+		nic_dev->q_params.num_qps = (u16)count;
+	}
+
+	return 0;
+}
+
+u32 hinic3_get_rxfh_indir_size(struct net_device *netdev)
+{
+	return L2NIC_RSS_INDIR_SIZE;
+}
+
+static int hinic3_set_rss_rxfh(struct net_device *netdev,
+			       const u32 *indir, u8 *key)
+{
+	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+	int err;
+	u32 i;
+
+	if (indir) {
+		for (i = 0; i < L2NIC_RSS_INDIR_SIZE; i++)
+			nic_dev->rss_indir[i] = (u16)indir[i];
+
+		err = hinic3_rss_set_indir_tbl(nic_dev->hwdev,
+					       nic_dev->rss_indir);
+		if (err) {
+			netdev_err(netdev, "Failed to set rss indir table\n");
+			return err;
+		}
+	}
+
+	if (key) {
+		err = hinic3_rss_set_hash_key(nic_dev->hwdev, key);
+		if (err) {
+			netdev_err(netdev, "Failed to set rss key\n");
+			return err;
+		}
+
+		memcpy(nic_dev->rss_hkey, key, L2NIC_RSS_KEY_SIZE);
+	}
+
+	return 0;
+}
+
+u32 hinic3_get_rxfh_key_size(struct net_device *netdev)
+{
+	return L2NIC_RSS_KEY_SIZE;
+}
+
+static int hinic3_rss_get_indir_tbl(struct hinic3_hwdev *hwdev,
+				    u32 *indir_table)
+{
+	struct hinic3_cmd_buf_pair pair;
+	__le16 *indir_tbl = NULL;
+	int err, i;
+
+	err = hinic3_cmd_buf_pair_init(hwdev, &pair);
+	if (err) {
+		dev_err(hwdev->dev, "Failed to allocate cmd_buf.\n");
+		return err;
+	}
+
+	err = hinic3_cmdq_detail_resp(hwdev, MGMT_MOD_L2NIC,
+				      L2NIC_UCODE_CMD_GET_RSS_INDIR_TBL,
+				      pair.in, pair.out, NULL);
+	if (err) {
+		dev_err(hwdev->dev, "Failed to get rss indir table\n");
+		goto err_get_indir_tbl;
+	}
+
+	indir_tbl = (__le16 *)pair.out->buf;
+	for (i = 0; i < L2NIC_RSS_INDIR_SIZE; i++)
+		indir_table[i] = le16_to_cpu(*(indir_tbl + i));
+
+err_get_indir_tbl:
+	hinic3_cmd_buf_pair_uninit(hwdev, &pair);
+
+	return err;
+}
+
+int hinic3_get_rxfh(struct net_device *netdev,
+		    struct ethtool_rxfh_param *rxfh)
+{
+	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+	int err = 0;
+
+	if (!test_bit(HINIC3_RSS_ENABLE, &nic_dev->flags)) {
+		netdev_err(netdev, "Rss is disabled\n");
+		return -EOPNOTSUPP;
+	}
+
+	rxfh->hfunc =
+		nic_dev->rss_hash_type == HINIC3_RSS_HASH_ENGINE_TYPE_XOR ?
+		ETH_RSS_HASH_XOR : ETH_RSS_HASH_TOP;
+
+	if (rxfh->indir) {
+		err = hinic3_rss_get_indir_tbl(nic_dev->hwdev, rxfh->indir);
+		if (err)
+			return err;
+	}
+
+	if (rxfh->key)
+		memcpy(rxfh->key, nic_dev->rss_hkey, L2NIC_RSS_KEY_SIZE);
+
+	return err;
+}
+
+static int hinic3_update_hash_func_type(struct net_device *netdev, u8 hfunc)
+{
+	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+	enum hinic3_rss_hash_type new_rss_hash_type;
+
+	switch (hfunc) {
+	case ETH_RSS_HASH_NO_CHANGE:
+		return 0;
+	case ETH_RSS_HASH_XOR:
+		new_rss_hash_type = HINIC3_RSS_HASH_ENGINE_TYPE_XOR;
+		break;
+	case ETH_RSS_HASH_TOP:
+		new_rss_hash_type = HINIC3_RSS_HASH_ENGINE_TYPE_TOEP;
+		break;
+	default:
+		netdev_err(netdev, "Unsupported hash func %u\n", hfunc);
+		return -EOPNOTSUPP;
+	}
+
+	if (new_rss_hash_type == nic_dev->rss_hash_type)
+		return 0;
+
+	nic_dev->rss_hash_type = new_rss_hash_type;
+	return hinic3_rss_set_hash_type(nic_dev->hwdev, nic_dev->rss_hash_type);
+}
+
+int hinic3_set_rxfh(struct net_device *netdev,
+		    struct ethtool_rxfh_param *rxfh,
+		    struct netlink_ext_ack *extack)
+{
+	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
+	int err;
+
+	if (!test_bit(HINIC3_RSS_ENABLE, &nic_dev->flags)) {
+		netdev_err(netdev, "Not support to set rss parameters when rss is disable\n");
+		return -EOPNOTSUPP;
+	}
+
+	err = hinic3_update_hash_func_type(netdev, rxfh->hfunc);
+	if (err)
+		return err;
+
+	err = hinic3_set_rss_rxfh(netdev, rxfh->indir, rxfh->key);
+
+	return err;
+}
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_rss.h b/drivers/net/ethernet/huawei/hinic3/hinic3_rss.h
index 78d82c2aca06..9f1b77780cd4 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_rss.h
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_rss.h
@@ -5,10 +5,29 @@
 #define _HINIC3_RSS_H_
 
 #include <linux/netdevice.h>
+#include <linux/ethtool.h>
 
 int hinic3_rss_init(struct net_device *netdev);
 void hinic3_rss_uninit(struct net_device *netdev);
 void hinic3_try_to_enable_rss(struct net_device *netdev);
 void hinic3_clear_rss_config(struct net_device *netdev);
 
+int hinic3_get_rxnfc(struct net_device *netdev,
+		     struct ethtool_rxnfc *cmd, u32 *rule_locs);
+int hinic3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd);
+
+void hinic3_get_channels(struct net_device *netdev,
+			 struct ethtool_channels *channels);
+int hinic3_set_channels(struct net_device *netdev,
+			struct ethtool_channels *channels);
+
+u32 hinic3_get_rxfh_indir_size(struct net_device *netdev);
+u32 hinic3_get_rxfh_key_size(struct net_device *netdev);
+
+int hinic3_get_rxfh(struct net_device *netdev,
+		    struct ethtool_rxfh_param *rxfh);
+int hinic3_set_rxfh(struct net_device *netdev,
+		    struct ethtool_rxfh_param *rxfh,
+		    struct netlink_ext_ack *extack);
+
 #endif
-- 
2.43.0


^ permalink raw reply related

* [PATCH net-next v05 6/6] hinic3: Remove unneeded coalesce parameters
From: Fan Gong @ 2026-04-11  3:37 UTC (permalink / raw)
  To: Fan Gong, Zhu Yikai, netdev, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Simon Horman, Andrew Lunn,
	Ioana Ciornei, Mohsin Bashir
  Cc: linux-kernel, linux-doc, luosifu, Xin Guo, Zhou Shuai, Wu Like,
	Shi Jing, Zheng Jiezhen, Maxime Chevallier
In-Reply-To: <cover.1775711066.git.zhuyikai1@h-partners.com>

  Remove unneeded coalesce parameters in irq handling.

Co-developed-by: Zhu Yikai <zhuyikai1@h-partners.com>
Signed-off-by: Zhu Yikai <zhuyikai1@h-partners.com>
Signed-off-by: Fan Gong <gongfan1@huawei.com>
---
 drivers/net/ethernet/huawei/hinic3/hinic3_irq.c | 6 +-----
 drivers/net/ethernet/huawei/hinic3/hinic3_rx.h  | 3 ---
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c
index d3b3927b5408..42464c007174 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c
@@ -156,13 +156,9 @@ static int hinic3_set_interrupt_moder(struct net_device *netdev, u16 q_id,
 	spin_unlock_irqrestore(&nic_dev->channel_res_lock, flags);
 
 	err = hinic3_set_interrupt_cfg(nic_dev->hwdev, info);
-	if (err) {
+	if (err)
 		netdev_err(netdev,
 			   "Failed to modify moderation for Queue: %u\n", q_id);
-	} else {
-		nic_dev->rxqs[q_id].last_coalesc_timer_cfg = coalesc_timer_cfg;
-		nic_dev->rxqs[q_id].last_pending_limit = pending_limit;
-	}
 
 	return err;
 }
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_rx.h b/drivers/net/ethernet/huawei/hinic3/hinic3_rx.h
index c11d080408a7..2ab691ed11a9 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_rx.h
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_rx.h
@@ -111,9 +111,6 @@ struct hinic3_rxq {
 	dma_addr_t             cqe_start_paddr;
 
 	struct dim             dim;
-
-	u8                     last_coalesc_timer_cfg;
-	u8                     last_pending_limit;
 } ____cacheline_aligned;
 
 struct hinic3_dyna_rxq_res {
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH net v4 1/2] flow_dissector: do not dissect PPPoE PFC frames
From: Qingfang Deng @ 2026-04-11  3:56 UTC (permalink / raw)
  To: Simon Horman
  Cc: linux-ppp, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Guillaume Nault, Wojciech Drewek, Tony Nguyen,
	linux-kernel, netdev, Paul Mackerras, Jaco Kroon, James Carlson,
	Marcin Szycik
In-Reply-To: <20260410171056.GD469338@kernel.org>

Hi,

On 4/11/2026 1:10 AM, Simon Horman wrote:
> On Fri, Apr 10, 2026 at 11:36:20AM +0800, Qingfang Deng wrote:
>> @@ -1361,7 +1376,7 @@ bool __skb_flow_dissect(const struct net *net,
>>   			struct pppoe_hdr hdr;
>>   			__be16 proto;
>>   		} *hdr, _hdr;
>> -		u16 ppp_proto;
>> +		__be16 ppp_proto;
> 
> I'm unclear of the relationship between changing the type of ppp_proto
> and the problem described in the patch description. And it
> is creating a log of churn in this patch. I suggest dropping it.

The intention is to restore the original behavior before the blamed 
commit. If you find it too verbose for a fix, I can drop it and then 
repost that part later to net-next.

>> @@ -1374,27 +1389,19 @@ bool __skb_flow_dissect(const struct net *net,
>>   			break;
>>   		}
>>   
>> -		/* least significant bit of the most significant octet
>> -		 * indicates if protocol field was compressed
>> -		 */
>> -		ppp_proto = ntohs(hdr->proto);
>> -		if (ppp_proto & 0x0100) {
>> -			ppp_proto = ppp_proto >> 8;
>> -			nhoff += PPPOE_SES_HLEN - 1;
>> -		} else {
>> -			nhoff += PPPOE_SES_HLEN;
>> -		}
> 
> Could we go for something like this?
> 
> 		ppp_proto = ntohs(hdr->proto);
> 		nhoff += PPPOE_SES_HLEN;
> 
> 		/* Explanation of what is going on */
> 		if (ppp_proto & 0x0100)
> 			ppp_proto = some invalid value like 0
>

I think it is redundant. ppp_proto_is_valid() already requires 
uncompressed frames.

>> +		ppp_proto = hdr->proto;
>> +		nhoff += PPPOE_SES_HLEN;
>>   
>> -		if (ppp_proto == PPP_IP) {
>> +		if (ppp_proto == htons(PPP_IP)) {
>>   			proto = htons(ETH_P_IP);
>>   			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
>> -		} else if (ppp_proto == PPP_IPV6) {
>> +		} else if (ppp_proto == htons(PPP_IPV6)) {
>>   			proto = htons(ETH_P_IPV6);
>>   			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
>> -		} else if (ppp_proto == PPP_MPLS_UC) {
>> +		} else if (ppp_proto == htons(PPP_MPLS_UC)) {
>>   			proto = htons(ETH_P_MPLS_UC);
>>   			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
>> -		} else if (ppp_proto == PPP_MPLS_MC) {
>> +		} else if (ppp_proto == htons(PPP_MPLS_MC)) {
>>   			proto = htons(ETH_P_MPLS_MC);
>>   			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
>>   		} else if (ppp_proto_is_valid(ppp_proto)) {

^ permalink raw reply

* [PATCH net-next] r8169: Use napi_schedule_irqoff()
From: Matt Vollrath @ 2026-04-11  4:16 UTC (permalink / raw)
  To: netdev; +Cc: Matt Vollrath

napi_schedule() masks hard interrupts while doing its work, which is
redundant when called from an interrupt handler where hard interrupts
are already masked. Use napi_schedule_irqoff() instead to bypass this
redundant masking. This is an optimization.

Tested on a Lenovo RTL8168h/8111h.

Signed-off-by: Matt Vollrath <tactii@gmail.com>
---
 drivers/net/ethernet/realtek/r8169_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 791277e750ba..4c0ad0de3410 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4873,7 +4873,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 		phy_mac_interrupt(tp->phydev);
 
 	rtl_irq_disable(tp);
-	napi_schedule(&tp->napi);
+	napi_schedule_irqoff(&tp->napi);
 out:
 	rtl_ack_events(tp, status);
 
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH bpf-next v2 0/3] Use kmalloc_nolock() universally in BPF local storage
From: patchwork-bot+netdevbpf @ 2026-04-11  4:30 UTC (permalink / raw)
  To: Amery Hung
  Cc: bpf, netdev, alexei.starovoitov, andrii, daniel, martin.lau,
	memxor, kernel-team
In-Reply-To: <20260411015419.114016-1-ameryhung@gmail.com>

Hello:

This series was applied to bpf/bpf-next.git (master)
by Alexei Starovoitov <ast@kernel.org>:

On Fri, 10 Apr 2026 18:54:15 -0700 you wrote:
> Socket local storage did not convert to use kmalloc_nolock() since there
> were observable performance degredation due to kfree_nolock() hitting the
> slow path and the lack of kfree_rcu()-like batching freeing. Now that
> these concern were addressed in slub, convert all remaining local storage
> flavors to use kmalloc_nolock().
> 
> v1 -> v2:
>   - Fix build (CI, Alexei)
> 
> [...]

Here is the summary with links:
  - [bpf-next,v2,1/3] selftests/bpf: Remove kmalloc tracing from local storage create bench
    https://git.kernel.org/bpf/bpf-next/c/78ee02a966ad
  - [bpf-next,v2,2/3] bpf: Use kmalloc_nolock() universally in local storage
    https://git.kernel.org/bpf/bpf-next/c/5063e7758899
  - [bpf-next,v2,3/3] bpf: Remove gfp_flags plumbing from bpf_local_storage_update()
    https://git.kernel.org/bpf/bpf-next/c/136deea435dc

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH bpf-next v2 2/3] bpf: Use kmalloc_nolock() universally in local storage
From: Alexei Starovoitov @ 2026-04-11  4:39 UTC (permalink / raw)
  To: bot+bpf-ci
  Cc: Amery Hung, bpf, Network Development, Andrii Nakryiko,
	Daniel Borkmann, Martin KaFai Lau, Kumar Kartikeya Dwivedi,
	Kernel Team, Alexei Starovoitov, Eduard, Yonghong Song,
	Chris Mason, Ihor Solodrai
In-Reply-To: <efdb35c2d8151489fb031ad63ce8bd6000b8bbaa2bf5a523c927907399c93ab0@mail.kernel.org>

On Fri, Apr 10, 2026 at 7:36 PM <bot+bpf-ci@kernel.org> wrote:
>
>
> This allows value sizes up to ~65KB. Before this patch, socket and
> inode storage used bpf_map_kzalloc() (backed by regular kmalloc)
> which could handle those large sizes. After this patch, any
> elem_size above KMALLOC_MAX_CACHE_SIZE will silently fail: the map
> creation succeeds via bpf_local_storage_map_alloc_check() but every
> element allocation returns NULL.
>
> Should BPF_LOCAL_STORAGE_MAX_VALUE_SIZE be updated to use
> KMALLOC_MAX_CACHE_SIZE instead of KMALLOC_MAX_SIZE now that all
> storage types go through kmalloc_nolock()?
>
> Slava Imameev raised the same concern for task storage in
> https://lore.kernel.org/bpf/20260410014341.47043-1-slava.imameev@crowdstrike.com/

Right. Let's update it, but I don't think it's a regression.
On a loaded system kmalloc_large() rarely succeeds for order 2+.
That's why kmalloc_nolock() doesn't attempt to bridge that gap.
One or two contiguous physical pages is the best one can expect.
In early bpf days we picked KMALLOC_MAX_SIZE assuming that
it's a realistic max for kmalloc().
It turned out to be wishful thinking.
kmalloc_large concept should really be removed.
It deceives users into thinking that it's usable.

^ permalink raw reply

* Re: [PATCH net-next 4/5] net/sched: netem: add per-impairment extended statistics
From: Stephen Hemminger @ 2026-04-11  5:08 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: netdev, Jamal Hadi Salim, Jiri Pirko, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Simon Horman, open list
In-Reply-To: <36e86b68-f7d9-4455-a0b5-613f717e670a@redhat.com>

On Thu, 9 Apr 2026 11:30:00 +0200
Paolo Abeni <pabeni@redhat.com> wrote:

> On 4/4/26 12:52 AM, Stephen Hemminger wrote:
> > diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> > index 66e8072f44df..fada10cb9b7b 100644
> > --- a/include/uapi/linux/pkt_sched.h
> > +++ b/include/uapi/linux/pkt_sched.h
> > @@ -569,6 +569,15 @@ struct tc_netem_gemodel {
> >  #define NETEM_DIST_SCALE	8192
> >  #define NETEM_DIST_MAX		16384
> >  
> > +struct tc_netem_xstats {
> > +	__u32	delayed;	/* packets delayed */
> > +	__u32	dropped;	/* packets dropped by loss model      */
> > +	__u32	corrupted;	/* packets with bit errors injected   */
> > +	__u32	duplicated;	/* duplicate packets generated        */
> > +	__u32	reordered;	/* packets sent out of order          */
> > +	__u32	ecn_marked;	/* packets ECN CE-marked (not dropped)*/
> > +};  
> 
> Sashiko notes that the counters size will be set in stone by the uAPI,
> and u32 can wraparound very quickly (especially for unconditional delay).
> 
> I see other qdiscs generally use __u32, but some have __u64 too, so I
> assume there are no architectural blocker to larger counter.
> 
> Could you please move use __u64 above?
> 
> Thanks,
> 
> Paolo
> 

Sure larger counters are fine, mostly just following the herd.
I assume don't need to need about 32 bit tearing when reading these?

^ permalink raw reply

* [PATTCH net v5 0/8] net/sched: netem bug fixes
From: Stephen Hemminger @ 2026-04-11  5:15 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger

These bugs were identified while using AI-assisted code review of
sch_netem.c to analyze the packet duplication re-entrancy problem
(CVE-2025-37890, CVE-2025-38001), which are addressed in a separate
series.

The review uncovered several additional issues:

- probability gaps in the 4-state Markov loss model where
  boundary values produce no state transition
- queue limit check not accounting for reordered packets
- PRNG reseeded on every tc change, breaking reproducibility
- the core dequeue re-entrancy issue with child qdiscs
  causing HFSC eltree corruption and DRR class stalls
- missing NULL termination on the tfifo linear list tail
- slot delay configuration not validated for inverted ranges
- slot delay arithmetic overflow for ranges above ~2.1 seconds

v5 - fix slot dynamics in the dequeue change

v4 - split refactoring and fix for dequeue into two patches

Stephen Hemminger (8):
  net/sched: netem: fix probability gaps in 4-state loss model
  net/sched: netem: fix queue limit check to include reordered packets
  net/sched: netem: only reseed PRNG when seed is explicitly provided
  net/sched: netem: refactor dequeue into helper functions
  net/sched: netem: batch-transfer ready packets to avoid child
    re-entrancy
  net/sched: netem: null-terminate tfifo linear queue tail
  net/sched: netem: check for invalid slot range
  net/sched: netem: fix slot delay calculation overflow

 net/sched/sch_netem.c | 238 ++++++++++++++++++++++++++++--------------
 1 file changed, 159 insertions(+), 79 deletions(-)

-- 
2.53.0


^ permalink raw reply

* [PATTCH net v5 1/8] net/sched: netem: fix probability gaps in 4-state loss model
From: Stephen Hemminger @ 2026-04-11  5:15 UTC (permalink / raw)
  To: netdev
  Cc: Stephen Hemminger, Simon Horman, Jamal Hadi Salim, Jiri Pirko,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	open list
In-Reply-To: <20260411051700.311679-1-stephen@networkplumber.org>

The 4-state Markov chain in loss_4state() has gaps at the boundaries
between transition probability ranges. The comparisons use:

  if (rnd < a4)
  else if (a4 < rnd && rnd < a1 + a4)

When rnd equals a boundary value exactly, neither branch matches and
no state transition occurs. The redundant lower-bound check (a4 < rnd)
is already implied by being in the else branch.

Remove the unnecessary lower-bound comparisons so the ranges are
contiguous and every random value produces a transition, matching
the GI (General and Intuitive) loss model specification.

This bug goes back to original implementation of this model.

Fixes: 661b79725fea ("netem: revised correlated loss generator")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_netem.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 20df1c08b1e9..8ee72cac1faf 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -227,10 +227,10 @@ static bool loss_4state(struct netem_sched_data *q)
 		if (rnd < clg->a4) {
 			clg->state = LOST_IN_GAP_PERIOD;
 			return true;
-		} else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
+		} else if (rnd < clg->a1 + clg->a4) {
 			clg->state = LOST_IN_BURST_PERIOD;
 			return true;
-		} else if (clg->a1 + clg->a4 < rnd) {
+		} else {
 			clg->state = TX_IN_GAP_PERIOD;
 		}
 
@@ -247,9 +247,9 @@ static bool loss_4state(struct netem_sched_data *q)
 	case LOST_IN_BURST_PERIOD:
 		if (rnd < clg->a3)
 			clg->state = TX_IN_BURST_PERIOD;
-		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
+		else if (rnd < clg->a2 + clg->a3) {
 			clg->state = TX_IN_GAP_PERIOD;
-		} else if (clg->a2 + clg->a3 < rnd) {
+		} else {
 			clg->state = LOST_IN_BURST_PERIOD;
 			return true;
 		}
-- 
2.53.0


^ permalink raw reply related

* [PATTCH net v5 2/8] net/sched: netem: fix queue limit check to include reordered packets
From: Stephen Hemminger @ 2026-04-11  5:15 UTC (permalink / raw)
  To: netdev
  Cc: Stephen Hemminger, Simon Horman, Jamal Hadi Salim, Jiri Pirko,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	open list
In-Reply-To: <20260411051700.311679-1-stephen@networkplumber.org>

The queue limit check in netem_enqueue() uses q->t_len which only
counts packets in the internal tfifo. Packets placed in sch->q by
the reorder path (__qdisc_enqueue_head) are not counted, allowing
the total queue occupancy to exceed sch->limit under reordering.

Include sch->q.qlen in the limit check.

Fixes: 50612537e9ab ("netem: fix classful handling")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_netem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 8ee72cac1faf..d400a730eadd 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -524,7 +524,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 				1 << get_random_u32_below(8);
 	}
 
-	if (unlikely(q->t_len >= sch->limit)) {
+	if (unlikely(sch->q.qlen >= sch->limit)) {
 		/* re-link segs, so that qdisc_drop_all() frees them all */
 		skb->next = segs;
 		qdisc_drop_all(skb, sch, to_free);
-- 
2.53.0


^ permalink raw reply related

* [PATTCH net v5 3/8] net/sched: netem: only reseed PRNG when seed is explicitly provided
From: Stephen Hemminger @ 2026-04-11  5:15 UTC (permalink / raw)
  To: netdev
  Cc: Stephen Hemminger, Simon Horman, Jamal Hadi Salim, Jiri Pirko,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	François Michel, open list
In-Reply-To: <20260411051700.311679-1-stephen@networkplumber.org>

netem_change() unconditionally reseeds the PRNG on every tc change
command. If TCA_NETEM_PRNG_SEED is not specified, a new random seed
is generated, destroying reproducibility for users who set a
deterministic seed on a previous change.

Move the initial random seed generation to netem_init() and only
reseed in netem_change() when TCA_NETEM_PRNG_SEED is explicitly
provided by the user.

Fixes: 4072d97ddc44 ("netem: add prng attribute to netem_sched_data")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_netem.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index d400a730eadd..556f9747f0e7 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -1112,11 +1112,10 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
 	/* capping jitter to the range acceptable by tabledist() */
 	q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
 
-	if (tb[TCA_NETEM_PRNG_SEED])
+	if (tb[TCA_NETEM_PRNG_SEED]) {
 		q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]);
-	else
-		q->prng.seed = get_random_u64();
-	prandom_seed_state(&q->prng.prng_state, q->prng.seed);
+		prandom_seed_state(&q->prng.prng_state, q->prng.seed);
+	}
 
 unlock:
 	sch_tree_unlock(sch);
@@ -1139,6 +1138,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt,
 		return -EINVAL;
 
 	q->loss_model = CLG_RANDOM;
+	q->prng.seed = get_random_u64();
+	prandom_seed_state(&q->prng.prng_state, q->prng.seed);
+
 	ret = netem_change(sch, opt, extack);
 	if (ret)
 		pr_info("netem: change failed\n");
-- 
2.53.0


^ permalink raw reply related

* [PATTCH net v5 4/8] net/sched: netem: refactor dequeue into helper functions
From: Stephen Hemminger @ 2026-04-11  5:15 UTC (permalink / raw)
  To: netdev
  Cc: Stephen Hemminger, Simon Horman, Jamal Hadi Salim, Jiri Pirko,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	open list
In-Reply-To: <20260411051700.311679-1-stephen@networkplumber.org>

Extract the tfifo removal, slot accounting, and child/direct dequeue
paths from the monolithic netem_dequeue() into separate helpers:

  netem_pull_tfifo()    - remove head packet from tfifo
  netem_slot_account()  - update slot pacing counters
  netem_dequeue_child() - enqueue to child, then dequeue from child
  netem_dequeue_direct()- dequeue from tfifo when no child

This replaces the goto-based control flow with straightforward function
calls, making the code easier to follow and modify.

No functional change intended.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_netem.c | 190 +++++++++++++++++++++++++++---------------
 1 file changed, 123 insertions(+), 67 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 556f9747f0e7..e264f7aefb97 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -689,99 +689,155 @@ static struct sk_buff *netem_peek(struct netem_sched_data *q)
 	return q->t_head;
 }
 
-static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
+/*
+ * Pop the head packet from the tfifo and prepare it for delivery.
+ * skb->dev shares the rbnode area and must be restored after removal.
+ */
+static struct sk_buff *netem_pull_tfifo(struct netem_sched_data *q,
+					struct Qdisc *sch)
 {
-	if (skb == q->t_head) {
+	struct sk_buff *skb;
+
+	if (q->t_head) {
+		skb = q->t_head;
 		q->t_head = skb->next;
 		if (!q->t_head)
 			q->t_tail = NULL;
 	} else {
-		rb_erase(&skb->rbnode, &q->t_root);
+		struct rb_node *p = rb_first(&q->t_root);
+
+		if (!p)
+			return NULL;
+		skb = rb_to_skb(p);
+		rb_erase(p, &q->t_root);
 	}
+
+	q->t_len--;
+	skb->next = NULL;
+	skb->prev = NULL;
+	skb->dev = qdisc_dev(sch);
+
+	return skb;
 }
 
-static struct sk_buff *netem_dequeue(struct Qdisc *sch)
+/* Update slot pacing counters after releasing a packet */
+static void netem_slot_account(struct netem_sched_data *q,
+			       const struct sk_buff *skb, u64 now)
+{
+	if (!q->slot.slot_next)
+		return;
+
+	q->slot.packets_left--;
+	q->slot.bytes_left -= qdisc_pkt_len(skb);
+	if (q->slot.packets_left <= 0 || q->slot.bytes_left <= 0)
+		get_slot_next(q, now);
+}
+
+/*
+ * Transfer time-ready packets from the tfifo into the child qdisc,
+ * then dequeue from the child.
+ */
+static struct sk_buff *netem_dequeue_child(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
+	u64 now = ktime_get_ns();
 	struct sk_buff *skb;
 
-tfifo_dequeue:
-	skb = __qdisc_dequeue_head(&sch->q);
-	if (skb) {
-deliver:
-		qdisc_qstats_backlog_dec(sch, skb);
-		qdisc_bstats_update(sch, skb);
-		return skb;
-	}
 	skb = netem_peek(q);
 	if (skb) {
-		u64 time_to_send;
-		u64 now = ktime_get_ns();
+		u64 time_to_send = netem_skb_cb(skb)->time_to_send;
 
-		/* if more time remaining? */
-		time_to_send = netem_skb_cb(skb)->time_to_send;
 		if (q->slot.slot_next && q->slot.slot_next < time_to_send)
 			get_slot_next(q, now);
 
 		if (time_to_send <= now && q->slot.slot_next <= now) {
-			netem_erase_head(q, skb);
-			q->t_len--;
-			skb->next = NULL;
-			skb->prev = NULL;
-			/* skb->dev shares skb->rbnode area,
-			 * we need to restore its value.
-			 */
-			skb->dev = qdisc_dev(sch);
-
-			if (q->slot.slot_next) {
-				q->slot.packets_left--;
-				q->slot.bytes_left -= qdisc_pkt_len(skb);
-				if (q->slot.packets_left <= 0 ||
-				    q->slot.bytes_left <= 0)
-					get_slot_next(q, now);
-			}
-
-			if (q->qdisc) {
-				unsigned int pkt_len = qdisc_pkt_len(skb);
-				struct sk_buff *to_free = NULL;
-				int err;
-
-				err = qdisc_enqueue(skb, q->qdisc, &to_free);
-				kfree_skb_list(to_free);
-				if (err != NET_XMIT_SUCCESS) {
-					if (net_xmit_drop_count(err))
-						qdisc_qstats_drop(sch);
-					sch->qstats.backlog -= pkt_len;
-					sch->q.qlen--;
-					qdisc_tree_reduce_backlog(sch, 1, pkt_len);
-				}
-				goto tfifo_dequeue;
-			}
-			sch->q.qlen--;
-			goto deliver;
-		}
-
-		if (q->qdisc) {
-			skb = q->qdisc->ops->dequeue(q->qdisc);
-			if (skb) {
+			struct sk_buff *to_free = NULL;
+			unsigned int pkt_len;
+			int err;
+
+			skb = netem_pull_tfifo(q, sch);
+			netem_slot_account(q, skb, now);
+
+			pkt_len = qdisc_pkt_len(skb);
+			err = qdisc_enqueue(skb, q->qdisc, &to_free);
+			kfree_skb_list(to_free);
+			if (err != NET_XMIT_SUCCESS) {
+				if (net_xmit_drop_count(err))
+					qdisc_qstats_drop(sch);
+				sch->qstats.backlog -= pkt_len;
 				sch->q.qlen--;
-				goto deliver;
+				qdisc_tree_reduce_backlog(sch, 1, pkt_len);
 			}
 		}
-
-		qdisc_watchdog_schedule_ns(&q->watchdog,
-					   max(time_to_send,
-					       q->slot.slot_next));
 	}
 
-	if (q->qdisc) {
-		skb = q->qdisc->ops->dequeue(q->qdisc);
-		if (skb) {
-			sch->q.qlen--;
-			goto deliver;
-		}
+	skb = q->qdisc->ops->dequeue(q->qdisc);
+	if (skb)
+		sch->q.qlen--;
+
+	return skb;
+}
+
+/* Dequeue directly from the tfifo when no child qdisc is configured. */
+static struct sk_buff *netem_dequeue_direct(struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+	u64 time_to_send;
+	u64 now;
+
+	skb = netem_peek(q);
+	if (!skb)
+		return NULL;
+
+	now = ktime_get_ns();
+	time_to_send = netem_skb_cb(skb)->time_to_send;
+
+	if (q->slot.slot_next && q->slot.slot_next < time_to_send)
+		get_slot_next(q, now);
+
+	if (time_to_send > now || q->slot.slot_next > now)
+		return NULL;
+
+	skb = netem_pull_tfifo(q, sch);
+	netem_slot_account(q, skb, now);
+	sch->q.qlen--;
+
+	return skb;
+}
+
+static struct sk_buff *netem_dequeue(struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	/* First check the reorder queue */
+	skb = __qdisc_dequeue_head(&sch->q);
+	if (skb)
+		goto deliver;
+
+	if (q->qdisc)
+		skb = netem_dequeue_child(sch);
+	else
+		skb = netem_dequeue_direct(sch);
+
+	if (skb)
+		goto deliver;
+
+	/* Nothing ready — schedule watchdog for next packet */
+	skb = netem_peek(q);
+	if (skb) {
+		u64 time_to_send = netem_skb_cb(skb)->time_to_send;
+
+		qdisc_watchdog_schedule_ns(&q->watchdog,
+					   max(time_to_send, q->slot.slot_next));
 	}
 	return NULL;
+
+deliver:
+	qdisc_qstats_backlog_dec(sch, skb);
+	qdisc_bstats_update(sch, skb);
+	return skb;
 }
 
 static void netem_reset(struct Qdisc *sch)
-- 
2.53.0


^ permalink raw reply related

* [PATTCH net v5 5/8] net/sched: netem: batch-transfer ready packets to avoid child re-entrancy
From: Stephen Hemminger @ 2026-04-11  5:15 UTC (permalink / raw)
  To: netdev
  Cc: Stephen Hemminger, Jamal Hadi Salim, Jiri Pirko, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	open list
In-Reply-To: <20260411051700.311679-1-stephen@networkplumber.org>

netem_dequeue_child() previously transferred one packet from the tfifo
to the child qdisc per dequeue call. Parents like HFSC that track
class active/inactive state on qlen transitions could see an enqueue
during dequeue, causing double-insertion into the eltree
(CVE-2025-37890, CVE-2025-38001). Non-work-conserving children like
TBF could also refuse to return a just-enqueued packet, making netem
return NULL despite having backlog, which causes parents like DRR to
incorrectly deactivate the class.

Move all time-ready packets into the child before calling its dequeue.
This separates the enqueue and dequeue phases so the parent sees
consistent qlen transitions.

Fixes: 50612537e9ab ("netem: fix classful handling")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 net/sched/sch_netem.c | 48 +++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index e264f7aefb97..98931bb4354b 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -734,8 +734,10 @@ static void netem_slot_account(struct netem_sched_data *q,
 }
 
 /*
- * Transfer time-ready packets from the tfifo into the child qdisc,
- * then dequeue from the child.
+ * Transfer all time-ready packets from the tfifo into the child qdisc,
+ * then dequeue from the child.  Batching the transfers avoids calling
+ * qdisc_enqueue() inside the parent's dequeue path, which confuses
+ * parents that track active/inactive state on qlen transitions (HFSC).
  */
 static struct sk_buff *netem_dequeue_child(struct Qdisc *sch)
 {
@@ -743,31 +745,33 @@ static struct sk_buff *netem_dequeue_child(struct Qdisc *sch)
 	u64 now = ktime_get_ns();
 	struct sk_buff *skb;
 
-	skb = netem_peek(q);
-	if (skb) {
-		u64 time_to_send = netem_skb_cb(skb)->time_to_send;
+	while ((skb = netem_peek(q)) != NULL) {
+		struct sk_buff *to_free = NULL;
+		unsigned int pkt_len;
+		u64 time_to_send;
+		int err;
 
+		time_to_send = netem_skb_cb(skb)->time_to_send;
 		if (q->slot.slot_next && q->slot.slot_next < time_to_send)
 			get_slot_next(q, now);
 
-		if (time_to_send <= now && q->slot.slot_next <= now) {
-			struct sk_buff *to_free = NULL;
-			unsigned int pkt_len;
-			int err;
-
-			skb = netem_pull_tfifo(q, sch);
-			netem_slot_account(q, skb, now);
+		if (time_to_send > now)
+			break;
+		if (q->slot.slot_next > now)
+			break;
 
-			pkt_len = qdisc_pkt_len(skb);
-			err = qdisc_enqueue(skb, q->qdisc, &to_free);
-			kfree_skb_list(to_free);
-			if (err != NET_XMIT_SUCCESS) {
-				if (net_xmit_drop_count(err))
-					qdisc_qstats_drop(sch);
-				sch->qstats.backlog -= pkt_len;
-				sch->q.qlen--;
-				qdisc_tree_reduce_backlog(sch, 1, pkt_len);
-			}
+		skb = netem_pull_tfifo(q, sch);
+		netem_slot_account(q, skb, now);
+
+		pkt_len = qdisc_pkt_len(skb);
+		err = qdisc_enqueue(skb, q->qdisc, &to_free);
+		kfree_skb_list(to_free);
+		if (unlikely(err != NET_XMIT_SUCCESS)) {
+			if (net_xmit_drop_count(err))
+				qdisc_qstats_drop(sch);
+			sch->qstats.backlog -= pkt_len;
+			sch->q.qlen--;
+			qdisc_tree_reduce_backlog(sch, 1, pkt_len);
 		}
 	}
 
-- 
2.53.0


^ permalink raw reply related

* [PATTCH net v5 6/8] net/sched: netem: null-terminate tfifo linear queue tail
From: Stephen Hemminger @ 2026-04-11  5:15 UTC (permalink / raw)
  To: netdev
  Cc: Stephen Hemminger, Simon Horman, Jamal Hadi Salim, Jiri Pirko,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Peter Oskolkov, open list
In-Reply-To: <20260411051700.311679-1-stephen@networkplumber.org>

When tfifo_enqueue() appends a packet to the linear queue tail,
nskb->next is never set to NULL. The list terminates correctly
only by accident if the skb arrived with next already NULL.

Explicitly null-terminate the tail to prevent list corruption.

Fixes: d66280b12bd7 ("net: netem: use a list in addition to rbtree")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_netem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 98931bb4354b..14d22fc7365d 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -398,6 +398,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 			q->t_tail->next = nskb;
 		else
 			q->t_head = nskb;
+		nskb->next = NULL;
 		q->t_tail = nskb;
 	} else {
 		struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
-- 
2.53.0


^ permalink raw reply related

* [PATTCH net v5 7/8] net/sched: netem: check for invalid slot range
From: Stephen Hemminger @ 2026-04-11  5:15 UTC (permalink / raw)
  To: netdev
  Cc: Stephen Hemminger, Simon Horman, Jamal Hadi Salim, Jiri Pirko,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Neal Cardwell, Yousuk Seung, open list
In-Reply-To: <20260411051700.311679-1-stephen@networkplumber.org>

Reject slot configuration where min_delay exceeds max_delay.
The delay range computation in get_slot_next() underflows in
this case, producing bogus results.

Fixes: 0a9fe5c375b5 ("netem: slotting with non-uniform distribution")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_netem.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 14d22fc7365d..ef4965f20f17 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -888,6 +888,18 @@ static int get_dist_table(struct disttable **tbl, const struct nlattr *attr)
 	return 0;
 }
 
+static int validate_slot(const struct nlattr *attr,
+			 struct netlink_ext_ack *extack)
+{
+	const struct tc_netem_slot *c = nla_data(attr);
+
+	if (c->min_delay > c->max_delay) {
+		NL_SET_ERR_MSG(extack, "slot min delay greater than max delay");
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
 {
 	const struct tc_netem_slot *c = nla_data(attr);
@@ -1101,6 +1113,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
 			goto table_free;
 	}
 
+	if (tb[TCA_NETEM_SLOT]) {
+		ret = validate_slot(tb[TCA_NETEM_SLOT], extack);
+		if (ret)
+			goto table_free;
+	}
+
 	sch_tree_lock(sch);
 	/* backup q->clg and q->loss_model */
 	old_clg = q->clg;
-- 
2.53.0


^ permalink raw reply related

* [PATTCH net v5 8/8] net/sched: netem: fix slot delay calculation overflow
From: Stephen Hemminger @ 2026-04-11  5:15 UTC (permalink / raw)
  To: netdev
  Cc: Stephen Hemminger, Simon Horman, Jamal Hadi Salim, Jiri Pirko,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Neal Cardwell, Yousuk Seung, open list
In-Reply-To: <20260411051700.311679-1-stephen@networkplumber.org>

get_slot_next() computes a random delay between min_delay and
max_delay using:

  get_random_u32() * (max_delay - min_delay) >> 32

This overflows signed 64-bit arithmetic when the delay range exceeds
approximately 2.1 seconds (2^31 nanoseconds), producing a negative
result that effectively disables slot-based pacing. This is a
realistic configuration for WAN emulation (e.g., slot 1s 5s).

Use mul_u64_u32_shr() which handles the widening multiply without
overflow.

Fixes: 0a9fe5c375b5 ("netem: slotting with non-uniform distribution")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_netem.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index ef4965f20f17..6a09627bafa0 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -660,9 +660,8 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)
 
 	if (!q->slot_dist)
 		next_delay = q->slot_config.min_delay +
-				(get_random_u32() *
-				 (q->slot_config.max_delay -
-				  q->slot_config.min_delay) >> 32);
+			mul_u64_u32_shr(q->slot_config.max_delay - q->slot_config.min_delay,
+					get_random_u32(), 32);
 	else
 		next_delay = tabledist(q->slot_config.dist_delay,
 				       (s32)(q->slot_config.dist_jitter),
-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH] net: Optimize flush calculation in inet_gro_receive()
From: Kuniyuki Iwashima @ 2026-04-11  5:19 UTC (permalink / raw)
  To: deller; +Cc: davem, dsahern, linux-kernel, linux-parisc, netdev, edumazet
In-Reply-To: <adkMqgP6QeBPgqP9@p100>

From: Helge Deller <deller@kernel.org>
Date: Fri, 10 Apr 2026 16:43:54 +0200
> For the calculation of the flush variable, use the get_unaligned_xxx() helpers
> to access only relevant bits of the IP header.
> 
> Note: Since I don't know the network details, I'm not sure if "& ~IP_DF"
> (& ~0x4000) is correct, or if "& IP_OFFSET" (& 0x1FFF) should be used instead

~IP_DF is correct (MF bit needs to be checked), see

commit db8caf3dbc77599dc90f4ea0a803cd1d97116f30
Author: Eric Dumazet <edumazet@google.com>
Date:   Fri May 31 11:18:10 2013

    gro: should aggregate frames without DF


> (which I believe would be more correct). Instead of possibly breaking things I
> left it as is, but maybe some expert can check?
> 
> Signed-off-by: Helge Deller <deller@gmx.de>
> 
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index c7731e300a44..58cad2687c2c 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1479,7 +1479,7 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
>  	struct sk_buff *p;
>  	unsigned int hlen;
>  	unsigned int off;
> -	int flush = 1;
> +	u16 flush = 1;
>  	int proto;
>  
>  	off = skb_gro_offset(skb);
> @@ -1504,7 +1504,8 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
>  		goto out;
>  
>  	NAPI_GRO_CB(skb)->proto = proto;
> -	flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (ntohl(*(__be32 *)&iph->id) & ~IP_DF));
> +	flush = (get_unaligned_be16(&iph->tot_len) ^ skb_gro_len(skb)) |
> +	        (get_unaligned_be16(&iph->frag_off) & ~IP_DF);

I think here we intentionally use 32-bit loads:

commit 1075f3f65d0e0f49351b7d4310e9f94483972a51
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Tue May 26 18:50:29 2009

    ipv4: Use 32-bit loads for ID and length in GRO


Before your patch, 32-bit load + bswap are used while
16-bit load + rol 8 after the change.

I feel the 4-byte aligned load + bswap is faster than
misaligned access + 8 times shift (Is this internally
optimised like xchg for a single word size ?)

Do you have some numbers ?


Before:
	flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb))
mov    edx,DWORD PTR [rcx]
bswap  edx
	return skb->len - NAPI_GRO_CB(skb)->data_offset;
mov    r8d,DWORD PTR [rsi+0x38]
mov    r9d,DWORD PTR [rsi+0x70]
sub    r9d,r8d
xor    r9d,edx
	| (ntohl(*(__be32 *)&iph->id) & ~IP_DF));
mov    ebp,0xffbfffff
and    ebp,DWORD PTR [rcx+0x4]
bswap  ebp
or     ebp,r9d


After:
	flush = (get_unaligned_be16(&iph->tot_len) ^ skb_gro_len(skb))
movzx  edx,WORD PTR [rcx+0x2]
rol    dx,0x8
	return skb->len - NAPI_GRO_CB(skb)->data_offset;
mov    r8d,DWORD PTR [rsi+0x38]
mov    r9d,DWORD PTR [rsi+0x70]
sub    r9d,r8d
xor    r9d,edx
	| (get_unaligned_be16(&iph->frag_off) & ~IP_DF);
movzx  ebp,WORD PTR [rcx+0x6]
and    ebp,0xffffffbf
rol    bp,0x8
or     ebp,r9d

^ permalink raw reply

* Re: [PATCH iwl-next 2/2] idpf: implement pci error handlers
From: Lukas Wunner @ 2026-04-11  5:43 UTC (permalink / raw)
  To: Emil Tantilov
  Cc: intel-wired-lan, netdev, przemyslaw.kitszel, jay.bhat,
	ivan.d.barrera, aleksandr.loktionov, larysa.zaremba,
	anthony.l.nguyen, andrew+netdev, davem, edumazet, kuba, pabeni,
	aleksander.lobakin, linux-pci, madhu.chittim, decot, willemb,
	sheenamo
In-Reply-To: <20260411003959.30959-3-emil.s.tantilov@intel.com>

On Fri, Apr 10, 2026 at 05:39:59PM -0700, Emil Tantilov wrote:
> +static pci_ers_result_t
> +idpf_pci_err_slot_reset(struct pci_dev *pdev)
> +{
> +	struct idpf_adapter *adapter = pci_get_drvdata(pdev);
> +
> +	pci_restore_state(pdev);
> +	pci_set_master(pdev);
> +	pci_wake_from_d3(pdev, false);
> +	if (readl(adapter->reset_reg.rstat) != 0xFFFFFFFF) {
> +		pci_save_state(pdev);
> +		return PCI_ERS_RESULT_RECOVERED;
> +	}

The pci_save_state() is no longer necessary here, please drop it.
See commits a2f1e22390ac and 383d89699c50 for details.

Thanks,

Lukas

^ permalink raw reply

* Re: [PATCH net] netrom: do some basic forms of validation on incoming frames
From: Greg KH @ 2026-04-11  5:50 UTC (permalink / raw)
  To: hugh
  Cc: Kuniyuki Iwashima, kuba, davem, edumazet, horms, linux-hams,
	linux-kernel, netdev, pabeni, stable, workflows, yizhe
In-Reply-To: <4f5810a7-c792-4d6b-9f7c-6c6b289def19@blemings.org>

On Sat, Apr 11, 2026 at 08:25:19AM +1000, Hugh Blemings wrote:
> 
> On 11/4/2026 08:11, Kuniyuki Iwashima wrote:
> > From: Jakub Kicinski <kuba@kernel.org>
> > Date: Fri, 10 Apr 2026 14:54:48 -0700
> > > On Fri, 10 Apr 2026 14:30:42 -0700 Jakub Kicinski wrote:
> > > > On Fri, 10 Apr 2026 07:24:36 +0200 Greg Kroah-Hartman wrote:
> > > > > On Thu, Apr 09, 2026 at 08:32:35PM -0700, Jakub Kicinski wrote:
> > > > > > Or for simplicity we could also be testing against skb_headlen()
> > > > > > since we don't expect any legit non-linear frames here? Dunno.
> > > > > I'll be glad to change this either way, your call.  Given that this is
> > > > > an obsolete protocol that seems to only be a target for drive-by fuzzers
> > > > > to attack, whatever the simplest thing to do to quiet them up I'll be
> > > > > glad to implement.
> > > > > 
> > > > > Or can we just delete this stuff entirely?  :)
> > > > Yes.
> > > > 
> > > > My thinking is to delete hamradio, nfc, atm, caif.. [more to come]
> > > > Create GH repos which provide them as OOT modules.
> > > > Hopefully we can convince any existing users to switch to that.
> > > > 
> > > > The only thing stopping me is the concern that this is just the softest
> > > > target and the LLMs will find something else to focus on which we can't
> > > > delete. I suspect any PCIe driver can be flooded with "aren't you
> > > > trusting the HW to provide valid responses here?" bullshit.
> > > > 
> > > > But hey, let's try. I'll post a patch nuking all of hamradio later
> > > > today.
> > > Well, either we "expunge" this code to OOT repos, or we mark it
> > > as broken and tell everyone that we don't take security fixes
> > > for anything that depends on BROKEN. I'd personally rather expunge.
> > +1 for "expunge" to prevent LLM-based patch flood.
> > 
> > IIRC, we did that recently for one driver only used by OpenWRT ?
> > 
> > 
> If the main concern here is ongoing maintenance of these Ham Radio related
> protocols/drivers, can we pause for a moment on anything as dramatic as
> removing from the tree entirely ?

Sure, but:

> There is a good cohort of capable kernel folks that either are or were ham
> radio operators who I believe, upon realising that things have got to this
> point, will be happy to redouble efforts to ensure this code maintained and
> tested to a satisfactory standard.

We need this code to be maintained, because as is being shown, there are
reported problems with it that will affect these devices/networks that
you all are using.  So all we need is a maintainer for this to be able
to take reports that we get and fix things up as needed.  I know you
have that experience, want to come back to kernel development, we've
missed you :)

thanks,

greg k-h

^ permalink raw reply

* [PATCH net] openvswitch: fix kernel panic from oversized vport upcall PID arrays
From: Weiming Shi @ 2026-04-11  5:59 UTC (permalink / raw)
  To: Aaron Conole, Eelco Chaudron, Ilya Maximets, David S . Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Pravin B Shelar, Flavio Leitner, Mark Gray, netdev,
	dev, linux-kernel, Xiang Mei, Weiming Shi

The vport netlink reply helpers allocate a fixed-size skb with
nlmsg_new(NLMSG_DEFAULT_SIZE, ...) but serialize the full upcall PID
array via ovs_vport_get_upcall_portids(). Since
ovs_vport_set_upcall_portids() accepts any non-zero multiple of
sizeof(u32) with no upper bound, a CAP_NET_ADMIN user can install a
PID array large enough to overflow the reply buffer. When the
subsequent nla_put() fails with -EMSGSIZE, five BUG_ON(err < 0) sites
fire and panic the kernel. On systems with unprivileged user namespaces
enabled (e.g., Ubuntu default), this is reachable via unshare -Urn.

 kernel BUG at net/openvswitch/datapath.c:2414!
 Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI
 CPU: 1 UID: 0 PID: 65 Comm: poc Not tainted 7.0.0-rc7-00195-geb216e422044 #1
 RIP: 0010:ovs_vport_cmd_set (net/openvswitch/datapath.c:2414 (discriminator 1))
 Call Trace:
  <TASK>
  genl_family_rcv_msg_doit (net/netlink/genetlink.c:1116)
  genl_rcv_msg (net/netlink/genetlink.c:1194 net/netlink/genetlink.c:1209)
  netlink_rcv_skb (net/netlink/af_netlink.c:2550)
  genl_rcv (net/netlink/genetlink.c:1219)
  netlink_unicast (net/netlink/af_netlink.c:1319 net/netlink/af_netlink.c:1344)
  netlink_sendmsg (net/netlink/af_netlink.c:1894)
  __sys_sendto (net/socket.c:2206 (discriminator 1))
  __x64_sys_sendto (net/socket.c:2209)
  do_syscall_64 (arch/x86/entry/syscall_64.c:63 (discriminator 1))
  entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
  </TASK>
 Kernel panic - not syncing: Fatal exception

Dynamically compute the reply skb size based on the vport's actual PID
array length instead of using a fixed NLMSG_DEFAULT_SIZE, and replace
the BUG_ON() calls with WARN_ON_ONCE() plus graceful error returns.

Fixes: b83d23a2a38b ("openvswitch: Introduce per-cpu upcall dispatch")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
---
 net/openvswitch/datapath.c | 95 ++++++++++++++++++++++++++------------
 1 file changed, 66 insertions(+), 29 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index e209099218b4..3649a1f2a3f5 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2184,9 +2184,17 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 	return err;
 }
 
-static struct sk_buff *ovs_vport_cmd_alloc_info(void)
+/* Must be called with ovs_mutex or rcu_read_lock. */
+static size_t ovs_vport_cmd_msg_size(const struct vport *vport)
 {
-	return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	const struct vport_portids *ids;
+	size_t msgsize = NLMSG_DEFAULT_SIZE;
+
+	ids = rcu_dereference_ovsl(vport->upcall_portids);
+	if (ids && (vport->dp->user_features & OVS_DP_F_VPORT_PIDS))
+		msgsize += ids->n_ids * sizeof(u32);
+
+	return msgsize;
 }
 
 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
@@ -2196,13 +2204,16 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
 	struct sk_buff *skb;
 	int retval;
 
-	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	skb = nlmsg_new(ovs_vport_cmd_msg_size(vport), GFP_KERNEL);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
 	retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
 					 GFP_KERNEL);
-	BUG_ON(retval < 0);
+	if (WARN_ON_ONCE(retval < 0)) {
+		kfree_skb(skb);
+		return ERR_PTR(retval);
+	}
 
 	return skb;
 }
@@ -2303,7 +2314,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	if (port_no >= DP_MAX_PORTS)
 		return -EFBIG;
 
-	reply = ovs_vport_cmd_alloc_info();
+	reply = genlmsg_new(NLMSG_DEFAULT_SIZE +
+			    nla_len(a[OVS_VPORT_ATTR_UPCALL_PID]), GFP_KERNEL);
 	if (!reply)
 		return -ENOMEM;
 
@@ -2358,7 +2370,9 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	else
 		netdev_set_rx_headroom(vport->dev, dp->max_headroom);
 
-	BUG_ON(err < 0);
+	if (WARN_ON_ONCE(err < 0))
+		goto exit_unlock_free;
+
 	ovs_unlock();
 
 	ovs_notify(&dp_vport_genl_family, reply, info);
@@ -2377,49 +2391,52 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct vport *vport;
 	int err;
 
-	reply = ovs_vport_cmd_alloc_info();
-	if (!reply)
-		return -ENOMEM;
-
 	ovs_lock();
 	vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
-		goto exit_unlock_free;
+		goto exit_unlock;
 
 	if (a[OVS_VPORT_ATTR_TYPE] &&
 	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
 		err = -EINVAL;
-		goto exit_unlock_free;
+		goto exit_unlock;
 	}
 
 	if (a[OVS_VPORT_ATTR_OPTIONS]) {
 		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
 		if (err)
-			goto exit_unlock_free;
+			goto exit_unlock;
 	}
 
-
 	if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
 		struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
 
 		err = ovs_vport_set_upcall_portids(vport, ids);
 		if (err)
-			goto exit_unlock_free;
+			goto exit_unlock;
+	}
+
+	reply = genlmsg_new(ovs_vport_cmd_msg_size(vport), GFP_KERNEL);
+	if (!reply) {
+		err = -ENOMEM;
+		goto exit_unlock;
 	}
 
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
 				      OVS_VPORT_CMD_SET, GFP_KERNEL);
-	BUG_ON(err < 0);
+	if (WARN_ON_ONCE(err < 0)) {
+		kfree_skb(reply);
+		goto exit_unlock;
+	}
 
 	ovs_unlock();
 	ovs_notify(&dp_vport_genl_family, reply, info);
 	return 0;
 
-exit_unlock_free:
+exit_unlock:
 	ovs_unlock();
-	kfree_skb(reply);
 	return err;
 }
 
@@ -2433,25 +2450,30 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	unsigned int new_headroom;
 	int err;
 
-	reply = ovs_vport_cmd_alloc_info();
-	if (!reply)
-		return -ENOMEM;
-
 	ovs_lock();
 	vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
-		goto exit_unlock_free;
+		goto exit_unlock;
 
 	if (vport->port_no == OVSP_LOCAL) {
 		err = -EINVAL;
-		goto exit_unlock_free;
+		goto exit_unlock;
+	}
+
+	reply = genlmsg_new(ovs_vport_cmd_msg_size(vport), GFP_KERNEL);
+	if (!reply) {
+		err = -ENOMEM;
+		goto exit_unlock;
 	}
 
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
 				      OVS_VPORT_CMD_DEL, GFP_KERNEL);
-	BUG_ON(err < 0);
+	if (WARN_ON_ONCE(err < 0)) {
+		kfree_skb(reply);
+		goto exit_unlock;
+	}
 
 	/* the vport deletion may trigger dp headroom update */
 	dp = vport->dp;
@@ -2472,9 +2494,8 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	ovs_notify(&dp_vport_genl_family, reply, info);
 	return 0;
 
-exit_unlock_free:
+exit_unlock:
 	ovs_unlock();
-	kfree_skb(reply);
 	return err;
 }
 
@@ -2484,9 +2505,20 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = genl_info_userhdr(info);
 	struct sk_buff *reply;
 	struct vport *vport;
+	size_t msg_size;
 	int err;
 
-	reply = ovs_vport_cmd_alloc_info();
+	rcu_read_lock();
+	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport)) {
+		rcu_read_unlock();
+		return err;
+	}
+	msg_size = ovs_vport_cmd_msg_size(vport);
+	rcu_read_unlock();
+
+	reply = genlmsg_new(msg_size, GFP_KERNEL);
 	if (!reply)
 		return -ENOMEM;
 
@@ -2495,12 +2527,17 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
 		goto exit_unlock_free;
+
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
 				      OVS_VPORT_CMD_GET, GFP_ATOMIC);
-	BUG_ON(err < 0);
 	rcu_read_unlock();
 
+	if (err < 0) {
+		kfree_skb(reply);
+		return err;
+	}
+
 	return genlmsg_reply(reply, info);
 
 exit_unlock_free:
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH net-next 1/3] psp: add crypt-offset and spi-threshold get/set attributes
From: Willem de Bruijn @ 2026-04-11  6:11 UTC (permalink / raw)
  To: Jakub Kicinski, Akhilesh Samineni
  Cc: Willem de Bruijn, davem, edumazet, pabeni, andrew+netdev, horms,
	willemb, daniel.zahka, netdev, linux-kernel,
	jayakrishnan.udayavarma, ajit.khaparde, kiran.kella, sachin.suman
In-Reply-To: <20260410135736.4bc7ed46@kernel.org>

Jakub Kicinski wrote:
> On Sat, 11 Apr 2026 01:06:06 +0530 Akhilesh Samineni wrote:
> > On Wed, Apr 8, 2026 at 6:34 AM Jakub Kicinski <kuba@kernel.org> wrote:
> > > On Tue, 07 Apr 2026 17:37:41 -0400 Willem de Bruijn wrote:  
> > > > PSP defines a 6-bit field in 4 octet units. Does this need bounds checking?  
> > >
> > > More fundamentally, were we to support this -- is it a device property
> > > or an assoc property?  
> > 
> > It's a device property. All associations under the device will share
> > the same crypt-offset.
> 
> I don't think there's anything in the spec that says the crypto
> offset is device level.
> At the very least every L4 proto may want to have a different offset.
> We should probably hold off adding this until a real user appears.

On how it is configured, the spec says "The crypt offset can be
specified by the transmit descriptor or by configuration".

So some devices might indeed selectively set it per-packet, e.g.,
for specific protocols.

One real use case is network telemetry, exposing the inner transport
protocol ports. For that to be useful it would have to be enabled on
most if not all packets.

^ permalink raw reply

* Re: [PATTCH net v5 6/8] net/sched: netem: null-terminate tfifo linear queue tail
From: Eric Dumazet @ 2026-04-11  6:38 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: netdev, Simon Horman, Jamal Hadi Salim, Jiri Pirko,
	David S. Miller, Jakub Kicinski, Paolo Abeni, Peter Oskolkov,
	open list
In-Reply-To: <20260411051700.311679-7-stephen@networkplumber.org>

On Fri, Apr 10, 2026 at 10:17 PM Stephen Hemminger
<stephen@networkplumber.org> wrote:
>
> When tfifo_enqueue() appends a packet to the linear queue tail,
> nskb->next is never set to NULL. The list terminates correctly
> only by accident if the skb arrived with next already NULL.
>
> Explicitly null-terminate the tail to prevent list corruption.
>
> Fixes: d66280b12bd7 ("net: netem: use a list in addition to rbtree")
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> Reviewed-by: Simon Horman <horms@kernel.org>

Can you explain how skb->next could be not NULL ?

This would be a bug in the upper stack.

Only TCQ_F_NOLOCK qdiscs (pfifo_fast) can possibly get such skbs, and
it would not care.

Other qdiscs already get skbs with skb_mark_not_on_list(skb).

^ permalink raw reply

* Re: [PATCH net] netrom: do some basic forms of validation on incoming frames
From: Hugh Blemings @ 2026-04-11  7:24 UTC (permalink / raw)
  To: Greg KH, hugh
  Cc: Kuniyuki Iwashima, kuba, davem, edumazet, horms, linux-hams,
	linux-kernel, netdev, pabeni, stable, workflows, yizhe
In-Reply-To: <2026041135-shindig-trekker-5d06@gregkh>


On 11/4/2026 15:50, Greg KH wrote:
> On Sat, Apr 11, 2026 at 08:25:19AM +1000, Hugh Blemings wrote:
>> On 11/4/2026 08:11, Kuniyuki Iwashima wrote:
>>> From: Jakub Kicinski <kuba@kernel.org>
>>> Date: Fri, 10 Apr 2026 14:54:48 -0700
>>>> On Fri, 10 Apr 2026 14:30:42 -0700 Jakub Kicinski wrote:
>>>>> On Fri, 10 Apr 2026 07:24:36 +0200 Greg Kroah-Hartman wrote:
>>>>>> On Thu, Apr 09, 2026 at 08:32:35PM -0700, Jakub Kicinski wrote:
>>>>>>> Or for simplicity we could also be testing against skb_headlen()
>>>>>>> since we don't expect any legit non-linear frames here? Dunno.
>>>>>> I'll be glad to change this either way, your call.  Given that this is
>>>>>> an obsolete protocol that seems to only be a target for drive-by fuzzers
>>>>>> to attack, whatever the simplest thing to do to quiet them up I'll be
>>>>>> glad to implement.
>>>>>>
>>>>>> Or can we just delete this stuff entirely?  :)
>>>>> Yes.
>>>>>
>>>>> My thinking is to delete hamradio, nfc, atm, caif.. [more to come]
>>>>> Create GH repos which provide them as OOT modules.
>>>>> Hopefully we can convince any existing users to switch to that.
>>>>>
>>>>> The only thing stopping me is the concern that this is just the softest
>>>>> target and the LLMs will find something else to focus on which we can't
>>>>> delete. I suspect any PCIe driver can be flooded with "aren't you
>>>>> trusting the HW to provide valid responses here?" bullshit.
>>>>>
>>>>> But hey, let's try. I'll post a patch nuking all of hamradio later
>>>>> today.
>>>> Well, either we "expunge" this code to OOT repos, or we mark it
>>>> as broken and tell everyone that we don't take security fixes
>>>> for anything that depends on BROKEN. I'd personally rather expunge.
>>> +1 for "expunge" to prevent LLM-based patch flood.
>>>
>>> IIRC, we did that recently for one driver only used by OpenWRT ?
>>>
>>>
>> If the main concern here is ongoing maintenance of these Ham Radio related
>> protocols/drivers, can we pause for a moment on anything as dramatic as
>> removing from the tree entirely ?
> Sure, but:
>
>> There is a good cohort of capable kernel folks that either are or were ham
>> radio operators who I believe, upon realising that things have got to this
>> point, will be happy to redouble efforts to ensure this code maintained and
>> tested to a satisfactory standard.
> We need this code to be maintained, because as is being shown, there are
> reported problems with it that will affect these devices/networks that
> you all are using.  So all we need is a maintainer for this to be able
> to take reports that we get and fix things up as needed.  I know you
> have that experience, want to come back to kernel development, we've
> missed you :)

That's most kind Greg, thank you, have missed all you cool kids too :)

More seriously though - I'd be up for doing it, but I think there may be 
others better placed than I who haven't yet realised we have this 
conundrum. I'm nudging a few folks offline on this front.

I've also kicked off a thread in linux-hams to discuss some of the 
broader questions raised about staying in tree, going to out of tree or 
looking at userspace solutions instead.

We'll try get a cohesive picture back over next few days.

Cheers,
Hugh

-- 
I am slowly moving to hugh@blemings.id.au as my main email address.
If you're using hugh@blemings.org please update your address book accordingly.
Thank you :)


^ permalink raw reply

* [PATCH v2 0/6] bus: mhi: host: mhi_phc: Add support for PHC over MHI
From: Krishna Chaitanya Chundru @ 2026-04-11  8:12 UTC (permalink / raw)
  To: Manivannan Sadhasivam, Richard Cochran
  Cc: mhi, linux-arm-msm, linux-kernel, netdev,
	Krishna Chaitanya Chundru, Vivek Pernamitta, Sivareddy Surasani,
	Vivek Pernamitta, Imran Shaik, Taniya Das

This series introduces the MHI PHC (PTP Hardware Clock) driver, which
registers a PTP (Precision Time Protocol) clock and communicates with
the MHI core to get the device side timestamps. These timestamps are
then exposed to the PTP subsystem, enabling precise time synchronization
between the host and the device.

The device exposes these through MHI time sync capability registers.

The following diagram illustrates the architecture and data flow:

 +-------------+    +--------------------+    +--------------+
 |Userspace App|<-->|Kernel PTP framework|<-->|MHI PHC Driver|
 +-------------+    +--------------------+    +--------------+
                                                     |
                                                     v
 +-------------------------------+         +-----------------+
 | MHI Device (Timestamp source) |<------->| MHI Core Driver |
 +-------------------------------+         +-----------------+

- User space applications use the standard Linux PTP interface.
- The PTP subsystem routes IOCTLs to the MHI PHC driver.
- The MHI PHC driver communicates with the MHI core to fetch timestamps.
- The MHI core interacts with the device to retrieve accurate time data.

Signed-off-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
---
Changes in V2:
- Rebases to the latest code.
- Patch 1 (bus: mhi: host: Add support to read MHI capabilities) which
is dependent change was brought it here as the series which this patch
is part need to re-designed and will take time https://lore.kernel.org/all/CAFEp6-0ik4B20cRyid9w0f+UgibGciPof9HCWTJ=uBOPvHG35Q@mail.gmail.com/.
- Link to v1: https://lore.kernel.org/all/20250818-tsc_time_sync-v1-0-2747710693ba@oss.qualcomm.com/

---
Imran Shaik (1):
      bus: mhi: host: mhi_phc: Add support for PHC over MHI

Krishna Chaitanya Chundru (3):
      bus: mhi: host: Add support for 64bit register reads and writes
      bus: mhi: pci_generic: Add support for 64 bit register read & write
      bus: mhi: host: Update the Time sync logic to read 64 bit register value

Vivek Pernamitta (2):
      bus: mhi: host: Add support to read MHI capabilities
      bus: mhi: host: Add support for non-posted TSC timesync feature

 drivers/bus/mhi/common.h           |  15 ++++
 drivers/bus/mhi/host/Kconfig       |   8 ++
 drivers/bus/mhi/host/Makefile      |   1 +
 drivers/bus/mhi/host/init.c        |  60 +++++++++++++++
 drivers/bus/mhi/host/internal.h    |   9 +++
 drivers/bus/mhi/host/main.c        |  97 ++++++++++++++++++++++++
 drivers/bus/mhi/host/mhi_phc.c     | 150 +++++++++++++++++++++++++++++++++++++
 drivers/bus/mhi/host/mhi_phc.h     |  28 +++++++
 drivers/bus/mhi/host/pci_generic.c |  46 ++++++++++++
 include/linux/mhi.h                |  43 +++++++++++
 10 files changed, 457 insertions(+)
---
base-commit: e774d5f1bc27a85f858bce7688509e866f8e8a4e
change-id: 20260411-tsc_timesync-f877a0394393

Best regards,
-- 
Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>


^ permalink raw reply

* [PATCH v2 1/6] bus: mhi: host: Add support to read MHI capabilities
From: Krishna Chaitanya Chundru @ 2026-04-11  8:12 UTC (permalink / raw)
  To: Manivannan Sadhasivam, Richard Cochran
  Cc: mhi, linux-arm-msm, linux-kernel, netdev,
	Krishna Chaitanya Chundru, Vivek Pernamitta, Sivareddy Surasani
In-Reply-To: <20260411-tsc_timesync-v2-0-6f25f72987b3@oss.qualcomm.com>

From: Vivek Pernamitta <vivek.pernamitta@oss.qualcomm.com>

As per MHI spec v1.2,sec 6.6, MHI has capability registers which are
located after the ERDB array. The location of this group of registers is
indicated by the MISCOFF register. Each capability has a capability ID to
determine which functionality is supported and each capability will point
to the next capability supported.

Add a basic function to read those capabilities offsets.

Signed-off-by: Vivek Pernamitta <vivek.pernamitta@oss.qualcomm.com>
Signed-off-by: Sivareddy Surasani <sivareddy.surasani@oss.qualcomm.com>
Signed-off-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
---
 drivers/bus/mhi/common.h    | 11 +++++++++++
 drivers/bus/mhi/host/init.c | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)

diff --git a/drivers/bus/mhi/common.h b/drivers/bus/mhi/common.h
index dda340aaed95a5573a2ec776ca712e11a1ed0b52..4c316f3d5a68beb01f15cf575b03747096fdcf2c 100644
--- a/drivers/bus/mhi/common.h
+++ b/drivers/bus/mhi/common.h
@@ -16,6 +16,7 @@
 #define MHICFG				0x10
 #define CHDBOFF				0x18
 #define ERDBOFF				0x20
+#define MISCOFF				0x24
 #define BHIOFF				0x28
 #define BHIEOFF				0x2c
 #define DEBUGOFF			0x30
@@ -113,6 +114,9 @@
 #define MHISTATUS_MHISTATE_MASK		GENMASK(15, 8)
 #define MHISTATUS_SYSERR_MASK		BIT(2)
 #define MHISTATUS_READY_MASK		BIT(0)
+#define MISC_CAP_MASK			GENMASK(31, 0)
+#define CAP_CAPID_MASK			GENMASK(31, 24)
+#define CAP_NEXT_CAP_MASK		GENMASK(23, 12)
 
 /* Command Ring Element macros */
 /* No operation command */
@@ -204,6 +208,13 @@
 #define MHI_RSCTRE_DATA_DWORD1		cpu_to_le32(FIELD_PREP(GENMASK(23, 16), \
 							       MHI_PKT_TYPE_COALESCING))
 
+#define MHI_CAP_ID_INTX			0x1
+#define MHI_CAP_ID_TIME_SYNC		0x2
+#define MHI_CAP_ID_BW_SCALE		0x3
+#define MHI_CAP_ID_TSC_TIME_SYNC	0x4
+#define MHI_CAP_ID_MAX_TRB_LEN		0x5
+#define MHI_CAP_ID_MAX			0x6
+
 enum mhi_pkt_type {
 	MHI_PKT_TYPE_INVALID = 0x0,
 	MHI_PKT_TYPE_NOOP_CMD = 0x1,
diff --git a/drivers/bus/mhi/host/init.c b/drivers/bus/mhi/host/init.c
index 0a728ca2c494836b0e0ce4c3f4aea41794c0868b..c2162aa04e810e45ccfbedd20aaa62f892420d31 100644
--- a/drivers/bus/mhi/host/init.c
+++ b/drivers/bus/mhi/host/init.c
@@ -466,6 +466,38 @@ static int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl)
 	return ret;
 }
 
+static int mhi_find_capability(struct mhi_controller *mhi_cntrl, u32 capability)
+{
+	u32 val, cur_cap, next_offset, cur_offset;
+	int ret;
+
+	/* Get the first supported capability offset */
+	ret = mhi_read_reg_field(mhi_cntrl, mhi_cntrl->regs, MISCOFF, MISC_CAP_MASK, &cur_offset);
+	if (ret)
+		return 0;
+
+	do {
+		if (cur_offset >= mhi_cntrl->reg_len)
+			return 0;
+
+		ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs, cur_offset, &val);
+		if (ret)
+			return 0;
+
+		cur_cap = FIELD_GET(CAP_CAPID_MASK, val);
+		next_offset = FIELD_GET(CAP_NEXT_CAP_MASK, val);
+		if (cur_cap >= MHI_CAP_ID_MAX)
+			return 0;
+
+		if (cur_cap == capability)
+			return cur_offset;
+
+		cur_offset = next_offset;
+	} while (next_offset);
+
+	return 0;
+}
+
 int mhi_init_mmio(struct mhi_controller *mhi_cntrl)
 {
 	u32 val;

-- 
2.34.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox