Re: [PATCH net-next v05 1/6] hinic3: Add ethtool queue ops

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Jakub Kicinski <kuba@kernel.org>
To: Fan Gong <gongfan1@huawei.com>
Cc: Zhu Yikai <zhuyikai1@h-partners.com>, <netdev@vger.kernel.org>,
	"David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Paolo Abeni <pabeni@redhat.com>, Simon Horman <horms@kernel.org>,
	Andrew Lunn <andrew+netdev@lunn.ch>,
	Ioana Ciornei <ioana.ciornei@nxp.com>,
	Mohsin Bashir <mohsin.bashr@gmail.com>,
	<linux-kernel@vger.kernel.org>, <linux-doc@vger.kernel.org>,
	luosifu <luosifu@huawei.com>, Xin Guo <guoxin09@huawei.com>,
	Zhou Shuai <zhoushuai28@huawei.com>, Wu Like <wulike1@huawei.com>,
	Shi Jing <shijing34@huawei.com>,
	Zheng Jiezhen <zhengjiezhen@h-partners.com>,
	Maxime Chevallier <maxime.chevallier@bootlin.com>
Subject: Re: [PATCH net-next v05 1/6] hinic3: Add ethtool queue ops
Date: Mon, 13 Apr 2026 17:18:17 -0700	[thread overview]
Message-ID: <20260413171817.1e1eca30@kernel.org> (raw)
In-Reply-To: <157d5cc6e757ffa77eee01dfdc3f2159dc97905f.1775711066.git.zhuyikai1@h-partners.com>

On Sat, 11 Apr 2026 11:36:59 +0800 Fan Gong wrote:
>   Implement following ethtool callback function:
> .get_ringparam
> .set_ringparam
> 
>   These callbacks allow users to utilize ethtool for detailed
> queue depth configuration and monitoring.

> +static int hinic3_check_ringparam_valid(struct net_device *netdev,
> +					const struct ethtool_ringparam *ring)
> +{
> +	if (ring->rx_jumbo_pending || ring->rx_mini_pending) {

Can driver actually be called with non-zero values if max is not set?

> +		netdev_err(netdev, "Unsupported rx_jumbo_pending/rx_mini_pending\n");
> +		return -EINVAL;
> +	}


> +	if (ring->tx_pending > HINIC3_MAX_TX_QUEUE_DEPTH ||
> +	    ring->tx_pending < HINIC3_MIN_QUEUE_DEPTH ||
> +	    ring->rx_pending > HINIC3_MAX_RX_QUEUE_DEPTH ||
> +	    ring->rx_pending < HINIC3_MIN_QUEUE_DEPTH) {

similar question - do you need to check the upper bound?
kernel should check the input against max returned by .get

> +		netdev_err(netdev,

please use extack for errors

> +			   "Queue depth out of range tx[%d-%d] rx[%d-%d]\n",
> +			   HINIC3_MIN_QUEUE_DEPTH, HINIC3_MAX_TX_QUEUE_DEPTH,
> +			   HINIC3_MIN_QUEUE_DEPTH, HINIC3_MAX_RX_QUEUE_DEPTH);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static int hinic3_set_ringparam(struct net_device *netdev,
> +				struct ethtool_ringparam *ring,
> +				struct kernel_ethtool_ringparam *kernel_ring,
> +				struct netlink_ext_ack *extack)
> +{
> +	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
> +	struct hinic3_dyna_txrxq_params q_params = {};
> +	u32 new_sq_depth, new_rq_depth;
> +	int err;
> +
> +	err = hinic3_check_ringparam_valid(netdev, ring);
> +	if (err)
> +		return err;
> +
> +	new_sq_depth = 1U << ilog2(ring->tx_pending);
> +	new_rq_depth = 1U << ilog2(ring->rx_pending);
> +	if (new_sq_depth == nic_dev->q_params.sq_depth &&
> +	    new_rq_depth == nic_dev->q_params.rq_depth)
> +		return 0;
> +
> +	if (new_sq_depth != ring->tx_pending)
> +		netdev_info(netdev, "Requested Tx depth trimmed to %d\n",
> +			    new_sq_depth);

please use extack for warnings

> +	if (new_rq_depth != ring->rx_pending)
> +		netdev_info(netdev, "Requested Rx depth trimmed to %d\n",
> +			    new_rq_depth);
> +
> +	netdev_info(netdev, "Change Tx/Rx ring depth from %u/%u to %u/%u\n",
> +		    nic_dev->q_params.sq_depth, nic_dev->q_params.rq_depth,
> +		    new_sq_depth, new_rq_depth);
> +
> +	if (!netif_running(netdev)) {
> +		hinic3_update_qp_depth(netdev, new_sq_depth, new_rq_depth);
> +	} else {
> +		q_params = nic_dev->q_params;
> +		q_params.sq_depth = new_sq_depth;
> +		q_params.rq_depth = new_rq_depth;
> +
> +		err = hinic3_change_channel_settings(netdev, &q_params);
> +		if (err) {
> +			netdev_err(netdev, "Failed to change channel settings\n");
> +			return err;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  static const struct ethtool_ops hinic3_ethtool_ops = {
>  	.supported_coalesce_params      = ETHTOOL_COALESCE_USECS |
>  					  ETHTOOL_COALESCE_PKT_RATE_RX_USECS,
> @@ -417,6 +516,8 @@ static const struct ethtool_ops hinic3_ethtool_ops = {
>  	.get_msglevel                   = hinic3_get_msglevel,
>  	.set_msglevel                   = hinic3_set_msglevel,
>  	.get_link                       = ethtool_op_get_link,
> +	.get_ringparam                  = hinic3_get_ringparam,
> +	.set_ringparam                  = hinic3_set_ringparam,
>  };

> diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c
> index 0a888fe4c975..3b470978714a 100644
> --- a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c
> +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c
> @@ -179,6 +179,8 @@ static int hinic3_sw_init(struct net_device *netdev)
>  	int err;
>  
>  	mutex_init(&nic_dev->port_state_mutex);
> +	mutex_init(&nic_dev->channel_cfg_lock);

Why do you need this mutex?
Aren't all the places you take it under rtnl_lock anyway?

> +	spin_lock_init(&nic_dev->channel_res_lock);
>  
>  	nic_dev->q_params.sq_depth = HINIC3_SQ_DEPTH;
>  	nic_dev->q_params.rq_depth = HINIC3_RQ_DEPTH;
> @@ -314,6 +316,15 @@ static void hinic3_link_status_change(struct net_device *netdev,
>  				      bool link_status_up)
>  {
>  	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
> +	unsigned long flags;
> +	bool valid;
> +
> +	spin_lock_irqsave(&nic_dev->channel_res_lock, flags);
> +	valid = HINIC3_CHANNEL_RES_VALID(nic_dev);
> +	spin_unlock_irqrestore(&nic_dev->channel_res_lock, flags);
> +
> +	if (!valid)

Why are you checking valid here? What if the state changes immediately
after unlocking?

> +		return;
>  
>  	if (link_status_up) {
>  		if (netif_carrier_ok(netdev))
> diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c b/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c
> index da73811641a9..d652a5ffdc2c 100644
> --- a/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c
> +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c
> @@ -428,6 +428,85 @@ static void hinic3_vport_down(struct net_device *netdev)
>  	}
>  }
>  
> +int
> +hinic3_change_channel_settings(struct net_device *netdev,
> +			       struct hinic3_dyna_txrxq_params *trxq_params)
> +{
> +	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
> +	struct hinic3_dyna_txrxq_params old_qp_params = {};
> +	struct hinic3_dyna_qp_params new_qp_params = {};
> +	struct hinic3_dyna_qp_params cur_qp_params = {};
> +	bool need_teardown = false;
> +	unsigned long flags;
> +	int err;
> +
> +	mutex_lock(&nic_dev->channel_cfg_lock);
> +
> +	hinic3_config_num_qps(netdev, trxq_params);
> +
> +	err = hinic3_alloc_channel_resources(netdev, &new_qp_params,
> +					     trxq_params);
> +	if (err) {
> +		netdev_err(netdev, "Failed to alloc channel resources\n");
> +		mutex_unlock(&nic_dev->channel_cfg_lock);
> +		return err;
> +	}
> +
> +	spin_lock_irqsave(&nic_dev->channel_res_lock, flags);
> +	if (!test_and_set_bit(HINIC3_CHANGE_RES_INVALID, &nic_dev->flags))
> +		need_teardown = true;
> +	spin_unlock_irqrestore(&nic_dev->channel_res_lock, flags);
> +
> +	if (need_teardown) {
> +		hinic3_vport_down(netdev);
> +		hinic3_close_channel(netdev);
> +		hinic3_uninit_qps(nic_dev, &cur_qp_params);
> +		hinic3_free_channel_resources(netdev, &cur_qp_params,
> +					      &nic_dev->q_params);
> +	}
> +
> +	if (nic_dev->num_qp_irq > trxq_params->num_qps)
> +		hinic3_qp_irq_change(netdev, trxq_params->num_qps);
> +
> +	spin_lock_irqsave(&nic_dev->channel_res_lock, flags);
> +	old_qp_params = nic_dev->q_params;
> +	nic_dev->q_params = *trxq_params;
> +	spin_unlock_irqrestore(&nic_dev->channel_res_lock, flags);
> +
> +	hinic3_init_qps(nic_dev, &new_qp_params);
> +
> +	err = hinic3_open_channel(netdev);

This "open" function allocates Rx buffers, and fails if it couldn't get
even one. That's no good.

> +	if (err)
> +		goto err_uninit_qps;
> +
> +	err = hinic3_vport_up(netdev);
> +	if (err)
> +		goto err_close_channel;
> +
> +	spin_lock_irqsave(&nic_dev->channel_res_lock, flags);
> +	clear_bit(HINIC3_CHANGE_RES_INVALID, &nic_dev->flags);
> +	spin_unlock_irqrestore(&nic_dev->channel_res_lock, flags);
> +
> +	mutex_unlock(&nic_dev->channel_cfg_lock);
> +
> +	return 0;
> +
> +err_close_channel:
> +	hinic3_close_channel(netdev);
> +err_uninit_qps:
> +	spin_lock_irqsave(&nic_dev->channel_res_lock, flags);
> +	nic_dev->q_params = old_qp_params;
> +	clear_bit(HINIC3_CHANGE_RES_INVALID, &nic_dev->flags);
> +	spin_unlock_irqrestore(&nic_dev->channel_res_lock, flags);
> +
> +	hinic3_uninit_qps(nic_dev, &new_qp_params);
> +	hinic3_free_channel_resources(netdev, &new_qp_params, trxq_params);
> +
> +	mutex_unlock(&nic_dev->channel_cfg_lock);

AI says:

  Can this error path lead to memory corruption?
  If need_teardown was true, the old channel resources were freed earlier in
  the function. If hinic3_open_channel() or hinic3_vport_up() fails, the code
  jumps to err_uninit_qps and restores nic_dev->q_params = old_qp_params.
  However, it doesn't appear to re-allocate those old resources or mark the
  interface as down. Could a subsequent administrative teardown or network
  traffic dereference these freed pointers?

> +
>  static int hinic3_open(struct net_device *netdev)
>  {
>  	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
> @@ -487,16 +566,33 @@ static int hinic3_close(struct net_device *netdev)
>  {
>  	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
>  	struct hinic3_dyna_qp_params qp_params;
> +	bool need_teardown = false;
> +	unsigned long flags;
>  
>  	if (!test_and_clear_bit(HINIC3_INTF_UP, &nic_dev->flags)) {
>  		netdev_dbg(netdev, "Netdev already close, do nothing\n");
>  		return 0;
>  	}
>  
> -	hinic3_vport_down(netdev);
> -	hinic3_close_channel(netdev);
> -	hinic3_uninit_qps(nic_dev, &qp_params);
> -	hinic3_free_channel_resources(netdev, &qp_params, &nic_dev->q_params);
> +	mutex_lock(&nic_dev->channel_cfg_lock);
> +
> +	spin_lock_irqsave(&nic_dev->channel_res_lock, flags);
> +	if (!test_and_set_bit(HINIC3_CHANGE_RES_INVALID, &nic_dev->flags))
> +		need_teardown = true;
> +	spin_unlock_irqrestore(&nic_dev->channel_res_lock, flags);
> +
> +	if (need_teardown) {
> +		hinic3_vport_down(netdev);
> +		hinic3_close_channel(netdev);
> +		hinic3_uninit_qps(nic_dev, &qp_params);
> +		hinic3_free_channel_resources(netdev, &qp_params,
> +					      &nic_dev->q_params);
> +	}
> +
> +	hinic3_free_nicio_res(nic_dev);
> +	hinic3_destroy_num_qps(netdev);
> +
> +	mutex_unlock(&nic_dev->channel_cfg_lock);
>  
>  	return 0;
>  }
> diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h
> index 9502293ff710..55b280888ad8 100644
> --- a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h
> +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h
> @@ -10,6 +10,9 @@
>  #include "hinic3_hw_cfg.h"
>  #include "hinic3_hwdev.h"
>  #include "hinic3_mgmt_interface.h"
> +#include "hinic3_nic_io.h"
> +#include "hinic3_tx.h"
> +#include "hinic3_rx.h"
>  
>  #define HINIC3_VLAN_BITMAP_BYTE_SIZE(nic_dev)  (sizeof(*(nic_dev)->vlan_bitmap))
>  #define HINIC3_VLAN_BITMAP_SIZE(nic_dev)  \
> @@ -20,8 +23,13 @@ enum hinic3_flags {
>  	HINIC3_MAC_FILTER_CHANGED,
>  	HINIC3_RSS_ENABLE,
>  	HINIC3_UPDATE_MAC_FILTER,
> +	HINIC3_CHANGE_RES_INVALID,
>  };
>  
> +#define HINIC3_CHANNEL_RES_VALID(nic_dev) \
> +	(test_bit(HINIC3_INTF_UP, &(nic_dev)->flags) && \
> +	 !test_bit(HINIC3_CHANGE_RES_INVALID, &(nic_dev)->flags))

I don't get why you need to check both of these bits.
Can't there be one bit for "resources valid" ?
And it will only be set while device is up (of course) so no need to
also check UP (this way checking can be atomic without the spin lock).

>  enum hinic3_event_work_flags {
>  	HINIC3_EVENT_WORK_TX_TIMEOUT,
>  };
> @@ -129,6 +137,10 @@ struct hinic3_nic_dev {
>  	struct work_struct              rx_mode_work;
>  	/* lock for enable/disable port */
>  	struct mutex                    port_state_mutex;
> +	/* lock for channel configuration */
> +	struct mutex                    channel_cfg_lock;
> +	/* lock for channel resources */
> +	spinlock_t                      channel_res_lock;
>  
>  	struct list_head                uc_filter_list;
>  	struct list_head                mc_filter_list;
> @@ -143,6 +155,10 @@ struct hinic3_nic_dev {
>  
>  void hinic3_set_netdev_ops(struct net_device *netdev);
>  int hinic3_set_hw_features(struct net_device *netdev);
> +int
> +hinic3_change_channel_settings(struct net_device *netdev,
> +			       struct hinic3_dyna_txrxq_params *trxq_params);
> +
>  int hinic3_qps_irq_init(struct net_device *netdev);
>  void hinic3_qps_irq_uninit(struct net_device *netdev);
>  
> diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_io.h b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_io.h
> index 12eefabcf1db..3791b9bc865b 100644
> --- a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_io.h
> +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_io.h
> @@ -14,6 +14,10 @@ struct hinic3_nic_dev;
>  #define HINIC3_RQ_WQEBB_SHIFT      3
>  #define HINIC3_SQ_WQEBB_SIZE       BIT(HINIC3_SQ_WQEBB_SHIFT)
>  
> +#define HINIC3_MAX_TX_QUEUE_DEPTH  65536
> +#define HINIC3_MAX_RX_QUEUE_DEPTH  16384
> +#define HINIC3_MIN_QUEUE_DEPTH     128
> +
>  /* ******************** RQ_CTRL ******************** */
>  enum hinic3_rq_wqe_type {
>  	HINIC3_NORMAL_RQ_WQE = 1,

next prev parent reply	other threads:[~2026-04-14  0:18 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-11  3:36 [PATCH net-next v05 0/6] net: hinic3: PF initialization Fan Gong
2026-04-11  3:36 ` [PATCH net-next v05 1/6] hinic3: Add ethtool queue ops Fan Gong
2026-04-14  0:18   ` Jakub Kicinski [this message]
2026-04-11  3:37 ` [PATCH net-next v05 2/6] hinic3: Add ethtool statistic ops Fan Gong
2026-04-11  3:37 ` [PATCH net-next v05 3/6] hinic3: Add ethtool coalesce ops Fan Gong
2026-04-11  3:37 ` [PATCH net-next v05 4/6] hinic3: Add ethtool rss ops Fan Gong
2026-04-11  3:37 ` [PATCH net-next v05 5/6] hinic3: Configure netdev->watchdog_timeo to set nic tx timeout Fan Gong
2026-04-11  3:37 ` [PATCH net-next v05 6/6] hinic3: Remove unneeded coalesce parameters Fan Gong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260413171817.1e1eca30@kernel.org \
    --to=kuba@kernel.org \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=gongfan1@huawei.com \
    --cc=guoxin09@huawei.com \
    --cc=horms@kernel.org \
    --cc=ioana.ciornei@nxp.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luosifu@huawei.com \
    --cc=maxime.chevallier@bootlin.com \
    --cc=mohsin.bashr@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=shijing34@huawei.com \
    --cc=wulike1@huawei.com \
    --cc=zhengjiezhen@h-partners.com \
    --cc=zhoushuai28@huawei.com \
    --cc=zhuyikai1@h-partners.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox