Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 09/12] amd-xgbe: Always attempt link training in KR mode
From: Tom Lendacky @ 2018-05-21 21:59 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

Link training is always attempted when in KR mode, but the code is
structured to check if link training has been enabled before attempting
to perform it.  Since that check will always be true, simplify the code
to always enable and start link training during KR auto-negotiation.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-mdio.c |   69 +++++++----------------------
 1 file changed, 16 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 9c39c72..450b89c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -216,31 +216,8 @@ static void xgbe_an_clear_interrupts_all(struct xgbe_prv_data *pdata)
 	xgbe_an37_clear_interrupts(pdata);
 }
 
-static void xgbe_an73_enable_kr_training(struct xgbe_prv_data *pdata)
-{
-	unsigned int reg;
-
-	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
-
-	reg |= XGBE_KR_TRAINING_ENABLE;
-	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
-}
-
-static void xgbe_an73_disable_kr_training(struct xgbe_prv_data *pdata)
-{
-	unsigned int reg;
-
-	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
-
-	reg &= ~XGBE_KR_TRAINING_ENABLE;
-	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
-}
-
 static void xgbe_kr_mode(struct xgbe_prv_data *pdata)
 {
-	/* Enable KR training */
-	xgbe_an73_enable_kr_training(pdata);
-
 	/* Set MAC to 10G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_10000);
 
@@ -250,9 +227,6 @@ static void xgbe_kr_mode(struct xgbe_prv_data *pdata)
 
 static void xgbe_kx_2500_mode(struct xgbe_prv_data *pdata)
 {
-	/* Disable KR training */
-	xgbe_an73_disable_kr_training(pdata);
-
 	/* Set MAC to 2.5G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_2500);
 
@@ -262,9 +236,6 @@ static void xgbe_kx_2500_mode(struct xgbe_prv_data *pdata)
 
 static void xgbe_kx_1000_mode(struct xgbe_prv_data *pdata)
 {
-	/* Disable KR training */
-	xgbe_an73_disable_kr_training(pdata);
-
 	/* Set MAC to 1G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_1000);
 
@@ -278,9 +249,6 @@ static void xgbe_sfi_mode(struct xgbe_prv_data *pdata)
 	if (pdata->kr_redrv)
 		return xgbe_kr_mode(pdata);
 
-	/* Disable KR training */
-	xgbe_an73_disable_kr_training(pdata);
-
 	/* Set MAC to 10G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_10000);
 
@@ -290,9 +258,6 @@ static void xgbe_sfi_mode(struct xgbe_prv_data *pdata)
 
 static void xgbe_x_mode(struct xgbe_prv_data *pdata)
 {
-	/* Disable KR training */
-	xgbe_an73_disable_kr_training(pdata);
-
 	/* Set MAC to 1G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_1000);
 
@@ -302,9 +267,6 @@ static void xgbe_x_mode(struct xgbe_prv_data *pdata)
 
 static void xgbe_sgmii_1000_mode(struct xgbe_prv_data *pdata)
 {
-	/* Disable KR training */
-	xgbe_an73_disable_kr_training(pdata);
-
 	/* Set MAC to 1G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_1000);
 
@@ -314,9 +276,6 @@ static void xgbe_sgmii_1000_mode(struct xgbe_prv_data *pdata)
 
 static void xgbe_sgmii_100_mode(struct xgbe_prv_data *pdata)
 {
-	/* Disable KR training */
-	xgbe_an73_disable_kr_training(pdata);
-
 	/* Set MAC to 1G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_1000);
 
@@ -425,6 +384,12 @@ static void xgbe_an73_set(struct xgbe_prv_data *pdata, bool enable,
 {
 	unsigned int reg;
 
+	/* Disable KR training for now */
+	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
+	reg &= ~XGBE_KR_TRAINING_ENABLE;
+	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
+
+	/* Update AN settings */
 	reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1);
 	reg &= ~MDIO_AN_CTRL1_ENABLE;
 
@@ -522,21 +487,19 @@ static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata,
 	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_FECCTRL, reg);
 
 	/* Start KR training */
-	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
-	if (reg & XGBE_KR_TRAINING_ENABLE) {
-		if (pdata->phy_if.phy_impl.kr_training_pre)
-			pdata->phy_if.phy_impl.kr_training_pre(pdata);
+	if (pdata->phy_if.phy_impl.kr_training_pre)
+		pdata->phy_if.phy_impl.kr_training_pre(pdata);
 
-		reg |= XGBE_KR_TRAINING_START;
-		XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL,
-			    reg);
+	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
+	reg |= XGBE_KR_TRAINING_ENABLE;
+	reg |= XGBE_KR_TRAINING_START;
+	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
 
-		netif_dbg(pdata, link, pdata->netdev,
-			  "KR training initiated\n");
+	netif_dbg(pdata, link, pdata->netdev,
+		  "KR training initiated\n");
 
-		if (pdata->phy_if.phy_impl.kr_training_post)
-			pdata->phy_if.phy_impl.kr_training_post(pdata);
-	}
+	if (pdata->phy_if.phy_impl.kr_training_post)
+		pdata->phy_if.phy_impl.kr_training_post(pdata);
 
 	return XGBE_AN_PAGE_RECEIVED;
 }

^ permalink raw reply related

* [PATCH net-next 08/12] amd-xgbe: Add ethtool show/set channels support
From: Tom Lendacky @ 2018-05-21 21:59 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

Add ethtool support to show and set the device channel configuration.
Changing the channel configuration will result in a device restart.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c     |   25 +++++
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c |  131 ++++++++++++++++++++++++++
 drivers/net/ethernet/amd/xgbe/xgbe.h         |    4 +
 3 files changed, 160 insertions(+)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 397e3a0..24f1053 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1329,6 +1329,17 @@ static int xgbe_alloc_memory(struct xgbe_prv_data *pdata)
 	struct net_device *netdev = pdata->netdev;
 	int ret;
 
+	if (pdata->new_tx_ring_count) {
+		pdata->tx_ring_count = pdata->new_tx_ring_count;
+		pdata->tx_q_count = pdata->tx_ring_count;
+		pdata->new_tx_ring_count = 0;
+	}
+
+	if (pdata->new_rx_ring_count) {
+		pdata->rx_ring_count = pdata->new_rx_ring_count;
+		pdata->new_rx_ring_count = 0;
+	}
+
 	/* Calculate the Rx buffer size before allocating rings */
 	pdata->rx_buf_size = xgbe_calc_rx_buf_size(netdev, netdev->mtu);
 
@@ -1482,6 +1493,20 @@ static void xgbe_stopdev(struct work_struct *work)
 	netdev_alert(pdata->netdev, "device stopped\n");
 }
 
+void xgbe_full_restart_dev(struct xgbe_prv_data *pdata)
+{
+	/* If not running, "restart" will happen on open */
+	if (!netif_running(pdata->netdev))
+		return;
+
+	xgbe_stop(pdata);
+
+	xgbe_free_memory(pdata);
+	xgbe_alloc_memory(pdata);
+
+	xgbe_start(pdata);
+}
+
 void xgbe_restart_dev(struct xgbe_prv_data *pdata)
 {
 	/* If not running, "restart" will happen on open */
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index d12f982..d26fd95 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -705,6 +705,135 @@ static int xgbe_set_ringparam(struct net_device *netdev,
 	return 0;
 }
 
+static void xgbe_get_channels(struct net_device *netdev,
+			      struct ethtool_channels *channels)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	unsigned int rx, tx, combined;
+
+	/* Calculate maximums allowed:
+	 *   - Take into account the number of available IRQs
+	 *   - Do not take into account the number of online CPUs so that
+	 *     the user can over-subscribe if desired
+	 *   - Tx is additionally limited by the number of hardware queues
+	 */
+	rx = min(pdata->hw_feat.rx_ch_cnt, pdata->rx_max_channel_count);
+	rx = min(rx, pdata->channel_irq_count);
+	tx = min(pdata->hw_feat.tx_ch_cnt, pdata->tx_max_channel_count);
+	tx = min(tx, pdata->channel_irq_count);
+	tx = min(tx, pdata->tx_max_q_count);
+
+	combined = min(rx, tx);
+
+	channels->max_combined = combined;
+	channels->max_rx = rx;
+	channels->max_tx = tx;
+
+	/* Current running settings */
+	rx = pdata->rx_ring_count;
+	tx = pdata->tx_ring_count;
+
+	combined = min(rx, tx);
+	rx -= combined;
+	tx -= combined;
+
+	channels->combined_count = combined;
+	channels->rx_count = rx;
+	channels->tx_count = tx;
+}
+
+static void xgbe_print_set_channels_input(struct net_device *netdev,
+					  struct ethtool_channels *channels)
+{
+	netdev_err(netdev, "channel inputs: combined=%u, rx-only=%u, tx-only=%u\n",
+		   channels->combined_count, channels->rx_count,
+		   channels->tx_count);
+}
+
+static int xgbe_set_channels(struct net_device *netdev,
+			     struct ethtool_channels *channels)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	unsigned int rx, tx, combined;
+
+	/* Calculate maximums allowed:
+	 *   - Take into account the number of available IRQs
+	 *   - Do not take into account the number of online CPUs so that
+	 *     the user can over-subscribe if desired
+	 *   - Tx is additionally limited by the number of hardware queues
+	 */
+	rx = min(pdata->hw_feat.rx_ch_cnt, pdata->rx_max_channel_count);
+	rx = min(rx, pdata->channel_irq_count);
+	tx = min(pdata->hw_feat.tx_ch_cnt, pdata->tx_max_channel_count);
+	tx = min(tx, pdata->tx_max_q_count);
+	tx = min(tx, pdata->channel_irq_count);
+
+	combined = min(rx, tx);
+
+	/* Should not be setting other count */
+	if (channels->other_count) {
+		netdev_err(netdev,
+			   "other channel count must be zero\n");
+		return -EINVAL;
+	}
+
+	/* Require at least one Rx and Tx channel */
+	if (!channels->combined_count) {
+		if (!channels->rx_count || !channels->tx_count) {
+			netdev_err(netdev,
+				   "at least one Rx and one Tx channel is required\n");
+			xgbe_print_set_channels_input(netdev, channels);
+			return -EINVAL;
+		}
+	}
+
+	/* Check combined channels */
+	if (channels->combined_count > combined) {
+		netdev_err(netdev,
+			   "combined channel count cannot exceed %u\n",
+			   combined);
+		xgbe_print_set_channels_input(netdev, channels);
+		return -EINVAL;
+	}
+
+	/* Check for Rx/Tx specific channels */
+	combined = channels->combined_count;
+	tx -= combined;
+	rx -= combined;
+	if (channels->rx_count > rx) {
+		netdev_err(netdev,
+			   "Rx channel count cannot exceed %u when combined channel count is %u\n",
+			   rx, combined);
+		xgbe_print_set_channels_input(netdev, channels);
+		return -EINVAL;
+	}
+
+	if (channels->tx_count > tx) {
+		netdev_err(netdev,
+			   "Tx channel count cannot exceed %u when combined channel count is %u\n",
+			   tx, combined);
+		xgbe_print_set_channels_input(netdev, channels);
+		return -EINVAL;
+	}
+
+	rx = combined + channels->rx_count;
+	tx = combined + channels->tx_count;
+	netdev_notice(netdev, "final channel count assignment: combined=%u, rx-only=%u, tx-only=%u\n",
+		      min(rx, tx), rx - min(rx, tx), tx - min(rx, tx));
+
+	if ((rx == pdata->rx_ring_count) &&
+	    (tx == pdata->tx_ring_count))
+		goto out;
+
+	pdata->new_rx_ring_count = rx;
+	pdata->new_tx_ring_count = tx;
+
+	xgbe_full_restart_dev(pdata);
+
+out:
+	return 0;
+}
+
 static const struct ethtool_ops xgbe_ethtool_ops = {
 	.get_drvinfo = xgbe_get_drvinfo,
 	.get_msglevel = xgbe_get_msglevel,
@@ -729,6 +858,8 @@ static int xgbe_set_ringparam(struct net_device *netdev,
 	.get_module_eeprom = xgbe_get_module_eeprom,
 	.get_ringparam = xgbe_get_ringparam,
 	.set_ringparam = xgbe_set_ringparam,
+	.get_channels = xgbe_get_channels,
+	.set_channels = xgbe_set_channels,
 };
 
 const struct ethtool_ops *xgbe_get_ethtool_ops(void)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 7dc0fac..7a412cf 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -1122,6 +1122,9 @@ struct xgbe_prv_data {
 	unsigned int rx_ring_count;
 	unsigned int rx_desc_count;
 
+	unsigned int new_tx_ring_count;
+	unsigned int new_rx_ring_count;
+
 	unsigned int tx_max_q_count;
 	unsigned int rx_max_q_count;
 	unsigned int tx_q_count;
@@ -1336,6 +1339,7 @@ void xgbe_dump_rx_desc(struct xgbe_prv_data *, struct xgbe_ring *,
 void xgbe_init_rx_coalesce(struct xgbe_prv_data *);
 void xgbe_init_tx_coalesce(struct xgbe_prv_data *);
 void xgbe_restart_dev(struct xgbe_prv_data *pdata);
+void xgbe_full_restart_dev(struct xgbe_prv_data *pdata);
 
 #ifdef CONFIG_DEBUG_FS
 void xgbe_debugfs_init(struct xgbe_prv_data *);

^ permalink raw reply related

* [PATCH net-next 07/12] amd-xgbe: Prepare for ethtool set-channel support
From: Tom Lendacky @ 2018-05-21 21:59 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

In order to support being able to dynamically set/change the number of
Rx and Tx channels, update the code to:
 - Move alloc and free of device memory into callable functions
 - Move setting of the real number of Rx and Tx channels to device startup
 - Move mapping of the RSS channels to device startup

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c  |  108 ++++++++++++++++++-----------
 drivers/net/ethernet/amd/xgbe/xgbe-main.c |   20 -----
 2 files changed, 68 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 2646c08..397e3a0 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1312,14 +1312,72 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller)
 	return 0;
 }
 
+static void xgbe_free_memory(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+
+	/* Free the ring descriptors and buffers */
+	desc_if->free_ring_resources(pdata);
+
+	/* Free the channel and ring structures */
+	xgbe_free_channels(pdata);
+}
+
+static int xgbe_alloc_memory(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+	struct net_device *netdev = pdata->netdev;
+	int ret;
+
+	/* Calculate the Rx buffer size before allocating rings */
+	pdata->rx_buf_size = xgbe_calc_rx_buf_size(netdev, netdev->mtu);
+
+	/* Allocate the channel and ring structures */
+	ret = xgbe_alloc_channels(pdata);
+	if (ret)
+		return ret;
+
+	/* Allocate the ring descriptors and buffers */
+	ret = desc_if->alloc_ring_resources(pdata);
+	if (ret)
+		goto err_channels;
+
+	/* Initialize the service and Tx timers */
+	xgbe_init_timers(pdata);
+
+	return 0;
+
+err_channels:
+	xgbe_free_memory(pdata);
+
+	return ret;
+}
+
 static int xgbe_start(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	struct xgbe_phy_if *phy_if = &pdata->phy_if;
 	struct net_device *netdev = pdata->netdev;
+	unsigned int i;
 	int ret;
 
-	DBGPR("-->xgbe_start\n");
+	/* Set the number of queues */
+	ret = netif_set_real_num_tx_queues(netdev, pdata->tx_ring_count);
+	if (ret) {
+		netdev_err(netdev, "error setting real tx queue count\n");
+		return ret;
+	}
+
+	ret = netif_set_real_num_rx_queues(netdev, pdata->rx_ring_count);
+	if (ret) {
+		netdev_err(netdev, "error setting real rx queue count\n");
+		return ret;
+	}
+
+	/* Set RSS lookup table data for programming */
+	for (i = 0; i < XGBE_RSS_MAX_TABLE_SIZE; i++)
+		XGMAC_SET_BITS(pdata->rss_table[i], MAC_RSSDR, DMCH,
+			       i % pdata->rx_ring_count);
 
 	ret = hw_if->init(pdata);
 	if (ret)
@@ -1347,8 +1405,6 @@ static int xgbe_start(struct xgbe_prv_data *pdata)
 
 	clear_bit(XGBE_STOPPED, &pdata->dev_state);
 
-	DBGPR("<--xgbe_start\n");
-
 	return 0;
 
 err_irqs:
@@ -1823,11 +1879,8 @@ static void xgbe_packet_info(struct xgbe_prv_data *pdata,
 static int xgbe_open(struct net_device *netdev)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
-	struct xgbe_desc_if *desc_if = &pdata->desc_if;
 	int ret;
 
-	DBGPR("-->xgbe_open\n");
-
 	/* Create the various names based on netdev name */
 	snprintf(pdata->an_name, sizeof(pdata->an_name) - 1, "%s-pcs",
 		 netdev_name(netdev));
@@ -1872,43 +1925,25 @@ static int xgbe_open(struct net_device *netdev)
 		goto err_sysclk;
 	}
 
-	/* Calculate the Rx buffer size before allocating rings */
-	ret = xgbe_calc_rx_buf_size(netdev, netdev->mtu);
-	if (ret < 0)
-		goto err_ptpclk;
-	pdata->rx_buf_size = ret;
-
-	/* Allocate the channel and ring structures */
-	ret = xgbe_alloc_channels(pdata);
-	if (ret)
-		goto err_ptpclk;
-
-	/* Allocate the ring descriptors and buffers */
-	ret = desc_if->alloc_ring_resources(pdata);
-	if (ret)
-		goto err_channels;
-
 	INIT_WORK(&pdata->service_work, xgbe_service);
 	INIT_WORK(&pdata->restart_work, xgbe_restart);
 	INIT_WORK(&pdata->stopdev_work, xgbe_stopdev);
 	INIT_WORK(&pdata->tx_tstamp_work, xgbe_tx_tstamp);
-	xgbe_init_timers(pdata);
+
+	ret = xgbe_alloc_memory(pdata);
+	if (ret)
+		goto err_ptpclk;
 
 	ret = xgbe_start(pdata);
 	if (ret)
-		goto err_rings;
+		goto err_mem;
 
 	clear_bit(XGBE_DOWN, &pdata->dev_state);
 
-	DBGPR("<--xgbe_open\n");
-
 	return 0;
 
-err_rings:
-	desc_if->free_ring_resources(pdata);
-
-err_channels:
-	xgbe_free_channels(pdata);
+err_mem:
+	xgbe_free_memory(pdata);
 
 err_ptpclk:
 	clk_disable_unprepare(pdata->ptpclk);
@@ -1928,18 +1963,11 @@ static int xgbe_open(struct net_device *netdev)
 static int xgbe_close(struct net_device *netdev)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
-	struct xgbe_desc_if *desc_if = &pdata->desc_if;
-
-	DBGPR("-->xgbe_close\n");
 
 	/* Stop the device */
 	xgbe_stop(pdata);
 
-	/* Free the ring descriptors and buffers */
-	desc_if->free_ring_resources(pdata);
-
-	/* Free the channel and ring structures */
-	xgbe_free_channels(pdata);
+	xgbe_free_memory(pdata);
 
 	/* Disable the clocks */
 	clk_disable_unprepare(pdata->ptpclk);
@@ -1953,8 +1981,6 @@ static int xgbe_close(struct net_device *netdev)
 
 	set_bit(XGBE_DOWN, &pdata->dev_state);
 
-	DBGPR("<--xgbe_close\n");
-
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index 441d0973..b41f236 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -265,7 +265,6 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
 {
 	struct net_device *netdev = pdata->netdev;
 	struct device *dev = pdata->dev;
-	unsigned int i;
 	int ret;
 
 	netdev->irq = pdata->dev_irq;
@@ -324,26 +323,9 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
 				pdata->tx_ring_count, pdata->rx_ring_count);
 	}
 
-	/* Set the number of queues */
-	ret = netif_set_real_num_tx_queues(netdev, pdata->tx_ring_count);
-	if (ret) {
-		dev_err(dev, "error setting real tx queue count\n");
-		return ret;
-	}
-
-	ret = netif_set_real_num_rx_queues(netdev, pdata->rx_ring_count);
-	if (ret) {
-		dev_err(dev, "error setting real rx queue count\n");
-		return ret;
-	}
-
-	/* Initialize RSS hash key and lookup table */
+	/* Initialize RSS hash key */
 	netdev_rss_key_fill(pdata->rss_key, sizeof(pdata->rss_key));
 
-	for (i = 0; i < XGBE_RSS_MAX_TABLE_SIZE; i++)
-		XGMAC_SET_BITS(pdata->rss_table[i], MAC_RSSDR, DMCH,
-			       i % pdata->rx_ring_count);
-
 	XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, IP2TE, 1);
 	XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, TCP4TE, 1);
 	XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1);

^ permalink raw reply related

* [PATCH net-next 06/12] amd-xgbe: Add ethtool show/set ring parameter support
From: Tom Lendacky @ 2018-05-21 21:59 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

Add ethtool support to show and set the number of the Rx and Tx ring
descriptors.  Changing the ring configuration will result in a device
restart.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c     |    6 --
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c |   65 ++++++++++++++++++++++++++
 drivers/net/ethernet/amd/xgbe/xgbe.h         |    6 ++
 3 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 7c204f0..2646c08 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1426,10 +1426,8 @@ static void xgbe_stopdev(struct work_struct *work)
 	netdev_alert(pdata->netdev, "device stopped\n");
 }
 
-static void xgbe_restart_dev(struct xgbe_prv_data *pdata)
+void xgbe_restart_dev(struct xgbe_prv_data *pdata)
 {
-	DBGPR("-->xgbe_restart_dev\n");
-
 	/* If not running, "restart" will happen on open */
 	if (!netif_running(pdata->netdev))
 		return;
@@ -1440,8 +1438,6 @@ static void xgbe_restart_dev(struct xgbe_prv_data *pdata)
 	xgbe_free_rx_data(pdata);
 
 	xgbe_start(pdata);
-
-	DBGPR("<--xgbe_restart_dev\n");
 }
 
 static void xgbe_restart(struct work_struct *work)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 57394b77..d12f982 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -642,6 +642,69 @@ static int xgbe_get_module_eeprom(struct net_device *netdev,
 	return pdata->phy_if.module_eeprom(pdata, eeprom, data);
 }
 
+static void xgbe_get_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ringparam)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	ringparam->rx_max_pending = XGBE_RX_DESC_CNT_MAX;
+	ringparam->tx_max_pending = XGBE_TX_DESC_CNT_MAX;
+	ringparam->rx_pending = pdata->rx_desc_count;
+	ringparam->tx_pending = pdata->tx_desc_count;
+}
+
+static int xgbe_set_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ringparam)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	unsigned int rx, tx;
+
+	if (ringparam->rx_mini_pending || ringparam->rx_jumbo_pending) {
+		netdev_err(netdev, "unsupported ring parameter\n");
+		return -EINVAL;
+	}
+
+	if ((ringparam->rx_pending < XGBE_RX_DESC_CNT_MIN) ||
+	    (ringparam->rx_pending > XGBE_RX_DESC_CNT_MAX)) {
+		netdev_err(netdev,
+			   "rx ring parameter must be between %u and %u\n",
+			   XGBE_RX_DESC_CNT_MIN, XGBE_RX_DESC_CNT_MAX);
+		return -EINVAL;
+	}
+
+	if ((ringparam->tx_pending < XGBE_TX_DESC_CNT_MIN) ||
+	    (ringparam->tx_pending > XGBE_TX_DESC_CNT_MAX)) {
+		netdev_err(netdev,
+			   "tx ring parameter must be between %u and %u\n",
+			   XGBE_TX_DESC_CNT_MIN, XGBE_TX_DESC_CNT_MAX);
+		return -EINVAL;
+	}
+
+	rx = __rounddown_pow_of_two(ringparam->rx_pending);
+	if (rx != ringparam->rx_pending)
+		netdev_notice(netdev,
+			      "rx ring parameter rounded to power of two: %u\n",
+			      rx);
+
+	tx = __rounddown_pow_of_two(ringparam->tx_pending);
+	if (tx != ringparam->tx_pending)
+		netdev_notice(netdev,
+			      "tx ring parameter rounded to power of two: %u\n",
+			      tx);
+
+	if ((rx == pdata->rx_desc_count) &&
+	    (tx == pdata->tx_desc_count))
+		goto out;
+
+	pdata->rx_desc_count = rx;
+	pdata->tx_desc_count = tx;
+
+	xgbe_restart_dev(pdata);
+
+out:
+	return 0;
+}
+
 static const struct ethtool_ops xgbe_ethtool_ops = {
 	.get_drvinfo = xgbe_get_drvinfo,
 	.get_msglevel = xgbe_get_msglevel,
@@ -664,6 +727,8 @@ static int xgbe_get_module_eeprom(struct net_device *netdev,
 	.set_link_ksettings = xgbe_set_link_ksettings,
 	.get_module_info = xgbe_get_module_info,
 	.get_module_eeprom = xgbe_get_module_eeprom,
+	.get_ringparam = xgbe_get_ringparam,
+	.set_ringparam = xgbe_set_ringparam,
 };
 
 const struct ethtool_ops *xgbe_get_ethtool_ops(void)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index f0f455b..7dc0fac 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -144,6 +144,11 @@
 #define XGBE_TX_DESC_MAX_PROC	(XGBE_TX_DESC_CNT >> 1)
 #define XGBE_RX_DESC_CNT	512
 
+#define XGBE_TX_DESC_CNT_MIN	64
+#define XGBE_TX_DESC_CNT_MAX	4096
+#define XGBE_RX_DESC_CNT_MIN	64
+#define XGBE_RX_DESC_CNT_MAX	4096
+
 #define XGBE_TX_MAX_BUF_SIZE	(0x3fff & ~(64 - 1))
 
 /* Descriptors required for maximum contiguous TSO/GSO packet */
@@ -1330,6 +1335,7 @@ void xgbe_dump_rx_desc(struct xgbe_prv_data *, struct xgbe_ring *,
 int xgbe_powerdown(struct net_device *, unsigned int);
 void xgbe_init_rx_coalesce(struct xgbe_prv_data *);
 void xgbe_init_tx_coalesce(struct xgbe_prv_data *);
+void xgbe_restart_dev(struct xgbe_prv_data *pdata);
 
 #ifdef CONFIG_DEBUG_FS
 void xgbe_debugfs_init(struct xgbe_prv_data *);

^ permalink raw reply related

* [PATCH net-next 05/12] amd-xgbe: Add ethtool support to retrieve SFP module info
From: Tom Lendacky @ 2018-05-21 21:59 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

Add support to get SFP module information using ethtool.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c |   18 +++
 drivers/net/ethernet/amd/xgbe/xgbe-mdio.c    |   21 ++++
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c  |  137 ++++++++++++++++++++++++++
 drivers/net/ethernet/amd/xgbe/xgbe.h         |   13 ++
 4 files changed, 189 insertions(+)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index ff397bb..57394b77 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -626,6 +626,22 @@ static int xgbe_get_ts_info(struct net_device *netdev,
 	return 0;
 }
 
+static int xgbe_get_module_info(struct net_device *netdev,
+				struct ethtool_modinfo *modinfo)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	return pdata->phy_if.module_info(pdata, modinfo);
+}
+
+static int xgbe_get_module_eeprom(struct net_device *netdev,
+				  struct ethtool_eeprom *eeprom, u8 *data)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	return pdata->phy_if.module_eeprom(pdata, eeprom, data);
+}
+
 static const struct ethtool_ops xgbe_ethtool_ops = {
 	.get_drvinfo = xgbe_get_drvinfo,
 	.get_msglevel = xgbe_get_msglevel,
@@ -646,6 +662,8 @@ static int xgbe_get_ts_info(struct net_device *netdev,
 	.get_ts_info = xgbe_get_ts_info,
 	.get_link_ksettings = xgbe_get_link_ksettings,
 	.set_link_ksettings = xgbe_set_link_ksettings,
+	.get_module_info = xgbe_get_module_info,
+	.get_module_eeprom = xgbe_get_module_eeprom,
 };
 
 const struct ethtool_ops *xgbe_get_ethtool_ops(void)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 1b45cd7..9c39c72 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -126,6 +126,24 @@
 #include "xgbe.h"
 #include "xgbe-common.h"
 
+static int xgbe_phy_module_eeprom(struct xgbe_prv_data *pdata,
+				  struct ethtool_eeprom *eeprom, u8 *data)
+{
+	if (!pdata->phy_if.phy_impl.module_eeprom)
+		return -ENXIO;
+
+	return pdata->phy_if.phy_impl.module_eeprom(pdata, eeprom, data);
+}
+
+static int xgbe_phy_module_info(struct xgbe_prv_data *pdata,
+				struct ethtool_modinfo *modinfo)
+{
+	if (!pdata->phy_if.phy_impl.module_info)
+		return -ENXIO;
+
+	return pdata->phy_if.phy_impl.module_info(pdata, modinfo);
+}
+
 static void xgbe_an37_clear_interrupts(struct xgbe_prv_data *pdata)
 {
 	int reg;
@@ -1639,4 +1657,7 @@ void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *phy_if)
 	phy_if->phy_valid_speed = xgbe_phy_valid_speed;
 
 	phy_if->an_isr          = xgbe_an_combined_isr;
+
+	phy_if->module_info     = xgbe_phy_module_info;
+	phy_if->module_eeprom   = xgbe_phy_module_eeprom;
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index cb15caf..141bb13 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -119,6 +119,7 @@
 #include <linux/kmod.h>
 #include <linux/mdio.h>
 #include <linux/phy.h>
+#include <linux/ethtool.h>
 
 #include "xgbe.h"
 #include "xgbe-common.h"
@@ -270,6 +271,15 @@ struct xgbe_sfp_eeprom {
 	u8 vendor[32];
 };
 
+#define XGBE_SFP_DIAGS_SUPPORTED(_x)			\
+	((_x)->extd[XGBE_SFP_EXTD_SFF_8472] &&		\
+	 !((_x)->extd[XGBE_SFP_EXTD_DIAG] & XGBE_SFP_EXTD_DIAG_ADDR_CHANGE))
+
+#define XGBE_SFP_EEPROM_BASE_LEN	256
+#define XGBE_SFP_EEPROM_DIAG_LEN	256
+#define XGBE_SFP_EEPROM_MAX		(XGBE_SFP_EEPROM_BASE_LEN +	\
+					 XGBE_SFP_EEPROM_DIAG_LEN)
+
 #define XGBE_BEL_FUSE_VENDOR	"BEL-FUSE        "
 #define XGBE_BEL_FUSE_PARTNO	"1GBT-SFP06      "
 
@@ -1301,6 +1311,130 @@ static void xgbe_phy_sfp_detect(struct xgbe_prv_data *pdata)
 	xgbe_phy_put_comm_ownership(pdata);
 }
 
+static int xgbe_phy_module_eeprom(struct xgbe_prv_data *pdata,
+				  struct ethtool_eeprom *eeprom, u8 *data)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	u8 eeprom_addr, eeprom_data[XGBE_SFP_EEPROM_MAX];
+	struct xgbe_sfp_eeprom *sfp_eeprom;
+	unsigned int i, j, rem;
+	int ret;
+
+	rem = eeprom->len;
+
+	if (!eeprom->len) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	if ((eeprom->offset + eeprom->len) > XGBE_SFP_EEPROM_MAX) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	if (phy_data->port_mode != XGBE_PORT_MODE_SFP) {
+		ret = -ENXIO;
+		goto done;
+	}
+
+	if (!netif_running(pdata->netdev)) {
+		ret = -EIO;
+		goto done;
+	}
+
+	if (phy_data->sfp_mod_absent) {
+		ret = -EIO;
+		goto done;
+	}
+
+	ret = xgbe_phy_get_comm_ownership(pdata);
+	if (ret) {
+		ret = -EIO;
+		goto done;
+	}
+
+	ret = xgbe_phy_sfp_get_mux(pdata);
+	if (ret) {
+		netdev_err(pdata->netdev, "I2C error setting SFP MUX\n");
+		ret = -EIO;
+		goto put_own;
+	}
+
+	/* Read the SFP serial ID eeprom */
+	eeprom_addr = 0;
+	ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_SERIAL_ID_ADDRESS,
+				&eeprom_addr, sizeof(eeprom_addr),
+				eeprom_data, XGBE_SFP_EEPROM_BASE_LEN);
+	if (ret) {
+		netdev_err(pdata->netdev,
+			   "I2C error reading SFP EEPROM\n");
+		ret = -EIO;
+		goto put_mux;
+	}
+
+	sfp_eeprom = (struct xgbe_sfp_eeprom *)eeprom_data;
+
+	if (XGBE_SFP_DIAGS_SUPPORTED(sfp_eeprom)) {
+		/* Read the SFP diagnostic eeprom */
+		eeprom_addr = 0;
+		ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_DIAG_INFO_ADDRESS,
+					&eeprom_addr, sizeof(eeprom_addr),
+					eeprom_data + XGBE_SFP_EEPROM_BASE_LEN,
+					XGBE_SFP_EEPROM_DIAG_LEN);
+		if (ret) {
+			netdev_err(pdata->netdev,
+				   "I2C error reading SFP DIAGS\n");
+			ret = -EIO;
+			goto put_mux;
+		}
+	}
+
+	for (i = 0, j = eeprom->offset; i < eeprom->len; i++, j++) {
+		if ((j >= XGBE_SFP_EEPROM_BASE_LEN) &&
+		    !XGBE_SFP_DIAGS_SUPPORTED(sfp_eeprom))
+			break;
+
+		data[i] = eeprom_data[j];
+		rem--;
+	}
+
+put_mux:
+	xgbe_phy_sfp_put_mux(pdata);
+
+put_own:
+	xgbe_phy_put_comm_ownership(pdata);
+
+done:
+	eeprom->len -= rem;
+
+	return ret;
+}
+
+static int xgbe_phy_module_info(struct xgbe_prv_data *pdata,
+				struct ethtool_modinfo *modinfo)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	if (phy_data->port_mode != XGBE_PORT_MODE_SFP)
+		return -ENXIO;
+
+	if (!netif_running(pdata->netdev))
+		return -EIO;
+
+	if (phy_data->sfp_mod_absent)
+		return -EIO;
+
+	if (XGBE_SFP_DIAGS_SUPPORTED(&phy_data->sfp_eeprom)) {
+		modinfo->type = ETH_MODULE_SFF_8472;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+	} else {
+		modinfo->type = ETH_MODULE_SFF_8079;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+	}
+
+	return 0;
+}
+
 static void xgbe_phy_phydev_flowctrl(struct xgbe_prv_data *pdata)
 {
 	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
@@ -3196,4 +3330,7 @@ void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *phy_if)
 
 	phy_impl->kr_training_pre	= xgbe_phy_kr_training_pre;
 	phy_impl->kr_training_post	= xgbe_phy_kr_training_post;
+
+	phy_impl->module_info		= xgbe_phy_module_info;
+	phy_impl->module_eeprom		= xgbe_phy_module_eeprom;
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 54e43ad3..f0f455b 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -835,6 +835,7 @@ struct xgbe_hw_if {
  *   Optional routines:
  *     an_pre, an_post
  *     kr_training_pre, kr_training_post
+ *     module_info, module_eeprom
  */
 struct xgbe_phy_impl_if {
 	/* Perform Setup/teardown actions */
@@ -883,6 +884,12 @@ struct xgbe_phy_impl_if {
 	/* Pre/Post KR training enablement support */
 	void (*kr_training_pre)(struct xgbe_prv_data *);
 	void (*kr_training_post)(struct xgbe_prv_data *);
+
+	/* SFP module related info */
+	int (*module_info)(struct xgbe_prv_data *pdata,
+			   struct ethtool_modinfo *modinfo);
+	int (*module_eeprom)(struct xgbe_prv_data *pdata,
+			     struct ethtool_eeprom *eeprom, u8 *data);
 };
 
 struct xgbe_phy_if {
@@ -905,6 +912,12 @@ struct xgbe_phy_if {
 	/* For single interrupt support */
 	irqreturn_t (*an_isr)(struct xgbe_prv_data *);
 
+	/* For ethtool PHY support */
+	int (*module_info)(struct xgbe_prv_data *pdata,
+			   struct ethtool_modinfo *modinfo);
+	int (*module_eeprom)(struct xgbe_prv_data *pdata,
+			     struct ethtool_eeprom *eeprom, u8 *data);
+
 	/* PHY implementation specific services */
 	struct xgbe_phy_impl_if phy_impl;
 };

^ permalink raw reply related

* [PATCH net-next 04/12] amd-xgbe: Remove field that indicates SFP diagnostic support
From: Tom Lendacky @ 2018-05-21 21:58 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

The driver currently sets an indication of whether the SFP supports, and
that the driver can obtain, diagnostics data.  This isn't currently used
by the driver and the logic to set this indicator is flawed because the
field is cleared each time the SFP is checked and only set when a new SFP
is detected.  Remove this field and the logic supporting it.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c |    9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 05003be..cb15caf 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -343,7 +343,6 @@ struct xgbe_phy_data {
 	unsigned int sfp_rx_los;
 	unsigned int sfp_tx_fault;
 	unsigned int sfp_mod_absent;
-	unsigned int sfp_diags;
 	unsigned int sfp_changed;
 	unsigned int sfp_phy_avail;
 	unsigned int sfp_cable_len;
@@ -1211,13 +1210,6 @@ static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata)
 
 		memcpy(&phy_data->sfp_eeprom, &sfp_eeprom, sizeof(sfp_eeprom));
 
-		if (sfp_eeprom.extd[XGBE_SFP_EXTD_SFF_8472]) {
-			u8 diag_type = sfp_eeprom.extd[XGBE_SFP_EXTD_DIAG];
-
-			if (!(diag_type & XGBE_SFP_EXTD_DIAG_ADDR_CHANGE))
-				phy_data->sfp_diags = 1;
-		}
-
 		xgbe_phy_free_phy_device(pdata);
 	} else {
 		phy_data->sfp_changed = 0;
@@ -1267,7 +1259,6 @@ static void xgbe_phy_sfp_reset(struct xgbe_phy_data *phy_data)
 	phy_data->sfp_rx_los = 0;
 	phy_data->sfp_tx_fault = 0;
 	phy_data->sfp_mod_absent = 1;
-	phy_data->sfp_diags = 0;
 	phy_data->sfp_base = XGBE_SFP_BASE_UNKNOWN;
 	phy_data->sfp_cable = XGBE_SFP_CABLE_UNKNOWN;
 	phy_data->sfp_speed = XGBE_SFP_SPEED_UNKNOWN;

^ permalink raw reply related

* [PATCH net-next 03/12] amd-xgbe: Remove use of comm_owned field
From: Tom Lendacky @ 2018-05-21 21:58 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

The comm_owned field can hide logic where double locking is attempted
and prevent multiple threads for the same device from accessing the
mutex properly.  Remove the comm_owned field and use the mutex API
exclusively for gaining ownership.  The current driver has been audited
and is obtaining communications ownership properly.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c |   16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 123ceb0..05003be 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -327,8 +327,6 @@ struct xgbe_phy_data {
 
 	unsigned int mdio_addr;
 
-	unsigned int comm_owned;
-
 	/* SFP Support */
 	enum xgbe_sfp_comm sfp_comm;
 	unsigned int sfp_mux_address;
@@ -382,12 +380,6 @@ struct xgbe_phy_data {
 static int xgbe_phy_i2c_xfer(struct xgbe_prv_data *pdata,
 			     struct xgbe_i2c_op *i2c_op)
 {
-	struct xgbe_phy_data *phy_data = pdata->phy_data;
-
-	/* Be sure we own the bus */
-	if (WARN_ON(!phy_data->comm_owned))
-		return -EIO;
-
 	return pdata->i2c_if.i2c_xfer(pdata, i2c_op);
 }
 
@@ -549,10 +541,6 @@ static int xgbe_phy_sfp_get_mux(struct xgbe_prv_data *pdata)
 
 static void xgbe_phy_put_comm_ownership(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_phy_data *phy_data = pdata->phy_data;
-
-	phy_data->comm_owned = 0;
-
 	mutex_unlock(&xgbe_phy_comm_lock);
 }
 
@@ -562,9 +550,6 @@ static int xgbe_phy_get_comm_ownership(struct xgbe_prv_data *pdata)
 	unsigned long timeout;
 	unsigned int mutex_id;
 
-	if (phy_data->comm_owned)
-		return 0;
-
 	/* The I2C and MDIO/GPIO bus is multiplexed between multiple devices,
 	 * the driver needs to take the software mutex and then the hardware
 	 * mutexes before being able to use the busses.
@@ -593,7 +578,6 @@ static int xgbe_phy_get_comm_ownership(struct xgbe_prv_data *pdata)
 		XP_IOWRITE(pdata, XP_I2C_MUTEX, mutex_id);
 		XP_IOWRITE(pdata, XP_MDIO_MUTEX, mutex_id);
 
-		phy_data->comm_owned = 1;
 		return 0;
 	}
 

^ permalink raw reply related

* [PATCH net-next 02/12] amd-xgbe: Read and save the port property registers during probe
From: Tom Lendacky @ 2018-05-21 21:58 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

Read and save the port property registers once during the device probe
and then use the saved values as they are needed.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-pci.c    |   34 ++++++++++----
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c |   68 ++++++++++++---------------
 drivers/net/ethernet/amd/xgbe/xgbe.h        |    7 +++
 3 files changed, 62 insertions(+), 47 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
index 7b63521..7b86240 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -335,12 +335,29 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pdata->awcr = XGBE_DMA_PCI_AWCR;
 	pdata->awarcr = XGBE_DMA_PCI_AWARCR;
 
+	/* Read the port property registers */
+	pdata->pp0 = XP_IOREAD(pdata, XP_PROP_0);
+	pdata->pp1 = XP_IOREAD(pdata, XP_PROP_1);
+	pdata->pp2 = XP_IOREAD(pdata, XP_PROP_2);
+	pdata->pp3 = XP_IOREAD(pdata, XP_PROP_3);
+	pdata->pp4 = XP_IOREAD(pdata, XP_PROP_4);
+	if (netif_msg_probe(pdata)) {
+		dev_dbg(dev, "port property 0 = %#010x\n", pdata->pp0);
+		dev_dbg(dev, "port property 1 = %#010x\n", pdata->pp1);
+		dev_dbg(dev, "port property 2 = %#010x\n", pdata->pp2);
+		dev_dbg(dev, "port property 3 = %#010x\n", pdata->pp3);
+		dev_dbg(dev, "port property 4 = %#010x\n", pdata->pp4);
+	}
+
 	/* Set the maximum channels and queues */
-	reg = XP_IOREAD(pdata, XP_PROP_1);
-	pdata->tx_max_channel_count = XP_GET_BITS(reg, XP_PROP_1, MAX_TX_DMA);
-	pdata->rx_max_channel_count = XP_GET_BITS(reg, XP_PROP_1, MAX_RX_DMA);
-	pdata->tx_max_q_count = XP_GET_BITS(reg, XP_PROP_1, MAX_TX_QUEUES);
-	pdata->rx_max_q_count = XP_GET_BITS(reg, XP_PROP_1, MAX_RX_QUEUES);
+	pdata->tx_max_channel_count = XP_GET_BITS(pdata->pp1, XP_PROP_1,
+						  MAX_TX_DMA);
+	pdata->rx_max_channel_count = XP_GET_BITS(pdata->pp1, XP_PROP_1,
+						  MAX_RX_DMA);
+	pdata->tx_max_q_count = XP_GET_BITS(pdata->pp1, XP_PROP_1,
+					    MAX_TX_QUEUES);
+	pdata->rx_max_q_count = XP_GET_BITS(pdata->pp1, XP_PROP_1,
+					    MAX_RX_QUEUES);
 	if (netif_msg_probe(pdata)) {
 		dev_dbg(dev, "max tx/rx channel count = %u/%u\n",
 			pdata->tx_max_channel_count,
@@ -353,12 +370,13 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	xgbe_set_counts(pdata);
 
 	/* Set the maximum fifo amounts */
-	reg = XP_IOREAD(pdata, XP_PROP_2);
-	pdata->tx_max_fifo_size = XP_GET_BITS(reg, XP_PROP_2, TX_FIFO_SIZE);
+	pdata->tx_max_fifo_size = XP_GET_BITS(pdata->pp2, XP_PROP_2,
+					      TX_FIFO_SIZE);
 	pdata->tx_max_fifo_size *= 16384;
 	pdata->tx_max_fifo_size = min(pdata->tx_max_fifo_size,
 				      pdata->vdata->tx_max_fifo_size);
-	pdata->rx_max_fifo_size = XP_GET_BITS(reg, XP_PROP_2, RX_FIFO_SIZE);
+	pdata->rx_max_fifo_size = XP_GET_BITS(pdata->pp2, XP_PROP_2,
+					      RX_FIFO_SIZE);
 	pdata->rx_max_fifo_size *= 16384;
 	pdata->rx_max_fifo_size = min(pdata->rx_max_fifo_size,
 				      pdata->vdata->rx_max_fifo_size);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index aac8843..123ceb0 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -2421,22 +2421,21 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
 static void xgbe_phy_sfp_gpio_setup(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int reg;
-
-	reg = XP_IOREAD(pdata, XP_PROP_3);
 
 	phy_data->sfp_gpio_address = XGBE_GPIO_ADDRESS_PCA9555 +
-				     XP_GET_BITS(reg, XP_PROP_3, GPIO_ADDR);
+				     XP_GET_BITS(pdata->pp3, XP_PROP_3,
+						 GPIO_ADDR);
 
-	phy_data->sfp_gpio_mask = XP_GET_BITS(reg, XP_PROP_3, GPIO_MASK);
+	phy_data->sfp_gpio_mask = XP_GET_BITS(pdata->pp3, XP_PROP_3,
+					      GPIO_MASK);
 
-	phy_data->sfp_gpio_rx_los = XP_GET_BITS(reg, XP_PROP_3,
+	phy_data->sfp_gpio_rx_los = XP_GET_BITS(pdata->pp3, XP_PROP_3,
 						GPIO_RX_LOS);
-	phy_data->sfp_gpio_tx_fault = XP_GET_BITS(reg, XP_PROP_3,
+	phy_data->sfp_gpio_tx_fault = XP_GET_BITS(pdata->pp3, XP_PROP_3,
 						  GPIO_TX_FAULT);
-	phy_data->sfp_gpio_mod_absent = XP_GET_BITS(reg, XP_PROP_3,
+	phy_data->sfp_gpio_mod_absent = XP_GET_BITS(pdata->pp3, XP_PROP_3,
 						    GPIO_MOD_ABS);
-	phy_data->sfp_gpio_rate_select = XP_GET_BITS(reg, XP_PROP_3,
+	phy_data->sfp_gpio_rate_select = XP_GET_BITS(pdata->pp3, XP_PROP_3,
 						     GPIO_RATE_SELECT);
 
 	if (netif_msg_probe(pdata)) {
@@ -2458,18 +2457,17 @@ static void xgbe_phy_sfp_gpio_setup(struct xgbe_prv_data *pdata)
 static void xgbe_phy_sfp_comm_setup(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int reg, mux_addr_hi, mux_addr_lo;
+	unsigned int mux_addr_hi, mux_addr_lo;
 
-	reg = XP_IOREAD(pdata, XP_PROP_4);
-
-	mux_addr_hi = XP_GET_BITS(reg, XP_PROP_4, MUX_ADDR_HI);
-	mux_addr_lo = XP_GET_BITS(reg, XP_PROP_4, MUX_ADDR_LO);
+	mux_addr_hi = XP_GET_BITS(pdata->pp4, XP_PROP_4, MUX_ADDR_HI);
+	mux_addr_lo = XP_GET_BITS(pdata->pp4, XP_PROP_4, MUX_ADDR_LO);
 	if (mux_addr_lo == XGBE_SFP_DIRECT)
 		return;
 
 	phy_data->sfp_comm = XGBE_SFP_COMM_PCA9545;
 	phy_data->sfp_mux_address = (mux_addr_hi << 2) + mux_addr_lo;
-	phy_data->sfp_mux_channel = XP_GET_BITS(reg, XP_PROP_4, MUX_CHAN);
+	phy_data->sfp_mux_channel = XP_GET_BITS(pdata->pp4, XP_PROP_4,
+						MUX_CHAN);
 
 	if (netif_msg_probe(pdata)) {
 		dev_dbg(pdata->dev, "SFP: mux_address=%#x\n",
@@ -2592,13 +2590,11 @@ static bool xgbe_phy_redrv_error(struct xgbe_phy_data *phy_data)
 static int xgbe_phy_mdio_reset_setup(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int reg;
 
 	if (phy_data->conn_type != XGBE_CONN_TYPE_MDIO)
 		return 0;
 
-	reg = XP_IOREAD(pdata, XP_PROP_3);
-	phy_data->mdio_reset = XP_GET_BITS(reg, XP_PROP_3, MDIO_RESET);
+	phy_data->mdio_reset = XP_GET_BITS(pdata->pp3, XP_PROP_3, MDIO_RESET);
 	switch (phy_data->mdio_reset) {
 	case XGBE_MDIO_RESET_NONE:
 	case XGBE_MDIO_RESET_I2C_GPIO:
@@ -2612,12 +2608,12 @@ static int xgbe_phy_mdio_reset_setup(struct xgbe_prv_data *pdata)
 
 	if (phy_data->mdio_reset == XGBE_MDIO_RESET_I2C_GPIO) {
 		phy_data->mdio_reset_addr = XGBE_GPIO_ADDRESS_PCA9555 +
-					    XP_GET_BITS(reg, XP_PROP_3,
+					    XP_GET_BITS(pdata->pp3, XP_PROP_3,
 							MDIO_RESET_I2C_ADDR);
-		phy_data->mdio_reset_gpio = XP_GET_BITS(reg, XP_PROP_3,
+		phy_data->mdio_reset_gpio = XP_GET_BITS(pdata->pp3, XP_PROP_3,
 							MDIO_RESET_I2C_GPIO);
 	} else if (phy_data->mdio_reset == XGBE_MDIO_RESET_INT_GPIO) {
-		phy_data->mdio_reset_gpio = XP_GET_BITS(reg, XP_PROP_3,
+		phy_data->mdio_reset_gpio = XP_GET_BITS(pdata->pp3, XP_PROP_3,
 							MDIO_RESET_INT_GPIO);
 	}
 
@@ -2707,12 +2703,9 @@ static bool xgbe_phy_conn_type_mismatch(struct xgbe_prv_data *pdata)
 
 static bool xgbe_phy_port_enabled(struct xgbe_prv_data *pdata)
 {
-	unsigned int reg;
-
-	reg = XP_IOREAD(pdata, XP_PROP_0);
-	if (!XP_GET_BITS(reg, XP_PROP_0, PORT_SPEEDS))
+	if (!XP_GET_BITS(pdata->pp0, XP_PROP_0, PORT_SPEEDS))
 		return false;
-	if (!XP_GET_BITS(reg, XP_PROP_0, CONN_TYPE))
+	if (!XP_GET_BITS(pdata->pp0, XP_PROP_0, CONN_TYPE))
 		return false;
 
 	return true;
@@ -2921,7 +2914,6 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data;
 	struct mii_bus *mii;
-	unsigned int reg;
 	int ret;
 
 	/* Check if enabled */
@@ -2940,12 +2932,11 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 		return -ENOMEM;
 	pdata->phy_data = phy_data;
 
-	reg = XP_IOREAD(pdata, XP_PROP_0);
-	phy_data->port_mode = XP_GET_BITS(reg, XP_PROP_0, PORT_MODE);
-	phy_data->port_id = XP_GET_BITS(reg, XP_PROP_0, PORT_ID);
-	phy_data->port_speeds = XP_GET_BITS(reg, XP_PROP_0, PORT_SPEEDS);
-	phy_data->conn_type = XP_GET_BITS(reg, XP_PROP_0, CONN_TYPE);
-	phy_data->mdio_addr = XP_GET_BITS(reg, XP_PROP_0, MDIO_ADDR);
+	phy_data->port_mode = XP_GET_BITS(pdata->pp0, XP_PROP_0, PORT_MODE);
+	phy_data->port_id = XP_GET_BITS(pdata->pp0, XP_PROP_0, PORT_ID);
+	phy_data->port_speeds = XP_GET_BITS(pdata->pp0, XP_PROP_0, PORT_SPEEDS);
+	phy_data->conn_type = XP_GET_BITS(pdata->pp0, XP_PROP_0, CONN_TYPE);
+	phy_data->mdio_addr = XP_GET_BITS(pdata->pp0, XP_PROP_0, MDIO_ADDR);
 	if (netif_msg_probe(pdata)) {
 		dev_dbg(pdata->dev, "port mode=%u\n", phy_data->port_mode);
 		dev_dbg(pdata->dev, "port id=%u\n", phy_data->port_id);
@@ -2954,12 +2945,11 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 		dev_dbg(pdata->dev, "mdio addr=%u\n", phy_data->mdio_addr);
 	}
 
-	reg = XP_IOREAD(pdata, XP_PROP_4);
-	phy_data->redrv = XP_GET_BITS(reg, XP_PROP_4, REDRV_PRESENT);
-	phy_data->redrv_if = XP_GET_BITS(reg, XP_PROP_4, REDRV_IF);
-	phy_data->redrv_addr = XP_GET_BITS(reg, XP_PROP_4, REDRV_ADDR);
-	phy_data->redrv_lane = XP_GET_BITS(reg, XP_PROP_4, REDRV_LANE);
-	phy_data->redrv_model = XP_GET_BITS(reg, XP_PROP_4, REDRV_MODEL);
+	phy_data->redrv = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_PRESENT);
+	phy_data->redrv_if = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_IF);
+	phy_data->redrv_addr = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_ADDR);
+	phy_data->redrv_lane = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_LANE);
+	phy_data->redrv_model = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_MODEL);
 	if (phy_data->redrv && netif_msg_probe(pdata)) {
 		dev_dbg(pdata->dev, "redrv present\n");
 		dev_dbg(pdata->dev, "redrv i/f=%u\n", phy_data->redrv_if);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 95d4b56..54e43ad3 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -1027,6 +1027,13 @@ struct xgbe_prv_data {
 	void __iomem *xprop_regs;	/* XGBE property registers */
 	void __iomem *xi2c_regs;	/* XGBE I2C CSRs */
 
+	/* Port property registers */
+	unsigned int pp0;
+	unsigned int pp1;
+	unsigned int pp2;
+	unsigned int pp3;
+	unsigned int pp4;
+
 	/* Overall device lock */
 	spinlock_t lock;
 

^ permalink raw reply related

* [PATCH net-next 01/12] amd-xgbe: Fix debug output of max channel counts
From: Tom Lendacky @ 2018-05-21 21:58 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

A debug output print statement uses the wrong variable to output the
maximum Rx channel count (cut and paste error, basically).  Fix the
statement to use the proper variable.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-pci.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
index 82d1f41..7b63521 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -344,7 +344,7 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (netif_msg_probe(pdata)) {
 		dev_dbg(dev, "max tx/rx channel count = %u/%u\n",
 			pdata->tx_max_channel_count,
-			pdata->tx_max_channel_count);
+			pdata->rx_max_channel_count);
 		dev_dbg(dev, "max tx/rx hw queue count = %u/%u\n",
 			pdata->tx_max_q_count, pdata->rx_max_q_count);
 	}

^ permalink raw reply related

* [PATCH net-next 00/12] amd-xgbe: AMD XGBE driver updates 2018-05-21
From: Tom Lendacky @ 2018-05-21 21:58 UTC (permalink / raw)
  To: netdev; +Cc: David Miller

The following updates are included in this driver update series:

- Fix the debug output for the max channels count
- Read (once) and save the port property registers during probe
- Remove the use of the comm_owned field
- Remove unused SFP diagnostic support indicator field
- Add ethtool --module-info support
- Add ethtool --show-ring/--set-ring support
- Update the driver in preparation for ethtool --set-channels support
- Add ethtool --show-channels/--set-channels support
- Update the driver to always perform link training in KR mode
- Advertise FEC support when using a KR re-driver
- Update the BelFuse quirk to now support SGMII
- Improve 100Mbps auto-negotiation for BelFuse parts

This patch series is based on net-next.

---

Tom Lendacky (12):
      amd-xgbe: Fix debug output of max channel counts
      amd-xgbe: Read and save the port property registers during probe
      amd-xgbe: Remove use of comm_owned field
      amd-xgbe: Remove field that indicates SFP diagnostic support
      amd-xgbe: Add ethtool support to retrieve SFP module info
      amd-xgbe: Add ethtool show/set ring parameter support
      amd-xgbe: Prepare for ethtool set-channel support
      amd-xgbe: Add ethtool show/set channels support
      amd-xgbe: Always attempt link training in KR mode
      amd-xgbe: Advertise FEC support with the KR re-driver
      amd-xgbe: Update the BelFuse quirk to support SGMII
      amd-xgbe: Improve SFP 100Mbps auto-negotiation


 drivers/net/ethernet/amd/xgbe/xgbe-drv.c     |  137 +++++++---
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c |  214 ++++++++++++++++
 drivers/net/ethernet/amd/xgbe/xgbe-main.c    |   20 -
 drivers/net/ethernet/amd/xgbe/xgbe-mdio.c    |  167 ++++++------
 drivers/net/ethernet/amd/xgbe/xgbe-pci.c     |   36 ++-
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c  |  349 +++++++++++++++++++-------
 drivers/net/ethernet/amd/xgbe/xgbe.h         |   31 ++
 7 files changed, 696 insertions(+), 258 deletions(-)

-- 
Tom Lendacky

^ permalink raw reply

* Re: [PATCH bpf-next 2/7] bpf: btf: Change how section is supported in btf_header
From: Martin KaFai Lau @ 2018-05-21 21:47 UTC (permalink / raw)
  To: Yonghong Song; +Cc: netdev, Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <eee61147-90a6-b1fb-5278-beb61afe7784@fb.com>

On Mon, May 21, 2018 at 01:15:24PM -0700, Yonghong Song wrote:
> 
> 
> On 5/18/18 5:16 PM, Martin KaFai Lau wrote:
> > There are currently unused section descriptions in the btf_header.  Those
> > sections are here to support future BTF use cases.  For example, the
> > func section (func_off) is to support function signature (e.g. the BPF
> > prog function signature).
> > 
> > Instead of spelling out all potential sections up-front in the btf_header.
> > This patch makes changes to btf_header such that extending it (e.g. adding
> > a section) is possible later.  The unused ones can be removed for now and
> > they can be added back later.
> > 
> > This patch:
> > 1. adds a hdr_len to the btf_header.  It will allow adding
> > sections (and other info like parent_label and parent_name)
> > later.  The check is similar to the existing bpf_attr.
> > If a user passes in a longer hdr_len, the kernel
> > ensures the extra tailing bytes are 0.
> > 
> > 2. allows the section order in the BTF object to be
> > different from its sec_off order in btf_header.
> > 
> > 3. each sec_off is followed by a sec_len.  It must not have gap or
> > overlapping among sections.
> > 
> > The string section is ensured to be at the end due to the 4 bytes
> > alignment requirement of the type section.
> > 
> > The above changes will allow enough flexibility to
> > add new sections (and other info) to the btf_header later.
> > 
> > This patch also removes an unnecessary !err check
> > at the end of btf_parse().
> > 
> > Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> > ---
> >   include/uapi/linux/btf.h |   8 +-
> >   kernel/bpf/btf.c         | 207 +++++++++++++++++++++++++++++++++++------------
> >   2 files changed, 158 insertions(+), 57 deletions(-)
> > 
> > diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
> > index bcb56ee47014..4fa479741a02 100644
> > --- a/include/uapi/linux/btf.h
> > +++ b/include/uapi/linux/btf.h
> > @@ -12,15 +12,11 @@ struct btf_header {
> >   	__u16	magic;
> >   	__u8	version;
> >   	__u8	flags;
> > -
> > -	__u32	parent_label;
> > -	__u32	parent_name;
> > +	__u32	hdr_len;
> >   	/* All offsets are in bytes relative to the end of this header */
> > -	__u32	label_off;	/* offset of label section	*/
> > -	__u32	object_off;	/* offset of data object section*/
> > -	__u32	func_off;	/* offset of function section	*/
> >   	__u32	type_off;	/* offset of type section	*/
> > +	__u32	type_len;	/* length of type section	*/
> >   	__u32	str_off;	/* offset of string section	*/
> >   	__u32	str_len;	/* length of string section	*/
> >   };
> > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > index ded10ab47b8a..536e5981ad8c 100644
> > --- a/kernel/bpf/btf.c
> > +++ b/kernel/bpf/btf.c
> > @@ -12,6 +12,7 @@
> >   #include <linux/uaccess.h>
> >   #include <linux/kernel.h>
> >   #include <linux/idr.h>
> > +#include <linux/sort.h>
> >   #include <linux/bpf_verifier.h>
> >   #include <linux/btf.h>
> > @@ -184,15 +185,13 @@ static DEFINE_IDR(btf_idr);
> >   static DEFINE_SPINLOCK(btf_idr_lock);
> >   struct btf {
> > -	union {
> > -		struct btf_header *hdr;
> > -		void *data;
> > -	};
> > +	void *data;
> >   	struct btf_type **types;
> >   	u32 *resolved_ids;
> >   	u32 *resolved_sizes;
> >   	const char *strings;
> >   	void *nohdr_data;
> > +	struct btf_header hdr;
> >   	u32 nr_types;
> >   	u32 types_size;
> >   	u32 data_size;
> > @@ -227,6 +226,12 @@ enum resolve_mode {
> >   };
> >   #define MAX_RESOLVE_DEPTH 32
> > +#define NR_SECS 2
> 
> Not sure whether it is necessary to define NR_SECS 2 here or not.
> See below.
> 
> > +
> > +struct btf_sec_info {
> > +	u32 off;
> > +	u32 len;
> > +};
> >   struct btf_verifier_env {
> >   	struct btf *btf;
> > @@ -418,14 +423,14 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
> >   static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
> >   {
> >   	return !BTF_STR_TBL_ELF_ID(offset) &&
> > -		BTF_STR_OFFSET(offset) < btf->hdr->str_len;
> > +		BTF_STR_OFFSET(offset) < btf->hdr.str_len;
> >   }
> >   static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
> >   {
> >   	if (!BTF_STR_OFFSET(offset))
> >   		return "(anon)";
> > -	else if (BTF_STR_OFFSET(offset) < btf->hdr->str_len)
> > +	else if (BTF_STR_OFFSET(offset) < btf->hdr.str_len)
> >   		return &btf->strings[BTF_STR_OFFSET(offset)];
> >   	else
> >   		return "(invalid-name-offset)";
> > @@ -536,7 +541,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
> >   	__btf_verifier_log(log, "\n");
> >   }
> > -static void btf_verifier_log_hdr(struct btf_verifier_env *env)
> > +static void btf_verifier_log_hdr(struct btf_verifier_env *env,
> > +				 u32 btf_data_size)
> >   {
> >   	struct bpf_verifier_log *log = &env->log;
> >   	const struct btf *btf = env->btf;
> > @@ -545,19 +551,16 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env)
> >   	if (!bpf_verifier_log_needed(log))
> >   		return;
> > -	hdr = btf->hdr;
> > +	hdr = &btf->hdr;
> >   	__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
> >   	__btf_verifier_log(log, "version: %u\n", hdr->version);
> >   	__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
> > -	__btf_verifier_log(log, "parent_label: %u\n", hdr->parent_label);
> > -	__btf_verifier_log(log, "parent_name: %u\n", hdr->parent_name);
> > -	__btf_verifier_log(log, "label_off: %u\n", hdr->label_off);
> > -	__btf_verifier_log(log, "object_off: %u\n", hdr->object_off);
> > -	__btf_verifier_log(log, "func_off: %u\n", hdr->func_off);
> > +	__btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len);
> >   	__btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
> > +	__btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
> >   	__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
> >   	__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
> > -	__btf_verifier_log(log, "btf_total_size: %u\n", btf->data_size);
> > +	__btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
> >   }
> >   static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
> > @@ -1754,9 +1757,9 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
> >   	struct btf_header *hdr;
> >   	void *cur, *end;
> > -	hdr = btf->hdr;
> > +	hdr = &btf->hdr;
> >   	cur = btf->nohdr_data + hdr->type_off;
> > -	end = btf->nohdr_data + hdr->str_off;
> > +	end = btf->nohdr_data + hdr->type_len;
> >   	env->log_type_id = 1;
> >   	while (cur < end) {
> > @@ -1866,8 +1869,20 @@ static int btf_check_all_types(struct btf_verifier_env *env)
> >   static int btf_parse_type_sec(struct btf_verifier_env *env)
> >   {
> > +	const struct btf_header *hdr = &env->btf->hdr;
> >   	int err;
> > +	/* Type section must align to 4 bytes */
> > +	if (hdr->type_off & (sizeof(u32) - 1)) {
> > +		btf_verifier_log(env, "Unaligned type_off");
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (!hdr->type_len) {
> > +		btf_verifier_log(env, "No type found");
> > +		return -EINVAL;
> > +	}
> > +
> >   	err = btf_check_all_metas(env);
> >   	if (err)
> >   		return err;
> > @@ -1881,10 +1896,15 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
> >   	struct btf *btf = env->btf;
> >   	const char *start, *end;
> > -	hdr = btf->hdr;
> > +	hdr = &btf->hdr;
> >   	start = btf->nohdr_data + hdr->str_off;
> >   	end = start + hdr->str_len;
> > +	if (end != btf->data + btf->data_size) {
> > +		btf_verifier_log(env, "String section is not at the end");
> > +		return -EINVAL;
> > +	}
> > +
> >   	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
> >   	    start[0] || end[-1]) {
> >   		btf_verifier_log(env, "Invalid string section");
> > @@ -1896,20 +1916,119 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
> >   	return 0;
> >   }
> > -static int btf_parse_hdr(struct btf_verifier_env *env)
> > +static const size_t btf_sec_info_offset[] = {
> > +	offsetof(struct btf_header, type_off),
> > +	offsetof(struct btf_header, str_off),
> > +};
> 
> Maybe we can define NR_SECS is
> ARRAY_SIZE(btf_sec_info_offset)/sizeof(size_t)?
> 
> > +
> > +static int btf_sec_info_cmp(const void *a, const void *b)
> >   {
> > +	const struct btf_sec_info *x = a;
> > +	const struct btf_sec_info *y = b;
> > +
> > +	return (int)(x->off - y->off) ? : (int)(x->len - y->len);
> > +}
> > +
> > +static int btf_check_sec_info(struct btf_verifier_env *env,
> > +			      u32 btf_data_size)
> > +{
> > +	struct btf_sec_info secs[NR_SECS];
> > +	u32 total, expected_total, i;
> >   	const struct btf_header *hdr;
> > -	struct btf *btf = env->btf;
> > -	u32 meta_left;
> > +	const struct btf *btf;
> > +
> > +	BUILD_BUG_ON(ARRAY_SIZE(btf_sec_info_offset) != NR_SECS);
> 
> If we define NR_SECS depending on the size of btf_sec_info_offset, this
> BUILD_BUG_ON is not needed.
Agree, BUILD_BUG_ON() can be avoided.  Will change.

> 
> > +
> > +	btf = env->btf;
> > +	hdr = &btf->hdr;
> > +
> > +	/* Populate the secs from hdr */
> > +	for (i = 0; i < NR_SECS; i++)
> > +		secs[i] = *(struct btf_sec_info *)((void *)hdr +
> > +						   btf_sec_info_offset[i]);
> > +
> > +	sort(secs, NR_SECS, sizeof(struct btf_sec_info),
> > +	     btf_sec_info_cmp, NULL);
> > +
> > +	/* Check for gaps and overlap among sections */
> > +	total = 0;
> > +	expected_total = btf_data_size - hdr->hdr_len;
> > +	for (i = 0; i < NR_SECS; i++) {
> > +		if (expected_total < secs[i].off) {
> > +			btf_verifier_log(env, "Invalid section offset");
> > +			return -EINVAL;
> > +		}
> > +		if (total < secs[i].off) {
> > +			/* gap */
> > +			btf_verifier_log(env, "Unsupported section found");
> > +			return -EINVAL;
> > +		}
> > +		if (total > secs[i].off) {
> > +			btf_verifier_log(env, "Section overlap found");
> > +			return -EINVAL;
> > +		}
> > +		if (expected_total - total < secs[i].len) {
> > +			btf_verifier_log(env,
> > +					 "Total section length too long");
> > +			return -EINVAL;
> > +		}
> > +		total += secs[i].len;
> > +	}
> > +
> > +	/* There is data other than hdr and known sections */
> > +	if (expected_total != total) {
> > +		btf_verifier_log(env, "Unsupported section found");
> > +		return -EINVAL;
> > +	}
> 
> Does this patch try to address compatibility issue? That is, the old btf
> format with 2 sections should still work with future kernel with more
> than 2 sections? The above comparision seems suggesting that newer kernel
> will not support non-compatible number of sections?
Yes, it should.  When an older btf passed in, the newer sec_off
and sec_len in btf->hdr will be zeros.  They will still pass the
for loop.

> 
> > +
> > +	return 0;
> > +}
> > +
> > +static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data,
> > +			 u32 btf_data_size)
> > +{
> > +	const struct btf_header *hdr;
> > +	u32 hdr_len, hdr_copy;
> > +	struct btf_min_header {
> > +		u16	magic;
> > +		u8	version;
> > +		u8	flags;
> > +		u32	hdr_len;
> > +	} __user *min_hdr;
> 
> This is the partial structure with the first four fields
> for struct btf_header.
> It is okay, but I feel a little bit duplication here.
> 
> Looks like only sizeof(*min_hdr) and min_hdr->hdr_len is used.
> Maybe we can just cast bpf_data to bpf_header and
> sizeof(*min_hdr) being offsetof(bpf_data, type_off), and
> min_hdr->hdr_len being bpf_data->hdr_len.
> 
> Do this work?
offsetof() was what I had.  I found a few lines of offsetof() become
so long that is actually hard to read.  Also, I think spelling out the
'btf_min_header' name here is helpful, so I did not leave it as
anon struct.

Agree that there is duplication but it will never be changed once we
release it to uapi.  I can make some comments here instead.

> 
> > +	struct btf *btf;
> > +	int err;
> > +
> > +	btf = env->btf;
> > +	min_hdr = btf_data;
> > +
> > +	if (btf_data_size < sizeof(*min_hdr)) {
> > +		btf_verifier_log(env, "hdr_len not found");
> > +		return -EINVAL;
> > +	}
> > -	if (btf->data_size < sizeof(*hdr)) {
> > +	if (get_user(hdr_len, &min_hdr->hdr_len))
> > +		return -EFAULT;
> > +
> > +	if (btf_data_size < hdr_len) {
> >   		btf_verifier_log(env, "btf_header not found");
> >   		return -EINVAL;
> >   	}
> > -	btf_verifier_log_hdr(env);
> > +	err = bpf_check_uarg_tail_zero(btf_data, sizeof(btf->hdr), hdr_len);
> > +	if (err) {
> > +		if (err == -E2BIG)
> > +			btf_verifier_log(env, "Unsupported btf_header");
> > +		return err;
> > +	}
> > +
> > +	hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
> > +	if (copy_from_user(&btf->hdr, btf_data, hdr_copy))
> > +		return -EFAULT;
> > +
> > +	hdr = &btf->hdr;
> > +
> > +	btf_verifier_log_hdr(env, btf_data_size);
> > -	hdr = btf->hdr;
> >   	if (hdr->magic != BTF_MAGIC) {
> >   		btf_verifier_log(env, "Invalid magic");
> >   		return -EINVAL;
> > @@ -1925,26 +2044,14 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
> >   		return -ENOTSUPP;
> >   	}
> > -	meta_left = btf->data_size - sizeof(*hdr);
> > -	if (!meta_left) {
> > +	if (btf_data_size == hdr->hdr_len) {
> >   		btf_verifier_log(env, "No data");
> >   		return -EINVAL;
> >   	}
> > -	if (meta_left < hdr->type_off || hdr->str_off <= hdr->type_off ||
> > -	    /* Type section must align to 4 bytes */
> > -	    hdr->type_off & (sizeof(u32) - 1)) {
> > -		btf_verifier_log(env, "Invalid type_off");
> > -		return -EINVAL;
> > -	}
> > -
> > -	if (meta_left < hdr->str_off ||
> > -	    meta_left - hdr->str_off < hdr->str_len) {
> > -		btf_verifier_log(env, "Invalid str_off or str_len");
> > -		return -EINVAL;
> > -	}
> > -
> > -	btf->nohdr_data = btf->hdr + 1;
> > +	err = btf_check_sec_info(env, btf_data_size);
> > +	if (err)
> > +		return err;
> >   	return 0;
> >   }
> > @@ -1987,6 +2094,11 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
> >   		err = -ENOMEM;
> >   		goto errout;
> >   	}
> > +	env->btf = btf;
> > +
> > +	err = btf_parse_hdr(env, btf_data, btf_data_size);
> > +	if (err)
> > +		goto errout;
> >   	data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
> >   	if (!data) {
> > @@ -1996,18 +2108,13 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
> >   	btf->data = data;
> >   	btf->data_size = btf_data_size;
> > +	btf->nohdr_data = btf->data + btf->hdr.hdr_len;
> >   	if (copy_from_user(data, btf_data, btf_data_size)) {
> >   		err = -EFAULT;
> >   		goto errout;
> >   	}
> > -	env->btf = btf;
> > -
> > -	err = btf_parse_hdr(env);
> > -	if (err)
> > -		goto errout;
> > -
> >   	err = btf_parse_str_sec(env);
> >   	if (err)
> >   		goto errout;
> > @@ -2016,16 +2123,14 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
> >   	if (err)
> >   		goto errout;
> > -	if (!err && log->level && bpf_verifier_log_full(log)) {
> > +	if (log->level && bpf_verifier_log_full(log)) {
> >   		err = -ENOSPC;
> >   		goto errout;
> >   	}
> > -	if (!err) {
> > -		btf_verifier_env_free(env);
> > -		refcount_set(&btf->refcnt, 1);
> > -		return btf;
> > -	}
> > +	btf_verifier_env_free(env);
> > +	refcount_set(&btf->refcnt, 1);
> > +	return btf;
> >   errout:
> >   	btf_verifier_env_free(env);
> > 

^ permalink raw reply

* Re: [PATCH] bpf: prevent memory disambiguation attack
From: Daniel Borkmann @ 2018-05-21 21:46 UTC (permalink / raw)
  To: Alexei Starovoitov, David S . Miller; +Cc: netdev, linux-kernel, kernel-team
In-Reply-To: <20180521211743.1492305-1-ast@kernel.org>

On 05/21/2018 11:17 PM, Alexei Starovoitov wrote:
> Detect code patterns where malicious 'speculative store bypass' can be used
> and sanitize such patterns.
> 
>  39: (bf) r3 = r10
>  40: (07) r3 += -216
>  41: (79) r8 = *(u64 *)(r7 +0)   // slow read
>  42: (7a) *(u64 *)(r10 -72) = 0  // verifier inserts this instruction
>  43: (7b) *(u64 *)(r8 +0) = r3   // this store becomes slow due to r8
>  44: (79) r1 = *(u64 *)(r6 +0)   // cpu speculatively executes this load
>  45: (71) r2 = *(u8 *)(r1 +0)    // speculatively arbitrary 'load byte'
>                                  // is now sanitized
> 
> Above code after x86 JIT becomes:
>  e5: mov    %rbp,%rdx
>  e8: add    $0xffffffffffffff28,%rdx
>  ef: mov    0x0(%r13),%r14
>  f3: movq   $0x0,-0x48(%rbp)
>  fb: mov    %rdx,0x0(%r14)
>  ff: mov    0x0(%rbx),%rdi
> 103: movzbq 0x0(%rdi),%rsi
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>

(No further action needed since already in Linus tree [1]. This went via the
 batch of x86 fixes on the speculative store buffer bypass from today [2].)

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=af86ca4e3088fe5eacf2f7e58c01fa68ca067672
[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3b78ce4a34b761c7fe13520de822984019ff1a8f

^ permalink raw reply

* [PATCH] pcnet32: add an error handling path in pcnet32_probe_pci()
From: Bo Chen @ 2018-05-21 21:44 UTC (permalink / raw)
  To: pcnet32; +Cc: davem, netdev, linux-kernel, Bo Chen

Make sure to invoke pci_disable_device() when errors occur in
pcnet32_probe_pci().

Signed-off-by: Bo Chen <chenbo@pdx.edu>
---
 drivers/net/ethernet/amd/pcnet32.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index a561705f232c..be198cc0b10c 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -1552,22 +1552,26 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!ioaddr) {
 		if (pcnet32_debug & NETIF_MSG_PROBE)
 			pr_err("card has no PCI IO resources, aborting\n");
-		return -ENODEV;
+		err = -ENODEV;
+		goto err_disable_dev;
 	}
 
 	err = pci_set_dma_mask(pdev, PCNET32_DMA_MASK);
 	if (err) {
 		if (pcnet32_debug & NETIF_MSG_PROBE)
 			pr_err("architecture does not support 32bit PCI busmaster DMA\n");
-		return err;
+		goto err_disable_dev;
 	}
 	if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci")) {
 		if (pcnet32_debug & NETIF_MSG_PROBE)
 			pr_err("io address range already allocated\n");
-		return -EBUSY;
+		err = -EBUSY;
+		goto err_disable_dev;
 	}
 
 	err = pcnet32_probe1(ioaddr, 1, pdev);
+
+err_disable_dev:
 	if (err < 0)
 		pci_disable_device(pdev);
 
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH 07/33] iwlwifi: mvm: use match_string() helper
From: Andy Shevchenko @ 2018-05-21 21:43 UTC (permalink / raw)
  To: Yisheng Xie
  Cc: Linux Kernel Mailing List, Kalle Valo, Intel Linux Wireless,
	Johannes Berg, Emmanuel Grumbach, open list:TI WILINK WIRELES...,
	netdev
In-Reply-To: <1526903890-35761-8-git-send-email-xieyisheng1@huawei.com>

On Mon, May 21, 2018 at 2:57 PM, Yisheng Xie <xieyisheng1@huawei.com> wrote:
> match_string() returns the index of an array for a matching string,
> which can be used intead of open coded variant.

>         int ret, bt_force_ant_mode;
>
> -       for (bt_force_ant_mode = 0;
> -            bt_force_ant_mode < ARRAY_SIZE(modes_str);
> -            bt_force_ant_mode++) {
> -               if (!strcmp(buf, modes_str[bt_force_ant_mode]))
> -                       break;
> -       }
> -
> -       if (bt_force_ant_mode >= ARRAY_SIZE(modes_str))

> +       bt_force_ant_mode = match_string(modes_str,
> +                                        ARRAY_SIZE(modes_str), buf);

One line?

> +       if (bt_force_ant_mode < 0)
>                 return -EINVAL;

I would rather use

ret = match_string();
if (ret < 0)
 return ret;

bt_force_... = ret;

But it's up tu Loca.

>
>         ret = 0;



-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* Re: [PATCH 05/33] cxgb4: use match_string() helper
From: Andy Shevchenko @ 2018-05-21 21:39 UTC (permalink / raw)
  To: Yisheng Xie; +Cc: Linux Kernel Mailing List, Ganesh Goudar, netdev
In-Reply-To: <1526903890-35761-6-git-send-email-xieyisheng1@huawei.com>

On Mon, May 21, 2018 at 2:57 PM, Yisheng Xie <xieyisheng1@huawei.com> wrote:
> match_string() returns the index of an array for a matching string,
> which can be used intead of open coded variant.

> -       for (i = 0; i < ARRAY_SIZE(cudbg_region); i++) {
> -               if (!strcmp(cudbg_region[i], region_name)) {
> -                       found = 1;
> -                       idx = i;
> -                       break;
> -               }
> -       }
> -       if (!found)
> -               return -EINVAL;
> +       rc = match_string(cudbg_region, ARRAY_SIZE(cudbg_region), region_name);
> +       if (rc < 0)
> +               return rc;
>
> -       found = 0;
> +       idx = rc;

Is found still in use after this?
If so, is it initialized properly now?

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* Re: [PATCH net-next] r8169: perform reset synchronously in __rtl8169_resume
From: Francois Romieu @ 2018-05-21 21:24 UTC (permalink / raw)
  To: Heiner Kallweit
  Cc: David Miller, Realtek linux nic maintainers,
	netdev@vger.kernel.org
In-Reply-To: <040eb6ae-bb21-2d27-14e0-b291cb4cc1c9@gmail.com>

Heiner Kallweit <hkallweit1@gmail.com> :
[...]
> diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
> index 75dfac024..1eb4f625a 100644
> --- a/drivers/net/ethernet/realtek/r8169.c
> +++ b/drivers/net/ethernet/realtek/r8169.c
> @@ -7327,9 +7327,9 @@ static void __rtl8169_resume(struct net_device *dev)
>  	rtl_lock_work(tp);
>  	napi_enable(&tp->napi);
>  	set_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags);
> +	if (netif_running(dev))
> +		rtl_reset_work(tp);
>  	rtl_unlock_work(tp);
> -
> -	rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
>  }
>  
>  static int rtl8169_resume(struct device *device)

netif_running() returns true before rtl_open(): issuing rtl_reset_work()
synchronously against uninitialized rings right at the start of rtl_open()
won't work as expected.

pm_runtime_get_sync() could be issued later in rtl_open() (but I would
not mind something different with runtime_{resume/suspend} in open/close).

-- 
Ueimor

^ permalink raw reply

* [net-next 6/6] net/mlx5e: Receive buffer support for DCBX
From: Saeed Mahameed @ 2018-05-21 21:05 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Huy Nguyen, Saeed Mahameed
In-Reply-To: <20180521210502.11082-1-saeedm@mellanox.com>

From: Huy Nguyen <huyn@mellanox.com>

Add dcbnl's set/get buffer configuration callback that allows user to
set/get buffer size configuration and priority to buffer mapping.

By default, firmware controls receive buffer configuration and priority
of buffer mapping based on the changes in pfc settings. When set buffer
call back is triggered, the buffer configuration changes to manual mode.

The manual mode means mlx5 driver will adjust the buffer configuration
accordingly based on the changes in pfc settings.

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   1 +
 .../ethernet/mellanox/mlx5/core/en_dcbnl.c    | 131 +++++++++++++++++-
 2 files changed, 125 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9ab7158a7ce7..c5c7a6d687ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -278,6 +278,7 @@ struct mlx5e_dcbx {
 	u8                         cap;
 
 	/* Buffer configuration */
+	bool                       manual_buffer;
 	u32                        cable_len;
 	u32                        xoff;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index c641d5656b2d..fa6cd8aa077c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -32,8 +32,8 @@
 #include <linux/device.h>
 #include <linux/netdevice.h>
 #include "en.h"
-
-#define MLX5E_MAX_PRIORITY 8
+#include "en/port.h"
+#include "en/port_buffer.h"
 
 #define MLX5E_100MB (100000)
 #define MLX5E_1GB   (1000000)
@@ -41,6 +41,9 @@
 #define MLX5E_CEE_STATE_UP    1
 #define MLX5E_CEE_STATE_DOWN  0
 
+/* Max supported cable length is 1000 meters */
+#define MLX5E_MAX_CABLE_LENGTH 1000
+
 enum {
 	MLX5E_VENDOR_TC_GROUP_NUM = 7,
 	MLX5E_LOWEST_PRIO_GROUP   = 0,
@@ -338,6 +341,9 @@ static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev,
 		pfc->indications[i] = PPORT_PER_PRIO_GET(pstats, i, rx_pause);
 	}
 
+	if (MLX5_BUFFER_SUPPORTED(mdev))
+		pfc->delay = priv->dcbx.cable_len;
+
 	return mlx5_query_port_pfc(mdev, &pfc->pfc_en, NULL);
 }
 
@@ -346,16 +352,39 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5_core_dev *mdev = priv->mdev;
+	u32 old_cable_len = priv->dcbx.cable_len;
+	struct ieee_pfc pfc_new;
+	u32 changed = 0;
 	u8 curr_pfc_en;
-	int ret;
+	int ret = 0;
 
+	/* pfc_en */
 	mlx5_query_port_pfc(mdev, &curr_pfc_en, NULL);
+	if (pfc->pfc_en != curr_pfc_en) {
+		ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en);
+		if (ret)
+			return ret;
+		mlx5_toggle_port_link(mdev);
+		changed |= MLX5E_PORT_BUFFER_PFC;
+	}
 
-	if (pfc->pfc_en == curr_pfc_en)
-		return 0;
+	if (pfc->delay &&
+	    pfc->delay < MLX5E_MAX_CABLE_LENGTH &&
+	    pfc->delay != priv->dcbx.cable_len) {
+		priv->dcbx.cable_len = pfc->delay;
+		changed |= MLX5E_PORT_BUFFER_CABLE_LEN;
+	}
 
-	ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en);
-	mlx5_toggle_port_link(mdev);
+	if (MLX5_BUFFER_SUPPORTED(mdev)) {
+		pfc_new.pfc_en = (changed & MLX5E_PORT_BUFFER_PFC) ? pfc->pfc_en : curr_pfc_en;
+		if (priv->dcbx.manual_buffer)
+			ret = mlx5e_port_manual_buffer_config(priv, changed,
+							      dev->mtu, &pfc_new,
+							      NULL, NULL);
+
+		if (ret && (changed & MLX5E_PORT_BUFFER_CABLE_LEN))
+			priv->dcbx.cable_len = old_cable_len;
+	}
 
 	if (!ret) {
 		mlx5e_dbg(HW, priv,
@@ -873,6 +902,89 @@ static void mlx5e_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
 	cee_cfg->pfc_enable = state;
 }
 
+static int mlx5e_dcbnl_getbuffer(struct net_device *dev,
+				 struct dcbnl_buffer *dcb_buffer)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_port_buffer port_buffer;
+	u8 buffer[MLX5E_MAX_PRIORITY];
+	int i, err;
+
+	if (!MLX5_BUFFER_SUPPORTED(mdev))
+		return -EOPNOTSUPP;
+
+	err = mlx5e_port_query_priority2buffer(mdev, buffer);
+	if (err)
+		return err;
+
+	for (i = 0; i < MLX5E_MAX_PRIORITY; i++)
+		dcb_buffer->prio2buffer[i] = buffer[i];
+
+	err = mlx5e_port_query_buffer(priv, &port_buffer);
+	if (err)
+		return err;
+
+	for (i = 0; i < MLX5E_MAX_BUFFER; i++)
+		dcb_buffer->buffer_size[i] = port_buffer.buffer[i].size;
+
+	return 0;
+}
+
+static int mlx5e_dcbnl_setbuffer(struct net_device *dev,
+				 struct dcbnl_buffer *dcb_buffer)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_port_buffer port_buffer;
+	u8 old_prio2buffer[MLX5E_MAX_PRIORITY];
+	u32 *buffer_size = NULL;
+	u8 *prio2buffer = NULL;
+	u32 changed = 0;
+	int i, err;
+
+	if (!MLX5_BUFFER_SUPPORTED(mdev))
+		return -EOPNOTSUPP;
+
+	for (i = 0; i < DCBX_MAX_BUFFERS; i++)
+		mlx5_core_dbg(mdev, "buffer[%d]=%d\n", i, dcb_buffer->buffer_size[i]);
+
+	for (i = 0; i < MLX5E_MAX_PRIORITY; i++)
+		mlx5_core_dbg(mdev, "priority %d buffer%d\n", i, dcb_buffer->prio2buffer[i]);
+
+	err = mlx5e_port_query_priority2buffer(mdev, old_prio2buffer);
+	if (err)
+		return err;
+
+	for (i = 0; i < MLX5E_MAX_PRIORITY; i++) {
+		if (dcb_buffer->prio2buffer[i] != old_prio2buffer[i]) {
+			changed |= MLX5E_PORT_BUFFER_PRIO2BUFFER;
+			prio2buffer = dcb_buffer->prio2buffer;
+			break;
+		}
+	}
+
+	err = mlx5e_port_query_buffer(priv, &port_buffer);
+	if (err)
+		return err;
+
+	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+		if (port_buffer.buffer[i].size != dcb_buffer->buffer_size[i]) {
+			changed |= MLX5E_PORT_BUFFER_SIZE;
+			buffer_size = dcb_buffer->buffer_size;
+			break;
+		}
+	}
+
+	if (!changed)
+		return 0;
+
+	priv->dcbx.manual_buffer = true;
+	err = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, NULL,
+					      buffer_size, prio2buffer);
+	return err;
+}
+
 const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
 	.ieee_getets	= mlx5e_dcbnl_ieee_getets,
 	.ieee_setets	= mlx5e_dcbnl_ieee_setets,
@@ -884,6 +996,8 @@ const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
 	.ieee_delapp    = mlx5e_dcbnl_ieee_delapp,
 	.getdcbx	= mlx5e_dcbnl_getdcbx,
 	.setdcbx	= mlx5e_dcbnl_setdcbx,
+	.dcbnl_getbuffer = mlx5e_dcbnl_getbuffer,
+	.dcbnl_setbuffer = mlx5e_dcbnl_setbuffer,
 
 /* CEE interfaces */
 	.setall         = mlx5e_dcbnl_setall,
@@ -1091,5 +1205,8 @@ void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv)
 	if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
 		priv->dcbx.cap |= DCB_CAP_DCBX_HOST;
 
+	priv->dcbx.manual_buffer = false;
+	priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN;
+
 	mlx5e_ets_init(priv);
 }
-- 
2.17.0

^ permalink raw reply related

* [net-next 5/6] net/mlx5e: Receive buffer configuration
From: Saeed Mahameed @ 2018-05-21 21:05 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Huy Nguyen, Saeed Mahameed
In-Reply-To: <20180521210502.11082-1-saeedm@mellanox.com>

From: Huy Nguyen <huyn@mellanox.com>

Add APIs for buffer configuration based on the changes in
pfc configuration, cable len, buffer size configuration,
and priority to buffer mapping.

Note that the xoff fomula is as below
  xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]
  xoff_threshold = buffer_size - xoff
  xon_threshold = xoff_threshold - MTU

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   5 +
 .../mellanox/mlx5/core/en/port_buffer.c       | 327 ++++++++++++++++++
 .../mellanox/mlx5/core/en/port_buffer.h       |  75 ++++
 4 files changed, 408 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 651cf3640420..9efbf193ad5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -21,7 +21,7 @@ mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
 
 mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o en_rep.o en_tc.o
 
-mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
+mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o en/port_buffer.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib_vlan.o
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index d13a86a1d702..9ab7158a7ce7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -65,6 +65,7 @@ struct page_pool;
 #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
 #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
 
+#define MLX5E_MAX_PRIORITY      8
 #define MLX5E_MAX_DSCP          64
 #define MLX5E_MAX_NUM_TC	8
 
@@ -275,6 +276,10 @@ struct mlx5e_dcbx {
 	/* The only setting that cannot be read from FW */
 	u8                         tc_tsa[IEEE_8021QAZ_MAX_TCS];
 	u8                         cap;
+
+	/* Buffer configuration */
+	u32                        cable_len;
+	u32                        xoff;
 };
 
 struct mlx5e_dcbx_dp {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
new file mode 100644
index 000000000000..c047da8752da
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "port_buffer.h"
+
+int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
+			    struct mlx5e_port_buffer *port_buffer)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+	u32 total_used = 0;
+	void *buffer;
+	void *out;
+	int err;
+	int i;
+
+	out = kzalloc(sz, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	err = mlx5e_port_query_pbmc(mdev, out);
+	if (err)
+		goto out;
+
+	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+		buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]);
+		port_buffer->buffer[i].lossy =
+			MLX5_GET(bufferx_reg, buffer, lossy);
+		port_buffer->buffer[i].epsb =
+			MLX5_GET(bufferx_reg, buffer, epsb);
+		port_buffer->buffer[i].size =
+			MLX5_GET(bufferx_reg, buffer, size) << MLX5E_BUFFER_CELL_SHIFT;
+		port_buffer->buffer[i].xon =
+			MLX5_GET(bufferx_reg, buffer, xon_threshold) << MLX5E_BUFFER_CELL_SHIFT;
+		port_buffer->buffer[i].xoff =
+			MLX5_GET(bufferx_reg, buffer, xoff_threshold) << MLX5E_BUFFER_CELL_SHIFT;
+		total_used += port_buffer->buffer[i].size;
+
+		mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i,
+			  port_buffer->buffer[i].size,
+			  port_buffer->buffer[i].xon,
+			  port_buffer->buffer[i].xoff,
+			  port_buffer->buffer[i].epsb,
+			  port_buffer->buffer[i].lossy);
+	}
+
+	port_buffer->port_buffer_size =
+		MLX5_GET(pbmc_reg, out, port_buffer_size) << MLX5E_BUFFER_CELL_SHIFT;
+	port_buffer->spare_buffer_size =
+		port_buffer->port_buffer_size - total_used;
+
+	mlx5e_dbg(HW, priv, "total buffer size=%d, spare buffer size=%d\n",
+		  port_buffer->port_buffer_size,
+		  port_buffer->spare_buffer_size);
+out:
+	kfree(out);
+	return err;
+}
+
+static int port_set_buffer(struct mlx5e_priv *priv,
+			   struct mlx5e_port_buffer *port_buffer)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+	void *buffer;
+	void *in;
+	int err;
+	int i;
+
+	in = kzalloc(sz, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	err = mlx5e_port_query_pbmc(mdev, in);
+	if (err)
+		goto out;
+
+	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+		buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]);
+
+		MLX5_SET(bufferx_reg, buffer, size,
+			 port_buffer->buffer[i].size >> MLX5E_BUFFER_CELL_SHIFT);
+		MLX5_SET(bufferx_reg, buffer, lossy,
+			 port_buffer->buffer[i].lossy);
+		MLX5_SET(bufferx_reg, buffer, xoff_threshold,
+			 port_buffer->buffer[i].xoff >> MLX5E_BUFFER_CELL_SHIFT);
+		MLX5_SET(bufferx_reg, buffer, xon_threshold,
+			 port_buffer->buffer[i].xon >> MLX5E_BUFFER_CELL_SHIFT);
+	}
+
+	err = mlx5e_port_set_pbmc(mdev, in);
+out:
+	kfree(in);
+	return err;
+}
+
+/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) */
+static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
+{
+	u32 speed;
+	u32 xoff;
+	int err;
+
+	err = mlx5e_port_linkspeed(priv->mdev, &speed);
+	if (err)
+		return 0;
+
+	xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100;
+
+	mlx5e_dbg(HW, priv, "%s: xoff=%d\n", __func__, xoff);
+	return xoff;
+}
+
+static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
+				 u32 xoff, unsigned int mtu)
+{
+	int i;
+
+	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+		if (port_buffer->buffer[i].lossy) {
+			port_buffer->buffer[i].xoff = 0;
+			port_buffer->buffer[i].xon  = 0;
+			continue;
+		}
+
+		if (port_buffer->buffer[i].size <
+		    (xoff + mtu + (1 << MLX5E_BUFFER_CELL_SHIFT)))
+			return -ENOMEM;
+
+		port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff;
+		port_buffer->buffer[i].xon  = port_buffer->buffer[i].xoff - mtu;
+	}
+
+	return 0;
+}
+
+/**
+ * update_buffer_lossy()
+ *   mtu: device's MTU
+ *   pfc_en: <input> current pfc configuration
+ *   buffer: <input> current prio to buffer mapping
+ *   xoff:   <input> xoff value
+ *   port_buffer: <output> port receive buffer configuration
+ *   change: <output>
+ *
+ *   Update buffer configuration based on pfc configuraiton and priority
+ *   to buffer mapping.
+ *   Buffer's lossy bit is changed to:
+ *     lossless if there is at least one PFC enabled priority mapped to this buffer
+ *     lossy if all priorities mapped to this buffer are PFC disabled
+ *
+ *   Return:
+ *     Return 0 if no error.
+ *     Set change to true if buffer configuration is modified.
+ */
+static int update_buffer_lossy(unsigned int mtu,
+			       u8 pfc_en, u8 *buffer, u32 xoff,
+			       struct mlx5e_port_buffer *port_buffer,
+			       bool *change)
+{
+	bool changed = false;
+	u8 lossy_count;
+	u8 prio_count;
+	u8 lossy;
+	int prio;
+	int err;
+	int i;
+
+	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+		prio_count = 0;
+		lossy_count = 0;
+
+		for (prio = 0; prio < MLX5E_MAX_PRIORITY; prio++) {
+			if (buffer[prio] != i)
+				continue;
+
+			prio_count++;
+			lossy_count += !(pfc_en & (1 << prio));
+		}
+
+		if (lossy_count == prio_count)
+			lossy = 1;
+		else /* lossy_count < prio_count */
+			lossy = 0;
+
+		if (lossy != port_buffer->buffer[i].lossy) {
+			port_buffer->buffer[i].lossy = lossy;
+			changed = true;
+		}
+	}
+
+	if (changed) {
+		err = update_xoff_threshold(port_buffer, xoff, mtu);
+		if (err)
+			return err;
+
+		*change = true;
+	}
+
+	return 0;
+}
+
+int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+				    u32 change, unsigned int mtu,
+				    struct ieee_pfc *pfc,
+				    u32 *buffer_size,
+				    u8 *prio2buffer)
+{
+	struct mlx5e_port_buffer port_buffer;
+	u32 xoff = calculate_xoff(priv, mtu);
+	bool update_prio2buffer = false;
+	u8 buffer[MLX5E_MAX_PRIORITY];
+	bool update_buffer = false;
+	u32 total_used = 0;
+	u8 curr_pfc_en;
+	int err;
+	int i;
+
+	mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change);
+
+	err = mlx5e_port_query_buffer(priv, &port_buffer);
+	if (err)
+		return err;
+
+	if (change & MLX5E_PORT_BUFFER_CABLE_LEN) {
+		update_buffer = true;
+		err = update_xoff_threshold(&port_buffer, xoff, mtu);
+		if (err)
+			return err;
+	}
+
+	if (change & MLX5E_PORT_BUFFER_PFC) {
+		err = mlx5e_port_query_priority2buffer(priv->mdev, buffer);
+		if (err)
+			return err;
+
+		err = update_buffer_lossy(mtu, pfc->pfc_en, buffer, xoff,
+					  &port_buffer, &update_buffer);
+		if (err)
+			return err;
+	}
+
+	if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) {
+		update_prio2buffer = true;
+		err = mlx5_query_port_pfc(priv->mdev, &curr_pfc_en, NULL);
+		if (err)
+			return err;
+
+		err = update_buffer_lossy(mtu, curr_pfc_en, prio2buffer, xoff,
+					  &port_buffer, &update_buffer);
+		if (err)
+			return err;
+	}
+
+	if (change & MLX5E_PORT_BUFFER_SIZE) {
+		for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+			mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]);
+			if (!port_buffer.buffer[i].lossy && !buffer_size[i]) {
+				mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n",
+					  __func__, i);
+				return -EINVAL;
+			}
+
+			port_buffer.buffer[i].size = buffer_size[i];
+			total_used += buffer_size[i];
+		}
+
+		mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used);
+
+		if (total_used > port_buffer.port_buffer_size)
+			return -EINVAL;
+
+		update_buffer = true;
+		err = update_xoff_threshold(&port_buffer, xoff, mtu);
+		if (err)
+			return err;
+	}
+
+	/* Need to update buffer configuration if xoff value is changed */
+	if (!update_buffer && xoff != priv->dcbx.xoff) {
+		update_buffer = true;
+		err = update_xoff_threshold(&port_buffer, xoff, mtu);
+		if (err)
+			return err;
+	}
+	priv->dcbx.xoff = xoff;
+
+	/* Apply the settings */
+	if (update_buffer) {
+		err = port_set_buffer(priv, &port_buffer);
+		if (err)
+			return err;
+	}
+
+	if (update_prio2buffer)
+		err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer);
+
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
new file mode 100644
index 000000000000..34f55b81a0de
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_PORT_BUFFER_H__
+#define __MLX5_EN_PORT_BUFFER_H__
+
+#include "en.h"
+#include "port.h"
+
+#define MLX5E_MAX_BUFFER 8
+#define MLX5E_BUFFER_CELL_SHIFT 7
+#define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */
+
+#define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \
+				     MLX5_CAP_PCAM_REG(mdev, pbmc) && \
+				     MLX5_CAP_PCAM_REG(mdev, pptb))
+
+enum {
+	MLX5E_PORT_BUFFER_CABLE_LEN   = BIT(0),
+	MLX5E_PORT_BUFFER_PFC         = BIT(1),
+	MLX5E_PORT_BUFFER_PRIO2BUFFER = BIT(2),
+	MLX5E_PORT_BUFFER_SIZE        = BIT(3),
+};
+
+struct mlx5e_bufferx_reg {
+	u8   lossy;
+	u8   epsb;
+	u32  size;
+	u32  xoff;
+	u32  xon;
+};
+
+struct mlx5e_port_buffer {
+	u32                       port_buffer_size;
+	u32                       spare_buffer_size;
+	struct mlx5e_bufferx_reg  buffer[MLX5E_MAX_BUFFER];
+};
+
+int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+				    u32 change, unsigned int mtu,
+				    struct ieee_pfc *pfc,
+				    u32 *buffer_size,
+				    u8 *prio2buffer);
+
+int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
+			    struct mlx5e_port_buffer *port_buffer);
+#endif
-- 
2.17.0

^ permalink raw reply related

* [net-next 4/6] net/mlx5e: PPTB and PBMC register firmware command support
From: Saeed Mahameed @ 2018-05-21 21:05 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Huy Nguyen, Saeed Mahameed
In-Reply-To: <20180521210502.11082-1-saeedm@mellanox.com>

From: Huy Nguyen <huyn@mellanox.com>

Add firmware command interface to read and write PPTB and PBMC
registers.

PPTB register enables mappings priority to a specific receive buffer.

PBMC registers enables changing the receive buffer's configuration such
as buffer size, xon/xoff thresholds, buffer's lossy property and
buffer's shared property.

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/port.c | 108 ++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en/port.h |   5 +
 include/linux/mlx5/driver.h                   |   2 +
 include/linux/mlx5/mlx5_ifc.h                 |  35 ++++++
 4 files changed, 150 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 9f04542f3661..24e3b564964f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -127,3 +127,111 @@ u32 mlx5e_port_speed2linkmodes(u32 speed)
 
 	return link_modes;
 }
+
+int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out)
+{
+	int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+	void *in;
+	int err;
+
+	in = kzalloc(sz, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(pbmc_reg, in, local_port, 1);
+	err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 0);
+
+	kfree(in);
+	return err;
+}
+
+int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in)
+{
+	int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+	void *out;
+	int err;
+
+	out = kzalloc(sz, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	MLX5_SET(pbmc_reg, in, local_port, 1);
+	err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 1);
+
+	kfree(out);
+	return err;
+}
+
+/* buffer[i]: buffer that priority i mapped to */
+int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer)
+{
+	int sz = MLX5_ST_SZ_BYTES(pptb_reg);
+	u32 prio_x_buff;
+	void *out;
+	void *in;
+	int prio;
+	int err;
+
+	in = kzalloc(sz, GFP_KERNEL);
+	out = kzalloc(sz, GFP_KERNEL);
+	if (!in || !out) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	MLX5_SET(pptb_reg, in, local_port, 1);
+	err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0);
+	if (err)
+		goto out;
+
+	prio_x_buff = MLX5_GET(pptb_reg, out, prio_x_buff);
+	for (prio = 0; prio < 8; prio++) {
+		buffer[prio] = (u8)(prio_x_buff >> (4 * prio)) & 0xF;
+		mlx5_core_dbg(mdev, "prio %d, buffer %d\n", prio, buffer[prio]);
+	}
+out:
+	kfree(in);
+	kfree(out);
+	return err;
+}
+
+int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer)
+{
+	int sz = MLX5_ST_SZ_BYTES(pptb_reg);
+	u32 prio_x_buff;
+	void *out;
+	void *in;
+	int prio;
+	int err;
+
+	in = kzalloc(sz, GFP_KERNEL);
+	out = kzalloc(sz, GFP_KERNEL);
+	if (!in || !out) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* First query the pptb register */
+	MLX5_SET(pptb_reg, in, local_port, 1);
+	err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0);
+	if (err)
+		goto out;
+
+	memcpy(in, out, sz);
+	MLX5_SET(pptb_reg, in, local_port, 1);
+
+	/* Update the pm and prio_x_buff */
+	MLX5_SET(pptb_reg, in, pm, 0xFF);
+
+	prio_x_buff = 0;
+	for (prio = 0; prio < 8; prio++)
+		prio_x_buff |= (buffer[prio] << (4 * prio));
+	MLX5_SET(pptb_reg, in, prio_x_buff, prio_x_buff);
+
+	err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 1);
+
+out:
+	kfree(in);
+	kfree(out);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index 7aae38e98a65..f8cbd8194179 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -40,4 +40,9 @@ u32 mlx5e_port_ptys2speed(u32 eth_proto_oper);
 int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
 int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
 u32 mlx5e_port_speed2linkmodes(u32 speed);
+
+int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
+int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
+int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
+int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
 #endif
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 2a156c5dfadd..395fc1a9e378 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -124,6 +124,8 @@ enum {
 	MLX5_REG_PAOS		 = 0x5006,
 	MLX5_REG_PFCC            = 0x5007,
 	MLX5_REG_PPCNT		 = 0x5008,
+	MLX5_REG_PPTB            = 0x500b,
+	MLX5_REG_PBMC            = 0x500c,
 	MLX5_REG_PMAOS		 = 0x5012,
 	MLX5_REG_PUDE		 = 0x5009,
 	MLX5_REG_PMPE		 = 0x5010,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f687989d336b..edbddeaacc88 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8788,6 +8788,41 @@ struct mlx5_ifc_qpts_reg_bits {
 	u8         trust_state[0x3];
 };
 
+struct mlx5_ifc_pptb_reg_bits {
+	u8         reserved_at_0[0x2];
+	u8         mm[0x2];
+	u8         reserved_at_4[0x4];
+	u8         local_port[0x8];
+	u8         reserved_at_10[0x6];
+	u8         cm[0x1];
+	u8         um[0x1];
+	u8         pm[0x8];
+
+	u8         prio_x_buff[0x20];
+
+	u8         pm_msb[0x8];
+	u8         reserved_at_48[0x10];
+	u8         ctrl_buff[0x4];
+	u8         untagged_buff[0x4];
+};
+
+struct mlx5_ifc_pbmc_reg_bits {
+	u8         reserved_at_0[0x8];
+	u8         local_port[0x8];
+	u8         reserved_at_10[0x10];
+
+	u8         xoff_timer_value[0x10];
+	u8         xoff_refresh[0x10];
+
+	u8         reserved_at_40[0x9];
+	u8         fullness_threshold[0x7];
+	u8         port_buffer_size[0x10];
+
+	struct mlx5_ifc_bufferx_reg_bits buffer[10];
+
+	u8         reserved_at_2e0[0x40];
+};
+
 struct mlx5_ifc_qtct_reg_bits {
 	u8         reserved_at_0[0x8];
 	u8         port_number[0x8];
-- 
2.17.0

^ permalink raw reply related

* [net-next 3/6] net/mlx5e: Move port speed code from en_ethtool.c to en/port.c
From: Saeed Mahameed @ 2018-05-21 21:04 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Huy Nguyen, Saeed Mahameed
In-Reply-To: <20180521210502.11082-1-saeedm@mellanox.com>

From: Huy Nguyen <huyn@mellanox.com>

Move four below functions from en_ethtool.c to en/port.c. These
functions are used by both en_ethtool.c and en_main.c. Downstream
patches will use these functions without ethtool link mode dependency.
  u32 mlx5e_port_ptys2speed(u32 eth_proto_oper);
  int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
  int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
  u32 mlx5e_port_speed2linkmodes(u32 speed);

Delete the speed field from table mlx5e_build_ptys2ethtool_map. This
table only keeps the mapping between the mlx5e link mode and
ethtool link mode. Add new table mlx5e_link_speed for translation
from mlx5e link mode to actual speed.

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   2 -
 .../ethernet/mellanox/mlx5/core/en/Makefile   |   1 +
 .../net/ethernet/mellanox/mlx5/core/en/port.c | 129 ++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en/port.h |  43 ++++++
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  | 102 +++++---------
 .../net/ethernet/mellanox/mlx5/core/en_main.c |   3 +-
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   |   3 +-
 8 files changed, 213 insertions(+), 72 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/Makefile
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index a7135f5d5cf6..651cf3640420 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -15,7 +15,7 @@ mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 		en_tx.o en_rx.o en_dim.o en_txrx.o en_stats.o vxlan.o \
-		en_arfs.o en_fs_ethtool.o en_selftest.o
+		en_arfs.o en_fs_ethtool.o en_selftest.o en/port.o
 
 mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index bc91a7335c93..d13a86a1d702 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -932,8 +932,6 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
 
 void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
 				   int num_channels);
-int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
-
 void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
 				 u8 cq_period_mode);
 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile
new file mode 100644
index 000000000000..d8e17110f25d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
new file mode 100644
index 000000000000..9f04542f3661
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "port.h"
+
+/* speed in units of 1Mb */
+static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = {
+	[MLX5E_1000BASE_CX_SGMII] = 1000,
+	[MLX5E_1000BASE_KX]       = 1000,
+	[MLX5E_10GBASE_CX4]       = 10000,
+	[MLX5E_10GBASE_KX4]       = 10000,
+	[MLX5E_10GBASE_KR]        = 10000,
+	[MLX5E_20GBASE_KR2]       = 20000,
+	[MLX5E_40GBASE_CR4]       = 40000,
+	[MLX5E_40GBASE_KR4]       = 40000,
+	[MLX5E_56GBASE_R4]        = 56000,
+	[MLX5E_10GBASE_CR]        = 10000,
+	[MLX5E_10GBASE_SR]        = 10000,
+	[MLX5E_10GBASE_ER]        = 10000,
+	[MLX5E_40GBASE_SR4]       = 40000,
+	[MLX5E_40GBASE_LR4]       = 40000,
+	[MLX5E_50GBASE_SR2]       = 50000,
+	[MLX5E_100GBASE_CR4]      = 100000,
+	[MLX5E_100GBASE_SR4]      = 100000,
+	[MLX5E_100GBASE_KR4]      = 100000,
+	[MLX5E_100GBASE_LR4]      = 100000,
+	[MLX5E_100BASE_TX]        = 100,
+	[MLX5E_1000BASE_T]        = 1000,
+	[MLX5E_10GBASE_T]         = 10000,
+	[MLX5E_25GBASE_CR]        = 25000,
+	[MLX5E_25GBASE_KR]        = 25000,
+	[MLX5E_25GBASE_SR]        = 25000,
+	[MLX5E_50GBASE_CR2]       = 50000,
+	[MLX5E_50GBASE_KR2]       = 50000,
+};
+
+u32 mlx5e_port_ptys2speed(u32 eth_proto_oper)
+{
+	unsigned long temp = eth_proto_oper;
+	u32 speed = 0;
+	int i;
+
+	i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER);
+	if (i < MLX5E_LINK_MODES_NUMBER)
+		speed = mlx5e_link_speed[i];
+
+	return speed;
+}
+
+int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
+{
+	u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {};
+	u32 eth_proto_oper;
+	int err;
+
+	err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
+	if (err)
+		return err;
+
+	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+	*speed = mlx5e_port_ptys2speed(eth_proto_oper);
+	if (!(*speed)) {
+		mlx5_core_warn(mdev, "cannot get port speed\n");
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
+{
+	u32 max_speed = 0;
+	u32 proto_cap;
+	int err;
+	int i;
+
+	err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN);
+	if (err)
+		return err;
+
+	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i)
+		if (proto_cap & MLX5E_PROT_MASK(i))
+			max_speed = max(max_speed, mlx5e_link_speed[i]);
+
+	*speed = max_speed;
+	return 0;
+}
+
+u32 mlx5e_port_speed2linkmodes(u32 speed)
+{
+	u32 link_modes = 0;
+	int i;
+
+	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
+		if (mlx5e_link_speed[i] == speed)
+			link_modes |= MLX5E_PROT_MASK(i);
+	}
+
+	return link_modes;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
new file mode 100644
index 000000000000..7aae38e98a65
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5E_EN_PORT_H
+#define __MLX5E_EN_PORT_H
+
+#include <linux/mlx5/driver.h>
+#include "en.h"
+
+u32 mlx5e_port_ptys2speed(u32 eth_proto_oper);
+int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
+int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
+u32 mlx5e_port_speed2linkmodes(u32 speed);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 2b786c4d3dab..42bd256e680d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -31,6 +31,7 @@
  */
 
 #include "en.h"
+#include "en/port.h"
 
 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
 			       struct ethtool_drvinfo *drvinfo)
@@ -59,18 +60,16 @@ static void mlx5e_get_drvinfo(struct net_device *dev,
 struct ptys2ethtool_config {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(advertised);
-	u32 speed;
 };
 
 static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER];
 
-#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, speed_, ...)               \
+#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, ...)                       \
 	({                                                              \
 		struct ptys2ethtool_config *cfg;                        \
 		const unsigned int modes[] = { __VA_ARGS__ };           \
 		unsigned int i;                                         \
 		cfg = &ptys2ethtool_table[reg_];                        \
-		cfg->speed = speed_;                                    \
 		bitmap_zero(cfg->supported,                             \
 			    __ETHTOOL_LINK_MODE_MASK_NBITS);            \
 		bitmap_zero(cfg->advertised,                            \
@@ -83,55 +82,55 @@ static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER];
 
 void mlx5e_build_ptys2ethtool_map(void)
 {
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, SPEED_1000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII,
 				       ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, SPEED_1000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX,
 				       ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, SPEED_10000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4,
 				       ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, SPEED_10000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4,
 				       ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, SPEED_10000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR,
 				       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, SPEED_20000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2,
 				       ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, SPEED_40000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4,
 				       ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, SPEED_40000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4,
 				       ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, SPEED_56000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4,
 				       ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, SPEED_10000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR,
 				       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, SPEED_10000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR,
 				       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, SPEED_10000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER,
 				       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, SPEED_40000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4,
 				       ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, SPEED_40000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4,
 				       ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, SPEED_50000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2,
 				       ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, SPEED_100000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4,
 				       ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, SPEED_100000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4,
 				       ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, SPEED_100000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4,
 				       ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, SPEED_100000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4,
 				       ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, SPEED_10000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T,
 				       ETHTOOL_LINK_MODE_10000baseT_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, SPEED_25000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR,
 				       ETHTOOL_LINK_MODE_25000baseCR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, SPEED_25000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR,
 				       ETHTOOL_LINK_MODE_25000baseKR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, SPEED_25000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR,
 				       ETHTOOL_LINK_MODE_25000baseSR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, SPEED_50000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2,
 				       ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, SPEED_50000,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2,
 				       ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT);
 }
 
@@ -617,43 +616,24 @@ static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings
 	}
 }
 
-int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
-{
-	u32 max_speed = 0;
-	u32 proto_cap;
-	int err;
-	int i;
-
-	err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN);
-	if (err)
-		return err;
-
-	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i)
-		if (proto_cap & MLX5E_PROT_MASK(i))
-			max_speed = max(max_speed, ptys2ethtool_table[i].speed);
-
-	*speed = max_speed;
-	return 0;
-}
-
 static void get_speed_duplex(struct net_device *netdev,
 			     u32 eth_proto_oper,
 			     struct ethtool_link_ksettings *link_ksettings)
 {
-	int i;
 	u32 speed = SPEED_UNKNOWN;
 	u8 duplex = DUPLEX_UNKNOWN;
 
 	if (!netif_carrier_ok(netdev))
 		goto out;
 
-	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
-		if (eth_proto_oper & MLX5E_PROT_MASK(i)) {
-			speed = ptys2ethtool_table[i].speed;
-			duplex = DUPLEX_FULL;
-			break;
-		}
+	speed = mlx5e_port_ptys2speed(eth_proto_oper);
+	if (!speed) {
+		speed = SPEED_UNKNOWN;
+		goto out;
 	}
+
+	duplex = DUPLEX_FULL;
+
 out:
 	link_ksettings->base.speed = speed;
 	link_ksettings->base.duplex = duplex;
@@ -811,18 +791,6 @@ static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
 	return ptys_modes;
 }
 
-static u32 mlx5e_ethtool2ptys_speed_link(u32 speed)
-{
-	u32 i, speed_links = 0;
-
-	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
-		if (ptys2ethtool_table[i].speed == speed)
-			speed_links |= MLX5E_PROT_MASK(i);
-	}
-
-	return speed_links;
-}
-
 static int mlx5e_set_link_ksettings(struct net_device *netdev,
 				    const struct ethtool_link_ksettings *link_ksettings)
 {
@@ -842,7 +810,7 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev,
 
 	link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ?
 		mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) :
-		mlx5e_ethtool2ptys_speed_link(speed);
+		mlx5e_port_speed2linkmodes(speed);
 
 	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
 	if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b5a7580b12fe..cee44c21766c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -46,6 +46,7 @@
 #include "accel/ipsec.h"
 #include "accel/tls.h"
 #include "vxlan.h"
+#include "en/port.h"
 
 struct mlx5e_rq_param {
 	u32			rqc[MLX5_ST_SZ_DW(rqc)];
@@ -4082,7 +4083,7 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
 	u32 link_speed = 0;
 	u32 pci_bw = 0;
 
-	mlx5e_get_max_linkspeed(mdev, &link_speed);
+	mlx5e_port_max_linkspeed(mdev, &link_speed);
 	pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
 	mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
 			   link_speed, pci_bw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 674f1d7d2737..a9c96fe8e4fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -52,6 +52,7 @@
 #include "eswitch.h"
 #include "vxlan.h"
 #include "fs_core.h"
+#include "en/port.h"
 
 struct mlx5_nic_flow_attr {
 	u32 action;
@@ -613,7 +614,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 
 	params.q_counter = priv->q_counter;
 	/* set hairpin pair per each 50Gbs share of the link */
-	mlx5e_get_max_linkspeed(priv->mdev, &link_speed);
+	mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
 	link_speed = max_t(u32, link_speed, 50000);
 	link_speed64 = link_speed;
 	do_div(link_speed64, 50000);
-- 
2.17.0

^ permalink raw reply related

* [net-next 2/6] net/mlx5: Add pbmc and pptb in the port_access_reg_cap_mask
From: Saeed Mahameed @ 2018-05-21 21:04 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Huy Nguyen, Saeed Mahameed
In-Reply-To: <20180521210502.11082-1-saeedm@mellanox.com>

From: Huy Nguyen <huyn@mellanox.com>

Add pbmc and pptb in the port_access_reg_cap_mask. These two
bits determine if device supports receive buffer configuration.

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h   |  3 +++
 include/linux/mlx5/mlx5_ifc.h | 12 ++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 2bc27f8c5b87..db0332a6d23c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1152,6 +1152,9 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \
 	MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld)
 
+#define MLX5_CAP_PCAM_REG(mdev, reg) \
+	MLX5_GET(pcam_reg, (mdev)->caps.pcam, port_access_reg_cap_mask.regs_5000_to_507f.reg)
+
 #define MLX5_CAP_MCAM_REG(mdev, reg) \
 	MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_access_reg_cap_mask.access_regs.reg)
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b4ea8a9914c4..f687989d336b 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8003,6 +8003,17 @@ struct mlx5_ifc_pcam_enhanced_features_bits {
 	u8         ppcnt_statistical_group[0x1];
 };
 
+struct mlx5_ifc_pcam_regs_5000_to_507f_bits {
+	u8         port_access_reg_cap_mask_127_to_96[0x20];
+	u8         port_access_reg_cap_mask_95_to_64[0x20];
+	u8         port_access_reg_cap_mask_63_to_32[0x20];
+
+	u8         port_access_reg_cap_mask_31_to_13[0x13];
+	u8         pbmc[0x1];
+	u8         pptb[0x1];
+	u8         port_access_reg_cap_mask_10_to_0[0xb];
+};
+
 struct mlx5_ifc_pcam_reg_bits {
 	u8         reserved_at_0[0x8];
 	u8         feature_group[0x8];
@@ -8012,6 +8023,7 @@ struct mlx5_ifc_pcam_reg_bits {
 	u8         reserved_at_20[0x20];
 
 	union {
+		struct mlx5_ifc_pcam_regs_5000_to_507f_bits regs_5000_to_507f;
 		u8         reserved_at_0[0x80];
 	} port_access_reg_cap_mask;
 
-- 
2.17.0

^ permalink raw reply related

* [net-next 1/6] net/dcb: Add dcbnl buffer attribute
From: Saeed Mahameed @ 2018-05-21 21:04 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Huy Nguyen, Saeed Mahameed
In-Reply-To: <20180521210502.11082-1-saeedm@mellanox.com>

From: Huy Nguyen <huyn@mellanox.com>

In this patch, we add dcbnl buffer attribute to allow user
change the NIC's buffer configuration such as priority
to buffer mapping and buffer size of individual buffer.

This attribute combined with pfc attribute allows advance user to
fine tune the qos setting for specific priority queue. For example,
user can give dedicated buffer for one or more prirorities or user
can give large buffer to certain priorities.

We present an use case scenario where dcbnl buffer attribute configured
by advance user helps reduce the latency of messages of different sizes.

Scenarios description:
On ConnectX-5, we run latency sensitive traffic with
small/medium message sizes ranging from 64B to 256KB and bandwidth sensitive
traffic with large messages sizes 512KB and 1MB. We group small, medium,
and large message sizes to their own pfc enables priorities as follow.
  Priorities 1 & 2 (64B, 256B and 1KB)
  Priorities 3 & 4 (4KB, 8KB, 16KB, 64KB, 128KB and 256KB)
  Priorities 5 & 6 (512KB and 1MB)

By default, ConnectX-5 maps all pfc enabled priorities to a single
lossless fixed buffer size of 50% of total available buffer space. The
other 50% is assigned to lossy buffer. Using dcbnl buffer attribute,
we create three equal size lossless buffers. Each buffer has 25% of total
available buffer space. Thus, the lossy buffer size reduces to 25%. Priority
to lossless  buffer mappings are set as follow.
  Priorities 1 & 2 on lossless buffer #1
  Priorities 3 & 4 on lossless buffer #2
  Priorities 5 & 6 on lossless buffer #3

We observe improvements in latency for small and medium message sizes
as follows. Please note that the large message sizes bandwidth performance is
reduced but the total bandwidth remains the same.
  256B message size (42 % latency reduction)
  4K message size (21% latency reduction)
  64K message size (16% latency reduction)

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/net/dcbnl.h        |  4 ++++
 include/uapi/linux/dcbnl.h | 10 ++++++++++
 net/dcb/dcbnl.c            | 20 ++++++++++++++++++++
 3 files changed, 34 insertions(+)

diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 207d9ba1f92c..0e5e91be2d30 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -101,6 +101,10 @@ struct dcbnl_rtnl_ops {
 	/* CEE peer */
 	int (*cee_peer_getpg) (struct net_device *, struct cee_pg *);
 	int (*cee_peer_getpfc) (struct net_device *, struct cee_pfc *);
+
+	/* buffer settings */
+	int (*dcbnl_getbuffer)(struct net_device *, struct dcbnl_buffer *);
+	int (*dcbnl_setbuffer)(struct net_device *, struct dcbnl_buffer *);
 };
 
 #endif /* __NET_DCBNL_H__ */
diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h
index 2c0c6453c3f4..1ddc0a44c172 100644
--- a/include/uapi/linux/dcbnl.h
+++ b/include/uapi/linux/dcbnl.h
@@ -163,6 +163,15 @@ struct ieee_pfc {
 	__u64	indications[IEEE_8021QAZ_MAX_TCS];
 };
 
+#define IEEE_8021Q_MAX_PRIORITIES 8
+#define DCBX_MAX_BUFFERS  8
+struct dcbnl_buffer {
+	/* priority to buffer mapping */
+	__u8    prio2buffer[IEEE_8021Q_MAX_PRIORITIES];
+	/* buffer size in Bytes */
+	__u32   buffer_size[DCBX_MAX_BUFFERS];
+};
+
 /* CEE DCBX std supported values */
 #define CEE_DCBX_MAX_PGS	8
 #define CEE_DCBX_MAX_PRIO	8
@@ -406,6 +415,7 @@ enum ieee_attrs {
 	DCB_ATTR_IEEE_MAXRATE,
 	DCB_ATTR_IEEE_QCN,
 	DCB_ATTR_IEEE_QCN_STATS,
+	DCB_ATTR_DCB_BUFFER,
 	__DCB_ATTR_IEEE_MAX
 };
 #define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1)
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index bae7d78aa068..d2f4e0c1faaf 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -176,6 +176,7 @@ static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = {
 	[DCB_ATTR_IEEE_MAXRATE]   = {.len = sizeof(struct ieee_maxrate)},
 	[DCB_ATTR_IEEE_QCN]         = {.len = sizeof(struct ieee_qcn)},
 	[DCB_ATTR_IEEE_QCN_STATS]   = {.len = sizeof(struct ieee_qcn_stats)},
+	[DCB_ATTR_DCB_BUFFER]       = {.len = sizeof(struct dcbnl_buffer)},
 };
 
 /* DCB number of traffic classes nested attributes. */
@@ -1094,6 +1095,16 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 			return -EMSGSIZE;
 	}
 
+	if (ops->dcbnl_getbuffer) {
+		struct dcbnl_buffer buffer;
+
+		memset(&buffer, 0, sizeof(buffer));
+		err = ops->dcbnl_getbuffer(netdev, &buffer);
+		if (!err &&
+		    nla_put(skb, DCB_ATTR_DCB_BUFFER, sizeof(buffer), &buffer))
+			return -EMSGSIZE;
+	}
+
 	app = nla_nest_start(skb, DCB_ATTR_IEEE_APP_TABLE);
 	if (!app)
 		return -EMSGSIZE;
@@ -1453,6 +1464,15 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
 			goto err;
 	}
 
+	if (ieee[DCB_ATTR_DCB_BUFFER] && ops->dcbnl_setbuffer) {
+		struct dcbnl_buffer *buffer =
+			nla_data(ieee[DCB_ATTR_DCB_BUFFER]);
+
+		err = ops->dcbnl_setbuffer(netdev, buffer);
+		if (err)
+			goto err;
+	}
+
 	if (ieee[DCB_ATTR_IEEE_APP_TABLE]) {
 		struct nlattr *attr;
 		int rem;
-- 
2.17.0

^ permalink raw reply related

* [pull request][net-next 0/6] Mellanox, mlx5e updates 2018-05-19
From: Saeed Mahameed @ 2018-05-21 21:04 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Saeed Mahameed

Hi Dave,

This is a mlx5e only pull request, for more information please see tag
log below.

Please pull and let me know if there's any problem.

Thanks,
Saeed.

---

The following changes since commit eb38401c779d350e9e31396471ea072fa29aec9b:

  net: stmmac: Populate missing callbacks in HWIF initialization (2018-05-18 13:56:08 -0400)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git tags/mlx5e-updates-2018-05-19

for you to fetch changes up to e4362d6f7191732f47f5bc63cc4c736494f1f964:

  net/mlx5e: Receive buffer support for DCBX (2018-05-19 06:09:23 -0700)

----------------------------------------------------------------
mlx5e-updates-2018-05-19

This series contains updates for mlx5e netdevice driver with one subject,
DSCP to priority mapping, in the first patch Huy adds the needed API in
dcbnl, the second patch adds the needed mlx5 core capability bits for the
feature, and all other patches are mlx5e (netdev) only changes to add
support for the feature.

From: Huy Nguyen

Dscp to priority mapping for Ethernet packet:

These patches enable differentiated services code point (dscp) to
priority mapping for Ethernet packet. Once this feature is
enabled, the packet is routed to the corresponding priority based on its
dscp. User can combine this feature with priority flow control (pfc)
feature to have priority flow control based on the dscp.

Firmware interface:
Mellanox firmware provides two control knobs for this feature:
  QPTS register allow changing the trust state between dscp and
  pcp mode. The default is pcp mode. Once in dscp mode, firmware will
  route the packet based on its dscp value if the dscp field exists.

  QPDPM register allow mapping a specific dscp (0 to 63) to a
  specific priority (0 to 7). By default, all the dscps are mapped to
  priority zero.

Software interface:
This feature is controlled via application priority TLV. IEEE
specification P802.1Qcd/D2.1 defines priority selector id 5 for
application priority TLV. This APP TLV selector defines DSCP to priority
map. This APP TLV can be sent by the switch or can be set locally using
software such as lldptool. In mlx5 drivers, we add the support for net
dcb's getapp and setapp call back. Mlx5 driver only handles the selector
id 5 application entry (dscp application priority application entry).
If user sends multiple dscp to priority APP TLV entries on the same
dscp, the last sent one will take effect. All the previous sent will be
deleted.

The firmware trust state (in QPTS register) is changed based on the
number of dscp to priority application entries. When the first dscp to
priority application entry is added by the user, the trust state is
changed to dscp. When the last dscp to priority application entry is
deleted by the user, the trust state is changed to pcp.

When the port is in DSCP trust state, the transmit queue is selected
based on the dscp of the skb.

When the port is in DSCP trust state and vport inline mode is not NONE,
firmware requires mlx5 driver to copy the IP header to the
wqe ethernet segment inline header if the skb has it.
This is done by changing the transmit queue sq's min inline mode to L3.
Note that the min inline mode of sqs that belong to other features
such as xdpsq, icosq are not modified.

----------------------------------------------------------------
Huy Nguyen (6):
      net/dcb: Add dcbnl buffer attribute
      net/mlx5: Add pbmc and pptb in the port_access_reg_cap_mask
      net/mlx5e: Move port speed code from en_ethtool.c to en/port.c
      net/mlx5e: PPTB and PBMC register firmware command support
      net/mlx5e: Receive buffer configuration
      net/mlx5e: Receive buffer support for DCBX

 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   8 +-
 .../net/ethernet/mellanox/mlx5/core/en/Makefile    |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/en/port.c  | 237 +++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en/port.h  |  48 +++
 .../ethernet/mellanox/mlx5/core/en/port_buffer.c   | 327 +++++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/en/port_buffer.h   |  75 +++++
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 131 ++++++++-
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 102 +++----
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |   3 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |   3 +-
 include/linux/mlx5/device.h                        |   3 +
 include/linux/mlx5/driver.h                        |   2 +
 include/linux/mlx5/mlx5_ifc.h                      |  47 +++
 include/net/dcbnl.h                                |   4 +
 include/uapi/linux/dcbnl.h                         |  10 +
 net/dcb/dcbnl.c                                    |  20 ++
 17 files changed, 945 insertions(+), 80 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/Makefile
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port.h
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h

^ permalink raw reply

* [PATCH net-next 3/3] net/ipv4: Remove tracepoint in fib_validate_source
From: dsahern @ 2018-05-21 21:24 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern
In-Reply-To: <20180521212443.23612-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Tracepoint does not add value and the call to fib_lookup follows
it which shows the same information and the fib lookup result.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/trace/events/fib.h | 35 -----------------------------------
 net/ipv4/fib_frontend.c    |  2 --
 2 files changed, 37 deletions(-)

diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h
index f5a1d4c518d8..9763cddd0594 100644
--- a/include/trace/events/fib.h
+++ b/include/trace/events/fib.h
@@ -87,41 +87,6 @@ TRACE_EVENT(fib_table_lookup,
 		  __entry->tos, __entry->scope, __entry->flags,
 		  __get_str(name), __entry->gw, __entry->saddr, __entry->err)
 );
-
-TRACE_EVENT(fib_validate_source,
-
-	TP_PROTO(const struct net_device *dev, const struct flowi4 *flp),
-
-	TP_ARGS(dev, flp),
-
-	TP_STRUCT__entry(
-		__string(	name,	dev->name	)
-		__field(	int,	oif		)
-		__field(	int,	iif		)
-		__field(	__u8,	tos		)
-		__array(	__u8,	src,	4	)
-		__array(	__u8,	dst,	4	)
-	),
-
-	TP_fast_assign(
-		__be32 *p32;
-
-		__assign_str(name, dev ? dev->name : "not set");
-		__entry->oif = flp->flowi4_oif;
-		__entry->iif = flp->flowi4_iif;
-		__entry->tos = flp->flowi4_tos;
-
-		p32 = (__be32 *) __entry->src;
-		*p32 = flp->saddr;
-
-		p32 = (__be32 *) __entry->dst;
-		*p32 = flp->daddr;
-	),
-
-	TP_printk("dev %s oif %d iif %d tos %d src %pI4 dst %pI4",
-		  __get_str(name), __entry->oif, __entry->iif, __entry->tos,
-		  __entry->src, __entry->dst)
-);
 #endif /* _TRACE_FIB_H */
 
 /* This part must be outside protection */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4d622112bf95..58696b829065 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -354,8 +354,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 		fl4.fl4_dport = 0;
 	}
 
-	trace_fib_validate_source(dev, &fl4);
-
 	if (fib_lookup(net, &fl4, &res, 0))
 		goto last_resort;
 	if (res.type != RTN_UNICAST &&
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 2/3] net/ipv6: Udate fib6_table_lookup tracepoint
From: dsahern @ 2018-05-21 21:24 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern
In-Reply-To: <20180521212443.23612-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Commit bb0ad1987e96 ("ipv6: fib6_rules: support for match on sport, dport
and ip proto") added support for protocol and ports to FIB rules.
Update the FIB lookup tracepoint to dump the parameters.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h       |  2 ++
 include/trace/events/fib6.h | 29 ++++++++++++++++++++++-------
 net/ipv6/route.c            |  2 +-
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index cc70f6da8462..63fd9888f006 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -286,6 +286,8 @@ static inline void fib6_info_release(struct fib6_info *f6i)
 		fib6_info_destroy(f6i);
 }
 
+int ip6_rt_type_to_error(u8 fib6_type);
+
 enum fib6_walk_state {
 #ifdef CONFIG_IPV6_SUBTREES
 	FWS_S,
diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h
index 1b8d951e3c12..b088b54d699c 100644
--- a/include/trace/events/fib6.h
+++ b/include/trace/events/fib6.h
@@ -19,7 +19,7 @@ TRACE_EVENT(fib6_table_lookup,
 
 	TP_STRUCT__entry(
 		__field(	u32,	tb_id		)
-
+		__field(	int,	err		)
 		__field(	int,	oif		)
 		__field(	int,	iif		)
 		__field(	__u8,	tos		)
@@ -27,7 +27,10 @@ TRACE_EVENT(fib6_table_lookup,
 		__field(	__u8,	flags		)
 		__array(	__u8,	src,	16	)
 		__array(	__u8,	dst,	16	)
-
+		__field(        u16,	sport		)
+		__field(        u16,	dport		)
+		__field(        u8,	proto		)
+		__field(        u8,	rt_type		)
 		__dynamic_array(	char,	name,	IFNAMSIZ )
 		__array(		__u8,	gw,	16	 )
 	),
@@ -36,6 +39,7 @@ TRACE_EVENT(fib6_table_lookup,
 		struct in6_addr *in6;
 
 		__entry->tb_id = table->tb6_id;
+		__entry->err = ip6_rt_type_to_error(f6i->fib6_type);
 		__entry->oif = flp->flowi6_oif;
 		__entry->iif = flp->flowi6_iif;
 		__entry->tos = ip6_tclass(flp->flowlabel);
@@ -48,10 +52,20 @@ TRACE_EVENT(fib6_table_lookup,
 		in6 = (struct in6_addr *)__entry->dst;
 		*in6 = flp->daddr;
 
+		__entry->proto = flp->flowi6_proto;
+		if (__entry->proto == IPPROTO_TCP ||
+		    __entry->proto == IPPROTO_UDP) {
+			__entry->sport = ntohs(flp->fl6_sport);
+			__entry->dport = ntohs(flp->fl6_dport);
+		} else {
+			__entry->sport = 0;
+			__entry->dport = 0;
+		}
+
 		if (f6i->fib6_nh.nh_dev) {
 			__assign_str(name, f6i->fib6_nh.nh_dev);
 		} else {
-			__assign_str(name, "");
+			__assign_str(name, "-");
 		}
 		if (f6i == net->ipv6.fib6_null_entry) {
 			struct in6_addr in6_zero = {};
@@ -65,10 +79,11 @@ TRACE_EVENT(fib6_table_lookup,
 		}
 	),
 
-	TP_printk("table %3u oif %d iif %d src %pI6c dst %pI6c tos %d scope %d flags %x ==> dev %s gw %pI6c",
-		  __entry->tb_id, __entry->oif, __entry->iif,
-		  __entry->src, __entry->dst, __entry->tos, __entry->scope,
-		  __entry->flags, __get_str(name), __entry->gw)
+	TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u tos %d scope %d flags %x ==> dev %s gw %pI6c err %d",
+		  __entry->tb_id, __entry->oif, __entry->iif, __entry->proto,
+		  __entry->src, __entry->sport, __entry->dst, __entry->dport,
+		  __entry->tos, __entry->scope, __entry->flags,
+		  __get_str(name), __entry->gw, __entry->err)
 );
 
 #endif /* _TRACE_FIB6_H */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cc24ed3bc334..ae27e57c5997 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -897,7 +897,7 @@ static const int fib6_prop[RTN_MAX + 1] = {
 	[RTN_XRESOLVE]	= -EINVAL,
 };
 
-static int ip6_rt_type_to_error(u8 fib6_type)
+int ip6_rt_type_to_error(u8 fib6_type)
 {
 	return fib6_prop[fib6_type];
 }
-- 
2.11.0

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox