Netdev List

Netdev List
 help / color / mirror / Atom feed

* [net-next v2 12/13] i40e: Persistent LLDP support
From: Jeff Kirsher @ 2019-08-22 20:30 UTC (permalink / raw)
  To: davem
  Cc: Sylwia Wnuczko, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190822203039.15668-1-jeffrey.t.kirsher@intel.com>

From: Sylwia Wnuczko <sylwia.wnuczko@intel.com>

This patch adds a function to read NVM module data and uses it to
read current LLDP agent configuration from NVM API version 1.8.

Signed-off-by: Sylwia Wnuczko <sylwia.wnuczko@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_dcb.c    |  18 +++-
 drivers/net/ethernet/intel/i40e/i40e_dcb.h    |   2 +
 drivers/net/ethernet/intel/i40e/i40e_nvm.c    | 101 ++++++++++++++++++
 .../net/ethernet/intel/i40e/i40e_prototype.h  |   6 ++
 drivers/net/ethernet/intel/i40e/i40e_type.h   |   1 +
 5 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 292eeb3def10..200a1cb3b536 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -877,7 +877,23 @@ i40e_status i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change)
 		return I40E_NOT_SUPPORTED;
 
 	/* Read LLDP NVM area */
-	ret = i40e_read_lldp_cfg(hw, &lldp_cfg);
+	if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT) {
+		u8 offset = 0;
+
+		if (hw->mac.type == I40E_MAC_XL710)
+			offset = I40E_LLDP_CURRENT_STATUS_XL710_OFFSET;
+		else if (hw->mac.type == I40E_MAC_X722)
+			offset = I40E_LLDP_CURRENT_STATUS_X722_OFFSET;
+		else
+			return I40E_NOT_SUPPORTED;
+
+		ret = i40e_read_nvm_module_data(hw,
+						I40E_SR_EMP_SR_SETTINGS_PTR,
+						offset, 1,
+						&lldp_cfg.adminstatus);
+	} else {
+		ret = i40e_read_lldp_cfg(hw, &lldp_cfg);
+	}
 	if (ret)
 		return I40E_ERR_NOT_READY;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
index ddb48ae7cce4..2a80c5daa376 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
@@ -30,6 +30,8 @@
 #define I40E_CEE_SUBTYPE_APP_PRI	4
 
 #define I40E_CEE_MAX_FEAT_TYPE		3
+#define I40E_LLDP_CURRENT_STATUS_XL710_OFFSET	0x2B
+#define I40E_LLDP_CURRENT_STATUS_X722_OFFSET	0x31
 /* Defines for LLDP TLV header */
 #define I40E_LLDP_TLV_LEN_SHIFT		0
 #define I40E_LLDP_TLV_LEN_MASK		(0x01FF << I40E_LLDP_TLV_LEN_SHIFT)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index c508b75c3c09..e4d8d20baf3b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -321,6 +321,77 @@ i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 	return ret_code;
 }
 
+/**
+ * i40e_read_nvm_module_data - Reads NVM Buffer to specified memory location
+ * @hw: pointer to the HW structure
+ * @module_ptr: Pointer to module in words with respect to NVM beginning
+ * @offset: offset in words from module start
+ * @words_data_size: Words to read from NVM
+ * @data_ptr: Pointer to memory location where resulting buffer will be stored
+ **/
+i40e_status i40e_read_nvm_module_data(struct i40e_hw *hw,
+				      u8 module_ptr, u16 offset,
+				      u16 words_data_size,
+				      u16 *data_ptr)
+{
+	i40e_status status;
+	u16 ptr_value = 0;
+	u32 flat_offset;
+
+	if (module_ptr != 0) {
+		status = i40e_read_nvm_word(hw, module_ptr, &ptr_value);
+		if (status) {
+			i40e_debug(hw, I40E_DEBUG_ALL,
+				   "Reading nvm word failed.Error code: %d.\n",
+				   status);
+			return I40E_ERR_NVM;
+		}
+	}
+#define I40E_NVM_INVALID_PTR_VAL 0x7FFF
+#define I40E_NVM_INVALID_VAL 0xFFFF
+
+	/* Pointer not initialized */
+	if (ptr_value == I40E_NVM_INVALID_PTR_VAL ||
+	    ptr_value == I40E_NVM_INVALID_VAL)
+		return I40E_ERR_BAD_PTR;
+
+	/* Check whether the module is in SR mapped area or outside */
+	if (ptr_value & I40E_PTR_TYPE) {
+		/* Pointer points outside of the Shared RAM mapped area */
+		ptr_value &= ~I40E_PTR_TYPE;
+
+		/* PtrValue in 4kB units, need to convert to words */
+		ptr_value /= 2;
+		flat_offset = ((u32)ptr_value * 0x1000) + (u32)offset;
+		status = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+		if (!status) {
+			status = i40e_aq_read_nvm(hw, 0, 2 * flat_offset,
+						  2 * words_data_size,
+						  data_ptr, true, NULL);
+			i40e_release_nvm(hw);
+			if (status) {
+				i40e_debug(hw, I40E_DEBUG_ALL,
+					   "Reading nvm aq failed.Error code: %d.\n",
+					   status);
+				return I40E_ERR_NVM;
+			}
+		} else {
+			return I40E_ERR_NVM;
+		}
+	} else {
+		/* Read from the Shadow RAM */
+		status = i40e_read_nvm_buffer(hw, ptr_value + offset,
+					      &words_data_size, data_ptr);
+		if (status) {
+			i40e_debug(hw, I40E_DEBUG_ALL,
+				   "Reading nvm buffer failed.Error code: %d.\n",
+				   status);
+		}
+	}
+
+	return status;
+}
+
 /**
  * i40e_read_nvm_buffer_srctl - Reads Shadow RAM buffer via SRCTL register
  * @hw: pointer to the HW structure
@@ -429,6 +500,36 @@ static i40e_status __i40e_read_nvm_buffer(struct i40e_hw *hw,
 	return i40e_read_nvm_buffer_srctl(hw, offset, words, data);
 }
 
+/**
+ * i40e_read_nvm_buffer - Reads Shadow RAM buffer and acquire lock if necessary
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ * @words: (in) number of words to read; (out) number of words actually read
+ * @data: words read from the Shadow RAM
+ *
+ * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
+ * method. The buffer read is preceded by the NVM ownership take
+ * and followed by the release.
+ **/
+i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
+				 u16 *words, u16 *data)
+{
+	i40e_status ret_code = 0;
+
+	if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
+		ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+		if (!ret_code) {
+			ret_code = i40e_read_nvm_buffer_aq(hw, offset, words,
+							   data);
+			i40e_release_nvm(hw);
+		}
+	} else {
+		ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data);
+	}
+
+	return ret_code;
+}
+
 /**
  * i40e_write_nvm_aq - Writes Shadow RAM.
  * @hw: pointer to the HW structure.
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 9c810d54df1c..5250441bf75b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -315,6 +315,12 @@ i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
 void i40e_release_nvm(struct i40e_hw *hw);
 i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 					 u16 *data);
+i40e_status i40e_read_nvm_module_data(struct i40e_hw *hw,
+				      u8 module_ptr, u16 offset,
+				      u16 words_data_size,
+				      u16 *data_ptr);
+i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
+				 u16 *words, u16 *data);
 i40e_status i40e_update_nvm_checksum(struct i40e_hw *hw);
 i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
 						 u16 *checksum);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 7c1d57683b53..b43ec94a0f29 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -1318,6 +1318,7 @@ struct i40e_hw_port_stats {
 #define I40E_SR_VPD_PTR				0x2F
 #define I40E_SR_PCIE_ALT_AUTO_LOAD_PTR		0x3E
 #define I40E_SR_SW_CHECKSUM_WORD		0x3F
+#define I40E_SR_EMP_SR_SETTINGS_PTR		0x48
 
 /* Auxiliary field, mask and shift definition for Shadow RAM and NVM Flash */
 #define I40E_SR_VPD_MODULE_MAX_SIZE		1024
-- 
2.21.0


^ permalink raw reply related

* [net-next v2 11/13] i40e: allow reset in recovery mode
From: Jeff Kirsher @ 2019-08-22 20:30 UTC (permalink / raw)
  To: davem
  Cc: Piotr Kwapulinski, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190822203039.15668-1-jeffrey.t.kirsher@intel.com>

From: Piotr Kwapulinski <piotr.kwapulinski@intel.com>

Driver waits after issuing a reset. When a reset takes too long a driver
gives up. Implemented by invoking PF reset in a loop. After defined
number of unsuccessful PF reset trials it returns error.
Without this patch PF reset fails when NIC is in recovery mode.

So make i40e_set_mac_type() public. i40e driver requires i40e_set_mac_type()
to be public. It is required for recovery mode handling. Without this patch
recovery mode could not be detected in i40e_probe().

Signed-off-by: Piotr Kwapulinski <piotr.kwapulinski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_common.c |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c   | 67 +++++++++++++++++--
 .../net/ethernet/intel/i40e/i40e_prototype.h  |  2 +
 3 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 7af1b7477140..de996a80013e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -13,7 +13,7 @@
  * This function sets the mac type of the adapter based on the
  * vendor ID and device ID stored in the hw structure.
  **/
-static i40e_status i40e_set_mac_type(struct i40e_hw *hw)
+i40e_status i40e_set_mac_type(struct i40e_hw *hw)
 {
 	i40e_status status = 0;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 8d6b9515b595..fdf43d87e983 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -14564,6 +14564,51 @@ static bool i40e_check_recovery_mode(struct i40e_pf *pf)
 	return false;
 }
 
+/**
+ * i40e_pf_loop_reset - perform reset in a loop.
+ * @pf: board private structure
+ *
+ * This function is useful when a NIC is about to enter recovery mode.
+ * When a NIC's internal data structures are corrupted the NIC's
+ * firmware is going to enter recovery mode.
+ * Right after a POR it takes about 7 minutes for firmware to enter
+ * recovery mode. Until that time a NIC is in some kind of intermediate
+ * state. After that time period the NIC almost surely enters
+ * recovery mode. The only way for a driver to detect intermediate
+ * state is to issue a series of pf-resets and check a return value.
+ * If a PF reset returns success then the firmware could be in recovery
+ * mode so the caller of this code needs to check for recovery mode
+ * if this function returns success. There is a little chance that
+ * firmware will hang in intermediate state forever.
+ * Since waiting 7 minutes is quite a lot of time this function waits
+ * 10 seconds and then gives up by returning an error.
+ *
+ * Return 0 on success, negative on failure.
+ **/
+static i40e_status i40e_pf_loop_reset(struct i40e_pf *pf)
+{
+	const unsigned short MAX_CNT = 1000;
+	const unsigned short MSECS = 10;
+	struct i40e_hw *hw = &pf->hw;
+	i40e_status ret;
+	int cnt;
+
+	for (cnt = 0; cnt < MAX_CNT; ++cnt) {
+		ret = i40e_pf_reset(hw);
+		if (!ret)
+			break;
+		msleep(MSECS);
+	}
+
+	if (cnt == MAX_CNT) {
+		dev_info(&pf->pdev->dev, "PF reset failed: %d\n", ret);
+		return ret;
+	}
+
+	pf->pfr_count++;
+	return ret;
+}
+
 /**
  * i40e_init_recovery_mode - initialize subsystems needed in recovery mode
  * @pf: board private structure
@@ -14792,14 +14837,22 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	/* Reset here to make sure all is clean and to define PF 'n' */
 	i40e_clear_hw(hw);
-	if (!i40e_check_recovery_mode(pf)) {
-		err = i40e_pf_reset(hw);
-		if (err) {
-			dev_info(&pdev->dev, "Initial pf_reset failed: %d\n", err);
-			goto err_pf_reset;
-		}
-		pf->pfr_count++;
+
+	err = i40e_set_mac_type(hw);
+	if (err) {
+		dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n",
+			 err);
+		goto err_pf_reset;
 	}
+
+	err = i40e_pf_loop_reset(pf);
+	if (err) {
+		dev_info(&pdev->dev, "Initial pf_reset failed: %d\n", err);
+		goto err_pf_reset;
+	}
+
+	i40e_check_recovery_mode(pf);
+
 	hw->aq.num_arq_entries = I40E_AQ_LEN;
 	hw->aq.num_asq_entries = I40E_AQ_LEN;
 	hw->aq.arq_buf_size = I40E_MAX_AQ_BUF_SIZE;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index eac88bcc6c06..9c810d54df1c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -326,6 +326,8 @@ void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode,
 void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw);
 void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status);
 
+i40e_status i40e_set_mac_type(struct i40e_hw *hw);
+
 extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
 
 static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
-- 
2.21.0


^ permalink raw reply related

* [net-next v2 10/13] i40e: Remove function i40e_update_dcb_config()
From: Jeff Kirsher @ 2019-08-22 20:30 UTC (permalink / raw)
  To: davem
  Cc: Grzegorz Siwik, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190822203039.15668-1-jeffrey.t.kirsher@intel.com>

From: Grzegorz Siwik <grzegorz.siwik@intel.com>

This patch removes function i40e_update_dcb_config(). Instead of
i40e_update_dcb_config() we use i40e_init_dcb(), which implements the
correct NVM read.

Signed-off-by: Grzegorz Siwik <grzegorz.siwik@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 46 +--------------------
 1 file changed, 1 insertion(+), 45 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 5c280c025085..8d6b9515b595 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6419,50 +6419,6 @@ static int i40e_resume_port_tx(struct i40e_pf *pf)
 	return ret;
 }
 
-/**
- * i40e_update_dcb_config
- * @hw: pointer to the HW struct
- * @enable_mib_change: enable MIB change event
- *
- * Update DCB configuration from the firmware
- **/
-static enum i40e_status_code
-i40e_update_dcb_config(struct i40e_hw *hw, bool enable_mib_change)
-{
-	struct i40e_lldp_variables lldp_cfg;
-	i40e_status ret;
-
-	if (!hw->func_caps.dcb)
-		return I40E_NOT_SUPPORTED;
-
-	/* Read LLDP NVM area */
-	ret = i40e_read_lldp_cfg(hw, &lldp_cfg);
-	if (ret)
-		return I40E_ERR_NOT_READY;
-
-	/* Get DCBX status */
-	ret = i40e_get_dcbx_status(hw, &hw->dcbx_status);
-	if (ret)
-		return ret;
-
-	/* Check the DCBX Status */
-	if (hw->dcbx_status == I40E_DCBX_STATUS_DONE ||
-	    hw->dcbx_status == I40E_DCBX_STATUS_IN_PROGRESS) {
-		/* Get current DCBX configuration */
-		ret = i40e_get_dcb_config(hw);
-		if (ret)
-			return ret;
-	} else if (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED) {
-		return I40E_ERR_NOT_READY;
-	}
-
-	/* Configure the LLDP MIB change event */
-	if (enable_mib_change)
-		ret = i40e_aq_cfg_lldp_mib_change_event(hw, true, NULL);
-
-	return ret;
-}
-
 /**
  * i40e_init_pf_dcb - Initialize DCB configuration
  * @pf: PF being configured
@@ -6485,7 +6441,7 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
 		goto out;
 	}
 
-	err = i40e_update_dcb_config(hw, true);
+	err = i40e_init_dcb(hw, true);
 	if (!err) {
 		/* Device/Function is not DCBX capable */
 		if ((!hw->func_caps.dcb) ||
-- 
2.21.0


^ permalink raw reply related

* [net-next v2 09/13] i40e: Fix crash caused by stress setting of VF MAC addresses
From: Jeff Kirsher @ 2019-08-22 20:30 UTC (permalink / raw)
  To: davem; +Cc: Slawomir Laba, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190822203039.15668-1-jeffrey.t.kirsher@intel.com>

From: Slawomir Laba <slawomirx.laba@intel.com>

Add update to the VSI pointer passed to the i40e_set_vf_mac function.
If VF is in reset state the driver waits in i40e_set_vf_mac function
for the reset to be complete, yet after reset the vsi pointer
that was passed into this function is no longer valid.

The patch updates local VSI pointer directly from pf->vsi array,
by using the id stored in VF pointer (lan_vsi_idx).

Without this commit the driver might occasionally invoke general
protection fault in kernel and disable the OS entirely.

Signed-off-by: Slawomir Laba <slawomirx.laba@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 4601f9e4e998..f8aa4deceb5e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -3967,10 +3967,15 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	/* When the VF is resetting wait until it is done.
 	 * It can take up to 200 milliseconds,
 	 * but wait for up to 300 milliseconds to be safe.
+	 * If the VF is indeed in reset, the vsi pointer has
+	 * to show on the newly loaded vsi under pf->vsi[id].
 	 */
 	for (i = 0; i < 15; i++) {
-		if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states))
+		if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
+			if (i > 0)
+				vsi = pf->vsi[vf->lan_vsi_idx];
 			break;
+		}
 		msleep(20);
 	}
 	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
-- 
2.21.0


^ permalink raw reply related

* [net-next v2 08/13] i40e: reset veb.tc_stats when resetting veb.stats
From: Jeff Kirsher @ 2019-08-22 20:30 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190822203039.15668-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

The stats structure for the VEB switch statistics is reset periodically,
but the tc_stats are not reset at the same time.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 4551d97771c9..5c280c025085 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -534,6 +534,10 @@ void i40e_pf_reset_stats(struct i40e_pf *pf)
 			       sizeof(pf->veb[i]->stats));
 			memset(&pf->veb[i]->stats_offsets, 0,
 			       sizeof(pf->veb[i]->stats_offsets));
+			memset(&pf->veb[i]->tc_stats, 0,
+			       sizeof(pf->veb[i]->tc_stats));
+			memset(&pf->veb[i]->tc_stats_offsets, 0,
+			       sizeof(pf->veb[i]->tc_stats_offsets));
 			pf->veb[i]->stat_offsets_loaded = false;
 		}
 	}
-- 
2.21.0


^ permalink raw reply related

* [net-next v2 13/13] i40e: fix retrying in i40e_aq_get_phy_capabilities
From: Jeff Kirsher @ 2019-08-22 20:30 UTC (permalink / raw)
  To: davem
  Cc: Marcin Formela, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190822203039.15668-1-jeffrey.t.kirsher@intel.com>

From: Marcin Formela <marcin.formela@intel.com>

Fixed a bug where driver was breaking out of the loop and
reporting an error without retrying first.

Signed-off-by: Marcin Formela <marcin.formela@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_common.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index de996a80013e..46e649c09f72 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1577,19 +1577,22 @@ i40e_status i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
 		status = i40e_asq_send_command(hw, &desc, abilities,
 					       abilities_size, cmd_details);
 
-		if (status)
-			break;
-
-		if (hw->aq.asq_last_status == I40E_AQ_RC_EIO) {
+		switch (hw->aq.asq_last_status) {
+		case I40E_AQ_RC_EIO:
 			status = I40E_ERR_UNKNOWN_PHY;
 			break;
-		} else if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN) {
+		case I40E_AQ_RC_EAGAIN:
 			usleep_range(1000, 2000);
 			total_delay++;
 			status = I40E_ERR_TIMEOUT;
+			break;
+		/* also covers I40E_AQ_RC_OK */
+		default:
+			break;
 		}
-	} while ((hw->aq.asq_last_status != I40E_AQ_RC_OK) &&
-		 (total_delay < max_delay));
+
+	} while ((hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN) &&
+		(total_delay < max_delay));
 
 	if (status)
 		return status;
-- 
2.21.0


^ permalink raw reply related

* [net-next v2 04/13] i40e: fix shifts of signed values
From: Jeff Kirsher @ 2019-08-22 20:30 UTC (permalink / raw)
  To: davem
  Cc: Beilei Xing, netdev, nhorman, sassmann, Ferruh Yigit,
	Andrew Bowers, Jeff Kirsher
In-Reply-To: <20190822203039.15668-1-jeffrey.t.kirsher@intel.com>

From: Beilei Xing <beilei.xing@intel.com>

This patch fixes following error reported by cppcheck:
(error) Shifting signed 32-bit value by 31 bits is undefined behaviour

Signed-off-by: Beilei Xing <beilei.xing@intel.com>
Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_register.h   | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index 52e3680c57f8..330ac19a5dae 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -58,7 +58,7 @@
 #define I40E_PF_ARQLEN_ARQCRIT_SHIFT 30
 #define I40E_PF_ARQLEN_ARQCRIT_MASK I40E_MASK(0x1, I40E_PF_ARQLEN_ARQCRIT_SHIFT)
 #define I40E_PF_ARQLEN_ARQENABLE_SHIFT 31
-#define I40E_PF_ARQLEN_ARQENABLE_MASK I40E_MASK(0x1, I40E_PF_ARQLEN_ARQENABLE_SHIFT)
+#define I40E_PF_ARQLEN_ARQENABLE_MASK I40E_MASK(0x1u, I40E_PF_ARQLEN_ARQENABLE_SHIFT)
 #define I40E_PF_ARQT 0x00080480 /* Reset: EMPR */
 #define I40E_PF_ARQT_ARQT_SHIFT 0
 #define I40E_PF_ARQT_ARQT_MASK I40E_MASK(0x3FF, I40E_PF_ARQT_ARQT_SHIFT)
@@ -81,7 +81,7 @@
 #define I40E_PF_ATQLEN_ATQCRIT_SHIFT 30
 #define I40E_PF_ATQLEN_ATQCRIT_MASK I40E_MASK(0x1, I40E_PF_ATQLEN_ATQCRIT_SHIFT)
 #define I40E_PF_ATQLEN_ATQENABLE_SHIFT 31
-#define I40E_PF_ATQLEN_ATQENABLE_MASK I40E_MASK(0x1, I40E_PF_ATQLEN_ATQENABLE_SHIFT)
+#define I40E_PF_ATQLEN_ATQENABLE_MASK I40E_MASK(0x1u, I40E_PF_ATQLEN_ATQENABLE_SHIFT)
 #define I40E_PF_ATQT 0x00080400 /* Reset: EMPR */
 #define I40E_PF_ATQT_ATQT_SHIFT 0
 #define I40E_PF_ATQT_ATQT_MASK I40E_MASK(0x3FF, I40E_PF_ATQT_ATQT_SHIFT)
@@ -108,7 +108,7 @@
 #define I40E_VF_ARQLEN_ARQCRIT_SHIFT 30
 #define I40E_VF_ARQLEN_ARQCRIT_MASK I40E_MASK(0x1, I40E_VF_ARQLEN_ARQCRIT_SHIFT)
 #define I40E_VF_ARQLEN_ARQENABLE_SHIFT 31
-#define I40E_VF_ARQLEN_ARQENABLE_MASK I40E_MASK(0x1, I40E_VF_ARQLEN_ARQENABLE_SHIFT)
+#define I40E_VF_ARQLEN_ARQENABLE_MASK I40E_MASK(0x1u, I40E_VF_ARQLEN_ARQENABLE_SHIFT)
 #define I40E_VF_ARQT(_VF) (0x00082C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
 #define I40E_VF_ARQT_MAX_INDEX 127
 #define I40E_VF_ARQT_ARQT_SHIFT 0
@@ -136,7 +136,7 @@
 #define I40E_VF_ATQLEN_ATQCRIT_SHIFT 30
 #define I40E_VF_ATQLEN_ATQCRIT_MASK I40E_MASK(0x1, I40E_VF_ATQLEN_ATQCRIT_SHIFT)
 #define I40E_VF_ATQLEN_ATQENABLE_SHIFT 31
-#define I40E_VF_ATQLEN_ATQENABLE_MASK I40E_MASK(0x1, I40E_VF_ATQLEN_ATQENABLE_SHIFT)
+#define I40E_VF_ATQLEN_ATQENABLE_MASK I40E_MASK(0x1u, I40E_VF_ATQLEN_ATQENABLE_SHIFT)
 #define I40E_VF_ATQT(_VF) (0x00082800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
 #define I40E_VF_ATQT_MAX_INDEX 127
 #define I40E_VF_ATQT_ATQT_SHIFT 0
@@ -259,7 +259,7 @@
 #define I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT 30
 #define I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT)
 #define I40E_PRTDCB_RETSTCC_ETSTC_SHIFT 31
-#define I40E_PRTDCB_RETSTCC_ETSTC_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSTCC_ETSTC_SHIFT)
+#define I40E_PRTDCB_RETSTCC_ETSTC_MASK I40E_MASK(0x1u, I40E_PRTDCB_RETSTCC_ETSTC_SHIFT)
 #define I40E_PRTDCB_RPPMC 0x001223A0 /* Reset: CORER */
 #define I40E_PRTDCB_RPPMC_LANRPPM_SHIFT 0
 #define I40E_PRTDCB_RPPMC_LANRPPM_MASK I40E_MASK(0xFF, I40E_PRTDCB_RPPMC_LANRPPM_SHIFT)
@@ -503,7 +503,7 @@
 #define I40E_GLGEN_MSCA_MDICMD_SHIFT 30
 #define I40E_GLGEN_MSCA_MDICMD_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_MDICMD_SHIFT)
 #define I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT 31
-#define I40E_GLGEN_MSCA_MDIINPROGEN_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT)
+#define I40E_GLGEN_MSCA_MDIINPROGEN_MASK I40E_MASK(0x1u, I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT)
 #define I40E_GLGEN_MSRWD(_i) (0x0008819C + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
 #define I40E_GLGEN_MSRWD_MAX_INDEX 3
 #define I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT 0
@@ -1242,14 +1242,14 @@
 #define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT 30
 #define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_MASK I40E_MASK(0x1, I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT)
 #define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT 31
-#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_MASK I40E_MASK(0x1, I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_MASK I40E_MASK(0x1u, I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT)
 #define I40E_PFLAN_QALLOC 0x001C0400 /* Reset: CORER */
 #define I40E_PFLAN_QALLOC_FIRSTQ_SHIFT 0
 #define I40E_PFLAN_QALLOC_FIRSTQ_MASK I40E_MASK(0x7FF, I40E_PFLAN_QALLOC_FIRSTQ_SHIFT)
 #define I40E_PFLAN_QALLOC_LASTQ_SHIFT 16
 #define I40E_PFLAN_QALLOC_LASTQ_MASK I40E_MASK(0x7FF, I40E_PFLAN_QALLOC_LASTQ_SHIFT)
 #define I40E_PFLAN_QALLOC_VALID_SHIFT 31
-#define I40E_PFLAN_QALLOC_VALID_MASK I40E_MASK(0x1, I40E_PFLAN_QALLOC_VALID_SHIFT)
+#define I40E_PFLAN_QALLOC_VALID_MASK I40E_MASK(0x1u, I40E_PFLAN_QALLOC_VALID_SHIFT)
 #define I40E_QRX_ENA(_Q) (0x00120000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: PFR */
 #define I40E_QRX_ENA_MAX_INDEX 1535
 #define I40E_QRX_ENA_QENA_REQ_SHIFT 0
@@ -1658,7 +1658,7 @@
 #define I40E_GLNVM_SRCTL_START_SHIFT 30
 #define I40E_GLNVM_SRCTL_START_MASK I40E_MASK(0x1, I40E_GLNVM_SRCTL_START_SHIFT)
 #define I40E_GLNVM_SRCTL_DONE_SHIFT 31
-#define I40E_GLNVM_SRCTL_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_SRCTL_DONE_SHIFT)
+#define I40E_GLNVM_SRCTL_DONE_MASK I40E_MASK(0x1u, I40E_GLNVM_SRCTL_DONE_SHIFT)
 #define I40E_GLNVM_SRDATA 0x000B6114 /* Reset: POR */
 #define I40E_GLNVM_SRDATA_WRDATA_SHIFT 0
 #define I40E_GLNVM_SRDATA_WRDATA_MASK I40E_MASK(0xFFFF, I40E_GLNVM_SRDATA_WRDATA_SHIFT)
@@ -3025,7 +3025,7 @@
 #define I40E_PF_VT_PFALLOC_LASTVF_SHIFT 8
 #define I40E_PF_VT_PFALLOC_LASTVF_MASK I40E_MASK(0xFF, I40E_PF_VT_PFALLOC_LASTVF_SHIFT)
 #define I40E_PF_VT_PFALLOC_VALID_SHIFT 31
-#define I40E_PF_VT_PFALLOC_VALID_MASK I40E_MASK(0x1, I40E_PF_VT_PFALLOC_VALID_SHIFT)
+#define I40E_PF_VT_PFALLOC_VALID_MASK I40E_MASK(0x1u, I40E_PF_VT_PFALLOC_VALID_SHIFT)
 #define I40E_VP_MDET_RX(_VF) (0x0012A000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
 #define I40E_VP_MDET_RX_MAX_INDEX 127
 #define I40E_VP_MDET_RX_VALID_SHIFT 0
@@ -3161,7 +3161,7 @@
 #define I40E_VF_ARQLEN1_ARQCRIT_SHIFT 30
 #define I40E_VF_ARQLEN1_ARQCRIT_MASK I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQCRIT_SHIFT)
 #define I40E_VF_ARQLEN1_ARQENABLE_SHIFT 31
-#define I40E_VF_ARQLEN1_ARQENABLE_MASK I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQENABLE_SHIFT)
+#define I40E_VF_ARQLEN1_ARQENABLE_MASK I40E_MASK(0x1u, I40E_VF_ARQLEN1_ARQENABLE_SHIFT)
 #define I40E_VF_ARQT1 0x00007000 /* Reset: EMPR */
 #define I40E_VF_ARQT1_ARQT_SHIFT 0
 #define I40E_VF_ARQT1_ARQT_MASK I40E_MASK(0x3FF, I40E_VF_ARQT1_ARQT_SHIFT)
@@ -3184,7 +3184,7 @@
 #define I40E_VF_ATQLEN1_ATQCRIT_SHIFT 30
 #define I40E_VF_ATQLEN1_ATQCRIT_MASK I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQCRIT_SHIFT)
 #define I40E_VF_ATQLEN1_ATQENABLE_SHIFT 31
-#define I40E_VF_ATQLEN1_ATQENABLE_MASK I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQENABLE_SHIFT)
+#define I40E_VF_ATQLEN1_ATQENABLE_MASK I40E_MASK(0x1u, I40E_VF_ATQLEN1_ATQENABLE_SHIFT)
 #define I40E_VF_ATQT1 0x00008400 /* Reset: EMPR */
 #define I40E_VF_ATQT1_ATQT_SHIFT 0
 #define I40E_VF_ATQT1_ATQT_MASK I40E_MASK(0x3FF, I40E_VF_ATQT1_ATQT_SHIFT)
-- 
2.21.0


^ permalink raw reply related

* [net-next v2 00/13][pull request] 40GbE Intel Wired LAN Driver Updates 2019-08-22
From: Jeff Kirsher @ 2019-08-22 20:30 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, nhorman, sassmann

This series contains updates to i40e driver only.

Arnd Bergmann reduces the stack usage which was causing warnings on
32-bit architectures due to large structure sizes for 2 functions
getting inlined, so use noinline_for_stack to prevent the compilers from
combining the 2 functions.

Mauro S. M. Rodrigues fixes an issue when reading an EEPROM from SFP
modules that comply with SFF-8472 but do not implement the Digital
Diagnostic Monitoring (DDM) interface for i40e.

Huhai found we were not checking the return value for configuring the
transmit ring and continuing with XDP configuration of the transmit
ring.

Beilei fixes an issue of shifting signed 32-bit integers.

Sylwia adds support for "packet drop mode" to the MAC configuration for
admin queue command.  This bit controls the behavior when a no-drop
packet is blocking a TC queue.  Adds support for persistent LLDP by
checking the LLDP flag and reading the LLDP from the NVM when enabled.

Adrian fixes the "recovery mode" check to take into account which device
we are on, since x710 devices have 4 register values to check for status
and x722 devices only have 2 register values to check.

Piotr Azarewicz bumps the supported firmware API version to 1.9 which
extends the PHY access admin queue command support.

Jake makes sure the traffic class stats for a VEB are reset when the VEB
stats are reset.

Slawomir fixes a NULL pointer dereference where the VSI pointer was not
updated before passing it to the i40e_set_vf_mac() when the VF is in a
reset state, so wait for the reset to complete.

Grzegorz removes the i40e_update_dcb_config() which was not using the
correct NVM reads, so call i40e_init_dcb() in its place to correctly
update the DCB configuration.

Piotr Kwapulinski expands the scope of i40e_set_mac_type() since this is
needed during probe to determine if we are in recovery mode.  Fixed the
driver reset path when in recovery mode.

Marcin fixed an issue where we were breaking out of a loop too early
when trying to get the PHY capabilities.

v2: Combined patch 7 & 9 in the original series, since both patches
    bumped firmware API version.  Also combined patches 12 & 13 in the
    original series, since one increased the scope of checking for MAC
    and the follow-on patch made use of function within the new scope.

The following are changes since commit c76c992525245ec1c7b6738bf887c42099abab02:
  nexthops: remove redundant assignment to variable err
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 40GbE

Adrian Podlawski (1):
  i40e: check_recovery_mode had wrong if statement

Arnd Bergmann (1):
  i40e: reduce stack usage in i40e_set_fc

Beilei Xing (1):
  i40e: fix shifts of signed values

Grzegorz Siwik (1):
  i40e: Remove function i40e_update_dcb_config()

Jacob Keller (1):
  i40e: reset veb.tc_stats when resetting veb.stats

Marcin Formela (1):
  i40e: fix retrying in i40e_aq_get_phy_capabilities

Mauro S. M. Rodrigues (1):
  i40e: Check if transceiver implements DDM before access

Piotr Azarewicz (1):
  i40e: Update FW API version to 1.9

Piotr Kwapulinski (1):
  i40e: allow reset in recovery mode

Slawomir Laba (1):
  i40e: Fix crash caused by stress setting of VF MAC addresses

Sylwia Wnuczko (2):
  i40e: Add drop mode parameter to set mac config
  i40e: Persistent LLDP support

huhai (1):
  i40e: add check on i40e_configure_tx_ring() return value

 drivers/net/ethernet/intel/i40e/i40e_adminq.c |   4 +-
 .../net/ethernet/intel/i40e/i40e_adminq_cmd.h |  33 ++---
 drivers/net/ethernet/intel/i40e/i40e_common.c | 110 +++++++-------
 drivers/net/ethernet/intel/i40e/i40e_dcb.c    |  18 ++-
 drivers/net/ethernet/intel/i40e/i40e_dcb.h    |   2 +
 .../net/ethernet/intel/i40e/i40e_ethtool.c    |   6 +
 drivers/net/ethernet/intel/i40e/i40e_main.c   | 136 ++++++++++--------
 drivers/net/ethernet/intel/i40e/i40e_nvm.c    | 101 +++++++++++++
 .../net/ethernet/intel/i40e/i40e_prototype.h  |   8 ++
 .../net/ethernet/intel/i40e/i40e_register.h   |  30 ++--
 drivers/net/ethernet/intel/i40e/i40e_type.h   |   3 +
 .../ethernet/intel/i40e/i40e_virtchnl_pf.c    |   7 +-
 12 files changed, 323 insertions(+), 135 deletions(-)

-- 
2.21.0

^ permalink raw reply

* [PATCH net v2] openvswitch: Fix conntrack cache with timeout
From: Yi-Hung Wei @ 2019-08-22 20:17 UTC (permalink / raw)
  To: netdev, pshelar; +Cc: Yi-Hung Wei

This patch addresses a conntrack cache issue with timeout policy.
Currently, we do not check if the timeout extension is set properly in the
cached conntrack entry.  Thus, after packet recirculate from conntrack
action, the timeout policy is not applied properly.  This patch fixes the
aforementioned issue.

Fixes: 06bd2bdf19d2 ("openvswitch: Add timeout support to ct action")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
---
v1->v2: Fix rcu dereference issue reported by kbuild test robot.
---
 net/openvswitch/conntrack.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 848c6eb55064..4d7896135e73 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -67,6 +67,7 @@ struct ovs_conntrack_info {
 	struct md_mark mark;
 	struct md_labels labels;
 	char timeout[CTNL_TIMEOUT_NAME_MAX];
+	struct nf_ct_timeout *nf_ct_timeout;
 #if IS_ENABLED(CONFIG_NF_NAT)
 	struct nf_nat_range2 range;  /* Only present for SRC NAT and DST NAT. */
 #endif
@@ -697,6 +698,14 @@ static bool skb_nfct_cached(struct net *net,
 		if (help && rcu_access_pointer(help->helper) != info->helper)
 			return false;
 	}
+	if (info->nf_ct_timeout) {
+		struct nf_conn_timeout *timeout_ext;
+
+		timeout_ext = nf_ct_timeout_find(ct);
+		if (!timeout_ext || info->nf_ct_timeout !=
+		    rcu_dereference(timeout_ext->timeout))
+			return false;
+	}
 	/* Force conntrack entry direction to the current packet? */
 	if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
 		/* Delete the conntrack entry if confirmed, else just release
@@ -1657,6 +1666,10 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
 				      ct_info.timeout))
 			pr_info_ratelimited("Failed to associated timeout "
 					    "policy `%s'\n", ct_info.timeout);
+		else
+			ct_info.nf_ct_timeout = rcu_dereference(
+				nf_ct_timeout_find(ct_info.ct)->timeout);
+
 	}
 
 	if (helper) {
-- 
2.7.4


^ permalink raw reply related

* Re: [PATCH net-next,v4, 3/6] net/mlx5: Add wrappers for HyperV PCIe operations
From: Eran Ben Elisha @ 2019-08-22 20:20 UTC (permalink / raw)
  To: Leon Romanovsky, haiyangz
  Cc: sashal@kernel.org, davem@davemloft.net, Saeed Mahameed,
	lorenzo.pieralisi@arm.com, bhelgaas@google.com,
	linux-pci@vger.kernel.org, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org, kys, Stephen Hemminger,
	linux-kernel@vger.kernel.org
In-Reply-To: <20190822173813.GM29433@mtr-leonro.mtl.com>



On 8/22/2019 8:38 PM, Leon Romanovsky wrote:
> On Thu, Aug 22, 2019 at 05:05:47AM +0000, Haiyang Zhang wrote:
>> From: Eran Ben Elisha <eranbe@mellanox.com>
>>
>> Add wrapper functions for HyperV PCIe read / write /
>> block_invalidate_register operations.  This will be used as an
>> infrastructure in the downstream patch for software communication.
>>
>> This will be enabled by default if CONFIG_PCI_HYPERV_INTERFACE is set.
>>
>> Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
>> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
>> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
>> ---
>>   drivers/net/ethernet/mellanox/mlx5/core/Makefile |  1 +
>>   drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++++++++++++++++++++++
>>   drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++++++++
>>   3 files changed, 87 insertions(+)
>>   create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
>>   create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> index bcf3655..fd32a5b 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> @@ -45,6 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offlo
>>   mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
>>   mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
>>   mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
>> +mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o
>>
>>   #
>>   # Ipoib netdev
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
>> new file mode 100644
>> index 0000000..cf08d02
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
>> @@ -0,0 +1,64 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>> +// Copyright (c) 2018 Mellanox Technologies
>> +
>> +#include <linux/hyperv.h>
>> +#include "mlx5_core.h"
>> +#include "lib/hv.h"
>> +
>> +static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
>> +				 int offset, bool read)
>> +{
>> +	int rc = -EOPNOTSUPP;
>> +	int bytes_returned;
>> +	int block_id;
>> +
>> +	if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len % HV_CONFIG_BLOCK_SIZE_MAX)
>> +		return -EINVAL;
>> +
>> +	block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
>> +
>> +	rc = read ?
>> +	     hyperv_read_cfg_blk(dev->pdev, buf,
>> +				 HV_CONFIG_BLOCK_SIZE_MAX, block_id,
>> +				 &bytes_returned) :
>> +	     hyperv_write_cfg_blk(dev->pdev, buf,
>> +				  HV_CONFIG_BLOCK_SIZE_MAX, block_id);
>> +
>> +	/* Make sure len bytes were read successfully  */
>> +	if (read)
>> +		rc |= !(len == bytes_returned);
> 
> Unclear what do you want to achieve here, for read == true, "rc" will
> have result of hyperv_read_cfg_blk(), which can be an error too.
> It means that your "rc |= .." will give interesting results.
Will fix.
> 
>> +
>> +	if (rc) {
>> +		mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
>> +			      read ? "read" : "write", rc, len,
>> +			      offset);
>> +		return rc;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
>> +			int offset)
>> +{
>> +	return mlx5_hv_config_common(dev, buf, len, offset, true);
>> +}
>> +
>> +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
>> +			 int offset)
>> +{
>> +	return mlx5_hv_config_common(dev, buf, len, offset, false);
>> +}
>> +
>> +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
>> +				void (*block_invalidate)(void *context,
>> +							 u64 block_mask))
>> +{
>> +	return hyperv_reg_block_invalidate(dev->pdev, context,
>> +					   block_invalidate);
>> +}
>> +
>> +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
>> +{
>> +	hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
>> +}
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
>> new file mode 100644
>> index 0000000..f9a4557
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
>> @@ -0,0 +1,22 @@
>> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
>> +/* Copyright (c) 2019 Mellanox Technologies. */
>> +
>> +#ifndef __LIB_HV_H__
>> +#define __LIB_HV_H__
>> +
>> +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
> 
> The common way to write it if(IS_ENABLED(CONFIG_FOO)) inside the code
all IS_ENABLED in mlx5 are written without parenthesis, I will stay 
aligned with it.
> and #ifdef CONFIG_FOO for header files.
This will not work for loadable modules such as PCI_HYPERV_INTERFACE. 
Need true here also for =m in h files as well.

> 
>> +
>> +#include <linux/hyperv.h>
>> +#include <linux/mlx5/driver.h>
>> +
>> +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
>> +			int offset);
>> +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
>> +			 int offset);
>> +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
>> +				void (*block_invalidate)(void *context,
>> +							 u64 block_mask));
>> +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
>> +#endif
>> +
>> +#endif /* __LIB_HV_H__ */
>> --
>> 1.8.3.1
>>

^ permalink raw reply

* Re: [PATCH net-next] net: dsa: remove bitmap operations
From: Vivien Didelot @ 2019-08-22 20:17 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, olteanv
In-Reply-To: <20190821062423.18735-1-vivien.didelot@gmail.com>

On Wed, 21 Aug 2019 02:24:23 -0400, Vivien Didelot <vivien.didelot@gmail.com> wrote:
> The bitmap operations were introduced to simplify the switch drivers
> in the future, since most of them could implement the common VLAN and
> MDB operations (add, del, dump) with simple functions taking all target
> ports at once, and thus limiting the number of hardware accesses.
> 
> Programming an MDB or VLAN this way in a single operation would clearly
> simplify the drivers a lot but would require a new get-set interface
> in DSA. The usage of such bitmap from the stack also raised concerned
> in the past, leading to the dynamic allocation of a new ds->_bitmap
> member in the dsa_switch structure. So let's get rid of them for now.
> 
> This commit nicely wraps the ds->ops->port_{mdb,vlan}_{prepare,add}
> switch operations into new dsa_switch_{mdb,vlan}_{prepare,add}
> variants not using any bitmap argument anymore.
> 
> New dsa_switch_{mdb,vlan}_match helpers have been introduced to make
> clear which local port of a switch must be programmed with the target
> object. While the targeted user port is an obvious candidate, the
> DSA links must also be programmed, as well as the CPU port for VLANs.
> 
> While at it, also remove local variables that are only used once.
> 
> Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>

David, I've included this patch into a new series with other related patches,
you can ignore this one now.


Thanks,

	Vivien

^ permalink raw reply

* Re: New skb extension for use by LSMs (skb "security blob")?
From: Florian Westphal @ 2019-08-22 20:15 UTC (permalink / raw)
  To: Casey Schaufler
  Cc: Paul Moore, Florian Westphal, netdev, linux-security-module,
	selinux
In-Reply-To: <00ab1a3e-fd57-fe42-04fa-d82c1585b360@schaufler-ca.com>

Casey Schaufler <casey@schaufler-ca.com> wrote:
> Given that the original objection to using a skb extension for a
> security blob was that an extension is dynamic, and that the ubiquitous
> nature of LSM use makes that unreasonable, it would seem that supporting
> the security blob as a basic part if the skb would be the obvious and
> correct solution. If the normal case is that there is an LSM that would
> befit from the native (unextended) support of a blob, it would seem
> that that is the case that should be optimized.

What is this "blob"? i.e., what would you like to add to sk_buff to make
whatever use cases you have in mind work?

^ permalink raw reply

* [PATCH net-next 5/6] net: dsa: program VLAN on CPU port from slave
From: Vivien Didelot @ 2019-08-22 20:13 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, olteanv, Vivien Didelot
In-Reply-To: <20190822201323.1292-1-vivien.didelot@gmail.com>

DSA currently programs a VLAN on the CPU port implicitly after the
related notifier is received by a switch.

While we still need to do this transparent programmation of the DSA
links in the fabric, programming the CPU port this way may cause
problems in some corners such as the tag_8021q driver.

Because the dedicated CPU port is specific to a slave, make their
programmation explicit a few layers up, in the slave code.

Note that technically, DSA links have a dedicated CPU port as well,
but since they are only used as conduit between interconnected switches
of a fabric, programming them transparently this way is fine.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
 net/dsa/slave.c  | 14 ++++++++++++++
 net/dsa/switch.c |  5 ++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 82e48d247b81..8267c156a51a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -332,6 +332,10 @@ static int dsa_slave_vlan_add(struct net_device *dev,
 	if (err)
 		return err;
 
+	err = dsa_port_vlan_add(dp->cpu_dp, &vlan, trans);
+	if (err)
+		return err;
+
 	return 0;
 }
 
@@ -383,6 +387,9 @@ static int dsa_slave_vlan_del(struct net_device *dev,
 	if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev))
 		return 0;
 
+	/* Do not deprogram the CPU port as it may be shared with other user
+	 * ports which can be members of this VLAN as well.
+	 */
 	return dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
 }
 
@@ -1121,6 +1128,10 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
 	if (ret && ret != -EOPNOTSUPP)
 		return ret;
 
+	ret = dsa_port_vid_add(dp->cpu_dp, vid, 0);
+	if (ret && ret != -EOPNOTSUPP)
+		return ret;
+
 	return 0;
 }
 
@@ -1151,6 +1162,9 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
 	if (ret == -EOPNOTSUPP)
 		ret = 0;
 
+	/* Do not deprogram the CPU port as it may be shared with other user
+	 * ports which can be members of this VLAN as well.
+	 */
 	return ret;
 }
 
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 489eb7b430a4..6a9607518823 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -232,7 +232,7 @@ static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port,
 	if (ds->index == info->sw_index && port == info->port)
 		return true;
 
-	if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+	if (dsa_is_dsa_port(ds, port))
 		return true;
 
 	return false;
@@ -288,6 +288,9 @@ static int dsa_switch_vlan_del(struct dsa_switch *ds,
 	if (ds->index == info->sw_index)
 		return ds->ops->port_vlan_del(ds, info->port, info->vlan);
 
+	/* Do not deprogram the DSA links as they may be used as conduit
+	 * for other VLAN members in the fabric.
+	 */
 	return 0;
 }
 
-- 
2.23.0


^ permalink raw reply related

* [PATCH net-next 6/6] net: dsa: clear VLAN flags for CPU port
From: Vivien Didelot @ 2019-08-22 20:13 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, olteanv, Vivien Didelot
In-Reply-To: <20190822201323.1292-1-vivien.didelot@gmail.com>

When the bridge offloads a VLAN on a slave port, we also need to
program its dedicated CPU port as a member of the VLAN.

Drivers may handle the CPU port's membership as they want. For example,
Marvell as a special "Unmodified" mode to pass frames as is through
such ports.

Even though DSA expects the drivers to handle the CPU port membership,
they are unlikely to program such VLANs untagged, and certainly not as
PVID. This patch clears the VLAN flags before programming the CPU port.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
Suggested-by: Vladimir Oltean <olteanv@gmail.com>
---
 net/dsa/slave.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 8267c156a51a..48df48f76c67 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -332,6 +332,12 @@ static int dsa_slave_vlan_add(struct net_device *dev,
 	if (err)
 		return err;

+	/* We need the dedicated CPU port to be a member of the VLAN as well.
+	 * Even though drivers often handle CPU membership in special ways,
+	 * CPU ports are likely to be tagged, so clear the VLAN flags.
+	 */
+	vlan.flags = 0;
+
 	err = dsa_port_vlan_add(dp->cpu_dp, &vlan, trans);
 	if (err)
 		return err;
-- 
2.23.0

^ permalink raw reply related

* [PATCH net-next 4/6] net: dsa: check bridge VLAN in slave operations
From: Vivien Didelot @ 2019-08-22 20:13 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, olteanv, Vivien Didelot
In-Reply-To: <20190822201323.1292-1-vivien.didelot@gmail.com>

The bridge VLANs are not offloaded by dsa_port_vlan_* if the port is
not bridged or if its bridge is not VLAN aware.

This is a good thing but other corners of DSA, such as the tag_8021q
driver, may need to program VLANs regardless the bridge state.

And also because bridge_dev is specific to user ports anyway, move
these checks were it belongs, one layer up in the slave code.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
Suggested-by: Vladimir Oltean <olteanv@gmail.com>
---
 net/dsa/port.c  | 10 ++--------
 net/dsa/slave.c | 12 ++++++++++++
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/net/dsa/port.c b/net/dsa/port.c
index ef28df7ecbde..9b54e5a76297 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -348,10 +348,7 @@ int dsa_port_vlan_add(struct dsa_port *dp,
 		.vlan = vlan,
 	};
 
-	if (!dp->bridge_dev || br_vlan_enabled(dp->bridge_dev))
-		return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
-
-	return 0;
+	return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
 }
 
 int dsa_port_vlan_del(struct dsa_port *dp,
@@ -363,10 +360,7 @@ int dsa_port_vlan_del(struct dsa_port *dp,
 		.vlan = vlan,
 	};
 
-	if (!dp->bridge_dev || br_vlan_enabled(dp->bridge_dev))
-		return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
-
-	return 0;
+	return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
 }
 
 int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 8f5126c41282..82e48d247b81 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -323,6 +323,9 @@ static int dsa_slave_vlan_add(struct net_device *dev,
 	if (obj->orig_dev != dev)
 		return -EOPNOTSUPP;
 
+	if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev))
+		return 0;
+
 	vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj);
 
 	err = dsa_port_vlan_add(dp, &vlan, trans);
@@ -377,6 +380,9 @@ static int dsa_slave_vlan_del(struct net_device *dev,
 	if (obj->orig_dev != dev)
 		return -EOPNOTSUPP;
 
+	if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev))
+		return 0;
+
 	return dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
 }
 
@@ -1099,6 +1105,9 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
 	 * need to emulate the switchdev prepare + commit phase.
 	 */
 	if (dp->bridge_dev) {
+		if (!br_vlan_enabled(dp->bridge_dev))
+			return 0;
+
 		/* br_vlan_get_info() returns -EINVAL or -ENOENT if the
 		 * device, respectively the VID is not found, returning
 		 * 0 means success, which is a failure for us here.
@@ -1126,6 +1135,9 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
 	 * need to emulate the switchdev prepare + commit phase.
 	 */
 	if (dp->bridge_dev) {
+		if (!br_vlan_enabled(dp->bridge_dev))
+			return 0;
+
 		/* br_vlan_get_info() returns -EINVAL or -ENOENT if the
 		 * device, respectively the VID is not found, returning
 		 * 0 means success, which is a failure for us here.
-- 
2.23.0


^ permalink raw reply related

* [PATCH net-next 3/6] net: dsa: add slave VLAN helpers
From: Vivien Didelot @ 2019-08-22 20:13 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, olteanv, Vivien Didelot
In-Reply-To: <20190822201323.1292-1-vivien.didelot@gmail.com>

Add dsa_slave_vlan_add and dsa_slave_vlan_del helpers to handle
SWITCHDEV_OBJ_ID_PORT_VLAN switchdev objects. Also copy the
switchdev_obj_port_vlan structure on add since we will modify it in
future patches.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
 net/dsa/slave.c | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9d61d9dbf001..8f5126c41282 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -312,6 +312,26 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
 	return ret;
 }
 
+static int dsa_slave_vlan_add(struct net_device *dev,
+			      const struct switchdev_obj *obj,
+			      struct switchdev_trans *trans)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct switchdev_obj_port_vlan vlan;
+	int err;
+
+	if (obj->orig_dev != dev)
+		return -EOPNOTSUPP;
+
+	vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj);
+
+	err = dsa_port_vlan_add(dp, &vlan, trans);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static int dsa_slave_port_obj_add(struct net_device *dev,
 				  const struct switchdev_obj *obj,
 				  struct switchdev_trans *trans,
@@ -339,10 +359,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 				       trans);
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		if (obj->orig_dev != dev)
-			return -EOPNOTSUPP;
-		err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj),
-					trans);
+		err = dsa_slave_vlan_add(dev, obj, trans);
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -352,6 +369,17 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 	return err;
 }
 
+static int dsa_slave_vlan_del(struct net_device *dev,
+			      const struct switchdev_obj *obj)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+
+	if (obj->orig_dev != dev)
+		return -EOPNOTSUPP;
+
+	return dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
+}
+
 static int dsa_slave_port_obj_del(struct net_device *dev,
 				  const struct switchdev_obj *obj)
 {
@@ -371,9 +399,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
 		err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		if (obj->orig_dev != dev)
-			return -EOPNOTSUPP;
-		err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
+		err = dsa_slave_vlan_del(dev, obj);
 		break;
 	default:
 		err = -EOPNOTSUPP;
-- 
2.23.0


^ permalink raw reply related

* [PATCH net-next 2/6] net: dsa: do not skip -EOPNOTSUPP in dsa_port_vid_add
From: Vivien Didelot @ 2019-08-22 20:13 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, olteanv, Vivien Didelot
In-Reply-To: <20190822201323.1292-1-vivien.didelot@gmail.com>

Currently dsa_port_vid_add returns 0 if the switch returns -EOPNOTSUPP.

This function is used in the tag_8021q.c code to offload the PVID of
ports, which would simply not work if .port_vlan_add is not supported
by the underlying switch.

Do not skip -EOPNOTSUPP in dsa_port_vid_add but only when necessary,
that is to say in dsa_slave_vlan_rx_add_vid.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
 net/dsa/port.c  | 4 ++--
 net/dsa/slave.c | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/dsa/port.c b/net/dsa/port.c
index f75301456430..ef28df7ecbde 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -382,8 +382,8 @@ int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags)
 
 	trans.ph_prepare = true;
 	err = dsa_port_vlan_add(dp, &vlan, &trans);
-	if (err == -EOPNOTSUPP)
-		return 0;
+	if (err)
+		return err;
 
 	trans.ph_prepare = false;
 	return dsa_port_vlan_add(dp, &vlan, &trans);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 33f41178afcc..9d61d9dbf001 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1082,8 +1082,11 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
 			return -EBUSY;
 	}
 
-	/* This API only allows programming tagged, non-PVID VIDs */
-	return dsa_port_vid_add(dp, vid, 0);
+	ret = dsa_port_vid_add(dp, vid, 0);
+	if (ret && ret != -EOPNOTSUPP)
+		return ret;
+
+	return 0;
 }
 
 static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
-- 
2.23.0


^ permalink raw reply related

* [PATCH net-next 1/6] net: dsa: remove bitmap operations
From: Vivien Didelot @ 2019-08-22 20:13 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, olteanv, Vivien Didelot
In-Reply-To: <20190822201323.1292-1-vivien.didelot@gmail.com>

The bitmap operations were introduced to simplify the switch drivers
in the future, since most of them could implement the common VLAN and
MDB operations (add, del, dump) with simple functions taking all target
ports at once, and thus limiting the number of hardware accesses.

Programming an MDB or VLAN this way in a single operation would clearly
simplify the drivers a lot but would require a new get-set interface
in DSA. The usage of such bitmap from the stack also raised concerned
in the past, leading to the dynamic allocation of a new ds->_bitmap
member in the dsa_switch structure. So let's get rid of them for now.

This commit nicely wraps the ds->ops->port_{mdb,vlan}_{prepare,add}
switch operations into new dsa_switch_{mdb,vlan}_{prepare,add}
variants not using any bitmap argument anymore.

New dsa_switch_{mdb,vlan}_match helpers have been introduced to make
clear which local port of a switch must be programmed with the target
object. While the targeted user port is an obvious candidate, the
DSA links must also be programmed, as well as the CPU port for VLANs.

While at it, also remove local variables that are only used once.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
 include/net/dsa.h |   3 --
 net/dsa/dsa2.c    |  14 -----
 net/dsa/switch.c  | 132 +++++++++++++++++++++-------------------------
 3 files changed, 59 insertions(+), 90 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 147b757ef8ea..96acb14ec1a8 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -275,9 +275,6 @@ struct dsa_switch {
 	 */
 	bool			vlan_filtering;
 
-	unsigned long		*bitmap;
-	unsigned long		_bitmap;
-
 	/* Dynamically allocated ports, keep last */
 	size_t num_ports;
 	struct dsa_port ports[];
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 8c4eccb0cfe6..f8445fa73448 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -834,20 +834,6 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
 	if (!ds)
 		return NULL;
 
-	/* We avoid allocating memory outside dsa_switch
-	 * if it is not needed.
-	 */
-	if (n <= sizeof(ds->_bitmap) * 8) {
-		ds->bitmap = &ds->_bitmap;
-	} else {
-		ds->bitmap = devm_kcalloc(dev,
-					  BITS_TO_LONGS(n),
-					  sizeof(unsigned long),
-					  GFP_KERNEL);
-		if (unlikely(!ds->bitmap))
-			return NULL;
-	}
-
 	ds->dev = dev;
 	ds->num_ports = n;
 
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 09d9286b27cc..489eb7b430a4 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -128,57 +128,51 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds,
 	return ds->ops->port_fdb_del(ds, port, info->addr, info->vid);
 }
 
-static int
-dsa_switch_mdb_prepare_bitmap(struct dsa_switch *ds,
-			      const struct switchdev_obj_port_mdb *mdb,
-			      const unsigned long *bitmap)
+static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port,
+				 struct dsa_notifier_mdb_info *info)
+{
+	if (ds->index == info->sw_index && port == info->port)
+		return true;
+
+	if (dsa_is_dsa_port(ds, port))
+		return true;
+
+	return false;
+}
+
+static int dsa_switch_mdb_prepare(struct dsa_switch *ds,
+				  struct dsa_notifier_mdb_info *info)
 {
 	int port, err;
 
 	if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
 		return -EOPNOTSUPP;
 
-	for_each_set_bit(port, bitmap, ds->num_ports) {
-		err = ds->ops->port_mdb_prepare(ds, port, mdb);
-		if (err)
-			return err;
+	for (port = 0; port < ds->num_ports; port++) {
+		if (dsa_switch_mdb_match(ds, port, info)) {
+			err = ds->ops->port_mdb_prepare(ds, port, info->mdb);
+			if (err)
+				return err;
+		}
 	}
 
 	return 0;
 }
 
-static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds,
-				      const struct switchdev_obj_port_mdb *mdb,
-				      const unsigned long *bitmap)
-{
-	int port;
-
-	if (!ds->ops->port_mdb_add)
-		return;
-
-	for_each_set_bit(port, bitmap, ds->num_ports)
-		ds->ops->port_mdb_add(ds, port, mdb);
-}
-
 static int dsa_switch_mdb_add(struct dsa_switch *ds,
 			      struct dsa_notifier_mdb_info *info)
 {
-	const struct switchdev_obj_port_mdb *mdb = info->mdb;
-	struct switchdev_trans *trans = info->trans;
 	int port;
 
-	/* Build a mask of Multicast group members */
-	bitmap_zero(ds->bitmap, ds->num_ports);
-	if (ds->index == info->sw_index)
-		set_bit(info->port, ds->bitmap);
-	for (port = 0; port < ds->num_ports; port++)
-		if (dsa_is_dsa_port(ds, port))
-			set_bit(port, ds->bitmap);
+	if (switchdev_trans_ph_prepare(info->trans))
+		return dsa_switch_mdb_prepare(ds, info);
 
-	if (switchdev_trans_ph_prepare(trans))
-		return dsa_switch_mdb_prepare_bitmap(ds, mdb, ds->bitmap);
+	if (!ds->ops->port_mdb_add)
+		return 0;
 
-	dsa_switch_mdb_add_bitmap(ds, mdb, ds->bitmap);
+	for (port = 0; port < ds->num_ports; port++)
+		if (dsa_switch_mdb_match(ds, port, info))
+			ds->ops->port_mdb_add(ds, port, info->mdb);
 
 	return 0;
 }
@@ -186,13 +180,11 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
 static int dsa_switch_mdb_del(struct dsa_switch *ds,
 			      struct dsa_notifier_mdb_info *info)
 {
-	const struct switchdev_obj_port_mdb *mdb = info->mdb;
-
 	if (!ds->ops->port_mdb_del)
 		return -EOPNOTSUPP;
 
 	if (ds->index == info->sw_index)
-		return ds->ops->port_mdb_del(ds, info->port, mdb);
+		return ds->ops->port_mdb_del(ds, info->port, info->mdb);
 
 	return 0;
 }
@@ -234,59 +226,55 @@ static int dsa_port_vlan_check(struct dsa_switch *ds, int port,
 			     (void *)vlan);
 }
 
-static int
-dsa_switch_vlan_prepare_bitmap(struct dsa_switch *ds,
-			       const struct switchdev_obj_port_vlan *vlan,
-			       const unsigned long *bitmap)
+static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port,
+				  struct dsa_notifier_vlan_info *info)
+{
+	if (ds->index == info->sw_index && port == info->port)
+		return true;
+
+	if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+		return true;
+
+	return false;
+}
+
+static int dsa_switch_vlan_prepare(struct dsa_switch *ds,
+				   struct dsa_notifier_vlan_info *info)
 {
 	int port, err;
 
 	if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
 		return -EOPNOTSUPP;
 
-	for_each_set_bit(port, bitmap, ds->num_ports) {
-		err = dsa_port_vlan_check(ds, port, vlan);
-		if (err)
-			return err;
+	for (port = 0; port < ds->num_ports; port++) {
+		if (dsa_switch_vlan_match(ds, port, info)) {
+			err = dsa_port_vlan_check(ds, port, info->vlan);
+			if (err)
+				return err;
 
-		err = ds->ops->port_vlan_prepare(ds, port, vlan);
-		if (err)
-			return err;
+			err = ds->ops->port_vlan_prepare(ds, port, info->vlan);
+			if (err)
+				return err;
+		}
 	}
 
 	return 0;
 }
 
-static void
-dsa_switch_vlan_add_bitmap(struct dsa_switch *ds,
-			   const struct switchdev_obj_port_vlan *vlan,
-			   const unsigned long *bitmap)
-{
-	int port;
-
-	for_each_set_bit(port, bitmap, ds->num_ports)
-		ds->ops->port_vlan_add(ds, port, vlan);
-}
-
 static int dsa_switch_vlan_add(struct dsa_switch *ds,
 			       struct dsa_notifier_vlan_info *info)
 {
-	const struct switchdev_obj_port_vlan *vlan = info->vlan;
-	struct switchdev_trans *trans = info->trans;
 	int port;
 
-	/* Build a mask of VLAN members */
-	bitmap_zero(ds->bitmap, ds->num_ports);
-	if (ds->index == info->sw_index)
-		set_bit(info->port, ds->bitmap);
-	for (port = 0; port < ds->num_ports; port++)
-		if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
-			set_bit(port, ds->bitmap);
+	if (switchdev_trans_ph_prepare(info->trans))
+		return dsa_switch_vlan_prepare(ds, info);
 
-	if (switchdev_trans_ph_prepare(trans))
-		return dsa_switch_vlan_prepare_bitmap(ds, vlan, ds->bitmap);
+	if (!ds->ops->port_vlan_add)
+		return 0;
 
-	dsa_switch_vlan_add_bitmap(ds, vlan, ds->bitmap);
+	for (port = 0; port < ds->num_ports; port++)
+		if (dsa_switch_vlan_match(ds, port, info))
+			ds->ops->port_vlan_add(ds, port, info->vlan);
 
 	return 0;
 }
@@ -294,13 +282,11 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
 static int dsa_switch_vlan_del(struct dsa_switch *ds,
 			       struct dsa_notifier_vlan_info *info)
 {
-	const struct switchdev_obj_port_vlan *vlan = info->vlan;
-
 	if (!ds->ops->port_vlan_del)
 		return -EOPNOTSUPP;
 
 	if (ds->index == info->sw_index)
-		return ds->ops->port_vlan_del(ds, info->port, vlan);
+		return ds->ops->port_vlan_del(ds, info->port, info->vlan);
 
 	return 0;
 }
-- 
2.23.0


^ permalink raw reply related

* [PATCH net-next 0/6] net: dsa: explicit programmation of VLAN on CPU ports
From: Vivien Didelot @ 2019-08-22 20:13 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, olteanv, Vivien Didelot

When a VLAN is programmed on a user port, every switch of the fabric also
program the CPU ports and the DSA links as part of the VLAN. To do that,
DSA makes use of bitmaps to prepare all members of a VLAN.

While this is expected for DSA links which are used as conduit between
interconnected switches, only the dedicated CPU port of the slave must be
programmed, not all CPU ports of the fabric. This may also cause problems in
other corners of DSA such as the tag_8021q.c driver, which needs to program
its ports manually, CPU port included.

We need the dsa_port_vlan_{add,del} functions and its dsa_port_vid_{add,del}
variants to simply trigger the VLAN programmation without any logic in them,
but they may currently skip the operation based on the bridge device state.

This patchset gets rid of the bitmap operations, and moves the bridge device
check as well as the explicit programmation of CPU ports where they belong,
in the slave code.

While at it, clear the VLAN flags before programming a CPU port, as it
doesn't make sense to forward the PVID flag for example for such ports.

Vivien Didelot (6):
  net: dsa: remove bitmap operations
  net: dsa: do not skip -EOPNOTSUPP in dsa_port_vid_add
  net: dsa: add slave VLAN helpers
  net: dsa: check bridge VLAN in slave operations
  net: dsa: program VLAN on CPU port from slave
  net: dsa: clear VLAN flags for CPU port

 include/net/dsa.h |   3 --
 net/dsa/dsa2.c    |  14 -----
 net/dsa/port.c    |  14 ++---
 net/dsa/slave.c   |  79 +++++++++++++++++++++++----
 net/dsa/switch.c  | 135 +++++++++++++++++++++-------------------------
 5 files changed, 136 insertions(+), 109 deletions(-)

-- 
2.23.0

^ permalink raw reply

* Re: New skb extension for use by LSMs (skb "security blob")?
From: Casey Schaufler @ 2019-08-22 20:10 UTC (permalink / raw)
  To: Paul Moore, Florian Westphal
  Cc: netdev, linux-security-module, selinux, casey
In-Reply-To: <CAHC9VhQ_+3ywPu0QRzP3cSgPH2i9Br994wJttp-yXy2GA4FrNg@mail.gmail.com>

On 8/22/2019 9:32 AM, Paul Moore wrote:
> On Thu, Aug 22, 2019 at 3:03 AM Florian Westphal <fw@strlen.de> wrote:
>> Paul Moore <paul@paul-moore.com> wrote:
>>> Hello netdev,
>>>
>>> I was just made aware of the skb extension work, and it looks very
>>> appealing from a LSM perspective.  As some of you probably remember,
>>> we (the LSM folks) have wanted a proper security blob in the skb for
>>> quite some time, but netdev has been resistant to this idea thus far.
>> Is that "blob" in addition to skb->secmark, or a replacement?
> That's a good question.  While I thought about that, I wasn't sure if
> that was worth bringing up as previous attempts to trade the secmark
> field for a void pointer met with failure.  Last time I played with it
> I was able to take the additional 32-bits from holes in the skb, and
> possibly even improve some of the cacheline groupings (but that is
> always going to be a dependent on use case I think), but that wasn't
> enough.
>
> I think we could consider freeing up the secmark in the main skb, and
> move it to a skb extension, but this would potentially increase the
> chances that we would need to add an extension to a skb.  I don't have
> any hard numbers, but based on discussions and questions I suspect
> Secmark is more widely used than NetLabel and/or labeled IPsec;
> although I'm confident it is still a minor percentage of the overall
> Linux installed base.

Smack uses both extensively. As far as Smack is concerned giving up
the secmark for a blob would be just fine.

I am also working on security module stacking, and a blob in the
skb would dramatically improve the options for making that work
rationally.

> For me the big question is what would it take for us to get a security
> blob associated with the skb?  Would moving the secmark into the skb
> extension be enough?  Something else?  Or is this simply never going
> to happen?  I want to remain optimistic, but I've been trying for this
> off-and-on for over a decade and keep running into a brick wall ;)

Given that the original objection to using a skb extension for a
security blob was that an extension is dynamic, and that the ubiquitous
nature of LSM use makes that unreasonable, it would seem that supporting
the security blob as a basic part if the skb would be the obvious and
correct solution. If the normal case is that there is an LSM that would
befit from the native (unextended) support of a blob, it would seem
that that is the case that should be optimized.

^ permalink raw reply

* Re: [PATCH 1/3] net: Add HW_BRIDGE offload feature
From: Andrew Lunn @ 2019-08-22 20:08 UTC (permalink / raw)
  To: Horatiu Vultur
  Cc: roopa, nikolay, davem, UNGLinuxDriver, alexandre.belloni,
	allan.nielsen, netdev, linux-kernel, bridge
In-Reply-To: <1566500850-6247-2-git-send-email-horatiu.vultur@microchip.com>

> +/* Determin if the SW bridge can be offloaded to HW. Return true if all
> + * the interfaces of the bridge have the feature NETIF_F_HW_SWITCHDEV set
> + * and have the same netdev_ops.
> + */

Hi Horatiu

Why do you need these restrictions. The HW bridge should be able to
learn that a destination MAC address can be reached via the SW
bridge. The software bridge can then forward it out the correct
interface.

Or are you saying your hardware cannot learn from frames which come
from the CPU?

	Andrew

^ permalink raw reply

* net/dst_cache.c: preemption bug in net/dst_cache.c
From: Bharath Vedartham @ 2019-08-22 19:51 UTC (permalink / raw)
  To: davem, gregkh, allison; +Cc: tglx, netdev, linux-kernel

Hi all,

I just want to bring attention to the syzbot bug [1]

Even though syzbot claims the bug to be in net/tipc, I feel it is in
net/dst_cache.c. Please correct me if I am wrong.

This bug is being triggered a lot of times by syzbot since the day it
was reported. Also given that this is core networking code, I felt it
was important to bring this to attention.

It looks like preemption needs to be disabled before using this_cpu_ptr
or maybe we would be better of using a get_cpu_var and put_cpu_var combo
here.

[1] https://syzkaller.appspot.com/bug?id=dc6352b92862eb79373fe03fdf9af5928753e057

Thank you
Bharath

^ permalink raw reply

* Re: [PATCH net-next v2 2/3] net: ethernet: mediatek: Re-add support SGMII
From: René van Dorst @ 2019-08-22 19:50 UTC (permalink / raw)
  To: Russell King - ARM Linux admin
  Cc: John Crispin, Sean Wang, Nelson Chang, David S . Miller,
	Matthias Brugger, Frank Wunderlich, netdev, linux-mips,
	linux-mediatek, Stefan Roese, linux-arm-kernel
In-Reply-To: <20190822144433.GT13294@shell.armlinux.org.uk>

Hi Russell,

Quoting Russell King - ARM Linux admin <linux@armlinux.org.uk>:

> On Wed, Aug 21, 2019 at 04:43:35PM +0200, René van Dorst wrote:
>> +	if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_SGMII)) {
>> +		if (state->interface != PHY_INTERFACE_MODE_2500BASEX) {
>>  			phylink_set(mask, 1000baseT_Full);
>>  			phylink_set(mask, 1000baseX_Full);
>> +		} else {
>> +			phylink_set(mask, 2500baseT_Full);
>> +			phylink_set(mask, 2500baseX_Full);
>> +		}
>
> If you can dynamically switch between 1000BASE-X and 2500BASE-X, then
> you need to have both set.  See mvneta.c:
>
>         if (pp->comphy || state->interface != PHY_INTERFACE_MODE_2500BASEX) {
>                 phylink_set(mask, 1000baseT_Full);
>                 phylink_set(mask, 1000baseX_Full);
>         }
>         if (pp->comphy || state->interface == PHY_INTERFACE_MODE_2500BASEX) {
>                 phylink_set(mask, 2500baseT_Full);
>                 phylink_set(mask, 2500baseX_Full);
>         }
>
> What this is saying is, if we have a comphy (which is the serdes lane
> facing component, where the data rate is setup) then we can support
> both speeds (and so mask ends up with all four bits set.)  Otherwise,
> we only support a single-speed (1000Gbps for non-2500BASE-X etc.)
>
>> +	} else {
>> +		if (state->interface == PHY_INTERFACE_MODE_TRGMII) {
>> +			phylink_set(mask, 1000baseT_Full);
>> +		} else {
>> +			phylink_set(mask, 10baseT_Half);
>> +			phylink_set(mask, 10baseT_Full);
>> +			phylink_set(mask, 100baseT_Half);
>> +			phylink_set(mask, 100baseT_Full);
>> +
>> +			if (state->interface != PHY_INTERFACE_MODE_MII) {
>> +				phylink_set(mask, 1000baseT_Half);
>> +				phylink_set(mask, 1000baseT_Full);
>> +				phylink_set(mask, 1000baseX_Full);
>> +			}
>
> I'm also wondering about the "MTK_HAS_CAPS(mac->hw->soc->caps,
> MTK_SGMII)" above.

This totally wrong.
MTK_HAS_CAPS(mac->hw->soc->caps, MTK_SGMII) tells me that the SOC has SGMII
lane(s). Having a SGMII block doesn't mean that other functions aren't
supported. I have to redo this!

> (Here comes a reason why using SGMII to cover all single-lane serdes
> modes causes confusion - unfortunately, some folk use SGMII to describe
> all these modes.  So, I'm going to use the terminology "Cisco SGMII"
> to mean exactly the SGMII format published by Cisco, "802.3 1000BASE-X"
> to mean the original IEEE 802.3 format running at 1.25Gbps, and
> "up-clocked 2500BASE-X" to mean the 3.125Gbps version of the 802.3
> 1000BASE-X protocol.)

Thanks for the explanation. In your previous review v1 you also explained it.
I did change the forced modes for x-BaseX modes and auto negotiation for Cisco
SGMII. But I seems to miss the link that I also have to improve this  
validation
part.

>
> Isn't this set for Cisco SGMII as well as for 802.3 1000BASE-X and
> the up-clocked 2500BASE-X modes?
>
> If so, is there a reason why 10Mbps and 100Mbps speeds aren't
> supported on Cisco SGMII links?

I can only tell a bit about the mt7622 SOC, datasheet tells me that:

The SGMII is the interface between 10/100/1000/2500 Mbps PHY and Ethernet MAC,
the spec is raised by Cisco in 1999, which is aims for pin reduction compare
with the GMII. It uses 2 differential data pair for TX and RX with clock
embedded bit stream to convey frame data and port ability information.
The core leverages the 1000Base-X PCS and Auto-Negotiation from IEEE 802.3
specification (clause 36/37). This IP can support up to 3.125G baud  
for 2.5Gbps
(proprietary 2500Base-X) data rate of MAC by overclocking.

Also features tells me: Support 10/100/1000/2500 Mbps in full duplex mode and
10/100 Mbps in half duplex mode.

I going make a new version.

Greats,

René

> --
> RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
> According to speedtest.net: 11.9Mbps down 500kbps up




^ permalink raw reply

* Re: [PATCH net-next,v4, 4/6] net/mlx5: Add HV VHCA infrastructure
From: Eran Ben Elisha @ 2019-08-22 19:33 UTC (permalink / raw)
  To: Leon Romanovsky, haiyangz
  Cc: sashal@kernel.org, davem@davemloft.net, Saeed Mahameed,
	lorenzo.pieralisi@arm.com, bhelgaas@google.com,
	linux-pci@vger.kernel.org, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org, kys, Stephen Hemminger,
	linux-kernel@vger.kernel.org
In-Reply-To: <20190822185847.GP29433@mtr-leonro.mtl.com>



On 8/22/2019 9:58 PM, Leon Romanovsky wrote:
> On Thu, Aug 22, 2019 at 05:05:51AM +0000, Haiyang Zhang wrote:
>> From: Eran Ben Elisha <eranbe@mellanox.com>
>>
>> HV VHCA is a layer which provides PF to VF communication channel based on
>> HyperV PCI config channel. It implements Mellanox's Inter VHCA control
>> communication protocol. The protocol contains control block in order to
>> pass messages between the PF and VF drivers, and data blocks in order to
>> pass actual data.
>>
>> The infrastructure is agent based. Each agent will be responsible of
>> contiguous buffer blocks in the VHCA config space. This infrastructure will
>> bind agents to their blocks, and those agents can only access read/write
>> the buffer blocks assigned to them. Each agent will provide three
>> callbacks (control, invalidate, cleanup). Control will be invoked when
>> block-0 is invalidated with a command that concerns this agent. Invalidate
>> callback will be invoked if one of the blocks assigned to this agent was
>> invalidated. Cleanup will be invoked before the agent is being freed in
>> order to clean all of its open resources or deferred works.
>>
>> Block-0 serves as the control block. All execution commands from the PF
>> will be written by the PF over this block. VF will ack on those by
>> writing on block-0 as well. Its format is described by struct
>> mlx5_hv_vhca_control_block layout.
>>
>> Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
>> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
>> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
>> ---
>>   drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
>>   .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c  | 253 +++++++++++++++++++++
>>   .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h  | 102 +++++++++
>>   drivers/net/ethernet/mellanox/mlx5/core/main.c     |   7 +
>>   include/linux/mlx5/driver.h                        |   2 +
>>   5 files changed, 365 insertions(+), 1 deletion(-)
>>   create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>>   create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> index fd32a5b..8d443fc 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> @@ -45,7 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offlo
>>   mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
>>   mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
>>   mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
>> -mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o
>> +mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o
>>
>>   #
>>   # Ipoib netdev
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>> new file mode 100644
>> index 0000000..84d1d75
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>> @@ -0,0 +1,253 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>> +// Copyright (c) 2018 Mellanox Technologies
>> +
>> +#include <linux/hyperv.h>
>> +#include "mlx5_core.h"
>> +#include "lib/hv.h"
>> +#include "lib/hv_vhca.h"
>> +
>> +struct mlx5_hv_vhca {
>> +	struct mlx5_core_dev       *dev;
>> +	struct workqueue_struct    *work_queue;
>> +	struct mlx5_hv_vhca_agent  *agents[MLX5_HV_VHCA_AGENT_MAX];
>> +	struct mutex                agents_lock; /* Protect agents array */
>> +};
>> +
>> +struct mlx5_hv_vhca_work {
>> +	struct work_struct     invalidate_work;
>> +	struct mlx5_hv_vhca   *hv_vhca;
>> +	u64                    block_mask;
>> +};
>> +
>> +struct mlx5_hv_vhca_data_block {
>> +	u16     sequence;
>> +	u16     offset;
>> +	u8      reserved[4];
>> +	u64     data[15];
>> +};
>> +
>> +struct mlx5_hv_vhca_agent {
>> +	enum mlx5_hv_vhca_agent_type	 type;
>> +	struct mlx5_hv_vhca		*hv_vhca;
>> +	void				*priv;
>> +	u16                              seq;
>> +	void (*control)(struct mlx5_hv_vhca_agent *agent,
>> +			struct mlx5_hv_vhca_control_block *block);
>> +	void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
>> +			   u64 block_mask);
>> +	void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
>> +};
>> +
>> +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
>> +{
>> +	struct mlx5_hv_vhca *hv_vhca = NULL;
>> +
>> +	hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
>> +	if (!hv_vhca)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
> 
> I was under impression that usage of create_* interfaces is discouraged,
> It has WQ_MEMORY_LEGACY flag inside and commit b71ab8c2025ca talks about
> this interface as legacy one.

mlx5 driver has dozens of singlethread workqueues. A general effort to 
remove them can be done later.

> 
>> +	if (!hv_vhca->work_queue) {
>> +		kfree(hv_vhca);
>> +		return ERR_PTR(-ENOMEM);
>> +	}
>> +
>> +	hv_vhca->dev = dev;
>> +	mutex_init(&hv_vhca->agents_lock);
>> +
>> +	return hv_vhca;
>> +}
>> +
>> +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +	if (IS_ERR_OR_NULL(hv_vhca))
>> +		return;
>> +
>> +	destroy_workqueue(hv_vhca->work_queue);
>> +	kfree(hv_vhca);
>> +}
>> +
>> +static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
>> +{
>> +	struct mlx5_hv_vhca_work *hwork;
>> +	struct mlx5_hv_vhca *hv_vhca;
>> +	int i;
>> +
>> +	hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
>> +	hv_vhca = hwork->hv_vhca;
>> +
>> +	mutex_lock(&hv_vhca->agents_lock);
>> +	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
>> +		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
>> +
>> +		if (!agent || !agent->invalidate)
>> +			continue;
>> +
>> +		if (!(BIT(agent->type) & hwork->block_mask))
>> +			continue;
>> +
>> +		agent->invalidate(agent, hwork->block_mask);
>> +	}
>> +	mutex_unlock(&hv_vhca->agents_lock);
>> +
>> +	kfree(hwork);
>> +}
>> +
>> +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
>> +{
>> +	struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
>> +	struct mlx5_hv_vhca_work *work;
>> +
>> +	work = kzalloc(sizeof(*work), GFP_ATOMIC);
>> +	if (!work)
>> +		return;
>> +
>> +	INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
>> +	work->hv_vhca    = hv_vhca;
>> +	work->block_mask = block_mask;
>> +
>> +	queue_work(hv_vhca->work_queue, &work->invalidate_work);
>> +}
>> +
>> +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +	if (IS_ERR_OR_NULL(hv_vhca))
>> +		return IS_ERR_OR_NULL(hv_vhca);
>> +
>> +	return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
>> +					   mlx5_hv_vhca_invalidate);
>> +}
>> +
>> +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +	int i;
>> +
>> +	if (IS_ERR_OR_NULL(hv_vhca))
>> +		return;
>> +
>> +	mutex_lock(&hv_vhca->agents_lock);
>> +	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
>> +		WARN_ON(hv_vhca->agents[i]);
>> +
>> +	mutex_unlock(&hv_vhca->agents_lock);
>> +
>> +	mlx5_hv_unregister_invalidate(hv_vhca->dev);
>> +}
>> +
>> +struct mlx5_hv_vhca_agent *
>> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
>> +			  enum mlx5_hv_vhca_agent_type type,
>> +			  void (*control)(struct mlx5_hv_vhca_agent*,
>> +					  struct mlx5_hv_vhca_control_block *block),
>> +			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
>> +					     u64 block_mask),
>> +			  void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
>> +			  void *priv)
>> +{
>> +	struct mlx5_hv_vhca_agent *agent;
>> +
>> +	if (IS_ERR_OR_NULL(hv_vhca))
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	if (type >= MLX5_HV_VHCA_AGENT_MAX)
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	mutex_lock(&hv_vhca->agents_lock);
>> +	if (hv_vhca->agents[type]) {
>> +		mutex_unlock(&hv_vhca->agents_lock);
>> +		return ERR_PTR(-EINVAL);
>> +	}
>> +	mutex_unlock(&hv_vhca->agents_lock);
>> +
>> +	agent = kzalloc(sizeof(*agent), GFP_KERNEL);
>> +	if (!agent)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	agent->type      = type;
>> +	agent->hv_vhca   = hv_vhca;
>> +	agent->priv      = priv;
>> +	agent->control   = control;
>> +	agent->invalidate = invalidate;
>> +	agent->cleanup   = cleaup;
>> +
>> +	mutex_lock(&hv_vhca->agents_lock);
>> +	hv_vhca->agents[type] = agent;
>> +	mutex_unlock(&hv_vhca->agents_lock);
>> +
>> +	return agent;
>> +}
>> +
>> +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
>> +{
>> +	struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
>> +
>> +	mutex_lock(&hv_vhca->agents_lock);
>> +
>> +	if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
>> +		mutex_unlock(&hv_vhca->agents_lock);
>> +		return;
>> +	}
>> +
>> +	hv_vhca->agents[agent->type] = NULL;
>> +	mutex_unlock(&hv_vhca->agents_lock);
>> +
>> +	if (agent->cleanup)
>> +		agent->cleanup(agent);
>> +
>> +	kfree(agent);
>> +}
>> +
>> +static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
>> +					   struct mlx5_hv_vhca_data_block *data_block,
>> +					   void *src, int len, int *offset)
>> +{
>> +	int bytes = min_t(int, (int)sizeof(data_block->data), len);
>> +
>> +	data_block->sequence = agent->seq;
>> +	data_block->offset   = (*offset)++;
>> +	memcpy(data_block->data, src, bytes);
>> +
>> +	return bytes;
>> +}
>> +
>> +static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
>> +{
>> +	agent->seq++;
>> +}
>> +
>> +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
>> +			     void *buf, int len)
>> +{
>> +	int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
>> +	int block_offset = 0;
>> +	int total = 0;
>> +	int err;
>> +
>> +	while (len) {
>> +		struct mlx5_hv_vhca_data_block data_block = {0};
>> +		int bytes;
>> +
>> +		bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
>> +							buf + total,
>> +							len, &block_offset);
>> +		if (!bytes)
>> +			return -ENOMEM;
>> +
>> +		err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
>> +					   sizeof(data_block), offset);
>> +		if (err)
>> +			return err;
>> +
>> +		total += bytes;
>> +		len   -= bytes;
>> +	}
>> +
>> +	mlx5_hv_vhca_agent_seq_update(agent);
>> +
>> +	return 0;
>> +}
>> +
>> +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
>> +{
>> +	return agent->priv;
>> +}
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>> new file mode 100644
>> index 0000000..cdf1303
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>> @@ -0,0 +1,102 @@
>> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
>> +/* Copyright (c) 2019 Mellanox Technologies. */
>> +
>> +#ifndef __LIB_HV_VHCA_H__
>> +#define __LIB_HV_VHCA_H__
>> +
>> +#include "en.h"
>> +#include "lib/hv.h"
>> +
>> +struct mlx5_hv_vhca_agent;
>> +struct mlx5_hv_vhca;
>> +struct mlx5_hv_vhca_control_block;
>> +
>> +enum mlx5_hv_vhca_agent_type {
>> +	MLX5_HV_VHCA_AGENT_MAX = 32,
>> +};
>> +
>> +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
>> +
>> +struct mlx5_hv_vhca_control_block {
>> +	u32     capabilities;
>> +	u32     control;
>> +	u16     command;
>> +	u16     command_ack;
>> +	u16     version;
>> +	u16     rings;
>> +	u32     reserved1[28];
>> +};
>> +
>> +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
>> +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
>> +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
>> +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
>> +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
>> +
>> +struct mlx5_hv_vhca_agent *
>> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
>> +			  enum mlx5_hv_vhca_agent_type type,
>> +			  void (*control)(struct mlx5_hv_vhca_agent*,
>> +					  struct mlx5_hv_vhca_control_block *block),
>> +			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
>> +					     u64 block_mask),
>> +			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
>> +			  void *context);
>> +
>> +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
>> +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
>> +			     void *buf, int len);
>> +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
>> +
>> +#else
>> +
>> +static inline struct mlx5_hv_vhca *
>> +mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
>> +{
>> +	return NULL;
>> +}
>> +
>> +static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +}
>> +
>> +static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +	return 0;
>> +}
>> +
>> +static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +}
>> +
>> +static inline void mlx5_hv_vhca_invalidate(void *context,
>> +					   u64 block_mask)
>> +{
>> +}
>> +
>> +static inline struct mlx5_hv_vhca_agent *
>> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
>> +			  enum mlx5_hv_vhca_agent_type type,
>> +			  void (*control)(struct mlx5_hv_vhca_agent*,
>> +					  struct mlx5_hv_vhca_control_block *block),
>> +			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
>> +					     u64 block_mask),
>> +			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
>> +			  void *context)
>> +{
>> +	return NULL;
>> +}
>> +
>> +static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
>> +{
>> +}
>> +
>> +static inline int
>> +mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
>> +			 void *buf, int len)
>> +{
>> +	return 0;
>> +}
>> +#endif
>> +
>> +#endif /* __LIB_HV_VHCA_H__ */
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
>> index 0b70b1d..61388ca 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
>> @@ -69,6 +69,7 @@
>>   #include "lib/pci_vsc.h"
>>   #include "diag/fw_tracer.h"
>>   #include "ecpf.h"
>> +#include "lib/hv_vhca.h"
>>
>>   MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
>>   MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
>> @@ -870,6 +871,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
>>   	}
>>
>>   	dev->tracer = mlx5_fw_tracer_create(dev);
>> +	dev->hv_vhca = mlx5_hv_vhca_create(dev);
>>
>>   	return 0;
>>
>> @@ -900,6 +902,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
>>
>>   static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
>>   {
>> +	mlx5_hv_vhca_destroy(dev->hv_vhca);
>>   	mlx5_fw_tracer_destroy(dev->tracer);
>>   	mlx5_fpga_cleanup(dev);
>>   	mlx5_eswitch_cleanup(dev->priv.eswitch);
>> @@ -1067,6 +1070,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
>>   		goto err_fw_tracer;
>>   	}
>>
>> +	mlx5_hv_vhca_init(dev->hv_vhca);
> 
> What is the point to declare this function as "int ..." if you are not
> interested in result?
> 
>> +
>>   	err = mlx5_fpga_device_start(dev);
>>   	if (err) {
>>   		mlx5_core_err(dev, "fpga device start failed %d\n", err);
>> @@ -1122,6 +1127,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
>>   err_ipsec_start:
>>   	mlx5_fpga_device_stop(dev);
>>   err_fpga_start:
>> +	mlx5_hv_vhca_cleanup(dev->hv_vhca);
>>   	mlx5_fw_tracer_cleanup(dev->tracer);
>>   err_fw_tracer:
>>   	mlx5_eq_table_destroy(dev);
>> @@ -1142,6 +1148,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
>>   	mlx5_accel_ipsec_cleanup(dev);
>>   	mlx5_accel_tls_cleanup(dev);
>>   	mlx5_fpga_device_stop(dev);
>> +	mlx5_hv_vhca_cleanup(dev->hv_vhca);
>>   	mlx5_fw_tracer_cleanup(dev->tracer);
>>   	mlx5_eq_table_destroy(dev);
>>   	mlx5_irq_table_destroy(dev);
>> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
>> index df23f17..13b4cf2 100644
>> --- a/include/linux/mlx5/driver.h
>> +++ b/include/linux/mlx5/driver.h
>> @@ -659,6 +659,7 @@ struct mlx5_clock {
>>   struct mlx5_fw_tracer;
>>   struct mlx5_vxlan;
>>   struct mlx5_geneve;
>> +struct mlx5_hv_vhca;
>>
>>   struct mlx5_core_dev {
>>   	struct device *device;
>> @@ -706,6 +707,7 @@ struct mlx5_core_dev {
>>   	struct mlx5_ib_clock_info  *clock_info;
>>   	struct mlx5_fw_tracer   *tracer;
>>   	u32                      vsc_addr;
>> +	struct mlx5_hv_vhca	*hv_vhca;
>>   };
>>
>>   struct mlx5_db {
>> --
>> 1.8.3.1
>>

^ permalink raw reply

* Re: [PATCH] nexthops: remove redundant assignment to variable err
From: David Miller @ 2019-08-22 19:14 UTC (permalink / raw)
  To: colin.king
  Cc: dsahern, kuznet, yoshfuji, netdev, kernel-janitors, linux-kernel
In-Reply-To: <20190822125340.30783-1-colin.king@canonical.com>

From: Colin King <colin.king@canonical.com>
Date: Thu, 22 Aug 2019 13:53:40 +0100

> From: Colin Ian King <colin.king@canonical.com>
> 
> Variable err is initialized to a value that is never read and it is
> re-assigned later. The initialization is redundant and can be removed.
> 
> Addresses-Coverity: ("Unused Value")
> Signed-off-by: Colin Ian King <colin.king@canonical.com>

Applied to net-next.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox