* [net-next 10/16] ice: Check for DCB capability before initializing DCB
From: Jeff Kirsher @ 2019-09-05 20:34 UTC (permalink / raw)
To: davem
Cc: Anirudh Venkataramanan, netdev, nhorman, sassmann, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Check the ICE_FLAG_DCB_CAPABLE before calling ice_init_pf_dcb.
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 3 ---
drivers/net/ethernet/intel/ice/ice_main.c | 15 ++++++++-------
2 files changed, 8 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index e922adf1fa15..20f440a64650 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -474,7 +474,6 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
}
pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
- set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
return 0;
}
@@ -483,8 +482,6 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
/* DCBX in FW and LLDP enabled in FW */
pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED | DCB_CAP_DCBX_VER_IEEE;
- set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
-
err = ice_dcb_init_cfg(pf, locked);
if (err)
goto dcb_init_err;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 703fc7bf2b31..8bb3b81876a9 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2252,6 +2252,8 @@ static void ice_deinit_pf(struct ice_pf *pf)
static int ice_init_pf(struct ice_pf *pf)
{
bitmap_zero(pf->flags, ICE_PF_FLAGS_NBITS);
+ if (pf->hw.func_caps.common_cap.dcb)
+ set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
#ifdef CONFIG_PCI_IOV
if (pf->hw.func_caps.common_cap.sr_iov_1_1) {
struct ice_hw *hw = &pf->hw;
@@ -2529,13 +2531,12 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
goto err_init_pf_unroll;
}
- err = ice_init_pf_dcb(pf, false);
- if (err) {
- clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
- clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
-
- /* do not fail overall init if DCB init fails */
- err = 0;
+ if (test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)) {
+ /* Note: DCB init failure is non-fatal to load */
+ if (ice_init_pf_dcb(pf, false)) {
+ clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+ clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ }
}
ice_determine_q_usage(pf);
--
2.21.0
^ permalink raw reply related
* [net-next 13/16] ice: Allow for delayed LLDP MIB change registration
From: Jeff Kirsher @ 2019-09-05 20:34 UTC (permalink / raw)
To: davem
Cc: Dave Ertman, netdev, nhorman, sassmann, Tony Nguyen,
Andrew Bowers, Jeff Kirsher
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>
From: Dave Ertman <david.m.ertman@intel.com>
Add an additional boolean parameter to the ice_init_dcb
function. This boolean controls if the LLDP MIB change
events are registered for. Also, add a new function
defined ice_cfg_lldp_mib_change. The additional function
is necessary to be able to register for LLDP MIB change
events after calling ice_init_dcb. The net effect of these
two changes is to allow a delayed registration for MIB change
events so that the driver is not accepting events before it
is ready for them.
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ice/ice_dcb.c | 39 ++++++++++++++++++--
drivers/net/ethernet/intel/ice/ice_dcb.h | 11 ++----
drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 4 +-
drivers/net/ethernet/intel/ice/ice_ethtool.c | 10 ++++-
drivers/net/ethernet/intel/ice/ice_main.c | 2 +
5 files changed, 51 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index c5ee8d930611..dd7efff121bd 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -60,7 +60,7 @@ ice_aq_get_lldp_mib(struct ice_hw *hw, u8 bridge_type, u8 mib_type, void *buf,
* Enable or Disable posting of an event on ARQ when LLDP MIB
* associated with the interface changes (0x0A01)
*/
-enum ice_status
+static enum ice_status
ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
struct ice_sq_cd *cd)
{
@@ -943,10 +943,11 @@ enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi)
/**
* ice_init_dcb
* @hw: pointer to the HW struct
+ * @enable_mib_change: enable MIB change event
*
* Update DCB configuration from the Firmware
*/
-enum ice_status ice_init_dcb(struct ice_hw *hw)
+enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
{
struct ice_port_info *pi = hw->port_info;
enum ice_status ret = 0;
@@ -972,9 +973,39 @@ enum ice_status ice_init_dcb(struct ice_hw *hw)
}
/* Configure the LLDP MIB change event */
- ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL);
+ if (enable_mib_change) {
+ ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL);
+ if (!ret)
+ pi->is_sw_lldp = false;
+ }
+
+ return ret;
+}
+
+/**
+ * ice_cfg_lldp_mib_change
+ * @hw: pointer to the HW struct
+ * @ena_mib: enable/disable MIB change event
+ *
+ * Configure (disable/enable) MIB
+ */
+enum ice_status ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib)
+{
+ struct ice_port_info *pi = hw->port_info;
+ enum ice_status ret;
+
+ if (!hw->func_caps.common_cap.dcb)
+ return ICE_ERR_NOT_SUPPORTED;
+
+ /* Get DCBX status */
+ pi->dcbx_status = ice_get_dcbx_status(hw);
+
+ if (pi->dcbx_status == ICE_DCBX_STATUS_DIS)
+ return ICE_ERR_NOT_READY;
+
+ ret = ice_aq_cfg_lldp_mib_change(hw, ena_mib, NULL);
if (!ret)
- pi->is_sw_lldp = false;
+ pi->is_sw_lldp = !ena_mib;
return ret;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.h b/drivers/net/ethernet/intel/ice/ice_dcb.h
index 522e1452abe2..ee138f9bdc7c 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.h
@@ -125,7 +125,7 @@ ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
struct ice_dcbx_cfg *dcbcfg);
enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi);
enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi);
-enum ice_status ice_init_dcb(struct ice_hw *hw);
+enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change);
enum ice_status
ice_query_port_ets(struct ice_port_info *pi,
struct ice_aqc_port_ets_elem *buf, u16 buf_size,
@@ -139,9 +139,7 @@ ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd);
enum ice_status
ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
bool *dcbx_agent_status, struct ice_sq_cd *cd);
-enum ice_status
-ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
- struct ice_sq_cd *cd);
+enum ice_status ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib);
#else /* CONFIG_DCB */
static inline enum ice_status
ice_aq_stop_lldp(struct ice_hw __always_unused *hw,
@@ -172,9 +170,8 @@ ice_aq_start_stop_dcbx(struct ice_hw __always_unused *hw,
}
static inline enum ice_status
-ice_aq_cfg_lldp_mib_change(struct ice_hw __always_unused *hw,
- bool __always_unused ena_update,
- struct ice_sq_cd __always_unused *cd)
+ice_cfg_lldp_mib_change(struct ice_hw __always_unused *hw,
+ bool __always_unused ena_mib)
{
return 0;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 20f440a64650..97c22d4aae1d 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -318,7 +318,7 @@ void ice_dcb_rebuild(struct ice_pf *pf)
goto dcb_error;
}
- ice_init_dcb(&pf->hw);
+ ice_init_dcb(&pf->hw, true);
if (pf->hw.port_info->dcbx_status == ICE_DCBX_STATUS_DIS)
pf->hw.port_info->is_sw_lldp = true;
else
@@ -451,7 +451,7 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
port_info = hw->port_info;
- err = ice_init_dcb(hw);
+ err = ice_init_dcb(hw, false);
if (err && !port_info->is_sw_lldp) {
dev_err(&pf->pdev->dev, "Error initializing DCB %d\n", err);
goto dcb_init_err;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index ae9921b7de7b..d5db1426d484 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -1206,8 +1206,8 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
enum ice_status status;
/* Disable FW LLDP engine */
- status = ice_aq_cfg_lldp_mib_change(&pf->hw, false,
- NULL);
+ status = ice_cfg_lldp_mib_change(&pf->hw, false);
+
/* If unregistering for LLDP events fails, this is
* not an error state, as there shouldn't be any
* events to respond to.
@@ -1273,6 +1273,12 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
* The FW LLDP engine will now be consuming them.
*/
ice_cfg_sw_lldp(vsi, false, false);
+
+ /* Register for MIB change events */
+ status = ice_cfg_lldp_mib_change(&pf->hw, true);
+ if (status)
+ dev_dbg(&pf->pdev->dev,
+ "Fail to enable MIB change events\n");
}
}
clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 8bb3b81876a9..2d92d8591a8a 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2536,6 +2536,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
if (ice_init_pf_dcb(pf, false)) {
clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ } else {
+ ice_cfg_lldp_mib_change(&pf->hw, true);
}
}
--
2.21.0
^ permalink raw reply related
* [net-next 16/16] ice: Rework around device/function capabilities
From: Jeff Kirsher @ 2019-09-05 20:34 UTC (permalink / raw)
To: davem
Cc: Anirudh Venkataramanan, netdev, nhorman, sassmann, Tony Nguyen,
Andrew Bowers, Jeff Kirsher
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
ice_parse_caps is printing capabilities in a different way when
compared to the variable names. This makes it difficult to search for
the right strings in the debug logs. So this patch updates the
print strings to be exactly the same as the fields' name in the
structure.
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ice/ice_common.c | 40 ++++++++++-----------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index e8397e5b6267..8b2c46615834 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1551,29 +1551,29 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
case ICE_AQC_CAPS_VALID_FUNCTIONS:
caps->valid_functions = number;
ice_debug(hw, ICE_DBG_INIT,
- "%s: valid functions = %d\n", prefix,
+ "%s: valid_functions (bitmap) = %d\n", prefix,
caps->valid_functions);
break;
case ICE_AQC_CAPS_SRIOV:
caps->sr_iov_1_1 = (number == 1);
ice_debug(hw, ICE_DBG_INIT,
- "%s: SR-IOV = %d\n", prefix,
+ "%s: sr_iov_1_1 = %d\n", prefix,
caps->sr_iov_1_1);
break;
case ICE_AQC_CAPS_VF:
if (dev_p) {
dev_p->num_vfs_exposed = number;
ice_debug(hw, ICE_DBG_INIT,
- "%s: VFs exposed = %d\n", prefix,
+ "%s: num_vfs_exposed = %d\n", prefix,
dev_p->num_vfs_exposed);
} else if (func_p) {
func_p->num_allocd_vfs = number;
func_p->vf_base_id = logical_id;
ice_debug(hw, ICE_DBG_INIT,
- "%s: VFs allocated = %d\n", prefix,
+ "%s: num_allocd_vfs = %d\n", prefix,
func_p->num_allocd_vfs);
ice_debug(hw, ICE_DBG_INIT,
- "%s: VF base_id = %d\n", prefix,
+ "%s: vf_base_id = %d\n", prefix,
func_p->vf_base_id);
}
break;
@@ -1581,17 +1581,17 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
if (dev_p) {
dev_p->num_vsi_allocd_to_host = number;
ice_debug(hw, ICE_DBG_INIT,
- "%s: num VSI alloc to host = %d\n",
+ "%s: num_vsi_allocd_to_host = %d\n",
prefix,
dev_p->num_vsi_allocd_to_host);
} else if (func_p) {
func_p->guar_num_vsi =
ice_get_num_per_func(hw, ICE_MAX_VSI);
ice_debug(hw, ICE_DBG_INIT,
- "%s: num guaranteed VSI (fw) = %d\n",
+ "%s: guar_num_vsi (fw) = %d\n",
prefix, number);
ice_debug(hw, ICE_DBG_INIT,
- "%s: num guaranteed VSI = %d\n",
+ "%s: guar_num_vsi = %d\n",
prefix, func_p->guar_num_vsi);
}
break;
@@ -1600,56 +1600,56 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
caps->active_tc_bitmap = logical_id;
caps->maxtc = phys_id;
ice_debug(hw, ICE_DBG_INIT,
- "%s: DCB = %d\n", prefix, caps->dcb);
+ "%s: dcb = %d\n", prefix, caps->dcb);
ice_debug(hw, ICE_DBG_INIT,
- "%s: active TC bitmap = %d\n", prefix,
+ "%s: active_tc_bitmap = %d\n", prefix,
caps->active_tc_bitmap);
ice_debug(hw, ICE_DBG_INIT,
- "%s: TC max = %d\n", prefix, caps->maxtc);
+ "%s: maxtc = %d\n", prefix, caps->maxtc);
break;
case ICE_AQC_CAPS_RSS:
caps->rss_table_size = number;
caps->rss_table_entry_width = logical_id;
ice_debug(hw, ICE_DBG_INIT,
- "%s: RSS table size = %d\n", prefix,
+ "%s: rss_table_size = %d\n", prefix,
caps->rss_table_size);
ice_debug(hw, ICE_DBG_INIT,
- "%s: RSS table width = %d\n", prefix,
+ "%s: rss_table_entry_width = %d\n", prefix,
caps->rss_table_entry_width);
break;
case ICE_AQC_CAPS_RXQS:
caps->num_rxq = number;
caps->rxq_first_id = phys_id;
ice_debug(hw, ICE_DBG_INIT,
- "%s: num Rx queues = %d\n", prefix,
+ "%s: num_rxq = %d\n", prefix,
caps->num_rxq);
ice_debug(hw, ICE_DBG_INIT,
- "%s: Rx first queue ID = %d\n", prefix,
+ "%s: rxq_first_id = %d\n", prefix,
caps->rxq_first_id);
break;
case ICE_AQC_CAPS_TXQS:
caps->num_txq = number;
caps->txq_first_id = phys_id;
ice_debug(hw, ICE_DBG_INIT,
- "%s: num Tx queues = %d\n", prefix,
+ "%s: num_txq = %d\n", prefix,
caps->num_txq);
ice_debug(hw, ICE_DBG_INIT,
- "%s: Tx first queue ID = %d\n", prefix,
+ "%s: txq_first_id = %d\n", prefix,
caps->txq_first_id);
break;
case ICE_AQC_CAPS_MSIX:
caps->num_msix_vectors = number;
caps->msix_vector_first_id = phys_id;
ice_debug(hw, ICE_DBG_INIT,
- "%s: MSIX vector count = %d\n", prefix,
+ "%s: num_msix_vectors = %d\n", prefix,
caps->num_msix_vectors);
ice_debug(hw, ICE_DBG_INIT,
- "%s: MSIX first vector index = %d\n", prefix,
+ "%s: msix_vector_first_id = %d\n", prefix,
caps->msix_vector_first_id);
break;
case ICE_AQC_CAPS_MAX_MTU:
caps->max_mtu = number;
- ice_debug(hw, ICE_DBG_INIT, "%s: max MTU = %d\n",
+ ice_debug(hw, ICE_DBG_INIT, "%s: max_mtu = %d\n",
prefix, caps->max_mtu);
break;
default:
--
2.21.0
^ permalink raw reply related
* [net-next 09/16] ice: report link down for VF when PF's queues are not enabled
From: Jeff Kirsher @ 2019-09-05 20:33 UTC (permalink / raw)
To: davem
Cc: Lukasz Czapnik, netdev, nhorman, sassmann, Tony Nguyen,
Andrew Bowers, Jeff Kirsher
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>
From: Lukasz Czapnik <lukasz.czapnik@intel.com>
This is port of a fix from i40e commit 2ad1274fa35a ("i40e: don't
report link up for a VF who hasn't enabled queues")
Older VF drivers do not respond well to receiving a link
up notification before queues are enabled. This can cause their state
machine to think that it is safe to send traffic. This results in a Tx
hang on the VF.
Record whether the PF has actually enabled queues for the VF. When
reporting link status, always report link down if the queues aren't
enabled. In this way, the VF driver will never receive a link up
notification until after its queues are enabled.
Signed-off-by: Lukasz Czapnik <lukasz.czapnik@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 3ba6613048ef..1ec2a037a369 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -129,7 +129,10 @@ static void ice_vc_notify_vf_link_state(struct ice_vf *vf)
pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
pfe.severity = PF_EVENT_SEVERITY_INFO;
- if (vf->link_forced)
+ /* Always report link is down if the VF queues aren't enabled */
+ if (!vf->num_qs_ena)
+ ice_set_pfe_link(vf, &pfe, ICE_AQ_LINK_SPEED_UNKNOWN, false);
+ else if (vf->link_forced)
ice_set_pfe_link_forced(vf, &pfe, vf->link_up);
else
ice_set_pfe_link(vf, &pfe, ls->link_speed, ls->link_info &
--
2.21.0
^ permalink raw reply related
* [net-next 01/16] ice: Update fields in ice_vsi_set_num_qs when reconfiguring
From: Jeff Kirsher @ 2019-09-05 20:33 UTC (permalink / raw)
To: davem; +Cc: Brett Creeley, netdev, nhorman, sassmann, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>
From: Brett Creeley <brett.creeley@intel.com>
Currently when vsi->req_txqs or vsi->req_rxqs are set we don't
correctly set the number of vsi->num_q_vectors. Fix this by
setting the number of queue vectors based on the max
between the vsi->alloc_txqs and vsi->alloc_rxqs.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ice/ice_lib.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index a39767e8c2a2..6cc01ebc0b01 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -345,7 +345,7 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
case ICE_VSI_PF:
vsi->alloc_txq = pf->num_lan_tx;
vsi->alloc_rxq = pf->num_lan_rx;
- vsi->num_q_vectors = max_t(int, pf->num_lan_rx, pf->num_lan_tx);
+ vsi->num_q_vectors = max_t(int, vsi->alloc_rxq, vsi->alloc_txq);
break;
case ICE_VSI_VF:
vf = &pf->vf[vsi->vf_id];
--
2.21.0
^ permalink raw reply related
* [net-next 14/16] ice: Minor refactor in queue management
From: Jeff Kirsher @ 2019-09-05 20:34 UTC (permalink / raw)
To: davem
Cc: Anirudh Venkataramanan, netdev, nhorman, sassmann, Tony Nguyen,
Andrew Bowers, Jeff Kirsher
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Remove q_left_tx and q_left_rx from the PF struct as these can be
obtained by calling ice_get_avail_txq_count and ice_get_avail_rxq_count
respectively.
The function ice_determine_q_usage is only setting num_lan_tx and
num_lan_rx in the PF structure, and these are later assigned to
vsi->alloc_txq and vsi->alloc_rxq respectively. This is an unnecessary
indirection, so remove ice_determine_q_usage and just assign values
for vsi->alloc_txq and vsi->alloc_rxq in ice_vsi_set_num_qs and use
these to set num_lan_tx and num_lan_rx respectively.
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ice/ice.h | 4 +-
drivers/net/ethernet/intel/ice/ice_lib.c | 25 ++++++----
drivers/net/ethernet/intel/ice/ice_main.c | 50 +++++++++++--------
.../net/ethernet/intel/ice/ice_virtchnl_pf.c | 14 +++---
4 files changed, 54 insertions(+), 39 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index c7f234688499..6c4faf7551f6 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -368,8 +368,6 @@ struct ice_pf {
u32 num_lan_msix; /* Total MSIX vectors for base driver */
u16 num_lan_tx; /* num LAN Tx queues setup */
u16 num_lan_rx; /* num LAN Rx queues setup */
- u16 q_left_tx; /* remaining num Tx queues left unclaimed */
- u16 q_left_rx; /* remaining num Rx queues left unclaimed */
u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */
u16 num_alloc_vsi;
u16 corer_count; /* Core reset count */
@@ -438,6 +436,8 @@ static inline struct ice_vsi *ice_get_main_vsi(struct ice_pf *pf)
int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
void ice_set_ethtool_ops(struct net_device *netdev);
+u16 ice_get_avail_txq_count(struct ice_pf *pf);
+u16 ice_get_avail_rxq_count(struct ice_pf *pf);
void ice_update_vsi_stats(struct ice_vsi *vsi);
void ice_update_pf_stats(struct ice_pf *pf);
int ice_up(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 5f7c75c3b24b..7cd8c5d13bcc 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -343,8 +343,20 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
switch (vsi->type) {
case ICE_VSI_PF:
- vsi->alloc_txq = pf->num_lan_tx;
- vsi->alloc_rxq = pf->num_lan_rx;
+ vsi->alloc_txq = min_t(int, ice_get_avail_txq_count(pf),
+ num_online_cpus());
+
+ pf->num_lan_tx = vsi->alloc_txq;
+
+ /* only 1 Rx queue unless RSS is enabled */
+ if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+ vsi->alloc_rxq = 1;
+ else
+ vsi->alloc_rxq = min_t(int, ice_get_avail_rxq_count(pf),
+ num_online_cpus());
+
+ pf->num_lan_rx = vsi->alloc_rxq;
+
vsi->num_q_vectors = max_t(int, vsi->alloc_rxq, vsi->alloc_txq);
break;
case ICE_VSI_VF:
@@ -2577,9 +2589,6 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
if (ret)
goto unroll_vector_base;
- pf->q_left_tx -= vsi->alloc_txq;
- pf->q_left_rx -= vsi->alloc_rxq;
-
/* Do not exit if configuring RSS had an issue, at least
* receive traffic on first queue. Hence no need to capture
* return value
@@ -2643,8 +2652,6 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
ice_vsi_delete(vsi);
unroll_get_qs:
ice_vsi_put_qs(vsi);
- pf->q_left_tx += vsi->alloc_txq;
- pf->q_left_rx += vsi->alloc_rxq;
ice_vsi_clear(vsi);
return NULL;
@@ -2992,8 +2999,6 @@ int ice_vsi_release(struct ice_vsi *vsi)
ice_vsi_clear_rings(vsi);
ice_vsi_put_qs(vsi);
- pf->q_left_tx += vsi->alloc_txq;
- pf->q_left_rx += vsi->alloc_rxq;
/* retain SW VSI data structure since it is needed to unregister and
* free VSI netdev when PF is not in reset recovery pending state,\
@@ -3102,8 +3107,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
if (ret)
goto err_vectors;
- pf->q_left_tx -= vsi->alloc_txq;
- pf->q_left_rx -= vsi->alloc_rxq;
break;
default:
break;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 2d92d8591a8a..f8be9ada2447 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2192,36 +2192,48 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
ice_vsi_free_q_vectors(vsi);
ice_vsi_delete(vsi);
ice_vsi_put_qs(vsi);
- pf->q_left_tx += vsi->alloc_txq;
- pf->q_left_rx += vsi->alloc_rxq;
ice_vsi_clear(vsi);
}
return status;
}
/**
- * ice_determine_q_usage - Calculate queue distribution
- * @pf: board private structure
- *
- * Return -ENOMEM if we don't get enough queues for all ports
+ * ice_get_avail_q_count - Get count of queues in use
+ * @pf_qmap: bitmap to get queue use count from
+ * @lock: pointer to a mutex that protects access to pf_qmap
+ * @size: size of the bitmap
*/
-static void ice_determine_q_usage(struct ice_pf *pf)
+static u16
+ice_get_avail_q_count(unsigned long *pf_qmap, struct mutex *lock, u16 size)
{
- u16 q_left_tx, q_left_rx;
+ u16 count = 0, bit;
- q_left_tx = pf->hw.func_caps.common_cap.num_txq;
- q_left_rx = pf->hw.func_caps.common_cap.num_rxq;
+ mutex_lock(lock);
+ for_each_clear_bit(bit, pf_qmap, size)
+ count++;
+ mutex_unlock(lock);
- pf->num_lan_tx = min_t(int, q_left_tx, num_online_cpus());
+ return count;
+}
- /* only 1 Rx queue unless RSS is enabled */
- if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
- pf->num_lan_rx = 1;
- else
- pf->num_lan_rx = min_t(int, q_left_rx, num_online_cpus());
+/**
+ * ice_get_avail_txq_count - Get count of Tx queues in use
+ * @pf: pointer to an ice_pf instance
+ */
+u16 ice_get_avail_txq_count(struct ice_pf *pf)
+{
+ return ice_get_avail_q_count(pf->avail_txqs, &pf->avail_q_mutex,
+ pf->max_pf_txqs);
+}
- pf->q_left_tx = q_left_tx - pf->num_lan_tx;
- pf->q_left_rx = q_left_rx - pf->num_lan_rx;
+/**
+ * ice_get_avail_rxq_count - Get count of Rx queues in use
+ * @pf: pointer to an ice_pf instance
+ */
+u16 ice_get_avail_rxq_count(struct ice_pf *pf)
+{
+ return ice_get_avail_q_count(pf->avail_rxqs, &pf->avail_q_mutex,
+ pf->max_pf_rxqs);
}
/**
@@ -2541,8 +2553,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
}
}
- ice_determine_q_usage(pf);
-
pf->num_alloc_vsi = hw->func_caps.guar_num_vsi;
if (!pf->num_alloc_vsi) {
err = -EIO;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 30e8e6166a59..64de05ccbc47 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -595,7 +595,8 @@ static int ice_alloc_vf_res(struct ice_vf *vf)
/* Update number of VF queues, in case VF had requested for queue
* changes
*/
- tx_rx_queue_left = min_t(int, pf->q_left_tx, pf->q_left_rx);
+ tx_rx_queue_left = min_t(int, ice_get_avail_txq_count(pf),
+ ice_get_avail_rxq_count(pf));
tx_rx_queue_left += ICE_DFLT_QS_PER_VF;
if (vf->num_req_qs && vf->num_req_qs <= tx_rx_queue_left &&
vf->num_req_qs != vf->num_vf_qs)
@@ -898,11 +899,11 @@ static int ice_check_avail_res(struct ice_pf *pf)
* at runtime through Virtchnl, that is the reason we start by reserving
* few queues.
*/
- num_txq = ice_determine_res(pf, pf->q_left_tx, ICE_DFLT_QS_PER_VF,
- ICE_MIN_QS_PER_VF);
+ num_txq = ice_determine_res(pf, ice_get_avail_txq_count(pf),
+ ICE_DFLT_QS_PER_VF, ICE_MIN_QS_PER_VF);
- num_rxq = ice_determine_res(pf, pf->q_left_rx, ICE_DFLT_QS_PER_VF,
- ICE_MIN_QS_PER_VF);
+ num_rxq = ice_determine_res(pf, ice_get_avail_rxq_count(pf),
+ ICE_DFLT_QS_PER_VF, ICE_MIN_QS_PER_VF);
if (!num_txq || !num_rxq)
return -EIO;
@@ -2511,7 +2512,8 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
}
cur_queues = vf->num_vf_qs;
- tx_rx_queue_left = min_t(u16, pf->q_left_tx, pf->q_left_rx);
+ tx_rx_queue_left = min_t(u16, ice_get_avail_txq_count(pf),
+ ice_get_avail_rxq_count(pf));
max_allowed_vf_queues = tx_rx_queue_left + cur_queues;
if (!req_queues) {
dev_err(&pf->pdev->dev,
--
2.21.0
^ permalink raw reply related
* [net-next 15/16] ice: change default number of receive descriptors
From: Jeff Kirsher @ 2019-09-05 20:34 UTC (permalink / raw)
To: davem
Cc: Jesse Brandeburg, netdev, nhorman, sassmann, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
The driver should start out with a reasonable number of descriptors that
can prevent drops due to a CPU being in a power management state.
Change the default number of descriptors to 2048.
The user can always change the value at runtime. Transmit descriptor
counts are not modified because they don't need to change due to the
speed of the interface, or for power managed CPUs, but the code is
simplified to a fixed value for the transmit default.
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ice/ice.h | 19 ++-----------------
1 file changed, 2 insertions(+), 17 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 6c4faf7551f6..b36e1cf0e461 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -47,23 +47,8 @@ extern const char ice_drv_ver[];
#define ICE_MIN_NUM_DESC 64
#define ICE_MAX_NUM_DESC 8160
#define ICE_DFLT_MIN_RX_DESC 512
-/* if the default number of Rx descriptors between ICE_MAX_NUM_DESC and the
- * number of descriptors to fill up an entire page is greater than or equal to
- * ICE_DFLT_MIN_RX_DESC set it based on page size, otherwise set it to
- * ICE_DFLT_MIN_RX_DESC
- */
-#define ICE_DFLT_NUM_RX_DESC \
- min_t(u16, ICE_MAX_NUM_DESC, \
- max_t(u16, ALIGN(PAGE_SIZE / sizeof(union ice_32byte_rx_desc), \
- ICE_REQ_DESC_MULTIPLE), \
- ICE_DFLT_MIN_RX_DESC))
-/* set default number of Tx descriptors to the minimum between ICE_MAX_NUM_DESC
- * and the number of descriptors to fill up an entire page
- */
-#define ICE_DFLT_NUM_TX_DESC min_t(u16, ICE_MAX_NUM_DESC, \
- ALIGN(PAGE_SIZE / \
- sizeof(struct ice_tx_desc), \
- ICE_REQ_DESC_MULTIPLE))
+#define ICE_DFLT_NUM_TX_DESC 256
+#define ICE_DFLT_NUM_RX_DESC 2048
#define ICE_DFLT_TRAFFIC_CLASS BIT(0)
#define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16)
--
2.21.0
^ permalink raw reply related
* [net-next 12/16] ice: update Tx context struct
From: Jeff Kirsher @ 2019-09-05 20:34 UTC (permalink / raw)
To: davem; +Cc: Ashish Shah, netdev, nhorman, sassmann, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>
From: Ashish Shah <ashish.n.shah@intel.com>
Add internal usage flag, bit 91 as described in spec.
Update width of internal queue state to 122 also as described in spec.
Signed-off-by: Ashish Shah <ashish.n.shah@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ice/ice_common.c | 3 ++-
drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 9492cd34b09d..e8397e5b6267 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1132,6 +1132,7 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = {
ICE_CTX_STORE(ice_tlan_ctx, vmvf_type, 2, 78),
ICE_CTX_STORE(ice_tlan_ctx, src_vsi, 10, 80),
ICE_CTX_STORE(ice_tlan_ctx, tsyn_ena, 1, 90),
+ ICE_CTX_STORE(ice_tlan_ctx, internal_usage_flag, 1, 91),
ICE_CTX_STORE(ice_tlan_ctx, alt_vlan, 1, 92),
ICE_CTX_STORE(ice_tlan_ctx, cpuid, 8, 93),
ICE_CTX_STORE(ice_tlan_ctx, wb_mode, 1, 101),
@@ -1150,7 +1151,7 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = {
ICE_CTX_STORE(ice_tlan_ctx, drop_ena, 1, 165),
ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx, 2, 166),
ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx, 3, 168),
- ICE_CTX_STORE(ice_tlan_ctx, int_q_state, 110, 171),
+ ICE_CTX_STORE(ice_tlan_ctx, int_q_state, 122, 171),
{ 0 }
};
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 57ea6811fe2c..2aac8f13daeb 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -428,6 +428,7 @@ struct ice_tlan_ctx {
#define ICE_TLAN_CTX_VMVF_TYPE_PF 2
u16 src_vsi;
u8 tsyn_ena;
+ u8 internal_usage_flag;
u8 alt_vlan;
u16 cpuid; /* bigger than needed, see above for reason */
u8 wb_mode;
--
2.21.0
^ permalink raw reply related
* [net-next 11/16] ice: Report VF link status with opcode to get resources
From: Jeff Kirsher @ 2019-09-05 20:34 UTC (permalink / raw)
To: davem
Cc: Akeem G Abodunrin, netdev, nhorman, sassmann, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>
From: Akeem G Abodunrin <akeem.g.abodunrin@intel.com>
This patch changes how and when the driver report link status, instead of
waiting till the call to enable queues for VF, we should report link
status earlier with opcode to get VF resources - So as to avoid reporting
erroneous information, especially when queues have not been configured.
In addition, we can also make a call to get and report link status change
after when queue is enabled, at least to report netdev or PHY link status.
This is in accordance to how link speed is being reported for PF...
Signed-off-by: Akeem G Abodunrin <akeem.g.abodunrin@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 1ec2a037a369..30e8e6166a59 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2934,6 +2934,7 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
break;
case VIRTCHNL_OP_GET_VF_RESOURCES:
err = ice_vc_get_vf_res_msg(vf, msg);
+ ice_vc_notify_vf_link_state(vf);
break;
case VIRTCHNL_OP_RESET_VF:
ice_vc_reset_vf_msg(vf);
--
2.21.0
^ permalink raw reply related
* [net-next 08/16] ice: Reliably reset VFs
From: Jeff Kirsher @ 2019-09-05 20:33 UTC (permalink / raw)
To: davem
Cc: Mitch Williams, netdev, nhorman, sassmann, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>
From: Mitch Williams <mitch.a.williams@intel.com>
When a PFR (or bigger reset) occurs, the device clears the VF_MBX_ARQLEN
register for all VFs. But if a VFR is triggered by a VF, the device does
NOT clear this register, and the VF driver will never see the reset.
When this happens, the VF driver will eventually timeout and attempt
recovery, and usually it will be successful. But this makes resets take
a long time and there are occasional failures.
We cannot just blithely clear this register on every reset; this has
been shown to cause synchronization problems when a PFR is triggered
with a large number of VFs.
Fix this by clearing VF_MBX_ARQLEN when the reset source is not PFR.
GlobR will trigger PFR, so this test catches that occurrence as well.
Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index c38939b1d496..3ba6613048ef 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -353,12 +353,13 @@ void ice_free_vfs(struct ice_pf *pf)
* ice_trigger_vf_reset - Reset a VF on HW
* @vf: pointer to the VF structure
* @is_vflr: true if VFLR was issued, false if not
+ * @is_pfr: true if the reset was triggered due to a previous PFR
*
* Trigger hardware to start a reset for a particular VF. Expects the caller
* to wait the proper amount of time to allow hardware to reset the VF before
* it cleans up and restores VF functionality.
*/
-static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr)
+static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
{
struct ice_pf *pf = vf->pf;
u32 reg, reg_idx, bit_idx;
@@ -379,10 +380,13 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr)
*/
clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
- /* Clear the VF's ARQLEN register. This is how the VF detects reset,
- * since the VFGEN_RSTAT register doesn't stick at 0 after reset.
+ /* VF_MBX_ARQLEN is cleared by PFR, so the driver needs to clear it
+ * in the case of VFR. If this is done for PFR, it can mess up VF
+ * resets because the VF driver may already have started cleanup
+ * by the time we get here.
*/
- wr32(hw, VF_MBX_ARQLEN(vf_abs_id), 0);
+ if (!is_pfr)
+ wr32(hw, VF_MBX_ARQLEN(vf_abs_id), 0);
/* In the case of a VFLR, the HW has already reset the VF and we
* just need to clean up, so don't hit the VFRTRIG register.
@@ -1072,7 +1076,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
/* Begin reset on all VFs at once */
for (v = 0; v < pf->num_alloc_vfs; v++)
- ice_trigger_vf_reset(&pf->vf[v], is_vflr);
+ ice_trigger_vf_reset(&pf->vf[v], is_vflr, true);
for (v = 0; v < pf->num_alloc_vfs; v++) {
struct ice_vsi *vsi;
@@ -1172,7 +1176,7 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
if (test_and_set_bit(ICE_VF_STATE_DIS, vf->vf_states))
return false;
- ice_trigger_vf_reset(vf, is_vflr);
+ ice_trigger_vf_reset(vf, is_vflr, false);
vsi = pf->vsi[vf->lan_vsi_idx];
--
2.21.0
^ permalink raw reply related
* [net-next 07/16] ice: change work limit to a constant
From: Jeff Kirsher @ 2019-09-05 20:33 UTC (permalink / raw)
To: davem
Cc: Jesse Brandeburg, netdev, nhorman, sassmann, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
The driver has supported a transmit work limit
that was configurable from ethtool for a long time, but
there are no good use cases for having it be a variable
that can be changed at run time. In addition, this
variable was noted to be causing performance overhead
due to cache misses.
Just remove the variable and let the code use a constant
so that the functionality is maintained (a limit on the
number of transmits that will be cleaned in any one call
to the clean routines) without the cache miss.
Removes code, removes a variable, removes testing surface. Yay.
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ice/ice.h | 3 ---
drivers/net/ethernet/intel/ice/ice_ethtool.c | 14 ++------------
drivers/net/ethernet/intel/ice/ice_lib.c | 2 +-
3 files changed, 3 insertions(+), 16 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index bbb3c290a0bf..c7f234688499 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -247,9 +247,6 @@ struct ice_vsi {
u16 vsi_num; /* HW (absolute) index of this VSI */
u16 idx; /* software index in pf->vsi[] */
- /* Interrupt thresholds */
- u16 work_lmt;
-
s16 vf_id; /* VF ID for SR-IOV VSIs */
u16 ethtype; /* Ethernet protocol for pause frame */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index edba5bd79097..ae9921b7de7b 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3214,12 +3214,6 @@ __ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
if (ice_get_q_coalesce(vsi, ec, q_num))
return -EINVAL;
- if (q_num < vsi->num_txq)
- ec->tx_max_coalesced_frames_irq = vsi->work_lmt;
-
- if (q_num < vsi->num_rxq)
- ec->rx_max_coalesced_frames_irq = vsi->work_lmt;
-
return 0;
}
@@ -3399,17 +3393,13 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
if (ice_set_q_coalesce(vsi, ec, i))
return -EINVAL;
}
- goto set_work_lmt;
+ goto set_complete;
}
if (ice_set_q_coalesce(vsi, ec, q_num))
return -EINVAL;
-set_work_lmt:
-
- if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
- vsi->work_lmt = max(ec->tx_max_coalesced_frames_irq,
- ec->rx_max_coalesced_frames_irq);
+set_complete:
return 0;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 6cc01ebc0b01..5f7c75c3b24b 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -548,8 +548,8 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id)
vsi->type = type;
vsi->back = pf;
set_bit(__ICE_DOWN, vsi->state);
+
vsi->idx = pf->next_vsi;
- vsi->work_lmt = ICE_DFLT_IRQ_WORK;
if (type == ICE_VSI_VF)
ice_vsi_set_num_qs(vsi, vf_id);
--
2.21.0
^ permalink raw reply related
* [net-next 05/16] ice: move code closer together
From: Jeff Kirsher @ 2019-09-05 20:33 UTC (permalink / raw)
To: davem
Cc: Jesse Brandeburg, netdev, nhorman, sassmann, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
This is a simple patch to move the assignment to a local variable
closer to the site where the local variable is used. This
can help readability and also maybe performance, although the
performance enhancement is really dependent upon the compiler.
No functional change.
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ice/ice_txrx.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 4fe1b332e67e..ec581b1f0fcb 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1068,9 +1068,6 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
continue;
}
- rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
- ICE_RX_FLEX_DESC_PTYPE_M;
-
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
if (ice_test_staterr(rx_desc, stat_err_bits))
vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
@@ -1087,6 +1084,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
total_rx_bytes += skb->len;
/* populate checksum, VLAN, and protocol */
+ rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+ ICE_RX_FLEX_DESC_PTYPE_M;
+
ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
/* send completed skb up the stack */
--
2.21.0
^ permalink raw reply related
* [net-next 06/16] ice: small efficiency fixes
From: Jeff Kirsher @ 2019-09-05 20:33 UTC (permalink / raw)
To: davem
Cc: Jesse Brandeburg, netdev, nhorman, sassmann, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Add a small bit of efficiency to the code by adding a
prefetch of the port_info structure in order to help
avoid a cache miss a little later on in execution.
Also add an unlikely statement to a branch which
generally will never happen in normal operation.
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ice/ice_txrx.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index ec581b1f0fcb..33dd103035dc 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1226,6 +1226,8 @@ ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc)
if (time_after(next_update, rc->next_update))
goto clear_counts;
+ prefetch(q_vector->vsi->port_info);
+
packets = rc->total_pkts;
bytes = rc->total_bytes;
@@ -1486,7 +1488,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
clean_complete = false;
/* Handle case where we are called by netpoll with a budget of 0 */
- if (budget <= 0)
+ if (unlikely(budget <= 0))
return budget;
/* normally we have 1 Rx ring per q_vector */
--
2.21.0
^ permalink raw reply related
* Re: [PATCH] net: sched: taprio: Fix potential integer overflow in taprio_set_picos_per_byte
From: Vladimir Oltean @ 2019-09-05 20:47 UTC (permalink / raw)
To: Vinicius Costa Gomes
Cc: Eric Dumazet, Gustavo A. R. Silva, Jamal Hadi Salim, Cong Wang,
Jiri Pirko, David S. Miller, netdev, lkml
In-Reply-To: <8736hd9ilm.fsf@intel.com>
Hi Vinicius,
On Wed, 4 Sep 2019 at 00:26, Vinicius Costa Gomes
<vinicius.gomes@intel.com> wrote:
>
> Hi,
>
> Vladimir Oltean <olteanv@gmail.com> writes:
>
> > Right. And while we're at it, there's still the potential
> > division-by-zero problem which I still don't know how to solve without
> > implementing a full-blown __ethtool_get_link_ksettings parser that
> > checks against all the possible outputs it can have under the "no
> > carrier" condition - see "[RFC PATCH 1/1] phylink: Set speed to
> > SPEED_UNKNOWN when there is no PHY connected" for details.
> > And there's also a third fix to be made: the netdev_dbg should be made
> > to print "speed" instead of "ecmd.base.speed".
>
> For the ksettings part I am thinking on adding something like this to
> ethtool.c. Do you think anything is missing (apart from the
> documentation)?
>
> ->
>
> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
> index 95991e43..d37c80b 100644
> --- a/include/linux/ethtool.h
> +++ b/include/linux/ethtool.h
> @@ -177,6 +177,9 @@ void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst,
> bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
> const unsigned long *src);
>
> +u32 ethtool_link_ksettings_to_speed(const struct ethtool_link_ksettings *settings,
> + u32 default_speed);
> +
> /**
> * struct ethtool_ops - optional netdev operations
> * @get_drvinfo: Report driver/device information. Should only set the
> diff --git a/net/core/ethtool.c b/net/core/ethtool.c
> index 6288e69..80e3db3 100644
> --- a/net/core/ethtool.c
> +++ b/net/core/ethtool.c
> @@ -539,6 +539,18 @@ struct ethtool_link_usettings {
> } link_modes;
> };
>
> +u32 ethtool_link_ksettings_to_speed(const struct ethtool_link_ksettings *settings,
> + u32 default_speed)
> +{
> + if (settings->base.speed == SPEED_UNKNOWN)
> + return default_speed;
> +
> + if (settings->base.speed == 0)
> + return default_speed;
> +
> + return settings->base.speed;
> +}
> +
> /* Internal kernel helper to query a device ethtool_link_settings. */
> int __ethtool_get_link_ksettings(struct net_device *dev,
> struct ethtool_link_ksettings *link_ksettings)
Looks ok to me, but I have no saying over ethtool API. Actually I
don't even know whom to ask - the output of
./scripts/get_maintainer.pl net/core/ethtool.c is a bit overwhelming.
To avoid conflicts, there needs to be somebody out of us who takes
Eric's simplification, with Gustavo's Reported-by tag, and the 2
ethtool & taprio patches to avoid division by zero, and the printing
fix, and maybe do the same in cbs. Will you be the one? Should I?
Thanks,
-Vladimir
^ permalink raw reply
* RE: [PATCH] net: sched: taprio: Fix potential integer overflow in taprio_set_picos_per_byte
From: Gomes, Vinicius @ 2019-09-05 21:03 UTC (permalink / raw)
To: Vladimir Oltean
Cc: Eric Dumazet, Gustavo A. R. Silva, Jamal Hadi Salim, Cong Wang,
Jiri Pirko, David S. Miller, netdev, lkml
In-Reply-To: <CA+h21hqtuGuJm0rMx_SZAy_HCjSVD_UK1j8wa7fv+p_zUGNV7A@mail.gmail.com>
Hi Vladimir,
> Looks ok to me, but I have no saying over ethtool API. Actually I don't even
> know whom to ask - the output of ./scripts/get_maintainer.pl
> net/core/ethtool.c is a bit overwhelming.
> To avoid conflicts, there needs to be somebody out of us who takes Eric's
> simplification, with Gustavo's Reported-by tag, and the 2 ethtool & taprio
> patches to avoid division by zero, and the printing fix, and maybe do the same in
> cbs. Will you be the one? Should I?
If you have the cycles to do it, go for it. I would only be able to work on this next week.
>
> Thanks,
> -Vladimir
Thanks a lot,
--
Vinicius
^ permalink raw reply
* [PATCH] kcm: use BPF_PROG_RUN
From: Sami Tolvanen @ 2019-09-05 21:15 UTC (permalink / raw)
To: David S. Miller, Tom Herbert; +Cc: netdev, bpf, linux-kernel, Sami Tolvanen
Instead of invoking struct bpf_prog::bpf_func directly, use the
BPF_PROG_RUN macro.
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
net/kcm/kcmsock.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 5dbc0c48f8cb..f350c613bd7d 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -379,7 +379,7 @@ static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
struct bpf_prog *prog = psock->bpf_prog;
- return (*prog->bpf_func)(skb, prog->insnsi);
+ return BPF_PROG_RUN(prog, skb);
}
static int kcm_read_sock_done(struct strparser *strp, int err)
--
2.23.0.187.g17f5b7556c-goog
^ permalink raw reply related
* [PATCH] net/ibmvnic: free reset work of removed device from queue
From: Juliet Kim @ 2019-09-05 21:30 UTC (permalink / raw)
To: netdev; +Cc: julietk, linuxppc-dev
Commit 36f1031c51a2 ("ibmvnic: Do not process reset during or after
device removal") made the change to exit reset if the driver has been
removed, but does not free reset work items of the adapter from queue.
Ensure all reset work items are freed when breaking out of the loop early.
Fixes: 36f1031c51a2 ("ibmnvic: Do not process reset during or after
device removal”)
Signed-off-by: Juliet Kim <julietk@linux.vnet.ibm.com>
---
drivers/net/ethernet/ibm/ibmvnic.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index fa4bb940665c..6644cabc8e75 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1985,7 +1985,10 @@ static void __ibmvnic_reset(struct work_struct *work)
while (rwi) {
if (adapter->state == VNIC_REMOVING ||
adapter->state == VNIC_REMOVED)
- goto out;
+ kfree(rwi);
+ rc = EBUSY;
+ break;
+ }
if (adapter->force_reset_recovery) {
adapter->force_reset_recovery = false;
@@ -2011,7 +2014,7 @@ static void __ibmvnic_reset(struct work_struct *work)
netdev_dbg(adapter->netdev, "Reset failed\n");
free_all_rwi(adapter);
}
-out:
+
adapter->resetting = false;
if (we_lock_rtnl)
rtnl_unlock();
--
2.16.4
^ permalink raw reply related
* Re: general protection fault in dev_map_hash_update_elem
From: Alexei Starovoitov @ 2019-09-05 21:44 UTC (permalink / raw)
To: syzbot
Cc: bpf, Daniel Borkmann, Jesper Dangaard Brouer, LKML,
Network Development, syzkaller-bugs,
Toke Høiland-Jørgensen
In-Reply-To: <0000000000005091a70591d3e1d9@google.com>
On Thu, Sep 5, 2019 at 1:08 PM syzbot
<syzbot+4e7a85b1432052e8d6f8@syzkaller.appspotmail.com> wrote:
>
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit: 6d028043 Add linux-next specific files for 20190830
> git tree: linux-next
> console output: https://syzkaller.appspot.com/x/log.txt?x=135c1a92600000
> kernel config: https://syzkaller.appspot.com/x/.config?x=82a6bec43ab0cb69
> dashboard link: https://syzkaller.appspot.com/bug?extid=4e7a85b1432052e8d6f8
> compiler: gcc (GCC) 9.0.0 20181231 (experimental)
> syz repro: https://syzkaller.appspot.com/x/repro.syz?x=109124e1600000
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+4e7a85b1432052e8d6f8@syzkaller.appspotmail.com
>
> kasan: CONFIG_KASAN_INLINE enabled
> kasan: GPF could be caused by NULL-ptr deref or user memory access
> general protection fault: 0000 [#1] PREEMPT SMP KASAN
> CPU: 1 PID: 10235 Comm: syz-executor.0 Not tainted 5.3.0-rc6-next-20190830
> #75
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> RIP: 0010:__write_once_size include/linux/compiler.h:203 [inline]
> RIP: 0010:__hlist_del include/linux/list.h:795 [inline]
> RIP: 0010:hlist_del_rcu include/linux/rculist.h:475 [inline]
> RIP: 0010:__dev_map_hash_update_elem kernel/bpf/devmap.c:668 [inline]
> RIP: 0010:dev_map_hash_update_elem+0x3c8/0x6e0 kernel/bpf/devmap.c:691
> Code: 48 89 f1 48 89 75 c8 48 c1 e9 03 80 3c 11 00 0f 85 d3 02 00 00 48 b9
> 00 00 00 00 00 fc ff df 48 8b 53 10 48 89 d6 48 c1 ee 03 <80> 3c 0e 00 0f
> 85 97 02 00 00 48 85 c0 48 89 02 74 38 48 89 55 b8
> RSP: 0018:ffff88808d607c30 EFLAGS: 00010046
> RAX: 0000000000000000 RBX: ffff8880a7f14580 RCX: dffffc0000000000
> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8880a7f14588
> RBP: ffff88808d607c78 R08: 0000000000000004 R09: ffffed1011ac0f73
> R10: ffffed1011ac0f72 R11: 0000000000000003 R12: ffff88809f4e9400
> R13: ffff88809b06ba00 R14: 0000000000000000 R15: ffff88809f4e9528
> FS: 00007f3a3d50c700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007feb3fcd0000 CR3: 00000000986b9000 CR4: 00000000001406e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> Call Trace:
> map_update_elem+0xc82/0x10b0 kernel/bpf/syscall.c:966
> __do_sys_bpf+0x8b5/0x3350 kernel/bpf/syscall.c:2854
> __se_sys_bpf kernel/bpf/syscall.c:2825 [inline]
> __x64_sys_bpf+0x73/0xb0 kernel/bpf/syscall.c:2825
> do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290
> entry_SYSCALL_64_after_hwframe+0x49/0xbe
> RIP: 0033:0x459879
> Code: fd b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7
> 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff
> ff 0f 83 cb b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00
> RSP: 002b:00007f3a3d50bc78 EFLAGS: 00000246 ORIG_RAX: 0000000000000141
> RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000459879
> RDX: 0000000000000020 RSI: 0000000020000040 RDI: 0000000000000002
> RBP: 000000000075bf20 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000246 R12: 00007f3a3d50c6d4
> R13: 00000000004bfc86 R14: 00000000004d1960 R15: 00000000ffffffff
> Modules linked in:
> ---[ end trace 083223e21dbd0ae5 ]---
> RIP: 0010:__write_once_size include/linux/compiler.h:203 [inline]
> RIP: 0010:__hlist_del include/linux/list.h:795 [inline]
> RIP: 0010:hlist_del_rcu include/linux/rculist.h:475 [inline]
> RIP: 0010:__dev_map_hash_update_elem kernel/bpf/devmap.c:668 [inline]
> RIP: 0010:dev_map_hash_update_elem+0x3c8/0x6e0 kernel/bpf/devmap.c:691
Toke,
please take a look.
Thanks!
^ permalink raw reply
* Re: [PATCH net-next v4 1/1] net: openvswitch: Set OvS recirc_id from tc chain index
From: Pravin Shelar @ 2019-09-05 21:48 UTC (permalink / raw)
To: Paul Blakey
Cc: Linux Kernel Network Developers, David S. Miller, Justin Pettit,
Simon Horman, Marcelo Ricardo Leitner, Vlad Buslov, Jiri Pirko,
Roi Dayan, Yossi Kuperman, Rony Efraim, Oz Shlomo
In-Reply-To: <1567605397-14060-2-git-send-email-paulb@mellanox.com>
On Wed, Sep 4, 2019 at 6:56 AM Paul Blakey <paulb@mellanox.com> wrote:
>
> Offloaded OvS datapath rules are translated one to one to tc rules,
> for example the following simplified OvS rule:
>
> recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)
>
> Will be translated to the following tc rule:
>
> $ tc filter add dev dev1 ingress \
> prio 1 chain 0 proto ip \
> flower tcp ct_state -trk \
> action ct pipe \
> action goto chain 2
>
> Received packets will first travel though tc, and if they aren't stolen
> by it, like in the above rule, they will continue to OvS datapath.
> Since we already did some actions (action ct in this case) which might
> modify the packets, and updated action stats, we would like to continue
> the proccessing with the correct recirc_id in OvS (here recirc_id(2))
> where we left off.
>
> To support this, introduce a new skb extension for tc, which
> will be used for translating tc chain to ovs recirc_id to
> handle these miss cases. Last tc chain index will be set
> by tc goto chain action and read by OvS datapath.
>
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
> Acked-by: Jiri Pirko <jiri@mellanox.com>
Looks good to me.
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Thanks,
Pravin.
^ permalink raw reply
* Re: [PATCH net] net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list
From: Alexander Duyck @ 2019-09-05 21:49 UTC (permalink / raw)
To: Shmulik Ladkani
Cc: Daniel Borkmann, Eric Dumazet, Willem de Bruijn, eyal, netdev,
Shmulik Ladkani
In-Reply-To: <20190905183633.8144-1-shmulik.ladkani@gmail.com>
On Thu, Sep 5, 2019 at 11:36 AM Shmulik Ladkani
<shmulik@metanetworks.com> wrote:
>
> Historically, support for frag_list packets entering skb_segment() was
> limited to frag_list members terminating on exact same gso_size
> boundaries. This is verified with a BUG_ON since commit 89319d3801d1
> ("net: Add frag_list support to skb_segment"), quote:
>
> As such we require all frag_list members terminate on exact MSS
> boundaries. This is checked using BUG_ON.
> As there should only be one producer in the kernel of such packets,
> namely GRO, this requirement should not be difficult to maintain.
>
> However, since commit 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper"),
> the "exact MSS boundaries" assumption no longer holds:
> An eBPF program using bpf_skb_change_proto() DOES modify 'gso_size', but
> leaves the frag_list members as originally merged by GRO with the
> original 'gso_size'. Example of such programs are bpf-based NAT46 or
> NAT64.
>
> This lead to a kernel BUG_ON for flows involving:
> - GRO generating a frag_list skb
> - bpf program performing bpf_skb_change_proto() or bpf_skb_adjust_room()
> - skb_segment() of the skb
>
> See example BUG_ON reports in [0].
>
> In commit 13acc94eff12 ("net: permit skb_segment on head_frag frag_list skb"),
> skb_segment() was modified to support the "gso_size mangling" case of
> a frag_list GRO'ed skb, but *only* for frag_list members having
> head_frag==true (having a page-fragment head).
>
> Alas, GRO packets having frag_list members with a linear kmalloced head
> (head_frag==false) still hit the BUG_ON.
>
> This commit adds support to skb_segment() for a 'head_skb' packet having
> a frag_list whose members are *non* head_frag, with gso_size mangled, by
> disabling SG and thus falling-back to copying the data from the given
> 'head_skb' into the generated segmented skbs - as suggested by Willem de
> Bruijn [1].
>
> Since this approach involves the penalty of skb_copy_and_csum_bits()
> when building the segments, care was taken in order to enable this
> solution only when required:
> - untrusted gso_size, by testing SKB_GSO_DODGY is set
> (SKB_GSO_DODGY is set by any gso_size mangling functions in
> net/core/filter.c)
> - the frag_list is non empty, its item is a non head_frag, *and* the
> headlen of the given 'head_skb' does not match the gso_size.
>
> [0]
> https://lore.kernel.org/netdev/20190826170724.25ff616f@pixies/
> https://lore.kernel.org/netdev/9265b93f-253d-6b8c-f2b8-4b54eff1835c@fb.com/
>
> [1]
> https://lore.kernel.org/netdev/CA+FuTSfVsgNDi7c=GUU8nMg2hWxF2SjCNLXetHeVPdnxAW5K-w@mail.gmail.com/
>
> Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper")
> Suggested-by: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Eric Dumazet <eric.dumazet@gmail.com>
> Cc: Alexander Duyck <alexander.duyck@gmail.com>
> Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
> ---
> net/core/skbuff.c | 18 ++++++++++++++++++
> 1 file changed, 18 insertions(+)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index ea8e8d332d85..c4bd1881acff 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -3678,6 +3678,24 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> sg = !!(features & NETIF_F_SG);
> csum = !!can_checksum_protocol(features, proto);
>
> + if (mss != GSO_BY_FRAGS &&
> + (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
> + /* gso_size is untrusted.
> + *
> + * If head_skb has a frag_list with a linear non head_frag
> + * item, and head_skb's headlen does not fit requested
> + * gso_size, fall back to copying the skbs - by disabling sg.
> + *
> + * We assume checking the first frag suffices, i.e if either of
> + * the frags have non head_frag data, then the first frag is
> + * too.
> + */
> + if (list_skb && skb_headlen(list_skb) && !list_skb->head_frag &&
> + (mss != skb_headlen(head_skb) - doffset)) {
> + sg = false;
> + }
> + }
> +
I would change the order of the tests you use here so that we can
eliminate the possibility of needing to perform many tests for the
more common cases. You could probably swap "list_skb" and "mss !=
GSO_BY_FRAGS" since list_skb is more likely to be false for many of
the common cases such as a standard TSO send from a socket. You might
even consider moving the GSO_BY_FRAGS check toward the end of your
checks since SCTP is the only protocol that I believe uses it and the
likelihood of encountering it is much lower compared to other
protocols.
You could probably test for !list_skb->head_frag before seeing if
there is a headlen since many NICs would be generating frames using
head_frag, so in the GRO case you mentioned above it could probably
save you some effort on a number of NICs.
You might also consider moving this code up before we push the mac
header back on and instead of setting sg to false you could just clear
the NETIF_F_SG flag from features. It would save you from having to
then remove doffset in your last check.
> if (sg && csum && (mss != GSO_BY_FRAGS)) {
> if (!(features & NETIF_F_GSO_PARTIAL)) {
> struct sk_buff *iter;
> --
> 2.19.1
>
^ permalink raw reply
* [pull request][net-next 00/14] Mellanox, mlx5 cleanups & port congestion stats
From: Saeed Mahameed @ 2019-09-05 21:50 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev@vger.kernel.org, Saeed Mahameed
Hi Dave,
This series provides 12 mlx5 cleanup patches and last 2 patches provide
port congestion stats to ethtool.
For more information please see tag log below.
Please pull and let me know if there is any problem.
Thanks,
Saeed.
---
The following changes since commit 0e5b36bc4c1fccfc18dd851d960781589c16dae8:
r8152: adjust the settings of ups flags (2019-09-05 12:41:11 +0200)
are available in the Git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git tags/mlx5-updates-2019-09-05
for you to fetch changes up to 1297d97f4862ad690d882ae5b0487e3d1ff15953:
net/mlx5e: Add port buffer's congestion counters (2019-09-05 14:44:43 -0700)
----------------------------------------------------------------
mlx5-updates-2019-09-05
1) Allover mlx5 cleanups
2) Added port congestion counters to ethtool stats:
Add 3 counters per priority to ethtool using PPCNT:
2.1) rx_prio[p]_buf_discard - the number of packets discarded by device
due to lack of per host receive buffers
2.2) rx_prio[p]_cong_discard - the number of packets discarded by device
due to per host congestion
2.3) rx_prio[p]_marked - the number of packets ECN marked by device due
to per host congestion
----------------------------------------------------------------
Aya Levin (2):
net/mlx5: Expose HW capability bits for port buffer per priority congestion counters
net/mlx5e: Add port buffer's congestion counters
Colin Ian King (2):
net/mlx5: fix spelling mistake "offlaods" -> "offloads"
net/mlx5: fix missing assignment of variable err
Eran Ben Elisha (1):
net/mlx5e: Fix static checker warning of potential pointer math issue
Mao Wenan (1):
net/mlx5: Kconfig: Fix MLX5_CORE dependency with PCI_HYPERV_INTERFACE
Maxim Mikityanskiy (1):
net/mlx5e: Remove unnecessary clear_bit()s
Roi Dayan (1):
net/mlx5e: Remove leftover declaration
Saeed Mahameed (2):
net/mlx5e: Use ipv6_stub to avoid dependency with ipv6 being a module
net/mlx5: DR, Remove redundant dev_name print from err log
Tariq Toukan (1):
net/mlx5e: kTLS, Remove unused function parameter
Wei Yongjun (2):
net/mlx5: DR, Remove useless set memory to zero use memset()
net/mlx5: DR, Fix error return code in dr_domain_init_resources()
zhong jiang (1):
net/mlx5: Use PTR_ERR_OR_ZERO rather than its implementation
drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 2 +-
drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +-
.../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c | 9 +-
.../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 6 +-
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 -
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 23 ++--
drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 1 -
drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 149 ++++++++++++++++++++-
drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 2 +
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 7 +-
.../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +-
.../mellanox/mlx5/core/steering/dr_domain.c | 18 ++-
.../ethernet/mellanox/mlx5/core/steering/dr_send.c | 1 -
include/linux/mlx5/device.h | 1 +
include/linux/mlx5/mlx5_ifc.h | 29 +++-
15 files changed, 207 insertions(+), 47 deletions(-)
^ permalink raw reply
* [net-next 01/14] net/mlx5e: Fix static checker warning of potential pointer math issue
From: Saeed Mahameed @ 2019-09-05 21:50 UTC (permalink / raw)
To: David S. Miller
Cc: netdev@vger.kernel.org, Eran Ben Elisha, Dan Carpenter,
Saeed Mahameed
In-Reply-To: <20190905215034.22713-1-saeedm@mellanox.com>
From: Eran Ben Elisha <eranbe@mellanox.com>
Cited patch have an issue in WARN_ON_ONCE check, with wrong address ranges
are compared. Fix that by changing pointer types from u64* to void*. This
will also make code simpler to read.
In addition mlx5e_hv_vhca_fill_ring_stats can get void pointer, so remove
the unnecessary casting when calling it.
Found by static checker:
drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c:41 mlx5e_hv_vhca_fill_stats()
warn: potential pointer math issue ('buf' is a u64 pointer)
Fixes: cef35af34d6d ("net/mlx5e: Add mlx5e HV VHCA stats agent")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
.../net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
index c37b4acd9bd5..b3a249b2a482 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -30,22 +30,21 @@ mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch,
}
}
-static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, u64 *data,
+static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, void *data,
int buf_len)
{
int ch, i = 0;
for (ch = 0; ch < priv->max_nch; ch++) {
- u64 *buf = data + i;
+ void *buf = data + i;
if (WARN_ON_ONCE(buf +
sizeof(struct mlx5e_hv_vhca_per_ring_stats) >
data + buf_len))
return;
- mlx5e_hv_vhca_fill_ring_stats(priv, ch,
- (struct mlx5e_hv_vhca_per_ring_stats *)buf);
- i += sizeof(struct mlx5e_hv_vhca_per_ring_stats) / sizeof(u64);
+ mlx5e_hv_vhca_fill_ring_stats(priv, ch, buf);
+ i += sizeof(struct mlx5e_hv_vhca_per_ring_stats);
}
}
--
2.21.0
^ permalink raw reply related
* [net-next 02/14] net/mlx5: Kconfig: Fix MLX5_CORE dependency with PCI_HYPERV_INTERFACE
From: Saeed Mahameed @ 2019-09-05 21:50 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev@vger.kernel.org, Mao Wenan, Saeed Mahameed
In-Reply-To: <20190905215034.22713-1-saeedm@mellanox.com>
From: Mao Wenan <maowenan@huawei.com>
When MLX5_CORE=y and PCI_HYPERV_INTERFACE=m, below errors are found:
drivers/net/ethernet/mellanox/mlx5/core/en_main.o: In function `mlx5e_nic_enable':
en_main.c:(.text+0xb649): undefined reference to `mlx5e_hv_vhca_stats_create'
drivers/net/ethernet/mellanox/mlx5/core/en_main.o: In function `mlx5e_nic_disable':
en_main.c:(.text+0xb8c4): undefined reference to `mlx5e_hv_vhca_stats_destroy'
Fix this by making MLX5_CORE imply PCI_HYPERV_INTERFACE.
Fixes: cef35af34d6d ("net/mlx5e: Add mlx5e HV VHCA stats agent")
Signed-off-by: Mao Wenan <maowenan@huawei.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 0d8dd885b7d6..a496f2ac20b0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -10,6 +10,7 @@ config MLX5_CORE
imply PTP_1588_CLOCK
imply VXLAN
imply MLXFW
+ imply PCI_HYPERV_INTERFACE
default n
---help---
Core driver for low level functionality of the ConnectX-4 and
--
2.21.0
^ permalink raw reply related
* [net-next 04/14] net/mlx5e: Remove leftover declaration
From: Saeed Mahameed @ 2019-09-05 21:51 UTC (permalink / raw)
To: David S. Miller
Cc: netdev@vger.kernel.org, Roi Dayan, Vlad Buslov, Saeed Mahameed
In-Reply-To: <20190905215034.22713-1-saeedm@mellanox.com>
From: Roi Dayan <roid@mellanox.com>
This function was removed in the cited commit below.
Fixes: 13e509a4c194 ("net/mlx5e: Remove leftover code from the PF netdev being uplink rep")
Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 8e512216deb8..31f83c8adcc9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -183,7 +183,6 @@ struct mlx5e_rep_sq {
struct list_head list;
};
-void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev);
void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev);
void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev);
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
--
2.21.0
^ permalink raw reply related
* [net-next 03/14] net/mlx5e: Use ipv6_stub to avoid dependency with ipv6 being a module
From: Saeed Mahameed @ 2019-09-05 21:50 UTC (permalink / raw)
To: David S. Miller
Cc: netdev@vger.kernel.org, Saeed Mahameed, Walter Harms, Mark Bloch,
Vlad Buslov
In-Reply-To: <20190905215034.22713-1-saeedm@mellanox.com>
mlx5 is dependent on IPv6 tristate since we use ipv6's nd_tbl directly,
alternatively we can use ipv6_stub->nd_tbl and remove the dependency.
Reported-by: Walter Harms <wharms@bfs.de>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
.../net/ethernet/mellanox/mlx5/core/Kconfig | 1 -
.../net/ethernet/mellanox/mlx5/core/en_rep.c | 23 +++++++++++--------
.../net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +-
3 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index a496f2ac20b0..0dba272a5b2f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -33,7 +33,6 @@ config MLX5_FPGA
config MLX5_CORE_EN
bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support"
depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
- depends on IPV6=y || IPV6=n || MLX5_CORE=m
select PAGE_POOL
select DIMLIB
default n
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 1623cd32f303..95892a3b63a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -38,6 +38,7 @@
#include <net/netevent.h>
#include <net/arp.h>
#include <net/devlink.h>
+#include <net/ipv6_stubs.h>
#include "eswitch.h"
#include "en.h"
@@ -499,16 +500,18 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
mlx5e_sqs2vport_stop(esw, rep);
}
+static unsigned long mlx5e_rep_ipv6_interval(void)
+{
+ if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
+ return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
+
+ return ~0UL;
+}
+
static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
{
-#if IS_ENABLED(CONFIG_IPV6)
- unsigned long ipv6_interval = NEIGH_VAR(&nd_tbl.parms,
- DELAY_PROBE_TIME);
-#else
- unsigned long ipv6_interval = ~0UL;
-#endif
- unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms,
- DELAY_PROBE_TIME);
+ unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+ unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -917,7 +920,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
case NETEVENT_NEIGH_UPDATE:
n = ptr;
#if IS_ENABLED(CONFIG_IPV6)
- if (n->tbl != &nd_tbl && n->tbl != &arp_tbl)
+ if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
#else
if (n->tbl != &arp_tbl)
#endif
@@ -944,7 +947,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
* done per device delay prob time parameter.
*/
#if IS_ENABLED(CONFIG_IPV6)
- if (!p->dev || (p->tbl != &nd_tbl && p->tbl != &arp_tbl))
+ if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
#else
if (!p->dev || p->tbl != &arp_tbl)
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 30d26eba75a3..98d1f7a48304 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1492,7 +1492,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
tbl = &arp_tbl;
#if IS_ENABLED(CONFIG_IPV6)
else if (m_neigh->family == AF_INET6)
- tbl = &nd_tbl;
+ tbl = ipv6_stub->nd_tbl;
#endif
else
return;
--
2.21.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox