Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next v2 01/14] bnxt_en: Add TC to hardware QoS queue mapping logic.
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

The current driver maps MQPRIO traffic classes directly 1:1 to the
internal hardware queues (TC0 maps to hardware queue 0, etc).  This
direct mapping requires the internal hardware queues to be reconfigured
from lossless to lossy and vice versa when necessary.  This
involves reconfiguring internal buffer thresholds which is
disruptive and not always reliable.

Implement a new scheme to map TCs to internal hardware queues by
matching up their PFC requirements.  This will eliminate the need
to reconfigure a hardware queue internal buffers at run time.  After
remapping, the NIC is closed and opened for the new TC to hardware
queues to take effect.

This patch only adds the basic mapping logic.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  5 ++-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  1 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 65 +++++++++++++++++----------
 3 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index f83769d..bda618d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2383,6 +2383,7 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
 	for (i = 0, j = 0; i < bp->tx_nr_rings; i++) {
 		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
 		struct bnxt_ring_struct *ring;
+		u8 qidx;
 
 		ring = &txr->tx_ring_struct;
 
@@ -2411,7 +2412,8 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
 
 			memset(txr->tx_push, 0, sizeof(struct tx_push_bd));
 		}
-		ring->queue_id = bp->q_info[j].queue_id;
+		qidx = bp->tc_to_qidx[j];
+		ring->queue_id = bp->q_info[qidx].queue_id;
 		if (i < bp->tx_nr_rings_xdp)
 			continue;
 		if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1))
@@ -5309,6 +5311,7 @@ static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
 	for (i = 0; i < bp->max_tc; i++) {
 		bp->q_info[i].queue_id = *qptr++;
 		bp->q_info[i].queue_profile = *qptr++;
+		bp->tc_to_qidx[i] = i;
 	}
 
 qportcfg_exit:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 3d55d3b..057f8a2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1242,6 +1242,7 @@ struct bnxt {
 	u8			max_tc;
 	u8			max_lltc;	/* lossless TCs */
 	struct bnxt_queue_info	q_info[BNXT_MAX_QUEUE];
+	u8			tc_to_qidx[BNXT_MAX_QUEUE];
 
 	unsigned int		current_interval;
 #define BNXT_TIMER_INTERVAL	HZ
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index 3c746f2..1b72f8a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -21,6 +21,21 @@
 #include "bnxt_dcb.h"
 
 #ifdef CONFIG_BNXT_DCB
+static int bnxt_queue_to_tc(struct bnxt *bp, u8 queue_id)
+{
+	int i, j;
+
+	for (i = 0; i < bp->max_tc; i++) {
+		if (bp->q_info[i].queue_id == queue_id) {
+			for (j = 0; j < bp->max_tc; j++) {
+				if (bp->tc_to_qidx[j] == i)
+					return j;
+			}
+		}
+	}
+	return -EINVAL;
+}
+
 static int bnxt_hwrm_queue_pri2cos_cfg(struct bnxt *bp, struct ieee_ets *ets)
 {
 	struct hwrm_queue_pri2cos_cfg_input req = {0};
@@ -33,10 +48,13 @@ static int bnxt_hwrm_queue_pri2cos_cfg(struct bnxt *bp, struct ieee_ets *ets)
 
 	pri2cos = &req.pri0_cos_queue_id;
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		u8 qidx;
+
 		req.enables |= cpu_to_le32(
 			QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI0_COS_QUEUE_ID << i);
 
-		pri2cos[i] = bp->q_info[ets->prio_tc[i]].queue_id;
+		qidx = bp->tc_to_qidx[ets->prio_tc[i]];
+		pri2cos[i] = bp->q_info[qidx].queue_id;
 	}
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	return rc;
@@ -55,17 +73,15 @@ static int bnxt_hwrm_queue_pri2cos_qcfg(struct bnxt *bp, struct ieee_ets *ets)
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (!rc) {
 		u8 *pri2cos = &resp->pri0_cos_queue_id;
-		int i, j;
+		int i;
 
 		for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 			u8 queue_id = pri2cos[i];
+			int tc;
 
-			for (j = 0; j < bp->max_tc; j++) {
-				if (bp->q_info[j].queue_id == queue_id) {
-					ets->prio_tc[i] = j;
-					break;
-				}
-			}
+			tc = bnxt_queue_to_tc(bp, queue_id);
+			if (tc >= 0)
+				ets->prio_tc[i] = tc;
 		}
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
@@ -81,13 +97,15 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
 	void *data;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1);
-	data = &req.unused_0;
-	for (i = 0; i < max_tc; i++, data += sizeof(cos2bw) - 4) {
+	for (i = 0; i < max_tc; i++) {
+		u8 qidx;
+
 		req.enables |= cpu_to_le32(
 			QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << i);
 
 		memset(&cos2bw, 0, sizeof(cos2bw));
-		cos2bw.queue_id = bp->q_info[i].queue_id;
+		qidx = bp->tc_to_qidx[i];
+		cos2bw.queue_id = bp->q_info[qidx].queue_id;
 		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_STRICT) {
 			cos2bw.tsa =
 				QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP;
@@ -103,8 +121,9 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
 				cpu_to_le32((ets->tc_tx_bw[i] * 100) |
 					    BW_VALUE_UNIT_PERCENT1_100);
 		}
+		data = &req.unused_0 + qidx * (sizeof(cos2bw) - 4);
 		memcpy(data, &cos2bw.queue_id, sizeof(cos2bw) - 4);
-		if (i == 0) {
+		if (qidx == 0) {
 			req.queue_id0 = cos2bw.queue_id;
 			req.unused_0 = 0;
 		}
@@ -132,22 +151,22 @@ static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets)
 
 	data = &resp->queue_id0 + offsetof(struct bnxt_cos2bw_cfg, queue_id);
 	for (i = 0; i < bp->max_tc; i++, data += sizeof(cos2bw) - 4) {
-		int j;
+		int tc;
 
 		memcpy(&cos2bw.queue_id, data, sizeof(cos2bw) - 4);
 		if (i == 0)
 			cos2bw.queue_id = resp->queue_id0;
 
-		for (j = 0; j < bp->max_tc; j++) {
-			if (bp->q_info[j].queue_id != cos2bw.queue_id)
-				continue;
-			if (cos2bw.tsa ==
-			    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP) {
-				ets->tc_tsa[j] = IEEE_8021QAZ_TSA_STRICT;
-			} else {
-				ets->tc_tsa[j] = IEEE_8021QAZ_TSA_ETS;
-				ets->tc_tx_bw[j] = cos2bw.bw_weight;
-			}
+		tc = bnxt_queue_to_tc(bp, cos2bw.queue_id);
+		if (tc < 0)
+			continue;
+
+		if (cos2bw.tsa ==
+		    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP) {
+			ets->tc_tsa[tc] = IEEE_8021QAZ_TSA_STRICT;
+		} else {
+			ets->tc_tsa[tc] = IEEE_8021QAZ_TSA_ETS;
+			ets->tc_tx_bw[tc] = cos2bw.bw_weight;
 		}
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 02/14] bnxt_en: Remap TC to hardware queues when configuring PFC.
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

Initially, the MQPRIO TCs are mapped 1:1 directly to the hardware
queues.  Some of these hardware queues are configured to be lossless.
When PFC is enabled on one of more TCs, we now need to remap the
TCs that have PFC enabled to the lossless hardware queues.

After remapping, we need to close and open the NIC for the new
mapping to take effect.  We also need to reprogram all ETS parameters.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 101 +++++++++++++++-----------
 1 file changed, 60 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index 1b72f8a..d5bc72c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -173,44 +173,59 @@ static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets)
 	return 0;
 }
 
-static int bnxt_hwrm_queue_cfg(struct bnxt *bp, unsigned int lltc_mask)
+static int bnxt_queue_remap(struct bnxt *bp, unsigned int lltc_mask)
 {
-	struct hwrm_queue_cfg_input req = {0};
-	int i;
+	unsigned long qmap = 0;
+	int max = bp->max_tc;
+	int i, j, rc;
 
-	if (netif_running(bp->dev))
-		bnxt_tx_disable(bp);
+	/* Assign lossless TCs first */
+	for (i = 0, j = 0; i < max; ) {
+		if (lltc_mask & (1 << i)) {
+			if (BNXT_LLQ(bp->q_info[j].queue_profile)) {
+				bp->tc_to_qidx[i] = j;
+				__set_bit(j, &qmap);
+				i++;
+			}
+			j++;
+			continue;
+		}
+		i++;
+	}
 
-	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_CFG, -1, -1);
-	req.flags = cpu_to_le32(QUEUE_CFG_REQ_FLAGS_PATH_BIDIR);
-	req.enables = cpu_to_le32(QUEUE_CFG_REQ_ENABLES_SERVICE_PROFILE);
+	for (i = 0, j = 0; i < max; i++) {
+		if (lltc_mask & (1 << i))
+			continue;
+		j = find_next_zero_bit(&qmap, max, j);
+		bp->tc_to_qidx[i] = j;
+		__set_bit(j, &qmap);
+		j++;
+	}
 
-	/* Configure lossless queues to lossy first */
-	req.service_profile = QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY;
-	for (i = 0; i < bp->max_tc; i++) {
-		if (BNXT_LLQ(bp->q_info[i].queue_profile)) {
-			req.queue_id = cpu_to_le32(bp->q_info[i].queue_id);
-			hwrm_send_message(bp, &req, sizeof(req),
-					  HWRM_CMD_TIMEOUT);
-			bp->q_info[i].queue_profile =
-				QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY;
+	if (netif_running(bp->dev)) {
+		bnxt_close_nic(bp, false, false);
+		rc = bnxt_open_nic(bp, false, false);
+		if (rc) {
+			netdev_warn(bp->dev, "failed to open NIC, rc = %d\n", rc);
+			return rc;
 		}
 	}
-
-	/* Now configure desired queues to lossless */
-	req.service_profile = QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS;
-	for (i = 0; i < bp->max_tc; i++) {
-		if (lltc_mask & (1 << i)) {
-			req.queue_id = cpu_to_le32(bp->q_info[i].queue_id);
-			hwrm_send_message(bp, &req, sizeof(req),
-					  HWRM_CMD_TIMEOUT);
-			bp->q_info[i].queue_profile =
-				QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS;
+	if (bp->ieee_ets) {
+		int tc = netdev_get_num_tc(bp->dev);
+
+		if (!tc)
+			tc = 1;
+		rc = bnxt_hwrm_queue_cos2bw_cfg(bp, bp->ieee_ets, tc);
+		if (rc) {
+			netdev_warn(bp->dev, "failed to config BW, rc = %d\n", rc);
+			return rc;
+		}
+		rc = bnxt_hwrm_queue_pri2cos_cfg(bp, bp->ieee_ets);
+		if (rc) {
+			netdev_warn(bp->dev, "failed to config prio, rc = %d\n", rc);
+			return rc;
 		}
 	}
-	if (netif_running(bp->dev))
-		bnxt_tx_enable(bp);
-
 	return 0;
 }
 
@@ -220,7 +235,7 @@ static int bnxt_hwrm_queue_pfc_cfg(struct bnxt *bp, struct ieee_pfc *pfc)
 	struct ieee_ets *my_ets = bp->ieee_ets;
 	unsigned int tc_mask = 0, pri_mask = 0;
 	u8 i, pri, lltc_count = 0;
-	bool need_q_recfg = false;
+	bool need_q_remap = false;
 	int rc;
 
 	if (!my_ets)
@@ -240,21 +255,25 @@ static int bnxt_hwrm_queue_pfc_cfg(struct bnxt *bp, struct ieee_pfc *pfc)
 	if (lltc_count > bp->max_lltc)
 		return -EINVAL;
 
-	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_CFG, -1, -1);
-	req.flags = cpu_to_le32(pri_mask);
-	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (rc)
-		return rc;
-
 	for (i = 0; i < bp->max_tc; i++) {
 		if (tc_mask & (1 << i)) {
-			if (!BNXT_LLQ(bp->q_info[i].queue_profile))
-				need_q_recfg = true;
+			u8 qidx = bp->tc_to_qidx[i];
+
+			if (!BNXT_LLQ(bp->q_info[qidx].queue_profile)) {
+				need_q_remap = true;
+				break;
+			}
 		}
 	}
 
-	if (need_q_recfg)
-		rc = bnxt_hwrm_queue_cfg(bp, tc_mask);
+	if (need_q_remap)
+		rc = bnxt_queue_remap(bp, tc_mask);
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_CFG, -1, -1);
+	req.flags = cpu_to_le32(pri_mask);
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		return rc;
 
 	return rc;
 }
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 03/14] bnxt_en: Check the lengths of encapsulated firmware responses.
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

Firmware messages that are forwarded from PF to VFs are encapsulated.
The size of these encapsulated messages must not exceed the maximum
defined message size.  Add appropriate checks to avoid oversize
messages.  Firmware messages may be expanded in future specs and
this will provide some guardrails to avoid data corruption.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c |  9 +++++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h | 12 ++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index f952963..18ee471 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -809,6 +809,9 @@ static int bnxt_hwrm_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
 	struct hwrm_fwd_resp_input req = {0};
 	struct hwrm_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr;
 
+	if (BNXT_FWD_RESP_SIZE_ERR(msg_size))
+		return -EINVAL;
+
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_RESP, -1, -1);
 
 	/* Set the new target id */
@@ -845,6 +848,9 @@ static int bnxt_hwrm_fwd_err_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
 	struct hwrm_reject_fwd_resp_input req = {0};
 	struct hwrm_reject_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr;
 
+	if (BNXT_REJ_FWD_RESP_SIZE_ERR(msg_size))
+		return -EINVAL;
+
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_REJECT_FWD_RESP, -1, -1);
 	/* Set the new target id */
 	req.target_id = cpu_to_le16(vf->fw_fid);
@@ -877,6 +883,9 @@ static int bnxt_hwrm_exec_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
 	struct hwrm_exec_fwd_resp_input req = {0};
 	struct hwrm_exec_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr;
 
+	if (BNXT_EXEC_FWD_RESP_SIZE_ERR(msg_size))
+		return -EINVAL;
+
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_EXEC_FWD_RESP, -1, -1);
 	/* Set the new target id */
 	req.target_id = cpu_to_le16(vf->fw_fid);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
index d10f6f6..6f6d850 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
@@ -11,6 +11,18 @@
 #ifndef BNXT_SRIOV_H
 #define BNXT_SRIOV_H
 
+#define BNXT_FWD_RESP_SIZE_ERR(n)					\
+	((offsetof(struct hwrm_fwd_resp_input, encap_resp) + n) >	\
+	 sizeof(struct hwrm_fwd_resp_input))
+
+#define BNXT_EXEC_FWD_RESP_SIZE_ERR(n)					\
+	((offsetof(struct hwrm_exec_fwd_resp_input, encap_request) + n) >\
+	 offsetof(struct hwrm_exec_fwd_resp_input, encap_resp_target_id))
+
+#define BNXT_REJ_FWD_RESP_SIZE_ERR(n)					\
+	((offsetof(struct hwrm_reject_fwd_resp_input, encap_request) + n) >\
+	 offsetof(struct hwrm_reject_fwd_resp_input, encap_resp_target_id))
+
 int bnxt_get_vf_config(struct net_device *, int, struct ifla_vf_info *);
 int bnxt_set_vf_mac(struct net_device *, int, u8 *);
 int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 04/14] bnxt_en: Do not set firmware time from VF driver on older firmware.
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

Older firmware will reject this call and cause an error message to
be printed by the VF driver.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index bda618d..aff4b4e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5379,7 +5379,8 @@ int bnxt_hwrm_fw_set_time(struct bnxt *bp)
 	struct tm tm;
 	time64_t now = ktime_get_real_seconds();

-	if (bp->hwrm_spec_code < 0x10400)
+	if ((BNXT_VF(bp) && bp->hwrm_spec_code < 0x10901) ||
+	    bp->hwrm_spec_code < 0x10400)
 		return -EOPNOTSUPP;

 	time64_to_tm(now, 0, &tm);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 05/14] bnxt_en: Simplify ring alloc/free error messages.
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

Replace switch statements printing different messages for every ring type
with a common message.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 43 +++++--------------------------
 1 file changed, 6 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index aff4b4e..b83c2ac 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4336,26 +4336,9 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
 	mutex_unlock(&bp->hwrm_cmd_lock);
 
 	if (rc || err) {
-		switch (ring_type) {
-		case RING_FREE_REQ_RING_TYPE_L2_CMPL:
-			netdev_err(bp->dev, "hwrm_ring_alloc cp failed. rc:%x err:%x\n",
-				   rc, err);
-			return -1;
-
-		case RING_FREE_REQ_RING_TYPE_RX:
-			netdev_err(bp->dev, "hwrm_ring_alloc rx failed. rc:%x err:%x\n",
-				   rc, err);
-			return -1;
-
-		case RING_FREE_REQ_RING_TYPE_TX:
-			netdev_err(bp->dev, "hwrm_ring_alloc tx failed. rc:%x err:%x\n",
-				   rc, err);
-			return -1;
-
-		default:
-			netdev_err(bp->dev, "Invalid ring\n");
-			return -1;
-		}
+		netdev_err(bp->dev, "hwrm_ring_alloc type %d failed. rc:%x err:%x\n",
+			   ring_type, rc, err);
+		return -EIO;
 	}
 	ring->fw_ring_id = ring_id;
 	return rc;
@@ -4479,23 +4462,9 @@ static int hwrm_ring_free_send_msg(struct bnxt *bp,
 	mutex_unlock(&bp->hwrm_cmd_lock);
 
 	if (rc || error_code) {
-		switch (ring_type) {
-		case RING_FREE_REQ_RING_TYPE_L2_CMPL:
-			netdev_err(bp->dev, "hwrm_ring_free cp failed. rc:%d\n",
-				   rc);
-			return rc;
-		case RING_FREE_REQ_RING_TYPE_RX:
-			netdev_err(bp->dev, "hwrm_ring_free rx failed. rc:%d\n",
-				   rc);
-			return rc;
-		case RING_FREE_REQ_RING_TYPE_TX:
-			netdev_err(bp->dev, "hwrm_ring_free tx failed. rc:%d\n",
-				   rc);
-			return rc;
-		default:
-			netdev_err(bp->dev, "Invalid ring\n");
-			return -1;
-		}
+		netdev_err(bp->dev, "hwrm_ring_free type %d failed. rc:%x err:%x\n",
+			   ring_type, rc, error_code);
+		return -EIO;
 	}
 	return 0;
 }
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 06/14] bnxt_en: Display function level rx/tx_discard_pkts via ethtool
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev, Vasundhara Volam
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>

Add counters to display sum of rx/tx_discard_pkts of all rings as
function level statistics via ethtool.

Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 33 +++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 8ba14ae..0ea8466 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -140,6 +140,19 @@ static int bnxt_set_coalesce(struct net_device *dev,
 #define BNXT_RX_STATS_EXT_ENTRY(counter)	\
 	{ BNXT_RX_STATS_EXT_OFFSET(counter), __stringify(counter) }
 
+enum {
+	RX_TOTAL_DISCARDS,
+	TX_TOTAL_DISCARDS,
+};
+
+static struct {
+	u64			counter;
+	char			string[ETH_GSTRING_LEN];
+} bnxt_sw_func_stats[] = {
+	{0, "rx_total_discard_pkts"},
+	{0, "tx_total_discard_pkts"},
+};
+
 static const struct {
 	long offset;
 	char string[ETH_GSTRING_LEN];
@@ -237,6 +250,7 @@ static int bnxt_set_coalesce(struct net_device *dev,
 	BNXT_RX_STATS_EXT_ENTRY(resume_roce_pause_events),
 };
 
+#define BNXT_NUM_SW_FUNC_STATS	ARRAY_SIZE(bnxt_sw_func_stats)
 #define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr)
 #define BNXT_NUM_PORT_STATS_EXT ARRAY_SIZE(bnxt_port_stats_ext_arr)
 
@@ -244,6 +258,8 @@ static int bnxt_get_num_stats(struct bnxt *bp)
 {
 	int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
 
+	num_stats += BNXT_NUM_SW_FUNC_STATS;
+
 	if (bp->flags & BNXT_FLAG_PORT_STATS)
 		num_stats += BNXT_NUM_PORT_STATS;
 
@@ -279,6 +295,9 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
 	if (!bp->bnapi)
 		return;
 
+	for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++)
+		bnxt_sw_func_stats[i].counter = 0;
+
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
@@ -288,7 +307,16 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
 		for (k = 0; k < stat_fields; j++, k++)
 			buf[j] = le64_to_cpu(hw_stats[k]);
 		buf[j++] = cpr->rx_l4_csum_errors;
+
+		bnxt_sw_func_stats[RX_TOTAL_DISCARDS].counter +=
+			le64_to_cpu(cpr->hw_stats->rx_discard_pkts);
+		bnxt_sw_func_stats[TX_TOTAL_DISCARDS].counter +=
+			le64_to_cpu(cpr->hw_stats->tx_discard_pkts);
 	}
+
+	for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++, j++)
+		buf[j] = bnxt_sw_func_stats[i].counter;
+
 	if (bp->flags & BNXT_FLAG_PORT_STATS) {
 		__le64 *port_stats = (__le64 *)bp->hw_rx_port_stats;
 
@@ -359,6 +387,11 @@ static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 			sprintf(buf, "[%d]: rx_l4_csum_errors", i);
 			buf += ETH_GSTRING_LEN;
 		}
+		for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++) {
+			strcpy(buf, bnxt_sw_func_stats[i].string);
+			buf += ETH_GSTRING_LEN;
+		}
+
 		if (bp->flags & BNXT_FLAG_PORT_STATS) {
 			for (i = 0; i < BNXT_NUM_PORT_STATS; i++) {
 				strcpy(buf, bnxt_port_stats_arr[i].string);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 07/14] bnxt_en: Do not allow VF to read EEPROM.
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

Firmware does not allow the operation and would return failure, causing
a warning in dmesg.  So check for VF and disallow it in the driver.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 0ea8466..a699ca54 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1818,6 +1818,11 @@ static int nvm_get_dir_info(struct net_device *dev, u32 *entries, u32 *length)

 static int bnxt_get_eeprom_len(struct net_device *dev)
 {
+	struct bnxt *bp = netdev_priv(dev);
+
+	if (BNXT_VF(bp))
+		return 0;
+
 	/* The -1 return value allows the entire 32-bit range of offsets to be
 	 * passed via the ethtool command-line utility.
 	 */
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 08/14] bnxt_en: Increase RING_IDLE minimum threshold to 50
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev, Andy Gospodarek
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

From: Andy Gospodarek <gospo@broadcom.com>

This keeps the RING_IDLE flag set in hardware for higher coalesce
settings by default and improved latency.

Signed-off-by: Andy Gospodarek <gospo@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b83c2ac..a221a10 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7702,7 +7702,7 @@ static void bnxt_init_dflt_coal(struct bnxt *bp)
 	coal->coal_bufs = 30;
 	coal->coal_ticks_irq = 1;
 	coal->coal_bufs_irq = 2;
-	coal->idle_thresh = 25;
+	coal->idle_thresh = 50;
 	coal->bufs_per_record = 2;
 	coal->budget = 64;		/* NAPI budget */
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 09/14] bnxt_en: reduce timeout on initial HWRM calls
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev, Andy Gospodarek
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

From: Andy Gospodarek <gospo@broadcom.com>

Testing with DIM enabled on older kernels indicated that firmware calls
were slower than expected.  More detailed analysis indicated that the
default 25us delay was higher than necessary.  Reducing the time spend in
usleep_range() for the first several calls would reduce the overall
latency of firmware calls on newer Intel processors.

Signed-off-by: Andy Gospodarek <gospo@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 26 +++++++++++++++++++++++---
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  6 ++++++
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index a221a10..ff9a5cd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3495,15 +3495,29 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 
 	if (!timeout)
 		timeout = DFLT_HWRM_CMD_TIMEOUT;
+	/* convert timeout to usec */
+	timeout *= 1000;
 
 	i = 0;
-	tmo_count = timeout * 40;
+	/* Short timeout for the first few iterations:
+	 * number of loops = number of loops for short timeout +
+	 * number of loops for standard timeout.
+	 */
+	tmo_count = HWRM_SHORT_TIMEOUT_COUNTER;
+	timeout = timeout - HWRM_SHORT_MIN_TIMEOUT * HWRM_SHORT_TIMEOUT_COUNTER;
+	tmo_count += DIV_ROUND_UP(timeout, HWRM_MIN_TIMEOUT);
 	resp_len = bp->hwrm_cmd_resp_addr + HWRM_RESP_LEN_OFFSET;
 	if (intr_process) {
 		/* Wait until hwrm response cmpl interrupt is processed */
 		while (bp->hwrm_intr_seq_id != HWRM_SEQ_ID_INVALID &&
 		       i++ < tmo_count) {
-			usleep_range(25, 40);
+			/* on first few passes, just barely sleep */
+			if (i < HWRM_SHORT_TIMEOUT_COUNTER)
+				usleep_range(HWRM_SHORT_MIN_TIMEOUT,
+					     HWRM_SHORT_MAX_TIMEOUT);
+			else
+				usleep_range(HWRM_MIN_TIMEOUT,
+					     HWRM_MAX_TIMEOUT);
 		}
 
 		if (bp->hwrm_intr_seq_id != HWRM_SEQ_ID_INVALID) {
@@ -3521,7 +3535,13 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 			      HWRM_RESP_LEN_SFT;
 			if (len)
 				break;
-			usleep_range(25, 40);
+			/* on first few passes, just barely sleep */
+			if (i < DFLT_HWRM_CMD_TIMEOUT)
+				usleep_range(HWRM_SHORT_MIN_TIMEOUT,
+					     HWRM_SHORT_MAX_TIMEOUT);
+			else
+				usleep_range(HWRM_MIN_TIMEOUT,
+					     HWRM_MAX_TIMEOUT);
 		}
 
 		if (i >= tmo_count) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 057f8a2..7fa4a45 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -532,6 +532,12 @@ struct rx_tpa_end_cmp_ext {
 #define BNXT_HWRM_REQ_MAX_SIZE		128
 #define BNXT_HWRM_REQS_PER_PAGE		(BNXT_PAGE_SIZE /	\
 					 BNXT_HWRM_REQ_MAX_SIZE)
+#define HWRM_SHORT_MIN_TIMEOUT		3
+#define HWRM_SHORT_MAX_TIMEOUT		10
+#define HWRM_SHORT_TIMEOUT_COUNTER	5
+
+#define HWRM_MIN_TIMEOUT		25
+#define HWRM_MAX_TIMEOUT		40
 
 #define BNXT_RX_EVENT	1
 #define BNXT_AGG_EVENT	2
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 10/14] bnxt_en: add debugfs support for DIM
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev, Andy Gospodarek
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

From: Andy Gospodarek <gospo@broadcom.com>

This adds debugfs support for bnxt_en with the purpose of allowing users
to examine the current DIM profile in use for each receive queue.  This
was instrumental in debugging issues found with DIM and ensuring that
the profiles we expect to use are the profiles being used.

Signed-off-by: Andy Gospodarek <gospo@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/Makefile       |   1 +
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         |   6 ++
 drivers/net/ethernet/broadcom/bnxt/bnxt.h         |   2 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c | 124 ++++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h |  23 ++++
 5 files changed, 156 insertions(+)
 create mode 100644 drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
 create mode 100644 drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h

diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
index 7c560d5..5a779b1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
+++ b/drivers/net/ethernet/broadcom/bnxt/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_BNXT) += bnxt_en.o
 
 bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o
 bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o
+bnxt_en-$(CONFIG_DEBUG_FS) += bnxt_debugfs.o
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index ff9a5cd..a45e692 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -62,6 +62,7 @@
 #include "bnxt_vfr.h"
 #include "bnxt_tc.h"
 #include "bnxt_devlink.h"
+#include "bnxt_debugfs.h"
 
 #define BNXT_TX_TIMEOUT		(5 * HZ)
 
@@ -6870,6 +6871,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 	}
 
 	bnxt_enable_napi(bp);
+	bnxt_debug_dev_init(bp);
 
 	rc = bnxt_init_nic(bp, irq_re_init);
 	if (rc) {
@@ -6902,6 +6904,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 	return 0;
 
 open_err:
+	bnxt_debug_dev_exit(bp);
 	bnxt_disable_napi(bp);
 	bnxt_del_napi(bp);
 
@@ -6995,6 +6998,7 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init,
 
 	/* TODO CHIMP_FW: Link/PHY related cleanup if (link_re_init) */
 
+	bnxt_debug_dev_exit(bp);
 	bnxt_disable_napi(bp);
 	del_timer_sync(&bp->timer);
 	bnxt_free_skbs(bp);
@@ -9071,6 +9075,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
 
 static int __init bnxt_init(void)
 {
+	bnxt_debug_init();
 	return pci_register_driver(&bnxt_pci_driver);
 }
 
@@ -9079,6 +9084,7 @@ static void __exit bnxt_exit(void)
 	pci_unregister_driver(&bnxt_pci_driver);
 	if (bnxt_pf_wq)
 		destroy_workqueue(bnxt_pf_wq);
+	bnxt_debug_exit();
 }
 
 module_init(bnxt_init);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 7fa4a45..8df1d8b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1391,6 +1391,8 @@ struct bnxt {
 	u16			*cfa_code_map; /* cfa_code -> vf_idx map */
 	u8			switch_id[8];
 	struct bnxt_tc_info	*tc_info;
+	struct dentry		*debugfs_pdev;
+	struct dentry		*debugfs_dim;
 };
 
 #define BNXT_RX_STATS_OFFSET(counter)			\
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
new file mode 100644
index 0000000..94e208e
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
@@ -0,0 +1,124 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017-2018 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "bnxt_hsi.h"
+#include <linux/net_dim.h>
+#include "bnxt.h"
+#include "bnxt_debugfs.h"
+
+static struct dentry *bnxt_debug_mnt;
+
+static ssize_t debugfs_dim_read(struct file *filep,
+				char __user *buffer,
+				size_t count, loff_t *ppos)
+{
+	struct net_dim *dim = filep->private_data;
+	int len;
+	char *buf;
+
+	if (*ppos)
+		return 0;
+	if (!dim)
+		return -ENODEV;
+	buf = kasprintf(GFP_KERNEL,
+			"state = %d\n" \
+			"profile_ix = %d\n" \
+			"mode = %d\n" \
+			"tune_state = %d\n" \
+			"steps_right = %d\n" \
+			"steps_left = %d\n" \
+			"tired = %d\n",
+			dim->state,
+			dim->profile_ix,
+			dim->mode,
+			dim->tune_state,
+			dim->steps_right,
+			dim->steps_left,
+			dim->tired);
+	if (!buf)
+		return -ENOMEM;
+	if (count < strlen(buf)) {
+		kfree(buf);
+		return -ENOSPC;
+	}
+	len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+	kfree(buf);
+	return len;
+}
+
+static const struct file_operations debugfs_dim_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = debugfs_dim_read,
+};
+
+static struct dentry *debugfs_dim_ring_init(struct net_dim *dim, int ring_idx,
+					    struct dentry *dd)
+{
+	static char qname[16];
+
+	snprintf(qname, 10, "%d", ring_idx);
+	return debugfs_create_file(qname, 0600, dd,
+				   dim, &debugfs_dim_fops);
+}
+
+void bnxt_debug_dev_init(struct bnxt *bp)
+{
+	const char *pname = pci_name(bp->pdev);
+	struct dentry *pdevf;
+	int i;
+
+	bp->debugfs_pdev = debugfs_create_dir(pname, bnxt_debug_mnt);
+	if (bp->debugfs_pdev) {
+		pdevf = debugfs_create_dir("dim", bp->debugfs_pdev);
+		if (!pdevf) {
+			pr_err("failed to create debugfs entry %s/dim\n",
+			       pname);
+			return;
+		}
+		bp->debugfs_dim = pdevf;
+		/* create files for each rx ring */
+		for (i = 0; i < bp->cp_nr_rings; i++) {
+			struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
+
+			if (cpr && bp->bnapi[i]->rx_ring) {
+				pdevf = debugfs_dim_ring_init(&cpr->dim, i,
+							      bp->debugfs_dim);
+				if (!pdevf)
+					pr_err("failed to create debugfs entry %s/dim/%d\n",
+					       pname, i);
+			}
+		}
+	} else {
+		pr_err("failed to create debugfs entry %s\n", pname);
+	}
+}
+
+void bnxt_debug_dev_exit(struct bnxt *bp)
+{
+	if (bp) {
+		debugfs_remove_recursive(bp->debugfs_pdev);
+		bp->debugfs_pdev = NULL;
+	}
+}
+
+void bnxt_debug_init(void)
+{
+	bnxt_debug_mnt = debugfs_create_dir("bnxt_en", NULL);
+	if (!bnxt_debug_mnt)
+		pr_err("failed to init bnxt_en debugfs\n");
+}
+
+void bnxt_debug_exit(void)
+{
+	debugfs_remove_recursive(bnxt_debug_mnt);
+}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h
new file mode 100644
index 0000000..d0bb488
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h
@@ -0,0 +1,23 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017-2018 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+
+#ifdef CONFIG_DEBUG_FS
+void bnxt_debug_init(void);
+void bnxt_debug_exit(void);
+void bnxt_debug_dev_init(struct bnxt *bp);
+void bnxt_debug_dev_exit(struct bnxt *bp);
+#else
+static inline void bnxt_debug_init(void) {}
+static inline void bnxt_debug_exit(void) {}
+static inline void bnxt_debug_dev_init(struct bnxt *bp) {}
+static inline void bnxt_debug_dev_exit(struct bnxt *bp) {}
+#endif
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 11/14] bnxt_en: Reserve rings in bnxt_set_channels() if device is down.
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

The current code does not reserve rings during ethtool -L when the device
is down.  The rings will be reserved when the device is later opened.

Change it to reserve rings during ethtool -L when the device is down.
This provides a better guarantee that the device open will be successful
when the rings are reserved ahead of time.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index a699ca54..ad98b78 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -584,6 +584,8 @@ static int bnxt_set_channels(struct net_device *dev,
 			 * to renable
 			 */
 		}
+	} else {
+		rc = bnxt_reserve_rings(bp);
 	}

 	return rc;
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 12/14] bnxt_en: Don't reserve rings on VF when min rings were not provisioned by PF.
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

When rings are more limited and the PF has not provisioned minimum
guaranteed rings to the VF, do not reserve rings during driver probe.
Wait till device open before reserving rings when they will be used.
Device open will succeed if some minimum rings can be successfully
reserved and allocated.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index a45e692..0884e49 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5952,6 +5952,9 @@ static int bnxt_init_msix(struct bnxt *bp)
 	if (total_vecs > max)
 		total_vecs = max;
 
+	if (!total_vecs)
+		return 0;
+
 	msix_ent = kcalloc(total_vecs, sizeof(struct msix_entry), GFP_KERNEL);
 	if (!msix_ent)
 		return -ENOMEM;
@@ -7276,6 +7279,25 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp)
 	return rc;
 }
 
+static bool bnxt_can_reserve_rings(struct bnxt *bp)
+{
+#ifdef CONFIG_BNXT_SRIOV
+	if ((bp->flags & BNXT_FLAG_NEW_RM) && BNXT_VF(bp)) {
+		struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+
+		/* No minimum rings were provisioned by the PF.  Don't
+		 * reserve rings by default when device is down.
+		 */
+		if (hw_resc->min_tx_rings || hw_resc->resv_tx_rings)
+			return true;
+
+		if (!netif_running(bp->dev))
+			return false;
+	}
+#endif
+	return true;
+}
+
 /* If the chip and firmware supports RFS */
 static bool bnxt_rfs_supported(struct bnxt *bp)
 {
@@ -7292,7 +7314,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp)
 #ifdef CONFIG_RFS_ACCEL
 	int vnics, max_vnics, max_rss_ctxs;
 
-	if (!(bp->flags & BNXT_FLAG_MSIX_CAP))
+	if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp))
 		return false;
 
 	vnics = 1 + bp->rx_nr_rings;
@@ -8526,6 +8548,9 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
 {
 	int dflt_rings, max_rx_rings, max_tx_rings, rc;
 
+	if (!bnxt_can_reserve_rings(bp))
+		return 0;
+
 	if (sh)
 		bp->flags |= BNXT_FLAG_SHARED_RINGS;
 	dflt_rings = netif_get_num_default_rss_queues();
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 13/14] bnxt_en: Reserve RSS and L2 contexts for VF.
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

For completeness and correctness, the VF driver needs to reserve these
RSS and L2 contexts.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c       |  4 ++++
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 10 +++++-----
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h |  5 +++++
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0884e49..fee1c0d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4713,6 +4713,10 @@ int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings)
 
 	__bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
 				     cp_rings, vnics);
+	req.enables |= cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS |
+				   FUNC_VF_CFG_REQ_ENABLES_NUM_L2_CTXS);
+	req.num_rsscos_ctxs = cpu_to_le16(BNXT_VF_MAX_RSS_CTX);
+	req.num_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 18ee471..cc21d87 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -462,13 +462,13 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
 	vf_vnics = hw_resc->max_vnics - bp->nr_vnics;
 	vf_vnics = min_t(u16, vf_vnics, vf_rx_rings);
 
-	req.min_rsscos_ctx = cpu_to_le16(1);
-	req.max_rsscos_ctx = cpu_to_le16(1);
+	req.min_rsscos_ctx = cpu_to_le16(BNXT_VF_MIN_RSS_CTX);
+	req.max_rsscos_ctx = cpu_to_le16(BNXT_VF_MAX_RSS_CTX);
 	if (pf->vf_resv_strategy == BNXT_VF_RESV_STRATEGY_MINIMAL) {
 		req.min_cmpl_rings = cpu_to_le16(1);
 		req.min_tx_rings = cpu_to_le16(1);
 		req.min_rx_rings = cpu_to_le16(1);
-		req.min_l2_ctxs = cpu_to_le16(1);
+		req.min_l2_ctxs = cpu_to_le16(BNXT_VF_MIN_L2_CTX);
 		req.min_vnics = cpu_to_le16(1);
 		req.min_stat_ctx = cpu_to_le16(1);
 		req.min_hw_ring_grps = cpu_to_le16(1);
@@ -483,7 +483,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
 		req.min_cmpl_rings = cpu_to_le16(vf_cp_rings);
 		req.min_tx_rings = cpu_to_le16(vf_tx_rings);
 		req.min_rx_rings = cpu_to_le16(vf_rx_rings);
-		req.min_l2_ctxs = cpu_to_le16(4);
+		req.min_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
 		req.min_vnics = cpu_to_le16(vf_vnics);
 		req.min_stat_ctx = cpu_to_le16(vf_stat_ctx);
 		req.min_hw_ring_grps = cpu_to_le16(vf_ring_grps);
@@ -491,7 +491,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
 	req.max_cmpl_rings = cpu_to_le16(vf_cp_rings);
 	req.max_tx_rings = cpu_to_le16(vf_tx_rings);
 	req.max_rx_rings = cpu_to_le16(vf_rx_rings);
-	req.max_l2_ctxs = cpu_to_le16(4);
+	req.max_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
 	req.max_vnics = cpu_to_le16(vf_vnics);
 	req.max_stat_ctx = cpu_to_le16(vf_stat_ctx);
 	req.max_hw_ring_grps = cpu_to_le16(vf_ring_grps);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
index 6f6d850..e9b20cd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
@@ -23,6 +23,11 @@
 	((offsetof(struct hwrm_reject_fwd_resp_input, encap_request) + n) >\
 	 offsetof(struct hwrm_reject_fwd_resp_input, encap_resp_target_id))
 
+#define BNXT_VF_MIN_RSS_CTX	1
+#define BNXT_VF_MAX_RSS_CTX	1
+#define BNXT_VF_MIN_L2_CTX	1
+#define BNXT_VF_MAX_L2_CTX	4
+
 int bnxt_get_vf_config(struct net_device *, int, struct ifla_vf_info *);
 int bnxt_set_vf_mac(struct net_device *, int, u8 *);
 int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2 14/14] bnxt_en: Reserve rings at driver open if none was reserved at probe time.
From: Michael Chan @ 2018-04-26 21:44 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1524779084-4016-1-git-send-email-michael.chan@broadcom.com>

Add logic to reserve default rings at driver open time if none was
reserved during probe time.  This will happen when the PF driver did
not provision minimum rings to the VF, due to more limited resources.

Driver open will only succeed if some minimum rings can be reserved.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index fee1c0d..efe5c72 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6844,6 +6844,8 @@ static void bnxt_preset_reg_win(struct bnxt *bp)
 	}
 }
 
+static int bnxt_init_dflt_ring_mode(struct bnxt *bp);
+
 static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 {
 	int rc = 0;
@@ -6851,6 +6853,12 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 	bnxt_preset_reg_win(bp);
 	netif_carrier_off(bp->dev);
 	if (irq_re_init) {
+		/* Reserve rings now if none were reserved at driver probe. */
+		rc = bnxt_init_dflt_ring_mode(bp);
+		if (rc) {
+			netdev_err(bp->dev, "Failed to reserve default rings at open\n");
+			return rc;
+		}
 		rc = bnxt_reserve_rings(bp);
 		if (rc)
 			return rc;
@@ -8600,6 +8608,29 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
 	return rc;
 }
 
+static int bnxt_init_dflt_ring_mode(struct bnxt *bp)
+{
+	int rc;
+
+	if (bp->tx_nr_rings)
+		return 0;
+
+	rc = bnxt_set_dflt_rings(bp, true);
+	if (rc) {
+		netdev_err(bp->dev, "Not enough rings available.\n");
+		return rc;
+	}
+	rc = bnxt_init_int_mode(bp);
+	if (rc)
+		return rc;
+	bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
+	if (bnxt_rfs_supported(bp) && bnxt_rfs_capable(bp)) {
+		bp->flags |= BNXT_FLAG_RFS;
+		bp->dev->features |= NETIF_F_NTUPLE;
+	}
+	return 0;
+}
+
 int bnxt_restore_pf_fw_resources(struct bnxt *bp)
 {
 	int rc;
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH iproute2] ipaddress: strengthen check on 'label' input
From: Stephen Hemminger @ 2018-04-26 21:45 UTC (permalink / raw)
  To: Patrick Talbert; +Cc: netdev
In-Reply-To: <1524578901-28278-1-git-send-email-ptalbert@redhat.com>

On Tue, 24 Apr 2018 16:08:21 +0200
Patrick Talbert <ptalbert@redhat.com> wrote:

> As mentioned in the ip-address man page, an address label must
> be equal to the device name or prefixed by the device name
> followed by a colon. Currently the only check on this input is
> to see if the device name appears at the beginning of the label
> string.
> 
> This commit adds an additional check to ensure label == dev or
> continues with a colon.
> 
> Signed-off-by: Patrick Talbert <ptalbert@redhat.com>
> ---
>  ip/ipaddress.c | 11 ++++++++---
>  1 file changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/ip/ipaddress.c b/ip/ipaddress.c
> index aecc9a1..edcf821 100644
> --- a/ip/ipaddress.c
> +++ b/ip/ipaddress.c
> @@ -2168,9 +2168,14 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv)
>  		fprintf(stderr, "Not enough information: \"dev\" argument is required.\n");
>  		return -1;
>  	}
> -	if (l && matches(d, l) != 0) {
> -		fprintf(stderr, "\"dev\" (%s) must match \"label\" (%s).\n", d, l);
> -		return -1;
> +	if (l) {
> +		size_t d_len = strlen(d);
> +
> +		if (!(matches(d, l) == 0 && (l[d_len] == '\0' || l[d_len] == ':'))) {

matches is not what you want here. matches does prefix match (ie matches("eth0", "eth") == 0).
Also, what if label is shorter than the device, you would end up dereferencing past
the end of the string!

I think you want something like:

static bool is_valid_label(const char *dev, const char *label)
{
	const char *sep;

	sep = strchr(label, ':');
	if (sep)
		return strncmp(dev, label, sep - label) == 0;
	else
		return strcmp(dev, label) == 0;

}

> +			fprintf(stderr, "\"label\" (%s) must match \"dev\" (%s) or be prefixed by"
> +				" \"dev\" with a colon.\n", l, d);
> +			return -1;
> +		}
>  	}
>  
>  	if (peer_len == 0 && local_len) {

^ permalink raw reply

* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Mikulas Patocka @ 2018-04-26 21:50 UTC (permalink / raw)
  To: John Stoffel
  Cc: Andrew, eric.dumazet, mst, edumazet, netdev, Randy Dunlap,
	linux-kernel, Matthew Wilcox, Hocko, James Bottomley, Michal,
	dm-devel, David Miller, David Rientjes, Morton, virtualization,
	linux-mm, Vlastimil Babka
In-Reply-To: <23266.8532.619051.784274@quad.stoffel.home>

On Thu, 26 Apr 2018, John Stoffel wrote:

> >>>>> "James" == James Bottomley <James.Bottomley@HansenPartnership.com> writes:
> 
> James> I may be an atypical developer but I'd rather have a root canal
> James> than browse through menuconfig options.  The way to get people
> James> to learn about new debugging options is to blog about it (or
> James> write an lwn.net article) which google will find the next time
> James> I ask it how I debug XXX.  Google (probably as a service to
> James> humanity) rarely turns up Kconfig options in response to a
> James> query.
> 
> I agree with James here.  Looking at the SLAB vs SLUB Kconfig entries
> tells me *nothing* about why I should pick one or the other, as an
> example.
> 
> John

I see your point - and I think the misunderstanding is this.

This patch is not really helping people to debug existing crashes. It is 
not like "you get a crash" - "you google for some keywords" - "you get a 
page that suggests to turn this option on" - "you turn it on and solve the 
crash".

What this patch really does is that - it makes the kernel deliberately 
crash in a situation when the code violates the specification, but it 
would not crash otherwise or it would crash very rarely. It helps to 
detect specification violations.

If the kernel developer (or tester) doesn't use this option, his buggy 
code won't crash - and if it won't crash, he won't fix the bug or report 
it. How is the user or developer supposed to learn about this option, if 
he gets no crash at all?

Mikulas

^ permalink raw reply

* Re: [PATCH] net/mlx5: report persistent netdev stats across ifdown/ifup commands
From: Saeed Mahameed @ 2018-04-26 21:50 UTC (permalink / raw)
  To: Qing Huang, Eran Ben Elisha
  Cc: linux-kernel, RDMA mailing list, Linux Netdev List,
	Leon Romanovsky, Matan Barak, Saeed Mahameed
In-Reply-To: <1524775057-8012-1-git-send-email-qing.huang@oracle.com>

On Thu, Apr 26, 2018 at 1:37 PM, Qing Huang <qing.huang@oracle.com> wrote:
> Current stats collecting scheme in mlx5 driver is to periodically fetch
> aggregated stats from all the active mlx5 software channels associated
> with the device. However when a mlx5 interface is brought down(ifdown),
> all the channels will be deactivated and closed. A new set of channels
> will be created when next ifup command or a similar command is called.
> Unfortunately the new channels will have all stats reset to 0. So you
> lose the accumulated stats information. This behavior is different from
> other netdev drivers including the mlx4 driver. In order to fix it, we
> now save prior mlx5 software stats into netdev stats fields, so all the
> accumulated stats will survive multiple runs of ifdown/ifup commands and
> be shown correctly.
>
> Orabug: 27548610
>
> Signed-off-by: Qing Huang <qing.huang@oracle.com>
> ---

Hi Qing,

I am adding Eran since he is currently working on a similar patch,
He is also taking care of all cores/rings stats to make them
persistent, so you won't have discrepancy  between
ethtool and ifconfig stats.

I am ok with this patch, but this means Eran has to work his way around it.

so we have two options:

1. Temporary accept this patch, and change it later with Eran's work.
2. Wait for Eran's work.

I am ok with either one of them, please let me know.

Thanks !


>  drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 30 +++++++++++++++++++----
>  1 file changed, 25 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index f1fe490..5d50e69 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -2621,6 +2621,23 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
>                 netdev_set_tc_queue(netdev, tc, nch, 0);
>  }
>
> +static void mlx5e_netdev_save_stats(struct mlx5e_priv *priv)
> +{
> +       struct net_device *netdev = priv->netdev;
> +
> +       netdev->stats.rx_packets += priv->stats.sw.rx_packets;
> +       netdev->stats.rx_bytes   += priv->stats.sw.rx_bytes;
> +       netdev->stats.tx_packets += priv->stats.sw.tx_packets;
> +       netdev->stats.tx_bytes   += priv->stats.sw.tx_bytes;
> +       netdev->stats.tx_dropped += priv->stats.sw.tx_queue_dropped;
> +
> +       priv->stats.sw.rx_packets       = 0;
> +       priv->stats.sw.rx_bytes         = 0;
> +       priv->stats.sw.tx_packets       = 0;
> +       priv->stats.sw.tx_bytes         = 0;
> +       priv->stats.sw.tx_queue_dropped = 0;
> +}
> +

This means that we are now explicitly clearing channels stats on
ifconfig down or switch_channels.
and now after ifconfing down, ethtool will always show 0, before this
patch it didn't.
Anyway update sw stats function will always override them with the new
channels stats next time we load new channels.
so it is not that big of a deal.


>  static void mlx5e_build_channels_tx_maps(struct mlx5e_priv *priv)
>  {
>         struct mlx5e_channel *c;
> @@ -2691,6 +2708,7 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
>                 netif_set_real_num_tx_queues(netdev, new_num_txqs);
>
>         mlx5e_deactivate_priv_channels(priv);
> +       mlx5e_netdev_save_stats(priv);
>         mlx5e_close_channels(&priv->channels);
>
>         priv->channels = *new_chs;
> @@ -2770,6 +2788,7 @@ int mlx5e_close_locked(struct net_device *netdev)
>
>         netif_carrier_off(priv->netdev);
>         mlx5e_deactivate_priv_channels(priv);
> +       mlx5e_netdev_save_stats(priv);
>         mlx5e_close_channels(&priv->channels);
>
>         return 0;
> @@ -3215,11 +3234,12 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
>                 stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
>                 stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
>         } else {
> -               stats->rx_packets = sstats->rx_packets;
> -               stats->rx_bytes   = sstats->rx_bytes;
> -               stats->tx_packets = sstats->tx_packets;
> -               stats->tx_bytes   = sstats->tx_bytes;
> -               stats->tx_dropped = sstats->tx_queue_dropped;
> +               stats->rx_packets = sstats->rx_packets + dev->stats.rx_packets;
> +               stats->rx_bytes   = sstats->rx_bytes + dev->stats.rx_bytes;
> +               stats->tx_packets = sstats->tx_packets + dev->stats.tx_packets;
> +               stats->tx_bytes   = sstats->tx_bytes + dev->stats.tx_bytes;
> +               stats->tx_dropped = sstats->tx_queue_dropped +
> +                                   dev->stats.tx_dropped;
>         }
>
>         stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
> --
> 1.8.3.1
>

^ permalink raw reply

* [RFC/PATCH] net: ethernet: nixge: Use of_get_mac_address()
From: Moritz Fischer @ 2018-04-26 21:57 UTC (permalink / raw)
  To: linux-kernel
  Cc: devicetree, netdev, davem, robh+dt, mark.rutland, Moritz Fischer

Make nixge driver work with 'mac-address' property instead of
'address' property. There are currently no in-tree users and
the only users of this driver are devices that use overlays
we control to instantiate the device together with the corresponding
FPGA images.

Signed-off-by: Moritz Fischer <mdf@kernel.org>
---

Hi David, Rob,

with Mike's change that enable the generic 'mac-address'
binding that I barely missed with the submission of this
driver I was wondering if we can still change the binding.

I'm aware that this generally is a nonono case, since the binding
is considered API, but since there are no users outside of our
devicetree overlays that we ship with our devices I thought I'd ask.

If you don't think that's a good idea do you think supporting both
would be worthwhile?

Thanks,

Moritz

---
 .../devicetree/bindings/net/nixge.txt         |  4 ++--
 drivers/net/ethernet/ni/nixge.c               | 20 ++-----------------
 2 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/nixge.txt b/Documentation/devicetree/bindings/net/nixge.txt
index e55af7f0881a..9bc1ecfb6762 100644
--- a/Documentation/devicetree/bindings/net/nixge.txt
+++ b/Documentation/devicetree/bindings/net/nixge.txt
@@ -8,7 +8,7 @@ Required properties:
 - phy-mode: See ethernet.txt file in the same directory.
 - phy-handle: See ethernet.txt file in the same directory.
 - nvmem-cells: Phandle of nvmem cell containing the MAC address
-- nvmem-cell-names: Should be "address"
+- nvmem-cell-names: Should be "mac-address"
 
 Examples (10G generic PHY):
 	nixge0: ethernet@40000000 {
@@ -16,7 +16,7 @@ Examples (10G generic PHY):
 		reg = <0x40000000 0x6000>;
 
 		nvmem-cells = <&eth1_addr>;
-		nvmem-cell-names = "address";
+		nvmem-cell-names = "mac-address";
 
 		interrupts = <0 29 IRQ_TYPE_LEVEL_HIGH>, <0 30 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-names = "rx", "tx";
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index 27364b7572fc..7918c7b7273b 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -1162,22 +1162,6 @@ static int nixge_mdio_setup(struct nixge_priv *priv, struct device_node *np)
 	return of_mdiobus_register(bus, np);
 }
 
-static void *nixge_get_nvmem_address(struct device *dev)
-{
-	struct nvmem_cell *cell;
-	size_t cell_size;
-	char *mac;
-
-	cell = nvmem_cell_get(dev, "address");
-	if (IS_ERR(cell))
-		return cell;
-
-	mac = nvmem_cell_read(cell, &cell_size);
-	nvmem_cell_put(cell);
-
-	return mac;
-}
-
 static int nixge_probe(struct platform_device *pdev)
 {
 	struct nixge_priv *priv;
@@ -1201,8 +1185,8 @@ static int nixge_probe(struct platform_device *pdev)
 	ndev->min_mtu = 64;
 	ndev->max_mtu = NIXGE_JUMBO_MTU;
 
-	mac_addr = nixge_get_nvmem_address(&pdev->dev);
-	if (mac_addr && is_valid_ether_addr(mac_addr))
+	mac_addr = of_get_mac_address(np);
+	if (mac_addr)
 		ether_addr_copy(ndev->dev_addr, mac_addr);
 	else
 		eth_hw_addr_random(ndev);
-- 
2.17.0

^ permalink raw reply related

* Re: [RFC/PATCH] net: ethernet: nixge: Use of_get_mac_address()
From: Moritz Fischer @ 2018-04-26 22:04 UTC (permalink / raw)
  To: Moritz Fischer
  Cc: linux-kernel, devicetree, netdev, davem, robh+dt, mark.rutland
In-Reply-To: <20180426215742.18966-1-mdf@kernel.org>

On Thu, Apr 26, 2018 at 02:57:42PM -0700, Moritz Fischer wrote:
> Make nixge driver work with 'mac-address' property instead of
> 'address' property. There are currently no in-tree users and
> the only users of this driver are devices that use overlays
> we control to instantiate the device together with the corresponding
> FPGA images.
> 
> Signed-off-by: Moritz Fischer <mdf@kernel.org>
> ---
> 
> Hi David, Rob,
> 
> with Mike's change that enable the generic 'mac-address'
> binding that I barely missed with the submission of this
> driver I was wondering if we can still change the binding.
> 
> I'm aware that this generally is a nonono case, since the binding
> is considered API, but since there are no users outside of our
> devicetree overlays that we ship with our devices I thought I'd ask.
> 
> If you don't think that's a good idea do you think supporting both
> would be worthwhile?
> 
> Thanks,
> 
> Moritz
> 
> ---
>  .../devicetree/bindings/net/nixge.txt         |  4 ++--
>  drivers/net/ethernet/ni/nixge.c               | 20 ++-----------------
>  2 files changed, 4 insertions(+), 20 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/net/nixge.txt b/Documentation/devicetree/bindings/net/nixge.txt
> index e55af7f0881a..9bc1ecfb6762 100644
> --- a/Documentation/devicetree/bindings/net/nixge.txt
> +++ b/Documentation/devicetree/bindings/net/nixge.txt
> @@ -8,7 +8,7 @@ Required properties:
>  - phy-mode: See ethernet.txt file in the same directory.
>  - phy-handle: See ethernet.txt file in the same directory.
>  - nvmem-cells: Phandle of nvmem cell containing the MAC address
> -- nvmem-cell-names: Should be "address"
> +- nvmem-cell-names: Should be "mac-address"
>  
>  Examples (10G generic PHY):
>  	nixge0: ethernet@40000000 {
> @@ -16,7 +16,7 @@ Examples (10G generic PHY):
>  		reg = <0x40000000 0x6000>;
>  
>  		nvmem-cells = <&eth1_addr>;
> -		nvmem-cell-names = "address";
> +		nvmem-cell-names = "mac-address";
>  
>  		interrupts = <0 29 IRQ_TYPE_LEVEL_HIGH>, <0 30 IRQ_TYPE_LEVEL_HIGH>;
>  		interrupt-names = "rx", "tx";
> diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
> index 27364b7572fc..7918c7b7273b 100644
> --- a/drivers/net/ethernet/ni/nixge.c
> +++ b/drivers/net/ethernet/ni/nixge.c
> @@ -1162,22 +1162,6 @@ static int nixge_mdio_setup(struct nixge_priv *priv, struct device_node *np)
>  	return of_mdiobus_register(bus, np);
>  }
>  
> -static void *nixge_get_nvmem_address(struct device *dev)
> -{
> -	struct nvmem_cell *cell;
> -	size_t cell_size;
> -	char *mac;
> -
> -	cell = nvmem_cell_get(dev, "address");
> -	if (IS_ERR(cell))
> -		return cell;
> -
> -	mac = nvmem_cell_read(cell, &cell_size);
> -	nvmem_cell_put(cell);
> -
> -	return mac;
> -}
> -
>  static int nixge_probe(struct platform_device *pdev)
>  {
>  	struct nixge_priv *priv;
> @@ -1201,8 +1185,8 @@ static int nixge_probe(struct platform_device *pdev)
>  	ndev->min_mtu = 64;
>  	ndev->max_mtu = NIXGE_JUMBO_MTU;
>  
> -	mac_addr = nixge_get_nvmem_address(&pdev->dev);
> -	if (mac_addr && is_valid_ether_addr(mac_addr))
> +	mac_addr = of_get_mac_address(np);
Sorry, that should be &pdev->dev.of_node here ... I'll resubmit if
general idea ok.

> +	if (mac_addr)
>  		ether_addr_copy(ndev->dev_addr, mac_addr);
>  	else
>  		eth_hw_addr_random(ndev);
> -- 
> 2.17.0
> 

- Moritz

^ permalink raw reply

* Re: [PATCH v7 net-next 4/4] netvsc: refactor notifier/event handling code to use the failover framework
From: Siwei Liu @ 2018-04-26 22:14 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Stephen Hemminger, Jiri Pirko, Sridhar Samudrala, David Miller,
	Netdev, virtualization, virtio-dev, Brandeburg, Jesse,
	Alexander Duyck, Jakub Kicinski, Jason Wang
In-Reply-To: <20180426050934-mutt-send-email-mst@kernel.org>

On Wed, Apr 25, 2018 at 7:28 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Wed, Apr 25, 2018 at 03:57:57PM -0700, Siwei Liu wrote:
>> On Wed, Apr 25, 2018 at 3:22 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>> > On Wed, Apr 25, 2018 at 02:38:57PM -0700, Siwei Liu wrote:
>> >> On Mon, Apr 23, 2018 at 1:06 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>> >> > On Mon, Apr 23, 2018 at 12:44:39PM -0700, Siwei Liu wrote:
>> >> >> On Mon, Apr 23, 2018 at 10:56 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
>> >> >> > On Mon, Apr 23, 2018 at 10:44:40AM -0700, Stephen Hemminger wrote:
>> >> >> >> On Mon, 23 Apr 2018 20:24:56 +0300
>> >> >> >> "Michael S. Tsirkin" <mst@redhat.com> wrote:
>> >> >> >>
>> >> >> >> > On Mon, Apr 23, 2018 at 10:04:06AM -0700, Stephen Hemminger wrote:
>> >> >> >> > > > >
>> >> >> >> > > > >I will NAK patches to change to common code for netvsc especially the
>> >> >> >> > > > >three device model.  MS worked hard with distro vendors to support transparent
>> >> >> >> > > > >mode, ans we really can't have a new model; or do backport.
>> >> >> >> > > > >
>> >> >> >> > > > >Plus, DPDK is now dependent on existing model.
>> >> >> >> > > >
>> >> >> >> > > > Sorry, but nobody here cares about dpdk or other similar oddities.
>> >> >> >> > >
>> >> >> >> > > The network device model is a userspace API, and DPDK is a userspace application.
>> >> >> >> >
>> >> >> >> > It is userspace but are you sure dpdk is actually poking at netdevs?
>> >> >> >> > AFAIK it's normally banging device registers directly.
>> >> >> >> >
>> >> >> >> > > You can't go breaking userspace even if you don't like the application.
>> >> >> >> >
>> >> >> >> > Could you please explain how is the proposed patchset breaking
>> >> >> >> > userspace? Ignoring DPDK for now, I don't think it changes the userspace
>> >> >> >> > API at all.
>> >> >> >> >
>> >> >> >>
>> >> >> >> The DPDK has a device driver vdev_netvsc which scans the Linux network devices
>> >> >> >> to look for Linux netvsc device and the paired VF device and setup the
>> >> >> >> DPDK environment.  This setup creates a DPDK failsafe (bondingish) instance
>> >> >> >> and sets up TAP support over the Linux netvsc device as well as the Mellanox
>> >> >> >> VF device.
>> >> >> >>
>> >> >> >> So it depends on existing 2 device model. You can't go to a 3 device model
>> >> >> >> or start hiding devices from userspace.
>> >> >> >
>> >> >> > Okay so how does the existing patch break that? IIUC does not go to
>> >> >> > a 3 device model since netvsc calls failover_register directly.
>> >> >> >
>> >> >> >> Also, I am working on associating netvsc and VF device based on serial number
>> >> >> >> rather than MAC address. The serial number is how Windows works now, and it makes
>> >> >> >> sense for Linux and Windows to use the same mechanism if possible.
>> >> >> >
>> >> >> > Maybe we should support same for virtio ...
>> >> >> > Which serial do you mean? From vpd?
>> >> >> >
>> >> >> > I guess you will want to keep supporting MAC for old hypervisors?
>> >> >> >
>> >> >> > It all seems like a reasonable thing to support in the generic core.
>> >> >>
>> >> >> That's the reason why I chose explicit identifier rather than rely on
>> >> >> MAC address to bind/pair a device. MAC address can change. Even if it
>> >> >> can't, malicious guest user can fake MAC address to skip binding.
>> >> >>
>> >> >> -Siwei
>> >> >
>> >> > Address should be sampled at device creation to prevent this
>> >> > kind of hack. Not that it buys the malicious user much:
>> >> > if you can poke at MAC addresses you probably already can
>> >> > break networking.
>> >>
>> >> I don't understand why poking at MAC address may potentially break
>> >> networking.
>> >
>> > Set a MAC address to match another device on the same LAN,
>> > packets will stop reaching that MAC.
>>
>> What I meant was guest users may create a virtual link, say veth that
>> has exactly the same MAC address as that for the VF, which can easily
>> get around of the binding procedure.
>
> This patchset limits binding to PCI devices so it won't be affected
> by any hacks around virtual devices.

Wait, I vaguely recall you seemed to like to generalize this feature
to non-PCI device. But now you're saying it should stick to PCI. It's
not that I'm reluctant with sticking to PCI. The fact is that I don't
think we can go with implementation until the semantics of the
so-called _F_STANDBY feature can be clearly defined into the spec.
Previously the boundary of using MAC address as the identifier for
bonding was quite confusing to me. And now PCI adds to the matrix.
However it still does not gurantee uniqueness I think. It's almost
incorrect of choosing MAC address as the ID in the beginning since
that has the implication of breaking existing configs. I don't think
libvirt or QEMU today retricts the MAC address to be unique per VM
instance. Neither the virtio spec mentions that.

In addition, it's difficult to fake PCI device on Linux does not mean
the same applies to other OSes that is going to implement this VirtIO
feature. It's a fragile assumption IMHO.

>
>> There's no explicit flag to
>> identify a VF or pass-through device AFAIK. And sometimes this happens
>> maybe due to user misconfiguring the link. This process should be
>> hardened to avoid from any potential configuration errors.
>
> They are still PCI devices though.
>
>> >
>> >> Unlike VF, passthrough PCI endpoint device has its freedom
>> >> to change the MAC address. Even on a VF setup it's not neccessarily
>> >> always safe to assume the VF's MAC address cannot or shouldn't be
>> >> changed. That depends on the specific need whether the host admin
>> >> wants to restrict guest from changing the MAC address, although in
>> >> most cases it's true.
>> >>
>> >> I understand we can use the perm_addr to distinguish. But as said,
>> >> this will pose limitation of flexible configuration where one can
>> >> assign VFs with identical MAC address at all while each VF belongs to
>> >> different PF and/or different subnet for e.g. load balancing.
>> >> And
>> >> furthermore, the QEMU device model never uses MAC address to be
>> >> interpreted as an identifier, which requires to be unique per VM
>> >> instance. Why we're introducing this inconsistency?
>> >>
>> >> -Siwei
>> >
>> > Because it addresses most of the issues and is simple.  That's already
>> > much better than what we have now which is nothing unless guest
>> > configures things manually.
>>
>> Did you see my QEMU patch for using BDF as the grouping identifier?
>
> Yes. And I don't think it can work because bus numbers are
> guest specified.

I know it's not ideal but perhaps its the best one can do in the KVM
world without adding complex config e.g. PCI bridge. Even if bus
number is guest specified, it's readily available in the guest and
recognizable by any OS, while on the QEMU configuration users specify
an id instead of the bus number. Unlike Hyper-V PCI bus, I don't think
there exists a para-virtual PCI bus in QEMU backend to expose VPD
capability to a passthrough device.

>
>> And there can be others like what you suggested, but the point is that
>> it's requried to support explicit grouping mechanism from day one,
>> before the backup property cast into stones.
>
> Let's start with addressing simple configs with just two NICs.
>
> Down the road I can see possible extensions that can work: for example,
> require that devices are on the same pci bridge. Or we could even make
> the virtio device actually include a pci bridge (as part of same
> or a child function), the PT would have to be
> behind it.
>
> As long as we are not breaking anything, adding more flags to fix
> non-working configurations is always fair game.

While it may work, the PCI bridge has NUMA and IOMMU implications that
would restrict the current flexibility to group devices. I'm not sure
if vIOMMU would have to be introduced inadvertently for
isolation/protection of devices under the PCI bridge which may cause
negative performance impact on the VF.

>
>> This is orthogonal to
>> device model being proposed, be it 1-netdev or not. Delaying it would
>> just mean support and compatibility burden, appearing more like a
>> design flaw rather than a feature to add later on.
>
> Well it's mostly myself who gets to support it, and I see the device
> model as much more fundamental as userspace will come to depend
> on it. So I'm not too worried, let's take this one step at a time.
>
>> >
>> > I think ideally the infrastructure should suppport flexible matching of
>> > NICs - netvsc is already reported to be moving to some kind of serial
>> > address.
>> >
>> As Stephen said, Hyper-V supports the serial UUID thing from day-one.
>> It's just the Linux netvsc guest driver itself does not leverage that
>> ID from the very beginging.
>>
>> Regards,
>> -Siwei
>
> We could add something like this, too. For example,
> we could add a virtual VPD capability with a UUID.

I'm not an expert on that and wonder how you could do this (add a
virtual VPD capability with a UUID to passthrough device) with
existing QEMU emulation model and native PCI bus.

>
> Do you know how exactly does hyperv pass the UUID for NICs?

Stephen might know it more and can correct me. But my personal
interpretation is that the SN is a host generated 32 bit sequence
number which is unique per VM instance and gets propogated to guest
via the para-virtual Hyper-V PCI bus.

Regards,
-Siwei

>
>> >
>> >> >
>> >> >
>> >> >
>> >> >
>> >> >>
>> >> >> >
>> >> >> > --
>> >> >> > MST

^ permalink raw reply

* Re: [PATCH net-next 0/2] net/sctp: Avoid allocating high order memory with kmalloc()
From: Oleg Babin @ 2018-04-26 22:14 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner
  Cc: netdev, linux-sctp, David S. Miller, Vlad Yasevich, Neil Horman,
	Xin Long, Andrey Ryabinin
In-Reply-To: <20180423213314.GG3711@localhost.localdomain>

Hi Marcelo,

On 04/24/2018 12:33 AM, Marcelo Ricardo Leitner wrote:
> Hi,
> 
> On Mon, Apr 23, 2018 at 09:41:04PM +0300, Oleg Babin wrote:
>> Each SCTP association can have up to 65535 input and output streams.
>> For each stream type an array of sctp_stream_in or sctp_stream_out
>> structures is allocated using kmalloc_array() function. This function
>> allocates physically contiguous memory regions, so this can lead
>> to allocation of memory regions of very high order, i.e.:
>>
>>   sizeof(struct sctp_stream_out) == 24,
>>   ((65535 * 24) / 4096) == 383 memory pages (4096 byte per page),
>>   which means 9th memory order.
>>
>> This can lead to a memory allocation failures on the systems
>> under a memory stress.
> 
> Did you do performance tests while actually using these 65k streams
> and with 256 (so it gets 2 pages)?
> 
> This will introduce another deref on each access to an element, but
> I'm not expecting any impact due to it.
> 

No, I didn't do such tests. Could you please tell me what methodology
do you usually use to measure performance properly?

I'm trying to do measurements with iperf3 on unmodified kernel and get
very strange results like this:

ovbabin@ovbabin-laptop:~$ ~/programs/iperf/bin/iperf3 -c 169.254.11.150 --sctp
Connecting to host 169.254.11.150, port 5201
[  5] local 169.254.11.150 port 46330 connected to 169.254.11.150 port 5201
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-1.00   sec  9.88 MBytes  82.8 Mbits/sec                  
[  5]   1.00-2.00   sec   226 MBytes  1.90 Gbits/sec                  
[  5]   2.00-3.00   sec   832 KBytes  6.82 Mbits/sec                  
[  5]   3.00-4.00   sec   640 KBytes  5.24 Mbits/sec                  
[  5]   4.00-5.00   sec   756 MBytes  6.34 Gbits/sec                  
[  5]   5.00-6.00   sec   522 MBytes  4.38 Gbits/sec                  
[  5]   6.00-7.00   sec   896 KBytes  7.34 Mbits/sec                  
[  5]   7.00-8.00   sec   519 MBytes  4.35 Gbits/sec                  
[  5]   8.00-9.00   sec   504 MBytes  4.23 Gbits/sec                  
[  5]   9.00-10.00  sec   475 MBytes  3.98 Gbits/sec                  
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-10.00  sec  2.94 GBytes  2.53 Gbits/sec                  sender
[  5]   0.00-10.04  sec  2.94 GBytes  2.52 Gbits/sec                  receiver

iperf Done.

The values are spread enormously from hundreds of kilobits to gigabits.
I get similar results with netperf. This particular result was obtained
with client and server running on the same machine. Also I tried this
on different machines with different kernel versions - situation was similar.
I compiled latest versions of iperf and netperf from sources.

Could it possibly be that I am missing something very obvious? 

Thanks!

-- 
Best regards,
Oleg
  
>   Marcelo
> --
> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> .
>

^ permalink raw reply

* Re: [PATCH] netfilter: fix nf_tables filter chain type build
From: Pablo Neira Ayuso @ 2018-04-26 22:15 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: netdev@vger.kernel.org, LKML, coreteam, netfilter-devel,
	Florian Westphal, Jozsef Kadlecsik, kbuild test robot
In-Reply-To: <c8e42e35-ccff-471c-8c45-61f44855e21a@infradead.org>

On Sat, Apr 21, 2018 at 09:10:09PM -0700, Randy Dunlap wrote:
> From: Randy Dunlap <rdunlap@infradead.org>
> 
> Fix build errors due to a missing Kconfig dependency term.
> Fixes these build errors:
> 
> net/ipv6/netfilter/nft_chain_nat_ipv6.o: In function `nft_nat_do_chain':
> net/ipv6/netfilter/nft_chain_nat_ipv6.c:37: undefined reference to `nft_do_chain'
> net/ipv6/netfilter/nft_chain_nat_ipv6.o: In function `nft_chain_nat_ipv6_exit':
> net/ipv6/netfilter/nft_chain_nat_ipv6.c:94: undefined reference to `nft_unregister_chain_type'
> net/ipv6/netfilter/nft_chain_nat_ipv6.o: In function `nft_chain_nat_ipv6_init':
> net/ipv6/netfilter/nft_chain_nat_ipv6.c:87: undefined reference to `nft_register_chain_type'

Thanks for sending a patch for this Randy.

We have a patch to address this that should fix this:

https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git/commit/?id=39f2ff0816e5421476c2bc538b68b4bb0708a78e

Thanks!

^ permalink raw reply

* Re: [PATCH net-next 1/2] net/sctp: Make wrappers for accessing in/out streams
From: Oleg Babin @ 2018-04-26 22:19 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner
  Cc: netdev, linux-sctp, David S. Miller, Vlad Yasevich, Neil Horman,
	Xin Long, Andrey Ryabinin
In-Reply-To: <20180423213331.GH3711@localhost.localdomain>

On 04/24/2018 12:33 AM, Marcelo Ricardo Leitner wrote:
> On Mon, Apr 23, 2018 at 09:41:05PM +0300, Oleg Babin wrote:
>> This patch introduces wrappers for accessing in/out streams indirectly.
>> This will enable to replace physically contiguous memory arrays
>> of streams with flexible arrays (or maybe any other appropriate
>> mechanism) which do memory allocation on a per-page basis.
>>
>> Signed-off-by: Oleg Babin <obabin@virtuozzo.com>
>> ---
>>  include/net/sctp/structs.h   |  30 +++++++-----
>>  net/sctp/chunk.c             |   6 ++-
>>  net/sctp/outqueue.c          |  11 +++--
>>  net/sctp/socket.c            |   4 +-
>>  net/sctp/stream.c            | 107 +++++++++++++++++++++++++------------------
>>  net/sctp/stream_interleave.c |   2 +-
>>  net/sctp/stream_sched.c      |  13 +++---
>>  net/sctp/stream_sched_prio.c |  22 ++++-----
>>  net/sctp/stream_sched_rr.c   |   8 ++--
>>  9 files changed, 116 insertions(+), 87 deletions(-)
>>
>> diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
>> index a0ec462..578bb40 100644
>> --- a/include/net/sctp/structs.h
>> +++ b/include/net/sctp/structs.h
>> @@ -394,37 +394,37 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
>>
>>  /* What is the current SSN number for this stream? */
>>  #define sctp_ssn_peek(stream, type, sid) \
>> -	((stream)->type[sid].ssn)
>> +	(sctp_stream_##type##_ptr((stream), (sid))->ssn)
>>
>>  /* Return the next SSN number for this stream.	*/
>>  #define sctp_ssn_next(stream, type, sid) \
>> -	((stream)->type[sid].ssn++)
>> +	(sctp_stream_##type##_ptr((stream), (sid))->ssn++)
>>
>>  /* Skip over this ssn and all below. */
>>  #define sctp_ssn_skip(stream, type, sid, ssn) \
>> -	((stream)->type[sid].ssn = ssn + 1)
>> +	(sctp_stream_##type##_ptr((stream), (sid))->ssn = ssn + 1)
>>
>>  /* What is the current MID number for this stream? */
>>  #define sctp_mid_peek(stream, type, sid) \
>> -	((stream)->type[sid].mid)
>> +	(sctp_stream_##type##_ptr((stream), (sid))->mid)
>>
>>  /* Return the next MID number for this stream.  */
>>  #define sctp_mid_next(stream, type, sid) \
>> -	((stream)->type[sid].mid++)
>> +	(sctp_stream_##type##_ptr((stream), (sid))->mid++)
>>
>>  /* Skip over this mid and all below. */
>>  #define sctp_mid_skip(stream, type, sid, mid) \
>> -	((stream)->type[sid].mid = mid + 1)
>> +	(sctp_stream_##type##_ptr((stream), (sid))->mid = mid + 1)
>>
>> -#define sctp_stream_in(asoc, sid) (&(asoc)->stream.in[sid])
>> +#define sctp_stream_in(asoc, sid) sctp_stream_in_ptr(&(asoc)->stream, (sid))
> 
> This will get confusing:
> - sctp_stream_in(asoc, sid)
> - sctp_stream_in_ptr(stream, sid)
> 
> Considering all usages of sctp_stream_in(), seems you can just update
> them to do the ->stream deref and keep only the later implementation.
> Which then don't need the _ptr suffix.
Ok, I'll change that in the next path version.

-- 
Best regards,
Oleg Babin

^ permalink raw reply

* Re: [GIT PULL 0/5] IPVS Updates for v4.18
From: Pablo Neira Ayuso @ 2018-04-26 22:19 UTC (permalink / raw)
  To: Simon Horman
  Cc: lvs-devel, netdev, netfilter-devel, Wensong Zhang,
	Julian Anastasov
In-Reply-To: <20180419085614.7437-1-horms@verge.net.au>

On Thu, Apr 19, 2018 at 10:56:09AM +0200, Simon Horman wrote:
> Hi Pablo,
> 
> please consider these IPVS enhancements for v4.18.
> 
> * Whitepace cleanup
> 
> * Add Maglev hashing algorithm as a IPVS scheduler
> 
>   Inju Song says "Implements the Google's Maglev hashing algorithm as a
>   IPVS scheduler.  Basically it provides consistent hashing but offers some
>   special features about disruption and load balancing.
> 
>   1) minimal disruption: when the set of destinations changes,
>      a connection will likely be sent to the same destination
>      as it was before.
> 
>   2) load balancing: each destination will receive an almost
>      equal number of connections.
> 
>  Seel also: [3.4 Consistent Hasing] in
>  https://www.usenix.org/system/files/conference/nsdi16/nsdi16-paper-eisenbud.pdf
>  "
> 
> * Fix to correct implementation of Knuth's multiplicative hashing
>   which is used in sh/dh/lblc/lblcr algorithms. Instead the
>   implementation provided by the hash_32() macro is used.

Pulled, thanks Simon.

^ permalink raw reply

* Re: [Patch nf] ipvs: initialize tbl->entries after allocation
From: Pablo Neira Ayuso @ 2018-04-26 22:21 UTC (permalink / raw)
  To: Simon Horman
  Cc: Julian Anastasov, Cong Wang, netdev, lvs-devel, netfilter-devel
In-Reply-To: <20180426121423.5c7iy2ddhjy2clzf@verge.net.au>

On Thu, Apr 26, 2018 at 02:14:25PM +0200, Simon Horman wrote:
> On Tue, Apr 24, 2018 at 08:16:14AM +0300, Julian Anastasov wrote:
> > 
> > 	Hello,
> > 
> > On Mon, 23 Apr 2018, Cong Wang wrote:
> > 
> > > tbl->entries is not initialized after kmalloc(), therefore
> > > causes an uninit-value warning in ip_vs_lblc_check_expire()
> > > as reported by syzbot.
> > > 
> > > Reported-by: <syzbot+3dfdea57819073a04f21@syzkaller.appspotmail.com>
> > > Cc: Simon Horman <horms@verge.net.au>
> > > Cc: Julian Anastasov <ja@ssi.bg>
> > > Cc: Pablo Neira Ayuso <pablo@netfilter.org>
> > > Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
> > 
> > 	Thanks!
> > 
> > Acked-by: Julian Anastasov <ja@ssi.bg>
> 
> Thanks.
> 
> Pablo, could you take this into nf?
> 
> Acked-by: Simon Horman <horms@verge.net.au>

Done, thanks Simon.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox