Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next 09/11] bnxt_en: bnxt: add TC flower filter offload support
From: Michael Chan @ 2017-08-28 17:40 UTC (permalink / raw)
  To: davem; +Cc: netdev, Sathya Perla
In-Reply-To: <1503942035-24924-1-git-send-email-michael.chan@broadcom.com>

From: Sathya Perla <sathya.perla@broadcom.com>

This patch adds support for offloading TC based flow
rules and actions for the 'flower' classifier in the bnxt_en driver.
It includes logic to parse flow rules and actions received from the
TC subsystem, store them and issue the corresponding
hwrm_cfa_flow_alloc/free FW cmds. L2/IPv4/IPv6 flows and drop,
redir, vlan push/pop actions are supported in this patch.

In this patch the hwrm_cfa_flow_xxx routines are just stubs.
The code for these routines is introduced in the next patch for easier
review. Also, the code to query the TC/flower action stats will
be introduced in a subsequent patch.

Signed-off-by: Sathya Perla <sathya.perla@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/Kconfig         |   9 +
 drivers/net/ethernet/broadcom/bnxt/Makefile   |   2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  39 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  23 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c  | 602 ++++++++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h  | 158 +++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c |  18 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h |   8 +
 8 files changed, 850 insertions(+), 9 deletions(-)
 create mode 100644 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
 create mode 100644 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h

diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 1456cb1..67134ec 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -212,6 +212,15 @@ config BNXT_SRIOV
 	  Virtualization support in the NetXtreme-C/E products. This
 	  allows for virtual function acceleration in virtual environments.
 
+config BNXT_FLOWER_OFFLOAD
+	bool "TC Flower offload support for NetXtreme-C/E"
+	depends on BNXT
+	default y
+	---help---
+	  This configuration parameter enables TC Flower packet classifier
+	  offload for eswitch.  This option enables SR-IOV switchdev eswitch
+	  offload.
+
 config BNXT_DCB
 	bool "Data Center Bridging (DCB) Support"
 	default n
diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
index d141a22..4f0cb8e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
+++ b/drivers/net/ethernet/broadcom/bnxt/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_BNXT) += bnxt_en.o
 
-bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o
+bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_tc.o
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4406f91..d6367c1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -50,6 +50,7 @@
 #include <linux/bitmap.h>
 #include <linux/cpu_rmap.h>
 #include <linux/cpumask.h>
+#include <net/pkt_cls.h>
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
@@ -59,6 +60,7 @@
 #include "bnxt_dcb.h"
 #include "bnxt_xdp.h"
 #include "bnxt_vfr.h"
+#include "bnxt_tc.h"
 
 #define BNXT_TX_TIMEOUT		(5 * HZ)
 
@@ -7305,17 +7307,33 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 	return 0;
 }
 
-static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			 void *type_data)
+static int bnxt_setup_flower(struct net_device *dev,
+			     struct tc_cls_flower_offload *cls_flower)
 {
-	struct tc_mqprio_qopt *mqprio = type_data;
+	struct bnxt *bp = netdev_priv(dev);
 
-	if (type != TC_SETUP_MQPRIO)
+	if (BNXT_VF(bp))
 		return -EOPNOTSUPP;
 
-	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, cls_flower);
+}
+
+static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			 void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return bnxt_setup_flower(dev, type_data);
+	case TC_SETUP_MQPRIO: {
+		struct tc_mqprio_qopt *mqprio = type_data;
+
+		mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
-	return bnxt_setup_mq_tc(dev, mqprio->num_tc);
+		return bnxt_setup_mq_tc(dev, mqprio->num_tc);
+	}
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 #ifdef CONFIG_RFS_ACCEL
@@ -7711,6 +7729,7 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 
 	pci_disable_pcie_error_reporting(pdev);
 	unregister_netdev(dev);
+	bnxt_shutdown_tc(bp);
 	cancel_work_sync(&bp->sp_task);
 	bp->sp_event = 0;
 
@@ -8102,9 +8121,12 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	else
 		device_set_wakeup_capable(&pdev->dev, false);
 
+	if (BNXT_PF(bp))
+		bnxt_init_tc(bp);
+
 	rc = register_netdev(dev);
 	if (rc)
-		goto init_err_clr_int;
+		goto init_err_cleanup_tc;
 
 	if (BNXT_PF(bp))
 		bnxt_dl_register(bp);
@@ -8117,7 +8139,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	return 0;
 
-init_err_clr_int:
+init_err_cleanup_tc:
+	bnxt_shutdown_tc(bp);
 	bnxt_clear_int_mode(bp);
 
 init_err_pci_clean:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 86af8ea..7b888d4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -19,6 +19,7 @@
 #define DRV_VER_UPD	0
 
 #include <linux/interrupt.h>
+#include <linux/rhashtable.h>
 #include <net/devlink.h>
 #include <net/dst_metadata.h>
 #include <net/switchdev.h>
@@ -943,6 +944,27 @@ struct bnxt_test_info {
 #define BNXT_CAG_REG_LEGACY_INT_STATUS	0x4014
 #define BNXT_CAG_REG_BASE		0x300000
 
+struct bnxt_tc_info {
+	bool				enabled;
+
+	/* hash table to store TC offloaded flows */
+	struct rhashtable		flow_table;
+	struct rhashtable_params	flow_ht_params;
+
+	/* hash table to store L2 keys of TC flows */
+	struct rhashtable		l2_table;
+	struct rhashtable_params	l2_ht_params;
+
+	/* lock to atomically add/del an l2 node when a flow is
+	 * added or deleted.
+	 */
+	struct mutex			lock;
+
+	/* Stat counter mask (width) */
+	u64				bytes_mask;
+	u64				packets_mask;
+};
+
 struct bnxt_vf_rep_stats {
 	u64			packets;
 	u64			bytes;
@@ -1289,6 +1311,7 @@ struct bnxt {
 	enum devlink_eswitch_mode eswitch_mode;
 	struct bnxt_vf_rep	**vf_reps; /* array of vf-rep ptrs */
 	u16			*cfa_code_map; /* cfa_code -> vf_idx map */
+	struct bnxt_tc_info	tc_info;
 };
 
 #define BNXT_RX_STATS_OFFSET(counter)			\
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
new file mode 100644
index 0000000..a10df27
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -0,0 +1,602 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/if_vlan.h>
+#include <net/flow_dissector.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_skbedit.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+#include "bnxt_sriov.h"
+#include "bnxt_tc.h"
+#include "bnxt_vfr.h"
+
+#ifdef CONFIG_BNXT_FLOWER_OFFLOAD
+
+#define BNXT_FID_INVALID			0xffff
+#define VLAN_TCI(vid, prio)	((vid) | ((prio) << VLAN_PRIO_SHIFT))
+
+/* Return the dst fid of the func for flow forwarding
+ * For PFs: src_fid is the fid of the PF
+ * For VF-reps: src_fid the fid of the VF
+ */
+static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev)
+{
+	struct bnxt *bp;
+
+	/* check if dev belongs to the same switch */
+	if (!switchdev_port_same_parent_id(pf_bp->dev, dev)) {
+		netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch",
+			    dev->ifindex);
+		return BNXT_FID_INVALID;
+	}
+
+	/* Is dev a VF-rep? */
+	if (dev != pf_bp->dev)
+		return bnxt_vf_rep_get_fid(dev);
+
+	bp = netdev_priv(dev);
+	return bp->pf.fw_fid;
+}
+
+static int bnxt_tc_parse_redir(struct bnxt *bp,
+			       struct bnxt_tc_actions *actions,
+			       const struct tc_action *tc_act)
+{
+	int ifindex = tcf_mirred_ifindex(tc_act);
+	struct net_device *dev;
+	u16 dst_fid;
+
+	dev = __dev_get_by_index(dev_net(bp->dev), ifindex);
+	if (!dev) {
+		netdev_info(bp->dev, "no dev for ifindex=%d", ifindex);
+		return -EINVAL;
+	}
+
+	/* find the FID from dev */
+	dst_fid = bnxt_flow_get_dst_fid(bp, dev);
+	if (dst_fid == BNXT_FID_INVALID) {
+		netdev_info(bp->dev, "can't get fid for ifindex=%d", ifindex);
+		return -EINVAL;
+	}
+
+	actions->flags |= BNXT_TC_ACTION_FLAG_FWD;
+	actions->dst_fid = dst_fid;
+	actions->dst_dev = dev;
+	return 0;
+}
+
+static void bnxt_tc_parse_vlan(struct bnxt *bp,
+			       struct bnxt_tc_actions *actions,
+			       const struct tc_action *tc_act)
+{
+	if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_POP) {
+		actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN;
+	} else if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_PUSH) {
+		actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN;
+		actions->push_vlan_tci = htons(tcf_vlan_push_vid(tc_act));
+		actions->push_vlan_tpid = tcf_vlan_push_proto(tc_act);
+	}
+}
+
+static int bnxt_tc_parse_actions(struct bnxt *bp,
+				 struct bnxt_tc_actions *actions,
+				 struct tcf_exts *tc_exts)
+{
+	const struct tc_action *tc_act;
+	LIST_HEAD(tc_actions);
+	int rc;
+
+	if (!tcf_exts_has_actions(tc_exts)) {
+		netdev_info(bp->dev, "no actions");
+		return -EINVAL;
+	}
+
+	tcf_exts_to_list(tc_exts, &tc_actions);
+	list_for_each_entry(tc_act, &tc_actions, list) {
+		/* Drop action */
+		if (is_tcf_gact_shot(tc_act)) {
+			actions->flags |= BNXT_TC_ACTION_FLAG_DROP;
+			return 0; /* don't bother with other actions */
+		}
+
+		/* Redirect action */
+		if (is_tcf_mirred_egress_redirect(tc_act)) {
+			rc = bnxt_tc_parse_redir(bp, actions, tc_act);
+			if (rc)
+				return rc;
+			continue;
+		}
+
+		/* Push/pop VLAN */
+		if (is_tcf_vlan(tc_act)) {
+			bnxt_tc_parse_vlan(bp, actions, tc_act);
+			continue;
+		}
+	}
+
+	return 0;
+}
+
+#define GET_KEY(flow_cmd, key_type)					\
+		skb_flow_dissector_target((flow_cmd)->dissector, key_type,\
+					  (flow_cmd)->key)
+#define GET_MASK(flow_cmd, key_type)					\
+		skb_flow_dissector_target((flow_cmd)->dissector, key_type,\
+					  (flow_cmd)->mask)
+
+static int bnxt_tc_parse_flow(struct bnxt *bp,
+			      struct tc_cls_flower_offload *tc_flow_cmd,
+			      struct bnxt_tc_flow *flow)
+{
+	struct flow_dissector *dissector = tc_flow_cmd->dissector;
+	u16 addr_type = 0;
+
+	/* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
+	if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
+	    (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) == 0) {
+		netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x",
+			    dissector->used_keys);
+		return -EOPNOTSUPP;
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_dissector_key_control *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_CONTROL);
+
+		addr_type = key->addr_type;
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_dissector_key_basic *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_BASIC);
+		struct flow_dissector_key_basic *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_BASIC);
+
+		flow->l2_key.ether_type = key->n_proto;
+		flow->l2_mask.ether_type = mask->n_proto;
+
+		if (key->n_proto == htons(ETH_P_IP) ||
+		    key->n_proto == htons(ETH_P_IPV6)) {
+			flow->l4_key.ip_proto = key->ip_proto;
+			flow->l4_mask.ip_proto = mask->ip_proto;
+		}
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_dissector_key_eth_addrs *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ETH_ADDRS);
+		struct flow_dissector_key_eth_addrs *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ETH_ADDRS);
+
+		flow->flags |= BNXT_TC_FLOW_FLAGS_ETH_ADDRS;
+		ether_addr_copy(flow->l2_key.dmac, key->dst);
+		ether_addr_copy(flow->l2_mask.dmac, mask->dst);
+		ether_addr_copy(flow->l2_key.smac, key->src);
+		ether_addr_copy(flow->l2_mask.smac, mask->src);
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_dissector_key_vlan *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_VLAN);
+		struct flow_dissector_key_vlan *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_VLAN);
+
+		flow->l2_key.inner_vlan_tci =
+		   cpu_to_be16(VLAN_TCI(key->vlan_id, key->vlan_priority));
+		flow->l2_mask.inner_vlan_tci =
+		   cpu_to_be16((VLAN_TCI(mask->vlan_id, mask->vlan_priority)));
+		flow->l2_key.inner_vlan_tpid = htons(ETH_P_8021Q);
+		flow->l2_mask.inner_vlan_tpid = htons(0xffff);
+		flow->l2_key.num_vlans = 1;
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		struct flow_dissector_key_ipv4_addrs *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+		struct flow_dissector_key_ipv4_addrs *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+
+		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV4_ADDRS;
+		flow->l3_key.ipv4.daddr.s_addr = key->dst;
+		flow->l3_mask.ipv4.daddr.s_addr = mask->dst;
+		flow->l3_key.ipv4.saddr.s_addr = key->src;
+		flow->l3_mask.ipv4.saddr.s_addr = mask->src;
+	} else if (dissector_uses_key(dissector,
+				      FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		struct flow_dissector_key_ipv6_addrs *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV6_ADDRS);
+		struct flow_dissector_key_ipv6_addrs *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV6_ADDRS);
+
+		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV6_ADDRS;
+		flow->l3_key.ipv6.daddr = key->dst;
+		flow->l3_mask.ipv6.daddr = mask->dst;
+		flow->l3_key.ipv6.saddr = key->src;
+		flow->l3_mask.ipv6.saddr = mask->src;
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_dissector_key_ports *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_PORTS);
+		struct flow_dissector_key_ports *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_PORTS);
+
+		flow->flags |= BNXT_TC_FLOW_FLAGS_PORTS;
+		flow->l4_key.ports.dport = key->dst;
+		flow->l4_mask.ports.dport = mask->dst;
+		flow->l4_key.ports.sport = key->src;
+		flow->l4_mask.ports.sport = mask->src;
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ICMP)) {
+		struct flow_dissector_key_icmp *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ICMP);
+		struct flow_dissector_key_icmp *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ICMP);
+
+		flow->flags |= BNXT_TC_FLOW_FLAGS_ICMP;
+		flow->l4_key.icmp.type = key->type;
+		flow->l4_key.icmp.code = key->code;
+		flow->l4_mask.icmp.type = mask->type;
+		flow->l4_mask.icmp.code = mask->code;
+	}
+
+	return bnxt_tc_parse_actions(bp, &flow->actions, tc_flow_cmd->exts);
+}
+
+static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp, __le16 flow_handle)
+{
+	return 0;
+}
+
+static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
+				    __le16 ref_flow_handle, __le16 *flow_handle)
+{
+	return 0;
+}
+
+static int bnxt_tc_put_l2_node(struct bnxt *bp,
+			       struct bnxt_tc_flow_node *flow_node)
+{
+	struct bnxt_tc_l2_node *l2_node = flow_node->l2_node;
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	int rc;
+
+	/* remove flow_node from the L2 shared flow list */
+	list_del(&flow_node->l2_list_node);
+	if (--l2_node->refcount == 0) {
+		rc =  rhashtable_remove_fast(&tc_info->l2_table, &l2_node->node,
+					     tc_info->l2_ht_params);
+		if (rc)
+			netdev_err(bp->dev,
+				   "Error: %s: rhashtable_remove_fast: %d",
+				   __func__, rc);
+		kfree_rcu(l2_node, rcu);
+	}
+	return 0;
+}
+
+static struct bnxt_tc_l2_node *
+bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table,
+		    struct rhashtable_params ht_params,
+		    struct bnxt_tc_l2_key *l2_key)
+{
+	struct bnxt_tc_l2_node *l2_node;
+	int rc;
+
+	l2_node = rhashtable_lookup_fast(l2_table, l2_key, ht_params);
+	if (!l2_node) {
+		l2_node = kzalloc(sizeof(*l2_node), GFP_KERNEL);
+		if (!l2_node) {
+			rc = -ENOMEM;
+			return NULL;
+		}
+
+		l2_node->key = *l2_key;
+		rc = rhashtable_insert_fast(l2_table, &l2_node->node,
+					    ht_params);
+		if (rc) {
+			kfree(l2_node);
+			netdev_err(bp->dev,
+				   "Error: %s: rhashtable_insert_fast: %d",
+				   __func__, rc);
+			return NULL;
+		}
+		INIT_LIST_HEAD(&l2_node->common_l2_flows);
+	}
+	return l2_node;
+}
+
+/* Get the ref_flow_handle for a flow by checking if there are any other
+ * flows that share the same L2 key as this flow.
+ */
+static int
+bnxt_tc_get_ref_flow_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
+			    struct bnxt_tc_flow_node *flow_node,
+			    __le16 *ref_flow_handle)
+{
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_flow_node *ref_flow_node;
+	struct bnxt_tc_l2_node *l2_node;
+
+	l2_node = bnxt_tc_get_l2_node(bp, &tc_info->l2_table,
+				      tc_info->l2_ht_params,
+				      &flow->l2_key);
+	if (!l2_node)
+		return -1;
+
+	/* If any other flow is using this l2_node, use it's flow_handle
+	 * as the ref_flow_handle
+	 */
+	if (l2_node->refcount > 0) {
+		ref_flow_node = list_first_entry(&l2_node->common_l2_flows,
+						 struct bnxt_tc_flow_node,
+						 l2_list_node);
+		*ref_flow_handle = ref_flow_node->flow_handle;
+	} else {
+		*ref_flow_handle = cpu_to_le16(0xffff);
+	}
+
+	/* Insert the l2_node into the flow_node so that subsequent flows
+	 * with a matching l2 key can use the flow_handle of this flow
+	 * as their ref_flow_handle
+	 */
+	flow_node->l2_node = l2_node;
+	list_add(&flow_node->l2_list_node, &l2_node->common_l2_flows);
+	l2_node->refcount++;
+	return 0;
+}
+
+/* After the flow parsing is done, this routine is used for checking
+ * if there are any aspects of the flow that prevent it from being
+ * offloaded.
+ */
+static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
+{
+	/* If L4 ports are specified then ip_proto must be TCP or UDP */
+	if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) &&
+	    (flow->l4_key.ip_proto != IPPROTO_TCP &&
+	     flow->l4_key.ip_proto != IPPROTO_UDP)) {
+		netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports",
+			    flow->l4_key.ip_proto);
+		return false;
+	}
+
+	return true;
+}
+
+static int __bnxt_tc_del_flow(struct bnxt *bp,
+			      struct bnxt_tc_flow_node *flow_node)
+{
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	int rc;
+
+	/* send HWRM cmd to free the flow-id */
+	bnxt_hwrm_cfa_flow_free(bp, flow_node->flow_handle);
+
+	mutex_lock(&tc_info->lock);
+
+	/* release reference to l2 node */
+	bnxt_tc_put_l2_node(bp, flow_node);
+
+	mutex_unlock(&tc_info->lock);
+
+	rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node,
+				    tc_info->flow_ht_params);
+	if (rc)
+		netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d",
+			   __func__, rc);
+
+	kfree_rcu(flow_node, rcu);
+	return 0;
+}
+
+/* Add a new flow or replace an existing flow.
+ * Notes on locking:
+ * There are essentially two critical sections here.
+ * 1. while adding a new flow
+ *    a) lookup l2-key
+ *    b) issue HWRM cmd and get flow_handle
+ *    c) link l2-key with flow
+ * 2. while deleting a flow
+ *    a) unlinking l2-key from flow
+ * A lock is needed to protect these two critical sections.
+ *
+ * The hash-tables are already protected by the rhashtable API.
+ */
+static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
+			    struct tc_cls_flower_offload *tc_flow_cmd)
+{
+	struct bnxt_tc_flow_node *new_node, *old_node;
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_flow *flow;
+	__le16 ref_flow_handle;
+	int rc;
+
+	/* allocate memory for the new flow and it's node */
+	new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+	if (!new_node) {
+		rc = -ENOMEM;
+		goto done;
+	}
+	new_node->cookie = tc_flow_cmd->cookie;
+	flow = &new_node->flow;
+
+	rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow);
+	if (rc)
+		goto free_node;
+	flow->src_fid = src_fid;
+
+	if (!bnxt_tc_can_offload(bp, flow)) {
+		rc = -ENOSPC;
+		goto free_node;
+	}
+
+	/* If a flow exists with the same cookie, delete it */
+	old_node = rhashtable_lookup_fast(&tc_info->flow_table,
+					  &tc_flow_cmd->cookie,
+					  tc_info->flow_ht_params);
+	if (old_node)
+		__bnxt_tc_del_flow(bp, old_node);
+
+	/* Check if the L2 part of the flow has been offloaded already.
+	 * If so, bump up it's refcnt and get it's reference handle.
+	 */
+	mutex_lock(&tc_info->lock);
+	rc = bnxt_tc_get_ref_flow_handle(bp, flow, new_node, &ref_flow_handle);
+	if (rc)
+		goto unlock;
+
+	/* send HWRM cmd to alloc the flow */
+	rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle,
+				      &new_node->flow_handle);
+	if (rc)
+		goto put_l2;
+
+	/* add new flow to flow-table */
+	rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node,
+				    tc_info->flow_ht_params);
+	if (rc)
+		goto hwrm_flow_free;
+
+	mutex_unlock(&tc_info->lock);
+	return 0;
+
+hwrm_flow_free:
+	bnxt_hwrm_cfa_flow_free(bp, new_node->flow_handle);
+put_l2:
+	bnxt_tc_put_l2_node(bp, new_node);
+unlock:
+	mutex_unlock(&tc_info->lock);
+free_node:
+	kfree(new_node);
+done:
+	netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d",
+		   __func__, tc_flow_cmd->cookie, rc);
+	return rc;
+}
+
+static int bnxt_tc_del_flow(struct bnxt *bp,
+			    struct tc_cls_flower_offload *tc_flow_cmd)
+{
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_flow_node *flow_node;
+
+	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
+					   &tc_flow_cmd->cookie,
+					   tc_info->flow_ht_params);
+	if (!flow_node) {
+		netdev_info(bp->dev, "ERROR: no flow_node for cookie %lx",
+			    tc_flow_cmd->cookie);
+		return -EINVAL;
+	}
+
+	return __bnxt_tc_del_flow(bp, flow_node);
+}
+
+static int bnxt_tc_get_flow_stats(struct bnxt *bp,
+				  struct tc_cls_flower_offload *tc_flow_cmd)
+{
+	return 0;
+}
+
+int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
+			 struct tc_cls_flower_offload *cls_flower)
+{
+	int rc = 0;
+
+	switch (cls_flower->command) {
+	case TC_CLSFLOWER_REPLACE:
+		rc = bnxt_tc_add_flow(bp, src_fid, cls_flower);
+		break;
+
+	case TC_CLSFLOWER_DESTROY:
+		rc = bnxt_tc_del_flow(bp, cls_flower);
+		break;
+
+	case TC_CLSFLOWER_STATS:
+		rc = bnxt_tc_get_flow_stats(bp, cls_flower);
+		break;
+	}
+	return rc;
+}
+
+static const struct rhashtable_params bnxt_tc_flow_ht_params = {
+	.head_offset = offsetof(struct bnxt_tc_flow_node, node),
+	.key_offset = offsetof(struct bnxt_tc_flow_node, cookie),
+	.key_len = sizeof(((struct bnxt_tc_flow_node *)0)->cookie),
+	.automatic_shrinking = true
+};
+
+static const struct rhashtable_params bnxt_tc_l2_ht_params = {
+	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
+	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
+	.key_len = BNXT_TC_L2_KEY_LEN,
+	.automatic_shrinking = true
+};
+
+/* convert counter width in bits to a mask */
+#define mask(width)		((u64)~0 >> (64 - (width)))
+
+int bnxt_init_tc(struct bnxt *bp)
+{
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	int rc;
+
+	if (bp->hwrm_spec_code < 0x10800) {
+		netdev_warn(bp->dev,
+			    "Firmware does not support TC flower offload.\n");
+		return -ENOTSUPP;
+	}
+	mutex_init(&tc_info->lock);
+
+	/* Counter widths are programmed by FW */
+	tc_info->bytes_mask = mask(36);
+	tc_info->packets_mask = mask(28);
+
+	tc_info->flow_ht_params = bnxt_tc_flow_ht_params;
+	rc = rhashtable_init(&tc_info->flow_table, &tc_info->flow_ht_params);
+	if (rc)
+		return rc;
+
+	tc_info->l2_ht_params = bnxt_tc_l2_ht_params;
+	rc = rhashtable_init(&tc_info->l2_table, &tc_info->l2_ht_params);
+	if (rc)
+		goto destroy_flow_table;
+
+	tc_info->enabled = true;
+	bp->dev->hw_features |= NETIF_F_HW_TC;
+	bp->dev->features |= NETIF_F_HW_TC;
+	return 0;
+
+destroy_flow_table:
+	rhashtable_destroy(&tc_info->flow_table);
+	return rc;
+}
+
+void bnxt_shutdown_tc(struct bnxt *bp)
+{
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+
+	if (!tc_info->enabled)
+		return;
+
+	rhashtable_destroy(&tc_info->flow_table);
+	rhashtable_destroy(&tc_info->l2_table);
+}
+
+#else
+#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
new file mode 100644
index 0000000..6c4c1ed
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
@@ -0,0 +1,158 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef BNXT_TC_H
+#define BNXT_TC_H
+
+#ifdef CONFIG_BNXT_FLOWER_OFFLOAD
+
+/* Structs used for storing the filter/actions of the TC cmd.
+ */
+struct bnxt_tc_l2_key {
+	u8		dmac[ETH_ALEN];
+	u8		smac[ETH_ALEN];
+	__be16		inner_vlan_tpid;
+	__be16		inner_vlan_tci;
+	__be16		ether_type;
+	u8		num_vlans;
+};
+
+struct bnxt_tc_l3_key {
+	union {
+		struct {
+			struct in_addr daddr;
+			struct in_addr saddr;
+		} ipv4;
+		struct {
+			struct in6_addr daddr;
+			struct in6_addr saddr;
+		} ipv6;
+	};
+};
+
+struct bnxt_tc_l4_key {
+	u8  ip_proto;
+	union {
+		struct {
+			__be16 sport;
+			__be16 dport;
+		} ports;
+		struct {
+			u8 type;
+			u8 code;
+		} icmp;
+	};
+};
+
+struct bnxt_tc_actions {
+	u32				flags;
+#define BNXT_TC_ACTION_FLAG_FWD			BIT(0)
+#define BNXT_TC_ACTION_FLAG_FWD_VXLAN		BIT(1)
+#define BNXT_TC_ACTION_FLAG_PUSH_VLAN		BIT(3)
+#define BNXT_TC_ACTION_FLAG_POP_VLAN		BIT(4)
+#define BNXT_TC_ACTION_FLAG_DROP		BIT(5)
+
+	u16				dst_fid;
+	struct net_device		*dst_dev;
+	__be16				push_vlan_tpid;
+	__be16				push_vlan_tci;
+};
+
+struct bnxt_tc_flow_stats {
+	u64		packets;
+	u64		bytes;
+};
+
+struct bnxt_tc_flow {
+	u32				flags;
+#define BNXT_TC_FLOW_FLAGS_ETH_ADDRS		BIT(1)
+#define BNXT_TC_FLOW_FLAGS_IPV4_ADDRS		BIT(2)
+#define BNXT_TC_FLOW_FLAGS_IPV6_ADDRS		BIT(3)
+#define BNXT_TC_FLOW_FLAGS_PORTS		BIT(4)
+#define BNXT_TC_FLOW_FLAGS_ICMP			BIT(5)
+
+	/* flow applicable to pkts ingressing on this fid */
+	u16				src_fid;
+	struct bnxt_tc_l2_key		l2_key;
+	struct bnxt_tc_l2_key		l2_mask;
+	struct bnxt_tc_l3_key		l3_key;
+	struct bnxt_tc_l3_key		l3_mask;
+	struct bnxt_tc_l4_key		l4_key;
+	struct bnxt_tc_l4_key		l4_mask;
+
+	struct bnxt_tc_actions		actions;
+
+	/* updated stats accounting for hw-counter wrap-around */
+	struct bnxt_tc_flow_stats	stats;
+	/* previous snap-shot of stats */
+	struct bnxt_tc_flow_stats	prev_stats;
+	unsigned long			lastused; /* jiffies */
+};
+
+/* L2 hash table
+ * This data-struct is used for L2-flow table.
+ * The L2 part of a flow is stored in a hash table.
+ * A flow that shares the same L2 key/mask with an
+ * already existing flow must refer to it's flow handle.
+ */
+struct bnxt_tc_l2_node {
+	/* hash key: first 16b of key */
+#define BNXT_TC_L2_KEY_LEN			16
+	struct bnxt_tc_l2_key	key;
+	struct rhash_head	node;
+
+	/* a linked list of flows that share the same l2 key */
+	struct list_head	common_l2_flows;
+
+	/* number of flows sharing the l2 key */
+	u16			refcount;
+
+	struct rcu_head		rcu;
+};
+
+struct bnxt_tc_flow_node {
+	/* hash key: provided by TC */
+	unsigned long			cookie;
+	struct rhash_head		node;
+
+	struct bnxt_tc_flow		flow;
+
+	__le16				flow_handle;
+
+	/* L2 node in l2 hashtable that shares flow's l2 key */
+	struct bnxt_tc_l2_node		*l2_node;
+	/* for the shared_flows list maintained in l2_node */
+	struct list_head		l2_list_node;
+
+	struct rcu_head			rcu;
+};
+
+int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
+			 struct tc_cls_flower_offload *cls_flower);
+int bnxt_init_tc(struct bnxt *bp);
+void bnxt_shutdown_tc(struct bnxt *bp);
+
+#else /* CONFIG_BNXT_FLOWER_OFFLOAD */
+
+static inline int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
+				       struct tc_cls_flower_offload *cls_flower)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int bnxt_init_tc(struct bnxt *bp)
+{
+	return 0;
+}
+
+static inline void bnxt_shutdown_tc(struct bnxt *bp)
+{
+}
+#endif /* CONFIG_BNXT_FLOWER_OFFLOAD */
+#endif /* BNXT_TC_H */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index c365d3c..e75db04 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -11,10 +11,12 @@
 #include <linux/etherdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/jhash.h>
+#include <net/pkt_cls.h>
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
 #include "bnxt_vfr.h"
+#include "bnxt_tc.h"
 
 #ifdef CONFIG_BNXT_SRIOV
 
@@ -113,6 +115,21 @@ static netdev_tx_t bnxt_vf_rep_xmit(struct sk_buff *skb,
 	stats->tx_bytes = vf_rep->tx_stats.bytes;
 }
 
+static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+				void *type_data)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+	struct bnxt *bp = vf_rep->bp;
+	int vf_fid = bp->pf.vf[vf_rep->vf_idx].fw_fid;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return bnxt_tc_setup_flower(bp, vf_fid, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code)
 {
 	u16 vf_idx;
@@ -182,6 +199,7 @@ static int bnxt_vf_rep_port_attr_get(struct net_device *dev,
 	.ndo_stop		= bnxt_vf_rep_close,
 	.ndo_start_xmit		= bnxt_vf_rep_xmit,
 	.ndo_get_stats64	= bnxt_vf_rep_get_stats64,
+	.ndo_setup_tc		= bnxt_vf_rep_setup_tc,
 	.ndo_get_phys_port_name = bnxt_vf_rep_get_phys_port_name
 };
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
index 3e997c9..d8b5f89 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
@@ -45,6 +45,14 @@ static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl)
 void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb);
 struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code);
 
+static inline u16 bnxt_vf_rep_get_fid(struct net_device *dev)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+	struct bnxt *bp = vf_rep->bp;
+
+	return bp->pf.vf[vf_rep->vf_idx].fw_fid;
+}
+
 #else
 
 static inline int bnxt_dl_register(struct bnxt *bp)
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 10/11] bnxt_en: add TC flower offload flow_alloc/free FW cmds
From: Michael Chan @ 2017-08-28 17:40 UTC (permalink / raw)
  To: davem; +Cc: netdev, Sathya Perla
In-Reply-To: <1503942035-24924-1-git-send-email-michael.chan@broadcom.com>

From: Sathya Perla <sathya.perla@broadcom.com>

This patch adds the hwrm_cfa_flow_alloc/free() routines
that are needed to issue the FW cmds needed for TC flower offload.

Signed-off-by: Sathya Perla <sathya.perla@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 141 ++++++++++++++++++++++++++-
 1 file changed, 139 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index a10df27..5fa0835 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -259,13 +259,150 @@ static int bnxt_tc_parse_flow(struct bnxt *bp,
 
 static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp, __le16 flow_handle)
 {
-	return 0;
+	struct hwrm_cfa_flow_free_input req = { 0 };
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_FREE, -1, -1);
+	req.flow_handle = flow_handle;
+
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		netdev_info(bp->dev, "Error: %s: flow_handle=0x%x rc=%d",
+			    __func__, flow_handle, rc);
+	return rc;
+}
+
+static int ipv6_mask_len(struct in6_addr *mask)
+{
+	int mask_len = 0, i;
+
+	for (i = 0; i < 4; i++)
+		mask_len += inet_mask_len(mask->s6_addr32[i]);
+
+	return mask_len;
+}
+
+static bool is_wildcard(void *mask, int len)
+{
+	const u8 *p = mask;
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (p[i] != 0)
+			return false;
+	}
+	return true;
 }
 
 static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
 				    __le16 ref_flow_handle, __le16 *flow_handle)
 {
-	return 0;
+	struct hwrm_cfa_flow_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+	struct bnxt_tc_actions *actions = &flow->actions;
+	struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
+	struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
+	struct hwrm_cfa_flow_alloc_input req = { 0 };
+	u16 flow_flags = 0, action_flags = 0;
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_ALLOC, -1, -1);
+
+	req.src_fid = cpu_to_le16(flow->src_fid);
+	req.ref_flow_handle = ref_flow_handle;
+	req.ethertype = flow->l2_key.ether_type;
+	req.ip_proto = flow->l4_key.ip_proto;
+
+	if (flow->flags & BNXT_TC_FLOW_FLAGS_ETH_ADDRS) {
+		memcpy(req.dmac, flow->l2_key.dmac, ETH_ALEN);
+		memcpy(req.smac, flow->l2_key.smac, ETH_ALEN);
+	}
+
+	if (flow->l2_key.num_vlans > 0) {
+		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE;
+		/* FW expects the inner_vlan_tci value to be set
+		 * in outer_vlan_tci when num_vlans is 1 (which is
+		 * always the case in TC.)
+		 */
+		req.outer_vlan_tci = flow->l2_key.inner_vlan_tci;
+	}
+
+	/* If all IP and L4 fields are wildcarded then this is an L2 flow */
+	if (is_wildcard(&l3_mask, sizeof(l3_mask)) &&
+	    is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
+		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
+	} else {
+		flow_flags |= flow->l2_key.ether_type == htons(ETH_P_IP) ?
+				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4 :
+				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6;
+
+		if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV4_ADDRS) {
+			req.ip_dst[0] = l3_key->ipv4.daddr.s_addr;
+			req.ip_dst_mask_len =
+				inet_mask_len(l3_mask->ipv4.daddr.s_addr);
+			req.ip_src[0] = l3_key->ipv4.saddr.s_addr;
+			req.ip_src_mask_len =
+				inet_mask_len(l3_mask->ipv4.saddr.s_addr);
+		} else if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV6_ADDRS) {
+			memcpy(req.ip_dst, l3_key->ipv6.daddr.s6_addr32,
+			       sizeof(req.ip_dst));
+			req.ip_dst_mask_len =
+					ipv6_mask_len(&l3_mask->ipv6.daddr);
+			memcpy(req.ip_src, l3_key->ipv6.saddr.s6_addr32,
+			       sizeof(req.ip_src));
+			req.ip_src_mask_len =
+					ipv6_mask_len(&l3_mask->ipv6.saddr);
+		}
+	}
+
+	if (flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) {
+		req.l4_src_port = flow->l4_key.ports.sport;
+		req.l4_src_port_mask = flow->l4_mask.ports.sport;
+		req.l4_dst_port = flow->l4_key.ports.dport;
+		req.l4_dst_port_mask = flow->l4_mask.ports.dport;
+	} else if (flow->flags & BNXT_TC_FLOW_FLAGS_ICMP) {
+		/* l4 ports serve as type/code when ip_proto is ICMP */
+		req.l4_src_port = htons(flow->l4_key.icmp.type);
+		req.l4_src_port_mask = htons(flow->l4_mask.icmp.type);
+		req.l4_dst_port = htons(flow->l4_key.icmp.code);
+		req.l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
+	}
+	req.flags = cpu_to_le16(flow_flags);
+
+	if (actions->flags & BNXT_TC_ACTION_FLAG_DROP) {
+		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP;
+	} else {
+		if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
+			action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD;
+			req.dst_fid = cpu_to_le16(actions->dst_fid);
+		}
+		if (actions->flags & BNXT_TC_ACTION_FLAG_PUSH_VLAN) {
+			action_flags |=
+			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
+			req.l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
+			req.l2_rewrite_vlan_tci = actions->push_vlan_tci;
+			memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
+			memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
+		}
+		if (actions->flags & BNXT_TC_ACTION_FLAG_POP_VLAN) {
+			action_flags |=
+			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
+			/* Rewrite config with tpid = 0 implies vlan pop */
+			req.l2_rewrite_vlan_tpid = 0;
+			memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
+			memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
+		}
+	}
+	req.action_flags = cpu_to_le16(action_flags);
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc)
+		*flow_handle = resp->flow_handle;
+
+	mutex_unlock(&bp->hwrm_cmd_lock);
+
+	return rc;
 }
 
 static int bnxt_tc_put_l2_node(struct bnxt *bp,
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 11/11] bnxt_en: add code to query TC flower offload stats
From: Michael Chan @ 2017-08-28 17:40 UTC (permalink / raw)
  To: davem; +Cc: netdev, Sathya Perla
In-Reply-To: <1503942035-24924-1-git-send-email-michael.chan@broadcom.com>

From: Sathya Perla <sathya.perla@broadcom.com>

This patch adds code to implement TC_CLSFLOWER_STATS TC-cmd and the
required FW code to query the stats from the HW.

Signed-off-by: Sathya Perla <sathya.perla@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 95 ++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 5fa0835..ccd699f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -405,6 +405,81 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
 	return rc;
 }
 
+/* Add val to accum while handling a possible wraparound
+ * of val. Eventhough val is of type u64, its actual width
+ * is denoted by mask and will wrap-around beyond that width.
+ */
+static void accumulate_val(u64 *accum, u64 val, u64 mask)
+{
+#define low_bits(x, mask)		((x) & (mask))
+#define high_bits(x, mask)		((x) & ~(mask))
+	bool wrapped = val < low_bits(*accum, mask);
+
+	*accum = high_bits(*accum, mask) + val;
+	if (wrapped)
+		*accum += (mask + 1);
+}
+
+/* The HW counters' width is much less than 64bits.
+ * Handle possible wrap-around while updating the stat counters
+ */
+static void bnxt_flow_stats_fix_wraparound(struct bnxt_tc_info *tc_info,
+					   struct bnxt_tc_flow_stats *stats,
+					   struct bnxt_tc_flow_stats *hw_stats)
+{
+	accumulate_val(&stats->bytes, hw_stats->bytes, tc_info->bytes_mask);
+	accumulate_val(&stats->packets, hw_stats->packets,
+		       tc_info->packets_mask);
+}
+
+/* Fix possible wraparound of the stats queried from HW, calculate
+ * the delta from prev_stats, and also update the prev_stats.
+ * The HW flow stats are fetched under the hwrm_cmd_lock mutex.
+ * This routine is best called while under the mutex so that the
+ * stats processing happens atomically.
+ */
+static void bnxt_flow_stats_calc(struct bnxt_tc_info *tc_info,
+				 struct bnxt_tc_flow *flow,
+				 struct bnxt_tc_flow_stats *stats)
+{
+	struct bnxt_tc_flow_stats *acc_stats, *prev_stats;
+
+	acc_stats = &flow->stats;
+	bnxt_flow_stats_fix_wraparound(tc_info, acc_stats, stats);
+
+	prev_stats = &flow->prev_stats;
+	stats->bytes = acc_stats->bytes - prev_stats->bytes;
+	stats->packets = acc_stats->packets - prev_stats->packets;
+	*prev_stats = *acc_stats;
+}
+
+static int bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp,
+					__le16 flow_handle,
+					struct bnxt_tc_flow *flow,
+					struct bnxt_tc_flow_stats *stats)
+{
+	struct hwrm_cfa_flow_stats_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_cfa_flow_stats_input req = { 0 };
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1);
+	req.num_flows = cpu_to_le16(1);
+	req.flow_handle_0 = flow_handle;
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc) {
+		stats->packets = le64_to_cpu(resp->packet_0);
+		stats->bytes = le64_to_cpu(resp->byte_0);
+		bnxt_flow_stats_calc(&bp->tc_info, flow, stats);
+	} else {
+		netdev_info(bp->dev, "error rc=%d", rc);
+	}
+
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
 static int bnxt_tc_put_l2_node(struct bnxt *bp,
 			       struct bnxt_tc_flow_node *flow_node)
 {
@@ -647,6 +722,26 @@ static int bnxt_tc_del_flow(struct bnxt *bp,
 static int bnxt_tc_get_flow_stats(struct bnxt *bp,
 				  struct tc_cls_flower_offload *tc_flow_cmd)
 {
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_flow_node *flow_node;
+	struct bnxt_tc_flow_stats stats;
+	int rc;
+
+	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
+					   &tc_flow_cmd->cookie,
+					   tc_info->flow_ht_params);
+	if (!flow_node) {
+		netdev_info(bp->dev, "Error: no flow_node for cookie %lx",
+			    tc_flow_cmd->cookie);
+		return -1;
+	}
+
+	rc = bnxt_hwrm_cfa_flow_stats_get(bp, flow_node->flow_handle,
+					  &flow_node->flow, &stats);
+	if (rc)
+		return rc;
+
+	tcf_exts_stats_update(tc_flow_cmd->exts, stats.bytes, stats.packets, 0);
 	return 0;
 }
 
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH 2/2] drivers: net: xgene: Clean up all outstanding tx descriptors
From: Iyappan Subramanian @ 2017-08-28 17:48 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: David Miller, netdev, linux-arm-kernel@lists.infradead.org,
	Dean Nelson, Quan Nguyen, patches
In-Reply-To: <20170825231034.GC4207@lunn.ch>

Hi Andrew,

On Fri, Aug 25, 2017 at 4:10 PM, Andrew Lunn <andrew@lunn.ch> wrote:
> On Fri, Aug 25, 2017 at 03:23:30PM -0700, Iyappan Subramanian wrote:
>> When xgene_enet is rmmod'd and there are still outstanding tx descriptors
>> that have been setup but have not completed, it is possible on the next
>> modprobe of the driver to receive the oldest of such tx descriptors. This
>> results in a kernel NULL pointer dereference.
>>
>> This patch attempts to clean up (by tearing down) all outstanding tx
>> descriptors when the xgene_enet driver is being rmmod'd.
>>
>> Given that, on the next modprobe it should be safe to ignore any such tx
>> descriptors received that map to a NULL skb pointer.
>
> This does not sound correct. Before the module is allowed to be
> removed, everything needs to be finished. You need to wait for all the
> tx descriptors to be returned before unloading. How can you free the
> memory for the descriptor if it is still in use? How can you free the
> skbuf the descriptor points to, if it is still in use...

Thanks for pointing out the issue.  It is an error, we will fix the issue.

Since the two patches are unrelated, I'm going to post them separately.

Thanks,
Iyappan

>
>       Andrew

^ permalink raw reply

* Re: mlxsw and rtnl lock
From: David Ahern @ 2017-08-28 18:00 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: Jiri Pirko, netdev@vger.kernel.org, mlxsw
In-Reply-To: <20170826170418.GA22324@shredder>

On 8/26/17 11:04 AM, Ido Schimmel wrote:
> Regarding the silent abort, that's intentional. You can look at the same
> code in v4.9 - when the chain was still blocking - and you'll see that
> we didn't propagate the error even then. This was discussed in the past
> and the conclusion was that user doesn't expect to operation to fail. If
> hardware resources are exceeded, we let the kernel take care of the
> forwarding instead.
> 

In addition to Roopa's comments... The silent abort is not a good user
experience. Right now it's add a network address or route, cross fingers
and hope it does not overflow some limit (nexthop, ecmp, neighbor,
prefix, etc) that triggers the offload abort.

The mlxsw driver queries for some limits (e.g., max rifs) but I don't
see any query related to current usage, and there is no API to pass any
of that data to user space so user space has no programmatic way to
handle this. I realize you are aware of this limitation. The point is to
emphasize the need to resolve this.

^ permalink raw reply

* Re: [PATCH net-next v7 01/10] selftest: Enhance kselftest_harness.h with a step mechanism
From: Shuah Khan @ 2017-08-28 18:01 UTC (permalink / raw)
  To: Alexei Starovoitov, Mickaël Salaün
  Cc: linux-kernel, Alexei Starovoitov, Andy Lutomirski,
	Arnaldo Carvalho de Melo, Casey Schaufler, Daniel Borkmann,
	David Drysdale, David S . Miller, Eric W . Biederman,
	James Morris, Jann Horn, Jonathan Corbet, Matthew Garrett,
	Michael Kerrisk, Kees Cook, Paul Moore, Sargun Dhillon,
	Serge E . Hallyn, Tejun Heo, Thomas Graf, Will Drewry
In-Reply-To: <20170826010711.dksmgg4mcni5q2xd@ast-mbp>

On 08/25/2017 07:07 PM, Alexei Starovoitov wrote:
> On Fri, Aug 25, 2017 at 09:58:33AM +0200, Mickaël Salaün wrote:
>>
>>
>> On 24/08/2017 04:31, Alexei Starovoitov wrote:
>>> On Mon, Aug 21, 2017 at 02:09:24AM +0200, Mickaël Salaün wrote:
>>>> This step mechanism may be useful to return an information about the
>>>> error without being able to write to TH_LOG_STREAM.
>>>>
>>>> Set _metadata->no_print to true to print this counter.
>>>>
>>>> Signed-off-by: Mickaël Salaün <mic@digikod.net>
>>>> Cc: Andy Lutomirski <luto@amacapital.net>
>>>> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
>>>> Cc: Kees Cook <keescook@chromium.org>
>>>> Cc: Shuah Khan <shuah@kernel.org>
>>>> Cc: Will Drewry <wad@chromium.org>
>>>> Link: https://lkml.kernel.org/r/CAGXu5j+D-FP8Kt9unNOqKrQJP4DYTpmgkJxWykZyrYiVPz3Y3Q@mail.gmail.com
>>>> ---
>>>>
>>>> This patch is intended to the kselftest tree:
>>>> https://lkml.kernel.org/r/20170806232337.4191-1-mic@digikod.net
>>>>
>>>> Changes since v6:
>>>> * add the step counter in assert/expect macros and use _metadata to
>>>>   enable the counter (suggested by Kees Cook)
>>>> ---
>>>>  tools/testing/selftests/kselftest_harness.h   | 31 ++++++++++++++++++++++-----
>>>>  tools/testing/selftests/seccomp/seccomp_bpf.c |  2 +-
>>>>  2 files changed, 27 insertions(+), 6 deletions(-)
>>>
>>> is there a dependency on this in patches 2+ ?
>>> if not, I would send this patch to selftests right away.
>>>
>>>
>>
>> The Landlock tests [patch 9/10] rely on it for now.
>>
>> I sent it three weeks ago:
>> https://lkml.kernel.org/r/20170806232337.4191-1-mic@digikod.net
>>
>> Anyway, until this patch is merged in the kselftest tree and then
>> available to net-next, I'll have to keep it here.
> 
> Shuah,
> could you please pick up this patch into your tree?
> 

Thanks. Applied to linux-kselftest next for 4.14-rc1.

-- Shuah

^ permalink raw reply

* Re: [PATCH net-next 4/8] net: ethernet: add the Alpine Ethernet driver
From: Andrew Lunn @ 2017-08-28 18:09 UTC (permalink / raw)
  To: Chocron, Jonathan
  Cc: Antoine Tenart, netdev@vger.kernel.org, davem@davemloft.net,
	linux-arm-kernel@lists.infradead.org,
	thomas.petazzoni@free-electrons.com, arnd@arndb.de
In-Reply-To: <1503841641816.62526@amazon.com>

On Sun, Aug 27, 2017 at 01:47:19PM +0000, Chocron, Jonathan wrote:
> This is a fixed version of my previous response (using proper indentation and leaving only the specific questions responded to).

Wow, this is old.  3 Feb 2017. I had to go dig into the archive to
refresh my memory.

> > > +/* MDIO */
> > > +#define AL_ETH_MDIO_C45_DEV_MASK     0x1f0000
> > > +#define AL_ETH_MDIO_C45_DEV_SHIFT    16
> > > +#define AL_ETH_MDIO_C45_REG_MASK     0xffff
> > > +
> > > +static int al_mdio_read(struct mii_bus *bp, int mii_id, int reg)
> > > +{
> > > +     struct al_eth_adapter *adapter = bp->priv;
> > > +     u16 value = 0;
> > > +     int rc;
> > > +     int timeout = MDIO_TIMEOUT_MSEC;
> > > +
> > > +     while (timeout > 0) {
> > > +             if (reg & MII_ADDR_C45) {
> > > +                     netdev_dbg(adapter->netdev, "[c45]: dev %x reg %x val %x\n",
> > > +                                ((reg & AL_ETH_MDIO_C45_DEV_MASK) >> AL_ETH_MDIO_C45_DEV_SHIFT),
> > > +                                (reg & AL_ETH_MDIO_C45_REG_MASK), value);
> > > +                     rc = al_eth_mdio_read(&adapter->hw_adapter, adapter->phy_addr,
> > > +                             ((reg & AL_ETH_MDIO_C45_DEV_MASK) >> AL_ETH_MDIO_C45_DEV_SHIFT),
> > > +                             (reg & AL_ETH_MDIO_C45_REG_MASK), &value);
> > > +             } else {
> > > +                     rc = al_eth_mdio_read(&adapter->hw_adapter, adapter->phy_addr,
> > > +                                           MDIO_DEVAD_NONE, reg, &value);
> > > +             }
> > > +
> > > +             if (rc == 0)
> > > +                     return value;
> > > +
> > > +             netdev_dbg(adapter->netdev,
> > > +                        "mdio read failed. try again in 10 msec\n");
> > > +
> > > +             timeout -= 10;
> > > +             msleep(10);
> > > +     }
> > 
> > This is rather unusual, retrying MDIO operations. Are you working
> > around a hardware bug? I suspect this also opens up race conditions,
> > in particular with PHY interrupts, which can be clear on read.
> 
> The MDIO bus is shared between the ethernet units. There is a HW
> lock used to arbitrate between different interfaces trying to access
> the bus, therefore there is a retry loop. The reg isn't accessed
> before obtaining the lock, so there shouldn't be any clear on read
> issues.
> 
> > > +/* al_eth_mdiobus_setup - initialize mdiobus and register to kernel */
> > > +static int al_eth_mdiobus_setup(struct al_eth_adapter *adapter)
> > > +{
> > > +     struct phy_device *phydev;
> > > +     int i;
> > > +     int ret = 0;
> > > +
> > > +     adapter->mdio_bus = mdiobus_alloc();
> > > +     if (!adapter->mdio_bus)
> > > +             return -ENOMEM;
> > > +
> > > +     adapter->mdio_bus->name     = "al mdio bus";
> > > +     snprintf(adapter->mdio_bus->id, MII_BUS_ID_SIZE, "%x",
> > > +              (adapter->pdev->bus->number << 8) | adapter->pdev->devfn);
> > > +     adapter->mdio_bus->priv     = adapter;
> > > +     adapter->mdio_bus->parent   = &adapter->pdev->dev;
> > > +     adapter->mdio_bus->read     = &al_mdio_read;
> > > +     adapter->mdio_bus->write    = &al_mdio_write;
> > > +     adapter->mdio_bus->phy_mask = ~BIT(adapter->phy_addr);
> >
> > Why do this?
> 
> Since the MDIO bus is shared, we want each interface to probe only for the PHY associated with it.

So i think this is the core of the problem. You have one physical MDIO
bus, yet you register it twice with the MDIO framework.

How about you only register it once? A lot of the complexity then goes
away. The mutex in the mdio core per bus means you don't need your
hardware locking. All that code goes away. All the retry code goes
away. Life is simple.

	Andrew

^ permalink raw reply

* Re: [net-next PATCH 0/9] sockmap UAPI updates and fixes
From: David Miller @ 2017-08-28 18:13 UTC (permalink / raw)
  To: john.fastabend; +Cc: ast, daniel, netdev
In-Reply-To: <20170828140850.14143.83953.stgit@john-Precision-Tower-5810>

From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 28 Aug 2017 07:09:45 -0700

> This series updates sockmap UAPI, adds additional test cases and
> provides a couple fixes.
> 
> First the UAPI changes. The original API added two sockmap specific
> API artifacts (a) a new map_flags field with a sockmap specific update
> command and (b) a new sockmap specific attach field in the attach data
> structure. After this series instead of attaching programs with a
> single command now two commands are used to attach programs to maps
> individually. This allows us to add new programs easily in the future
> and avoids any specific sockmap data structure additions. The
> map_flags field is also removed and instead we allow socks to be
> added to multiple maps that may or may not have programs attached.
> This allows users to decide if a sock should run a SK_SKB program type
> on receive based on the map it is attached to. This is a nice
> improvement. See patches for specific details.
> 
> More test cases were added to test above changes and also stress test
> the interface.
> 
> Finally two fixes/improvements were made. First a missing rcu
> section was added. Second now sockmap can build without KCM being
> used to trigger 'y' on CONFIG_STREAM_PARSER by selecting a new
> BPF config option.

Series applied, thanks John.

^ permalink raw reply

* [PATCH] ipv6: sr: fix get_srh() to comply with IPv6 standard "RFC 8200"
From: Ahmed Abdelsalam @ 2017-08-28 18:20 UTC (permalink / raw)
  To: davem; +Cc: yoshfuji, netdev, amsalam20

IPv6 packet may carry more than one extension header, and IPv6 nodes must
acceptand attempt to process extension headers in any order and occurring
any number of times in the same packet. Hence, there should be no
assumption that Segment Routing extension header is to appear immediately
after the IPv6 header.

Moreover, section 4.1 of RFC 8200 gives a recommendation on the order of
appearance of those extension headers within an IPv6 packet. According to
this recommendation, Segment Routing extension header should appear after
Hop-by-Hop and Destination Options headers (if they present).

This patch fixes the get_srh(), so it gets the segment routing header
regardless of its position in the chain of the extension headers in IPv6
packet, and makes sure that the IPv6 routing extension header is of
Type 4.

Signed-off-by: Ahmed Abdelsalam <amsalam20@gmail.com>
---
 net/ipv6/seg6_local.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 9c1a885..ae97fd9 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -62,17 +62,19 @@ static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
 static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
 {
 	struct ipv6_sr_hdr *srh;
-	struct ipv6hdr *hdr;
-	int len;
+	int len, srhoff;

-	hdr = ipv6_hdr(skb);
-	if (hdr->nexthdr != IPPROTO_ROUTING)
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
 		return NULL;

-	srh = (struct ipv6_sr_hdr *)(hdr + 1);
-	len = (srh->hdrlen + 1) << 3;
+	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);

-	if (!pskb_may_pull(skb, sizeof(*hdr) + len))
+	/* make sure it's a Segment Routing header (Routing Type 4) */
+	if (srh->type != IPV6_SRCRT_TYPE_4)
+		return NULL;
+
+	len = (srh->hdrlen + 1) << 3;
+	if (!pskb_may_pull(skb, srhoff + len))
 		return NULL;

 	if (!seg6_validate_srh(srh, len))
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH] net: missing call of trace_napi_poll in busy_poll_stop
From: David Miller @ 2017-08-28 18:23 UTC (permalink / raw)
  To: brouer; +Cc: netdev
In-Reply-To: <150366627224.16965.12554507587831320156.stgit@firesoul>

From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Fri, 25 Aug 2017 15:04:32 +0200

> Noticed that busy_poll_stop() also invoke the drivers napi->poll()
> function pointer, but didn't have an associated call to trace_napi_poll()
> like all other call sites.
> 
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH v2] cdc_ncm: flag the u-blox TOBY-L4 as wwan
From: David Miller @ 2017-08-28 18:25 UTC (permalink / raw)
  To: aleksander
  Cc: oliver, stefano.godeas, marco.demarco, linux-usb, netdev,
	linux-kernel, gregkh
In-Reply-To: <20170825133916.13948-1-aleksander@aleksander.es>

From: Aleksander Morgado <aleksander@aleksander.es>
Date: Fri, 25 Aug 2017 15:39:16 +0200

> The u-blox TOBY-L4 is a LTE Advanced (Cat 6) module with HSPA+ and 2G
> fallback.
> 
> Unlike the TOBY-L2, this module has one single USB layout and exposes
> several TTYs for control and a NCM interface for data. Connecting this
> module may be done just by activating the desired PDP context with
> 'AT+CGACT=1,<cid>' and then running DHCP on the NCM interface.
> 
> Signed-off-by: Aleksander Morgado <aleksander@aleksander.es>

Applied.

^ permalink raw reply

* Re: [PATCH net v2 1/4] net: mvpp2: fix the mac address used when using PPv2.2
From: David Miller @ 2017-08-28 18:25 UTC (permalink / raw)
  To: antoine.tenart
  Cc: thomas.petazzoni, andrew, gregory.clement, nadavh, linux,
	linux-kernel, mw, stefanc, netdev
In-Reply-To: <20170825141420.14027-2-antoine.tenart@free-electrons.com>

From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Fri, 25 Aug 2017 16:14:17 +0200

> The mac address is only retrieved from h/w when using PPv2.1. Otherwise
> the variable holding it is still checked and used if it contains a valid
> value. As the variable isn't initialized to an invalid mac address
> value, we end up with random mac addresses which can be the same for all
> the ports handled by this PPv2 driver.
> 
> Fixes this by initializing the h/w mac address variable to {0}, which is
> an invalid mac address value. This way the random assignation fallback
> is called and all ports end up with their own addresses.
> 
> Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
> Fixes: 2697582144dd ("net: mvpp2: handle misc PPv2.1/PPv2.2 differences")

Applied and queued up for -stable, thanks.

^ permalink raw reply

* Re: [PATCH net] l2tp: initialise session's refcount before making it reachable
From: David Miller @ 2017-08-28 18:29 UTC (permalink / raw)
  To: g.nault; +Cc: netdev, jchapman
In-Reply-To: <f37a31f7fbfa4c02c4a263672f86eac4e80be272.1503670839.git.g.nault@alphalink.fr>

From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 25 Aug 2017 16:22:17 +0200

> Sessions must be fully initialised before calling
> l2tp_session_add_to_tunnel(). Otherwise, there's a short time frame
> where partially initialised sessions can be accessed by external users.
> 
> Fixes: dbdbc73b4478 ("l2tp: fix duplicate session creation")
> Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* [PATCH] packet: Don't write vnet header beyond end of buffer
From: Benjamin Poirier @ 2017-08-28 18:29 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, linux-kernel, Willem de Bruijn

... which may happen with certain values of tp_reserve and maclen.

Fixes: 58d19b19cd99 ("packet: vnet_hdr support for tpacket_rcv")
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Cc: Willem de Bruijn <willemb@google.com>
---
 net/packet/af_packet.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 008a45ca3112..1c61af9af67d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2191,6 +2191,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct timespec ts;
 	__u32 ts_status;
 	bool is_drop_n_account = false;
+	bool do_vnet = false;
 
 	/* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
 	 * We may add members to them until current aligned size without forcing
@@ -2241,8 +2242,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		netoff = TPACKET_ALIGN(po->tp_hdrlen +
 				       (maclen < 16 ? 16 : maclen)) +
 				       po->tp_reserve;
-		if (po->has_vnet_hdr)
+		if (po->has_vnet_hdr) {
 			netoff += sizeof(struct virtio_net_hdr);
+			do_vnet = true;
+		}
 		macoff = netoff - maclen;
 	}
 	if (po->tp_version <= TPACKET_V2) {
@@ -2259,8 +2262,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 					skb_set_owner_r(copy_skb, sk);
 			}
 			snaplen = po->rx_ring.frame_size - macoff;
-			if ((int)snaplen < 0)
+			if ((int)snaplen < 0) {
 				snaplen = 0;
+				do_vnet = false;
+			}
 		}
 	} else if (unlikely(macoff + snaplen >
 			    GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
@@ -2273,6 +2278,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		if (unlikely((int)snaplen < 0)) {
 			snaplen = 0;
 			macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
+			do_vnet = false;
 		}
 	}
 	spin_lock(&sk->sk_receive_queue.lock);
@@ -2298,7 +2304,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	}
 	spin_unlock(&sk->sk_receive_queue.lock);
 
-	if (po->has_vnet_hdr) {
+	if (do_vnet) {
 		if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
 					    sizeof(struct virtio_net_hdr),
 					    vio_le(), true)) {
-- 
2.14.1

^ permalink raw reply related

* Re: [ethtool] ethtool: Remove UDP Fragmentation Offload use from ethtool
From: John W. Linville @ 2017-08-28 18:22 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tariq Toukan, netdev, Eran Ben Elisha, Shaker Daibes
In-Reply-To: <1503932411.11498.67.camel@edumazet-glaptop3.roam.corp.google.com>

On Mon, Aug 28, 2017 at 08:00:11AM -0700, Eric Dumazet wrote:
> On Mon, 2017-08-28 at 15:38 +0300, Tariq Toukan wrote:
> > From: Shaker Daibes <shakerd@mellanox.com>
> > 
> > UFO was removed in kernel, here we remove it in ethtool app.
> > 
> > Fixes the following issue:
> > Features for ens8:
> > Cannot get device udp-fragmentation-offload settings: Operation not supported
> > 
> > Tested with "make check"
> > 
> > Signed-off-by: Shaker Daibes <shakerd@mellanox.com>
> > Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
> > ---
> 
> 
> Hi guys
> 
> I would rather remove the warning, but leave the ability to switch UFO
> on machines running old kernel but a recent ethtool.
> 
> ethtool does not need to be downgraded every time we boot an old
> kernel ;)

No, definitely not.
 
> Thanks !

Tariq, will you be reworking this as Eric suggests?

John
-- 
John W. Linville		Someday the world will need a hero, and you
linville@tuxdriver.com			might be all we have.  Be ready.

^ permalink raw reply

* Re: [PATCH 2/7] bridge: make ebt_table const
From: David Miller @ 2017-08-28 18:30 UTC (permalink / raw)
  To: bhumirks
  Cc: julia.lawall, marcel, gustavo, johan.hedberg, pablo, kadlec, fw,
	stephen, alex.aring, stefan, kuznet, yoshfuji, santosh.shilimkar,
	trond.myklebust, anna.schumaker, bfields, jlayton,
	linux-bluetooth, netdev, linux-kernel, netfilter-devel, coreteam,
	bridge, linux-wpan, linux-rdma, rds-devel, linux-nfs
In-Reply-To: <1503670907-23221-3-git-send-email-bhumirks@gmail.com>

From: Bhumika Goyal <bhumirks@gmail.com>
Date: Fri, 25 Aug 2017 19:51:42 +0530

> Make this const as it is only passed to a const argument of the function
> ebt_register_table.
> 
> Signed-off-by: Bhumika Goyal <bhumirks@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH 4/7] ipv4: make net_protocol const
From: David Miller @ 2017-08-28 18:30 UTC (permalink / raw)
  To: bhumirks
  Cc: julia.lawall, marcel, gustavo, johan.hedberg, pablo, kadlec, fw,
	stephen, alex.aring, stefan, kuznet, yoshfuji, santosh.shilimkar,
	trond.myklebust, anna.schumaker, bfields, jlayton,
	linux-bluetooth, netdev, linux-kernel, netfilter-devel, coreteam,
	bridge, linux-wpan, linux-rdma, rds-devel, linux-nfs
In-Reply-To: <1503670907-23221-5-git-send-email-bhumirks@gmail.com>

From: Bhumika Goyal <bhumirks@gmail.com>
Date: Fri, 25 Aug 2017 19:51:44 +0530

> Make these const as they are only passed to a const argument of the
> function inet_add_protocol.
> 
> Signed-off-by: Bhumika Goyal <bhumirks@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH 5/7] RDS: make rhashtable_params const
From: David Miller @ 2017-08-28 18:30 UTC (permalink / raw)
  To: bhumirks
  Cc: julia.lawall, marcel, gustavo, johan.hedberg, pablo, kadlec, fw,
	stephen, alex.aring, stefan, kuznet, yoshfuji, santosh.shilimkar,
	trond.myklebust, anna.schumaker, bfields, jlayton,
	linux-bluetooth, netdev, linux-kernel, netfilter-devel, coreteam,
	bridge, linux-wpan, linux-rdma, rds-devel, linux-nfs
In-Reply-To: <1503670907-23221-6-git-send-email-bhumirks@gmail.com>

From: Bhumika Goyal <bhumirks@gmail.com>
Date: Fri, 25 Aug 2017 19:51:45 +0530

> Make this const as it is either used during a copy operation or passed
> to a const argument of the function rhltable_init
> 
> Signed-off-by: Bhumika Goyal <bhumirks@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH net 0/5] l2tp: fix some l2tp_tunnel_find() issues in l2tp_netlink
From: David Miller @ 2017-08-28 18:35 UTC (permalink / raw)
  To: g.nault; +Cc: netdev, jchapman
In-Reply-To: <cover.1503671776.git.g.nault@alphalink.fr>

From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 25 Aug 2017 16:51:39 +0200

> Since l2tp_tunnel_find() doesn't take a reference on the tunnel it
> returns, its users are almost guaranteed to be racy.
> 
> This series defines l2tp_tunnel_get() which can be used as a safe
> replacement, and converts some of l2tp_tunnel_find() users in the
> l2tp_netlink module.
> 
> Other users often combine this issue with other more or less subtle
> races. They will be fixed incrementally in followup series.

Series applied, thank you.

^ permalink raw reply

* Re: [PATCH] packet: Don't write vnet header beyond end of buffer
From: Willem de Bruijn @ 2017-08-28 18:39 UTC (permalink / raw)
  To: Benjamin Poirier
  Cc: David S. Miller, Network Development, LKML, Willem de Bruijn
In-Reply-To: <20170828182941.10677-1-bpoirier@suse.com>

On Mon, Aug 28, 2017 at 2:29 PM, Benjamin Poirier <bpoirier@suse.com> wrote:
> ... which may happen with certain values of tp_reserve and maclen.
>
> Fixes: 58d19b19cd99 ("packet: vnet_hdr support for tpacket_rcv")
> Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
> Cc: Willem de Bruijn <willemb@google.com>

Acked-by: Willem de Bruijn <willemb@google.com>

Thanks for fixing this, Benjamin.

^ permalink raw reply

* Re: [PATCH] ipv6: sr: fix get_srh() to comply with IPv6 standard "RFC 8200"
From: David Lebrun @ 2017-08-28 18:48 UTC (permalink / raw)
  To: Ahmed Abdelsalam, davem; +Cc: yoshfuji, netdev
In-Reply-To: <1503944442-8994-1-git-send-email-amsalam20@gmail.com>

[-- Attachment #1.1: Type: text/plain, Size: 716 bytes --]

On 08/28/2017 07:20 PM, Ahmed Abdelsalam wrote:
> This patch fixes the get_srh(), so it gets the segment routing header
> regardless of its position in the chain of the extension headers in IPv6
> packet, and makes sure that the IPv6 routing extension header is of
> Type 4.

Ahmed,

You need to initialize srhoff to 0, otherwise ipv6_find_hdr() will crash
the kernel by dereferencing an uninitialized pointer.

Please test your patches before submitting them.

Furthermore, your pskb_may_pull() check should happen right after the
call to ipv6_find_hdr, with srhoff + sizeof(*srh) as argument. Once you
have checked the SRH type, you can then do another pskb_may_pull with
srhoff + len.

David

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply

* [PATCH net v2 0/6] net:ethernet:aquantia: Atlantic driver Update 2017-08-23
From: Pavel Belous @ 2017-08-28 18:52 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, David Arcari, Igor Russkikh, Nadezhda Krupnina,
	Simon Edelhaus, Pavel Belous

From: Pavel Belous <pavel.belous@aquantia.com>

This series contains updates for aQuantia Atlantic driver.

It has bugfixes and some improvements.

Changes in v2:
 - "MCP state change" fix removed (will be sent as
    a separate fix after further investigation.)

Igor Russkikh (1):
  net:ethernet:aquantia: Fix for multicast filter handling.

Pavel Belous (5):
  net:ethernet:aquantia: Extra spinlocks removed.
  net:ethernet:aquantia: Fix for number of RSS queues.
  net:ethernet:aquantia: Workaround for HW checksum bug.
  net:ethernet:aquantia: Fix for incorrect speed index.
  net:ethernet:aquantia: Show info message if bad firmware version
    detected.

 drivers/net/ethernet/aquantia/atlantic/aq_hw.h     |  3 +-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    | 92 +++++++++++-----------
 drivers/net/ethernet/aquantia/atlantic/aq_ring.c   |  1 -
 drivers/net/ethernet/aquantia/atlantic/aq_utils.h  |  1 -
 drivers/net/ethernet/aquantia/atlantic/aq_vec.c    | 11 +--
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c  |  6 ++
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  |  6 ++
 .../aquantia/atlantic/hw_atl/hw_atl_utils.c        | 10 ++-
 .../aquantia/atlantic/hw_atl/hw_atl_utils.h        |  3 +-
 9 files changed, 68 insertions(+), 65 deletions(-)

-- 
2.7.4

^ permalink raw reply

* [PATCH net v2 1/6] net:ethernet:aquantia: Extra spinlocks removed.
From: Pavel Belous @ 2017-08-28 18:52 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, David Arcari, Igor Russkikh, Nadezhda Krupnina,
	Simon Edelhaus, Pavel Belous, Pavel Belous
In-Reply-To: <cover.1503945861.git.pavel.belous@aquantia.com>

From: Pavel Belous <pavel.belous@aquantia.com>

This patch removes datapath spinlocks which does not perform any
useful work.

Fixes: 6e70637f9f1e ("net: ethernet: aquantia: Add ring support code")
Signed-off-by: Pavel Belous <Pavel.Belous@aquantia.com>
---
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c   | 42 +++++++----------------
 drivers/net/ethernet/aquantia/atlantic/aq_ring.c  |  1 -
 drivers/net/ethernet/aquantia/atlantic/aq_utils.h |  1 -
 drivers/net/ethernet/aquantia/atlantic/aq_vec.c   | 11 ++----
 4 files changed, 14 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 9ee1c50..08b7275 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -597,14 +597,11 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
 }
 
 int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
-__releases(&ring->lock)
-__acquires(&ring->lock)
 {
 	struct aq_ring_s *ring = NULL;
 	unsigned int frags = 0U;
 	unsigned int vec = skb->queue_mapping % self->aq_nic_cfg.vecs;
 	unsigned int tc = 0U;
-	unsigned int trys = AQ_CFG_LOCK_TRYS;
 	int err = NETDEV_TX_OK;
 	bool is_nic_in_bad_state;
 
@@ -628,36 +625,21 @@ __acquires(&ring->lock)
 		goto err_exit;
 	}
 
-	do {
-		if (spin_trylock(&ring->header.lock)) {
-			frags = aq_nic_map_skb(self, skb, ring);
-
-			if (likely(frags)) {
-				err = self->aq_hw_ops.hw_ring_tx_xmit(
-								self->aq_hw,
-								ring, frags);
-				if (err >= 0) {
-					if (aq_ring_avail_dx(ring) <
-					    AQ_CFG_SKB_FRAGS_MAX + 1)
-						aq_nic_ndev_queue_stop(
-								self,
-								ring->idx);
-
-					++ring->stats.tx.packets;
-					ring->stats.tx.bytes += skb->len;
-				}
-			} else {
-				err = NETDEV_TX_BUSY;
-			}
+	frags = aq_nic_map_skb(self, skb, ring);
 
-			spin_unlock(&ring->header.lock);
-			break;
-		}
-	} while (--trys);
+	if (likely(frags)) {
+		err = self->aq_hw_ops.hw_ring_tx_xmit(self->aq_hw,
+						      ring,
+						      frags);
+		if (err >= 0) {
+			if (aq_ring_avail_dx(ring) < AQ_CFG_SKB_FRAGS_MAX + 1)
+				aq_nic_ndev_queue_stop(self, ring->idx);
 
-	if (!trys) {
+			++ring->stats.tx.packets;
+			ring->stats.tx.bytes += skb->len;
+		}
+	} else {
 		err = NETDEV_TX_BUSY;
-		goto err_exit;
 	}
 
 err_exit:
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 9a08179..ec5579f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -101,7 +101,6 @@ int aq_ring_init(struct aq_ring_s *self)
 	self->hw_head = 0;
 	self->sw_head = 0;
 	self->sw_tail = 0;
-	spin_lock_init(&self->header.lock);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_utils.h b/drivers/net/ethernet/aquantia/atlantic/aq_utils.h
index f6012b3..e12bcdf 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_utils.h
@@ -17,7 +17,6 @@
 #define AQ_DIMOF(_ARY_)  ARRAY_SIZE(_ARY_)
 
 struct aq_obj_s {
-	spinlock_t lock; /* spinlock for nic/rings processing */
 	atomic_t flags;
 };
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
index ad5b4d4d..fee446a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
@@ -34,8 +34,6 @@ struct aq_vec_s {
 #define AQ_VEC_RX_ID 1
 
 static int aq_vec_poll(struct napi_struct *napi, int budget)
-__releases(&self->lock)
-__acquires(&self->lock)
 {
 	struct aq_vec_s *self = container_of(napi, struct aq_vec_s, napi);
 	struct aq_ring_s *ring = NULL;
@@ -47,7 +45,7 @@ __acquires(&self->lock)
 
 	if (!self) {
 		err = -EINVAL;
-	} else if (spin_trylock(&self->header.lock)) {
+	} else {
 		for (i = 0U, ring = self->ring[0];
 			self->tx_rings > i; ++i, ring = self->ring[i]) {
 			if (self->aq_hw_ops->hw_ring_tx_head_update) {
@@ -105,11 +103,8 @@ __acquires(&self->lock)
 			self->aq_hw_ops->hw_irq_enable(self->aq_hw,
 					1U << self->aq_ring_param.vec_idx);
 		}
-
-err_exit:
-		spin_unlock(&self->header.lock);
 	}
-
+err_exit:
 	return work_done;
 }
 
@@ -185,8 +180,6 @@ int aq_vec_init(struct aq_vec_s *self, struct aq_hw_ops *aq_hw_ops,
 	self->aq_hw_ops = aq_hw_ops;
 	self->aq_hw = aq_hw;
 
-	spin_lock_init(&self->header.lock);
-
 	for (i = 0U, ring = self->ring[0];
 		self->tx_rings > i; ++i, ring = self->ring[i]) {
 		err = aq_ring_init(&ring[AQ_VEC_TX_ID]);
-- 
2.7.4

^ permalink raw reply related

* [PATCH net v2 2/6] net:ethernet:aquantia: Fix for number of RSS queues.
From: Pavel Belous @ 2017-08-28 18:52 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, David Arcari, Igor Russkikh, Nadezhda Krupnina,
	Simon Edelhaus, Pavel Belous, Pavel Belous
In-Reply-To: <cover.1503945861.git.pavel.belous@aquantia.com>

From: Pavel Belous <pavel.belous@aquantia.com>

The number of RSS queues should be not more than numbers of CPU.
Its does not make sense to increase perfomance, and also cause problems on
some motherboards.

Fixes: 94f6c9e4cdf6 ("net: ethernet: aquantia: Support for NIC-specific code")
Signed-off-by: Pavel Belous <Pavel.Belous@aquantia.com>
---
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 08b7275..d6d8e70 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -103,6 +103,8 @@ int aq_nic_cfg_start(struct aq_nic_s *self)
 	else
 		cfg->vecs = 1U;
 
+	cfg->num_rss_queues = min(cfg->vecs, AQ_CFG_NUM_RSS_QUEUES_DEF);
+
 	cfg->irq_type = aq_pci_func_get_irq_type(self->aq_pci_func);
 
 	if ((cfg->irq_type == AQ_HW_IRQ_LEGACY) ||
-- 
2.7.4

^ permalink raw reply related

* [PATCH net v2 3/6] net:ethernet:aquantia: Workaround for HW checksum bug.
From: Pavel Belous @ 2017-08-28 18:52 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, David Arcari, Igor Russkikh, Nadezhda Krupnina,
	Simon Edelhaus, Pavel Belous, Pavel Belous
In-Reply-To: <cover.1503945861.git.pavel.belous@aquantia.com>

From: Pavel Belous <pavel.belous@aquantia.com>

The hardware has the HW Checksum Offload bug when small
TCP patckets (with length <= 60 bytes) has wrong "checksum valid" bit.

The solution is - ignore checksum valid bit for small packets
(with length <= 60 bytes) and mark this as CHECKSUM_NONE to allow
network stack recalculate checksum itself.

Fixes: ccf9a5ed14be ("net: ethernet: aquantia: Atlantic A0 and B0 specific functions.")
Signed-off-by: Pavel Belous <Pavel.Belous@aquantia.com>
---
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 6 ++++++
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index faeb493..c5a02df 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -629,6 +629,12 @@ static int hw_atl_a0_hw_ring_rx_receive(struct aq_hw_s *self,
 				buff->is_udp_cso = (is_err & 0x10U) ? 0 : 1;
 			else if (0x0U == (pkt_type & 0x1CU))
 				buff->is_tcp_cso = (is_err & 0x10U) ? 0 : 1;
+
+			/* Checksum offload workaround for small packets */
+			if (rxd_wb->pkt_len <= 60) {
+				buff->is_ip_cso = 0U;
+				buff->is_cso_err = 0U;
+			}
 		}
 
 		is_err &= ~0x18U;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 1bceb73..21784cc 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -645,6 +645,12 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
 				buff->is_udp_cso = buff->is_cso_err ? 0U : 1U;
 			else if (0x0U == (pkt_type & 0x1CU))
 				buff->is_tcp_cso = buff->is_cso_err ? 0U : 1U;
+
+			/* Checksum offload workaround for small packets */
+			if (rxd_wb->pkt_len <= 60) {
+				buff->is_ip_cso = 0U;
+				buff->is_cso_err = 0U;
+			}
 		}
 
 		is_err &= ~0x18U;
-- 
2.7.4

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox