Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 08/10] nfp: flower: offload pre-tunnel rules
From: John Hurley @ 2019-08-04 15:09 UTC (permalink / raw)
  To: netdev; +Cc: davem, simon.horman, jakub.kicinski, oss-drivers, John Hurley
In-Reply-To: <1564931351-1036-1-git-send-email-john.hurley@netronome.com>

Pre-tunnel rules are TC flower and OvS rules that forward a packet to the
tunnel end point where it can then pass through the network stack and be
decapsulated. These are required if the tunnel end point is, say, an OvS
internal port.

Currently, firmware determines that a packet is in a tunnel and decaps it
if it has a known destination IP and MAC address. However, this bypasses
the flower pre-tunnel rule and so does not update the stats. Further to
this it ignores VLANs that may exist outside of the tunnel header.

Offload pre-tunnel rules to the NFP. This embeds the pre-tunnel rule into
the tunnel decap process based on (firmware) mac index and VLAN. This
means that decap can be carried out correctly with VLANs and that stats
can be updated for all kernel rules correctly.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h   |  1 +
 drivers/net/ethernet/netronome/nfp/flower/main.c   |  1 +
 drivers/net/ethernet/netronome/nfp/flower/main.h   |  3 +
 .../ethernet/netronome/nfp/flower/tunnel_conf.c    | 79 +++++++++++++++++++++-
 4 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index caf3029..7eb2ec8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -484,6 +484,7 @@ enum nfp_flower_cmsg_type_port {
 	NFP_FLOWER_CMSG_TYPE_QOS_MOD =		18,
 	NFP_FLOWER_CMSG_TYPE_QOS_DEL =		19,
 	NFP_FLOWER_CMSG_TYPE_QOS_STATS =	20,
+	NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE =	21,
 	NFP_FLOWER_CMSG_TYPE_MAX =		32,
 };
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index eb84613..7a20447 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -781,6 +781,7 @@ static int nfp_flower_init(struct nfp_app *app)
 
 	INIT_LIST_HEAD(&app_priv->indr_block_cb_priv);
 	INIT_LIST_HEAD(&app_priv->non_repr_priv);
+	app_priv->pre_tun_rule_cnt = 0;
 
 	return 0;
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index c3aa415..5d302d7 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -163,6 +163,7 @@ struct nfp_fl_internal_ports {
  * @qos_stats_work:	Workqueue for qos stats processing
  * @qos_rate_limiters:	Current active qos rate limiters
  * @qos_stats_lock:	Lock on qos stats updates
+ * @pre_tun_rule_cnt:	Number of pre-tunnel rules offloaded
  */
 struct nfp_flower_priv {
 	struct nfp_app *app;
@@ -194,6 +195,7 @@ struct nfp_flower_priv {
 	struct delayed_work qos_stats_work;
 	unsigned int qos_rate_limiters;
 	spinlock_t qos_stats_lock; /* Protect the qos stats */
+	int pre_tun_rule_cnt;
 };
 
 /**
@@ -284,6 +286,7 @@ struct nfp_fl_payload {
 	struct {
 		struct net_device *dev;
 		__be16 vlan_tci;
+		__be16 port_idx;
 	} pre_tun_rule;
 };
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index ef59481..b9dbfb7 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -15,6 +15,23 @@
 
 #define NFP_FL_MAX_ROUTES               32
 
+#define NFP_TUN_PRE_TUN_RULE_LIMIT	32
+#define NFP_TUN_PRE_TUN_RULE_DEL	0x1
+
+/**
+ * struct nfp_tun_pre_run_rule - rule matched before decap
+ * @flags:		options for the rule offset
+ * @port_idx:		index of destination MAC address for the rule
+ * @vlan_tci:		VLAN info associated with MAC
+ * @host_ctx_id:	stats context of rule to update
+ */
+struct nfp_tun_pre_tun_rule {
+	__be32 flags;
+	__be16 port_idx;
+	__be16 vlan_tci;
+	__be32 host_ctx_id;
+};
+
 /**
  * struct nfp_tun_active_tuns - periodic message of active tunnels
  * @seq:		sequence number of the message
@@ -835,13 +852,71 @@ int nfp_tunnel_mac_event_handler(struct nfp_app *app,
 int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app,
 				 struct nfp_fl_payload *flow)
 {
-	return -EOPNOTSUPP;
+	struct nfp_flower_priv *app_priv = app->priv;
+	struct nfp_tun_offloaded_mac *mac_entry;
+	struct nfp_tun_pre_tun_rule payload;
+	struct net_device *internal_dev;
+	int err;
+
+	if (app_priv->pre_tun_rule_cnt == NFP_TUN_PRE_TUN_RULE_LIMIT)
+		return -ENOSPC;
+
+	memset(&payload, 0, sizeof(struct nfp_tun_pre_tun_rule));
+
+	internal_dev = flow->pre_tun_rule.dev;
+	payload.vlan_tci = flow->pre_tun_rule.vlan_tci;
+	payload.host_ctx_id = flow->meta.host_ctx_id;
+
+	/* Lookup MAC index for the pre-tunnel rule egress device.
+	 * Note that because the device is always an internal port, it will
+	 * have a constant global index so does not need to be tracked.
+	 */
+	mac_entry = nfp_tunnel_lookup_offloaded_macs(app,
+						     internal_dev->dev_addr);
+	if (!mac_entry)
+		return -ENOENT;
+
+	payload.port_idx = cpu_to_be16(mac_entry->index);
+
+	/* Copy mac id and vlan to flow - dev may not exist at delete time. */
+	flow->pre_tun_rule.vlan_tci = payload.vlan_tci;
+	flow->pre_tun_rule.port_idx = payload.port_idx;
+
+	err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE,
+				       sizeof(struct nfp_tun_pre_tun_rule),
+				       (unsigned char *)&payload, GFP_KERNEL);
+	if (err)
+		return err;
+
+	app_priv->pre_tun_rule_cnt++;
+
+	return 0;
 }
 
 int nfp_flower_xmit_pre_tun_del_flow(struct nfp_app *app,
 				     struct nfp_fl_payload *flow)
 {
-	return -EOPNOTSUPP;
+	struct nfp_flower_priv *app_priv = app->priv;
+	struct nfp_tun_pre_tun_rule payload;
+	u32 tmp_flags = 0;
+	int err;
+
+	memset(&payload, 0, sizeof(struct nfp_tun_pre_tun_rule));
+
+	tmp_flags |= NFP_TUN_PRE_TUN_RULE_DEL;
+	payload.flags = cpu_to_be32(tmp_flags);
+	payload.vlan_tci = flow->pre_tun_rule.vlan_tci;
+	payload.port_idx = flow->pre_tun_rule.port_idx;
+
+	err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE,
+				       sizeof(struct nfp_tun_pre_tun_rule),
+				       (unsigned char *)&payload, GFP_KERNEL);
+	if (err)
+		return err;
+
+	app_priv->pre_tun_rule_cnt--;
+
+	return 0;
 }
 
 int nfp_tunnel_config_start(struct nfp_app *app)
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 05/10] nfp: flower: push vlan after tunnel in merge
From: John Hurley @ 2019-08-04 15:09 UTC (permalink / raw)
  To: netdev; +Cc: davem, simon.horman, jakub.kicinski, oss-drivers, John Hurley
In-Reply-To: <1564931351-1036-1-git-send-email-john.hurley@netronome.com>

NFP allows the merging of 2 flows together into a single offloaded flow.
In the kernel datapath the packet must match 1 flow, impliment its
actions, recirculate, match the 2nd flow and also impliment its actions.
Merging creates a single flow with all actions from the 2 original flows.

Firmware impliments a tunnel header push as the packet is about to egress
the card. Therefore, if the first merge rule candiate pushes a tunnel,
then the second rule can only have an egress action for a valid merge to
occur (or else the action ordering will be incorrect). This prevents the
pushing of a tunnel header followed by the pushing of a vlan header.

In order to support this behaviour, firmware allows VLAN information to
be encoded in the tunnel push action. If this is non zero then the fw will
push a VLAN after the tunnel header push meaning that 2 such flows with
these actions can be merged (with action order being maintained).

Support tunnel in VLAN pushes by encoding VLAN information in the tunnel
push action of any merge flow requiring this.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h   |  3 +-
 .../net/ethernet/netronome/nfp/flower/offload.c    | 60 ++++++++++++++++++++--
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 3324394..caf3029 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -220,7 +220,8 @@ struct nfp_fl_set_ipv4_tun {
 	__be16 tun_flags;
 	u8 ttl;
 	u8 tos;
-	__be32 extra;
+	__be16 outer_vlan_tpid;
+	__be16 outer_vlan_tci;
 	u8 tun_len;
 	u8 res2;
 	__be16 tun_proto;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index e209f15..607426c 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -732,28 +732,62 @@ nfp_flower_copy_pre_actions(char *act_dst, char *act_src, int len,
 	return act_off;
 }
 
-static int nfp_fl_verify_post_tun_acts(char *acts, int len)
+static int
+nfp_fl_verify_post_tun_acts(char *acts, int len, struct nfp_fl_push_vlan **vlan)
 {
 	struct nfp_fl_act_head *a;
 	unsigned int act_off = 0;
 
 	while (act_off < len) {
 		a = (struct nfp_fl_act_head *)&acts[act_off];
-		if (a->jump_id != NFP_FL_ACTION_OPCODE_OUTPUT)
+
+		if (a->jump_id == NFP_FL_ACTION_OPCODE_PUSH_VLAN && !act_off)
+			*vlan = (struct nfp_fl_push_vlan *)a;
+		else if (a->jump_id != NFP_FL_ACTION_OPCODE_OUTPUT)
 			return -EOPNOTSUPP;
 
 		act_off += a->len_lw << NFP_FL_LW_SIZ;
 	}
 
+	/* Ensure any VLAN push also has an egress action. */
+	if (*vlan && act_off <= sizeof(struct nfp_fl_push_vlan))
+		return -EOPNOTSUPP;
+
 	return 0;
 }
 
 static int
+nfp_fl_push_vlan_after_tun(char *acts, int len, struct nfp_fl_push_vlan *vlan)
+{
+	struct nfp_fl_set_ipv4_tun *tun;
+	struct nfp_fl_act_head *a;
+	unsigned int act_off = 0;
+
+	while (act_off < len) {
+		a = (struct nfp_fl_act_head *)&acts[act_off];
+
+		if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL) {
+			tun = (struct nfp_fl_set_ipv4_tun *)a;
+			tun->outer_vlan_tpid = vlan->vlan_tpid;
+			tun->outer_vlan_tci = vlan->vlan_tci;
+
+			return 0;
+		}
+
+		act_off += a->len_lw << NFP_FL_LW_SIZ;
+	}
+
+	/* Return error if no tunnel action is found. */
+	return -EOPNOTSUPP;
+}
+
+static int
 nfp_flower_merge_action(struct nfp_fl_payload *sub_flow1,
 			struct nfp_fl_payload *sub_flow2,
 			struct nfp_fl_payload *merge_flow)
 {
 	unsigned int sub1_act_len, sub2_act_len, pre_off1, pre_off2;
+	struct nfp_fl_push_vlan *post_tun_push_vlan = NULL;
 	bool tunnel_act = false;
 	char *merge_act;
 	int err;
@@ -790,18 +824,36 @@ nfp_flower_merge_action(struct nfp_fl_payload *sub_flow1,
 	sub2_act_len -= pre_off2;
 
 	/* FW does a tunnel push when egressing, therefore, if sub_flow 1 pushes
-	 * a tunnel, sub_flow 2 can only have output actions for a valid merge.
+	 * a tunnel, there are restrictions on what sub_flow 2 actions lead to a
+	 * valid merge.
 	 */
 	if (tunnel_act) {
 		char *post_tun_acts = &sub_flow2->action_data[pre_off2];
 
-		err = nfp_fl_verify_post_tun_acts(post_tun_acts, sub2_act_len);
+		err = nfp_fl_verify_post_tun_acts(post_tun_acts, sub2_act_len,
+						  &post_tun_push_vlan);
 		if (err)
 			return err;
+
+		if (post_tun_push_vlan) {
+			pre_off2 += sizeof(*post_tun_push_vlan);
+			sub2_act_len -= sizeof(*post_tun_push_vlan);
+		}
 	}
 
 	/* Copy remaining actions from sub_flows 1 and 2. */
 	memcpy(merge_act, sub_flow1->action_data + pre_off1, sub1_act_len);
+
+	if (post_tun_push_vlan) {
+		/* Update tunnel action in merge to include VLAN push. */
+		err = nfp_fl_push_vlan_after_tun(merge_act, sub1_act_len,
+						 post_tun_push_vlan);
+		if (err)
+			return err;
+
+		merge_flow->meta.act_len -= sizeof(*post_tun_push_vlan);
+	}
+
 	merge_act += sub1_act_len;
 	memcpy(merge_act, sub_flow2->action_data + pre_off2, sub2_act_len);
 
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 04/10] net: sched: add ingress mirred action to hardware IR
From: John Hurley @ 2019-08-04 15:09 UTC (permalink / raw)
  To: netdev; +Cc: davem, simon.horman, jakub.kicinski, oss-drivers, John Hurley
In-Reply-To: <1564931351-1036-1-git-send-email-john.hurley@netronome.com>

TC mirred actions (redirect and mirred) can send to egress or ingress of a
device. Currently only egress is used for hw offload rules.

Modify the intermediate representation for hw offload to include mirred
actions that go to ingress. This gives drivers access to such rules and
can decide whether or not to offload them.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 include/net/flow_offload.h | 2 ++
 net/sched/cls_api.c        | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 04c29f5..d3b12bc 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -117,6 +117,8 @@ enum flow_action_id {
 	FLOW_ACTION_GOTO,
 	FLOW_ACTION_REDIRECT,
 	FLOW_ACTION_MIRRED,
+	FLOW_ACTION_REDIRECT_INGRESS,
+	FLOW_ACTION_MIRRED_INGRESS,
 	FLOW_ACTION_VLAN_PUSH,
 	FLOW_ACTION_VLAN_POP,
 	FLOW_ACTION_VLAN_MANGLE,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ae73d37..9d85d32 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3205,6 +3205,12 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 		} else if (is_tcf_mirred_egress_mirror(act)) {
 			entry->id = FLOW_ACTION_MIRRED;
 			entry->dev = tcf_mirred_dev(act);
+		} else if (is_tcf_mirred_ingress_redirect(act)) {
+			entry->id = FLOW_ACTION_REDIRECT_INGRESS;
+			entry->dev = tcf_mirred_dev(act);
+		} else if (is_tcf_mirred_ingress_mirror(act)) {
+			entry->id = FLOW_ACTION_MIRRED_INGRESS;
+			entry->dev = tcf_mirred_dev(act);
 		} else if (is_tcf_vlan(act)) {
 			switch (tcf_vlan_action(act)) {
 			case TCA_VLAN_ACT_PUSH:
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 02/10] net: sched: add skbedit of ptype action to hardware IR
From: John Hurley @ 2019-08-04 15:09 UTC (permalink / raw)
  To: netdev; +Cc: davem, simon.horman, jakub.kicinski, oss-drivers, John Hurley
In-Reply-To: <1564931351-1036-1-git-send-email-john.hurley@netronome.com>

TC rules can impliment skbedit actions. Currently actions that modify the
skb mark are passed to offloading drivers via the hardware intermediate
representation in the flow_offload API.

Extend this to include skbedit actions that modify the packet type of the
skb. Such actions may be used to set the ptype to HOST when redirecting a
packet to ingress.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 include/net/flow_offload.h | 2 ++
 net/sched/cls_api.c        | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 00b9aab..04c29f5 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -126,6 +126,7 @@ enum flow_action_id {
 	FLOW_ACTION_ADD,
 	FLOW_ACTION_CSUM,
 	FLOW_ACTION_MARK,
+	FLOW_ACTION_PTYPE,
 	FLOW_ACTION_WAKE,
 	FLOW_ACTION_QUEUE,
 	FLOW_ACTION_SAMPLE,
@@ -168,6 +169,7 @@ struct flow_action_entry {
 		const struct ip_tunnel_info *tunnel;	/* FLOW_ACTION_TUNNEL_ENCAP */
 		u32			csum_flags;	/* FLOW_ACTION_CSUM */
 		u32			mark;		/* FLOW_ACTION_MARK */
+		u16                     ptype;          /* FLOW_ACTION_PTYPE */
 		struct {				/* FLOW_ACTION_QUEUE */
 			u32		ctx;
 			u32		index;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3565d9a..ae73d37 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3294,6 +3294,9 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 			default:
 				goto err_out;
 			}
+		} else if (is_tcf_skbedit_ptype(act)) {
+			entry->id = FLOW_ACTION_PTYPE;
+			entry->ptype = tcf_skbedit_ptype(act);
 		} else {
 			goto err_out;
 		}
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 01/10] net: tc_act: add skbedit_ptype helper functions
From: John Hurley @ 2019-08-04 15:09 UTC (permalink / raw)
  To: netdev; +Cc: davem, simon.horman, jakub.kicinski, oss-drivers, John Hurley
In-Reply-To: <1564931351-1036-1-git-send-email-john.hurley@netronome.com>

The tc_act header file contains an inline function that checks if an
action is changing the skb mark of a packet and a further function to
extract the mark.

Add similar functions to check for and get skbedit actions that modify
the packet type of the skb.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 include/net/tc_act/tc_skbedit.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 4c04e29..b22a1f6 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -54,4 +54,31 @@ static inline u32 tcf_skbedit_mark(const struct tc_action *a)
 	return mark;
 }
 
+/* Return true iff action is ptype */
+static inline bool is_tcf_skbedit_ptype(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	u32 flags;
+
+	if (a->ops && a->ops->id == TCA_ID_SKBEDIT) {
+		rcu_read_lock();
+		flags = rcu_dereference(to_skbedit(a)->params)->flags;
+		rcu_read_unlock();
+		return flags == SKBEDIT_F_PTYPE;
+	}
+#endif
+	return false;
+}
+
+static inline u32 tcf_skbedit_ptype(const struct tc_action *a)
+{
+	u16 ptype;
+
+	rcu_read_lock();
+	ptype = rcu_dereference(to_skbedit(a)->params)->ptype;
+	rcu_read_unlock();
+
+	return ptype;
+}
+
 #endif /* __NET_TC_SKBEDIT_H */
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 00/10] Support tunnels over VLAN in NFP
From: John Hurley @ 2019-08-04 15:09 UTC (permalink / raw)
  To: netdev; +Cc: davem, simon.horman, jakub.kicinski, oss-drivers, John Hurley

This patchset deals with tunnel encap and decap when the end-point IP
address is on an internal port (for example and OvS VLAN port). Tunnel
encap without VLAN is already supported in the NFP driver. This patchset
extends that to include a push VLAN along with tunnel header push.

Patches 1-4 extend the flow_offload IR API to include actions that use
skbedit to set the ptype of an SKB and that send a packet to port ingress
from the act_mirred module. Such actions are used in flower rules that
forward tunnel packets to internal ports where they can be decapsulated.
OvS and its TC API is an example of a user-space app that produces such
rules.

Patch 5 modifies the encap offload code to allow the pushing of a VLAN
header after a tunnel header push.

Patches 6-10 deal with tunnel decap when the end-point is on an internal
port. They detect 'pre-tunnel rules' which do not deal with tunnels
themselves but, rather, forward packets to internal ports where they
can be decapped if required. Such rules are offloaded to a table in HW
along with an indication of whether packets need to be passed to this
table of not (based on their destination MAC address). Matching against
this table prior to decapsulation in HW allows the correct parsing and
handling of outer VLANs on tunnelled packets and the correct updating of
stats for said 'pre-tunnel' rules.

John Hurley (10):
  net: tc_act: add skbedit_ptype helper functions
  net: sched: add skbedit of ptype action to hardware IR
  net: tc_act: add helpers to detect ingress mirred actions
  net: sched: add ingress mirred action to hardware IR
  nfp: flower: push vlan after tunnel in merge
  nfp: flower: detect potential pre-tunnel rules
  nfp: flower: verify pre-tunnel rules
  nfp: flower: offload pre-tunnel rules
  nfp: flower: remove offloaded MACs when reprs are applied to OvS
    bridges
  nfp: flower: encode mac indexes with pre-tunnel rule check

 drivers/net/ethernet/netronome/nfp/flower/action.c |  40 ++++-
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h   |   4 +-
 drivers/net/ethernet/netronome/nfp/flower/main.c   |   1 +
 drivers/net/ethernet/netronome/nfp/flower/main.h   |  19 ++
 .../net/ethernet/netronome/nfp/flower/offload.c    | 186 ++++++++++++++++++-
 .../ethernet/netronome/nfp/flower/tunnel_conf.c    | 200 +++++++++++++++++++--
 include/net/flow_offload.h                         |   4 +
 include/net/tc_act/tc_mirred.h                     |  18 ++
 include/net/tc_act/tc_skbedit.h                    |  27 +++
 net/sched/cls_api.c                                |   9 +
 10 files changed, 476 insertions(+), 32 deletions(-)

-- 
2.7.4


^ permalink raw reply

* Re: [PATCH net] net: dsa: mv88e6xxx: drop adjust_link to enabled phylink
From: Andrew Lunn @ 2019-08-04 15:10 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	David S. Miller
In-Reply-To: <20190731154239.19270-1-h.feurstein@gmail.com>

On Wed, Jul 31, 2019 at 05:42:39PM +0200, Hubert Feurstein wrote:
> We have to drop the adjust_link callback in order to finally migrate to
> phylink.
> 
> Otherwise we get the following warning during startup:
>   "mv88e6xxx 2188000.ethernet-1:10: Using legacy PHYLIB callbacks. Please
>    migrate to PHYLINK!"

Hi Hubert

Do we need the same patch for the b53 driver?

   Andrew

^ permalink raw reply

* Re: [PATCH net] net: dsa: mv88e6xxx: drop adjust_link to enabled phylink
From: Andrew Lunn @ 2019-08-04 15:04 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	David S. Miller
In-Reply-To: <20190731154239.19270-1-h.feurstein@gmail.com>

On Wed, Jul 31, 2019 at 05:42:39PM +0200, Hubert Feurstein wrote:
> We have to drop the adjust_link callback in order to finally migrate to
> phylink.
> 
> Otherwise we get the following warning during startup:
>   "mv88e6xxx 2188000.ethernet-1:10: Using legacy PHYLIB callbacks. Please
>    migrate to PHYLINK!"
> 
> The warning is generated in the function dsa_port_link_register_of in
> dsa/port.c:
> 
>   int dsa_port_link_register_of(struct dsa_port *dp)
>   {
>   	struct dsa_switch *ds = dp->ds;
> 
>   	if (!ds->ops->adjust_link)
>   		return dsa_port_phylink_register(dp);
> 
>   	dev_warn(ds->dev,
>   		 "Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
>   	[...]
>   }
> 
> Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* Re: [PATCH 0/3] Use refcount_t for refcount
From: Chuhong Yuan @ 2019-08-04 14:58 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Saeed Mahameed, David S . Miller, Doug Ledford, Jason Gunthorpe,
	linux-rdma, Netdev, LKML
In-Reply-To: <20190804124820.GH4832@mtr-leonro.mtl.com>

On Sun, Aug 4, 2019 at 8:48 PM Leon Romanovsky <leon@kernel.org> wrote:
>
> On Fri, Aug 02, 2019 at 08:10:35PM +0800, Chuhong Yuan wrote:
> > Reference counters are preferred to use refcount_t instead of
> > atomic_t.
> > This is because the implementation of refcount_t can prevent
> > overflows and detect possible use-after-free.
> >
> > First convert the refcount field to refcount_t in mlx5/driver.h.
> > Then convert the uses to refcount_() APIs.
>
> You can't do it, because you need to ensure that driver compiles and
> works between patches. By converting driver.h alone to refcount_t, you
> simply broke mlx5 driver.
>

It is my fault... I am not clear how to send patches which cross
several subsystems, so I sent them in series.
Maybe I should merge these patches together?


> NAK, to be clear.
>
> And please don't sent series of patches as standalone patches.
>

Due to the reason mentioned above, I sent them seperately.

> Thanks,

^ permalink raw reply

* Re: [RFC PATCH 1/2] dt-bindings: net: macb: Add new property for PS SGMII only
From: Andrew Lunn @ 2019-08-04 14:56 UTC (permalink / raw)
  To: Harini Katakam
  Cc: nicolas.ferre, davem, claudiu.beznea, robh+dt, mark.rutland,
	netdev, linux-kernel, michal.simek, harinikatakamlinux,
	devicetree
In-Reply-To: <1564566033-676-2-git-send-email-harini.katakam@xilinx.com>

On Wed, Jul 31, 2019 at 03:10:32PM +0530, Harini Katakam wrote:
> Add a new property to indicate when PS SGMII is used with NO
> external PHY on board.

Hi Harini

What exactly is you use case? Are you connecting to a Ethernet switch?
To an SFP cage with a copper module?

   Andrew

^ permalink raw reply

* Re: [PATCH net-next v3] net: phy: broadcom: add 1000Base-X support for BCM54616S
From: Andrew Lunn @ 2019-08-04 14:51 UTC (permalink / raw)
  To: Tao Ren
  Cc: Vladimir Oltean, Florian Fainelli, Heiner Kallweit,
	David S . Miller, Arun Parameswaran, Justin Chen, netdev, lkml,
	openbmc@lists.ozlabs.org
In-Reply-To: <53e18a01-3d08-3023-374f-2c712c4ee9ea@fb.com>

> > The patchset looks better now. But is it ok, I wonder, to keep
> > PHY_BCM_FLAGS_MODE_1000BX in phydev->dev_flags, considering that
> > phy_attach_direct is overwriting it?
> 

> I checked ftgmac100 driver (used on my machine) and it calls
> phy_connect_direct which passes phydev->dev_flags when calling
> phy_attach_direct: that explains why the flag is not cleared in my
> case.

Yes, that is the way it is intended to be used. The MAC driver can
pass flags to the PHY. It is a fragile API, since the MAC needs to
know what PHY is being used, since the flags are driver specific.

One option would be to modify the assignment in phy_attach_direct() to
OR in the flags passed to it with flags which are already in
phydev->dev_flags.

	Andrew

^ permalink raw reply

* Re: [PATCH v2] net/mlx5e: Use refcount_t for refcount
From: Chuhong Yuan @ 2019-08-04 14:44 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Saeed Mahameed, David S . Miller, Netdev, linux-rdma, LKML
In-Reply-To: <20190804125858.GJ4832@mtr-leonro.mtl.com>

On Sun, Aug 4, 2019 at 8:59 PM Leon Romanovsky <leon@kernel.org> wrote:
>
> On Sat, Aug 03, 2019 at 12:48:28AM +0800, Chuhong Yuan wrote:
> > refcount_t is better for reference counters since its
> > implementation can prevent overflows.
> > So convert atomic_t ref counters to refcount_t.
>
> I'm not thrilled to see those automatic conversion patches, especially
> for flows which can't overflow. There is nothing wrong in using atomic_t
> type of variable, do you have in mind flow which will cause to overflow?
>
> Thanks

I have to say that these patches are not done automatically...
Only the detection of problems is done by a script.
All conversions are done manually.

I am not sure whether the flow can cause an overflow.
But I think it is hard to ensure that a data path is impossible
to have problems in any cases including being attacked.

So I think it is better to do this minor revision to prevent
potential risk, just like we have done in mlx5/core/cq.c.

Regards,
Chuhong

> >
> > Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
> > ---
> > Changes in v2:
> >   - Add #include.
> >
> >  drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c | 9 +++++----
> >  1 file changed, 5 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
> > index b9d4f4e19ff9..148b55c3db7a 100644
> > --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
> > @@ -32,6 +32,7 @@
> >
> >  #include <linux/kernel.h>
> >  #include <linux/module.h>
> > +#include <linux/refcount.h>
> >  #include <linux/mlx5/driver.h>
> >  #include <net/vxlan.h>
> >  #include "mlx5_core.h"
> > @@ -48,7 +49,7 @@ struct mlx5_vxlan {
> >
> >  struct mlx5_vxlan_port {
> >       struct hlist_node hlist;
> > -     atomic_t refcount;
> > +     refcount_t refcount;
> >       u16 udp_port;
> >  };
> >
> > @@ -113,7 +114,7 @@ int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
> >
> >       vxlanp = mlx5_vxlan_lookup_port(vxlan, port);
> >       if (vxlanp) {
> > -             atomic_inc(&vxlanp->refcount);
> > +             refcount_inc(&vxlanp->refcount);
> >               return 0;
> >       }
> >
> > @@ -137,7 +138,7 @@ int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
> >       }
> >
> >       vxlanp->udp_port = port;
> > -     atomic_set(&vxlanp->refcount, 1);
> > +     refcount_set(&vxlanp->refcount, 1);
> >
> >       spin_lock_bh(&vxlan->lock);
> >       hash_add(vxlan->htable, &vxlanp->hlist, port);
> > @@ -170,7 +171,7 @@ int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
> >               goto out_unlock;
> >       }
> >
> > -     if (atomic_dec_and_test(&vxlanp->refcount)) {
> > +     if (refcount_dec_and_test(&vxlanp->refcount)) {
> >               hash_del(&vxlanp->hlist);
> >               remove = true;
> >       }
> > --
> > 2.20.1
> >

^ permalink raw reply

* [PATCH net-next v6 4/6] flow_offload: move tc indirect block to flow offload
From: wenxu @ 2019-08-04 13:23 UTC (permalink / raw)
  To: jakub.kicinski, jiri; +Cc: netfilter-devel, netdev
In-Reply-To: <1564925041-23530-1-git-send-email-wenxu@ucloud.cn>

From: wenxu <wenxu@ucloud.cn>

move tc indirect block to flow_offload and rename
it to flow indirect block.The nf_tables can use the
indr block architecture.

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
v6: add a block_get_and_ing_cmd callback

 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  10 +-
 .../net/ethernet/netronome/nfp/flower/offload.c    |  11 +-
 include/net/flow_offload.h                         |  29 +++
 include/net/pkt_cls.h                              |  35 ---
 include/net/sch_generic.h                          |   3 -
 net/core/flow_offload.c                            | 215 ++++++++++++++++++
 net/sched/cls_api.c                                | 240 +++------------------
 7 files changed, 280 insertions(+), 263 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 6edf0ae..a820915 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -781,9 +781,9 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
 {
 	int err;
 
-	err = __tc_indr_block_cb_register(netdev, rpriv,
-					  mlx5e_rep_indr_setup_tc_cb,
-					  rpriv);
+	err = __flow_indr_block_cb_register(netdev, rpriv,
+					    mlx5e_rep_indr_setup_tc_cb,
+					    rpriv);
 	if (err) {
 		struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
 
@@ -796,8 +796,8 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
 static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
 					    struct net_device *netdev)
 {
-	__tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
-				      rpriv);
+	__flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
+					rpriv);
 }
 
 static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index e209f15..7b490db 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -1479,16 +1479,17 @@ int nfp_flower_reg_indir_block_handler(struct nfp_app *app,
 		return NOTIFY_OK;
 
 	if (event == NETDEV_REGISTER) {
-		err = __tc_indr_block_cb_register(netdev, app,
-						  nfp_flower_indr_setup_tc_cb,
-						  app);
+		err = __flow_indr_block_cb_register(netdev, app,
+						    nfp_flower_indr_setup_tc_cb,
+						    app);
 		if (err)
 			nfp_flower_cmsg_warn(app,
 					     "Indirect block reg failed - %s\n",
 					     netdev->name);
 	} else if (event == NETDEV_UNREGISTER) {
-		__tc_indr_block_cb_unregister(netdev,
-					      nfp_flower_indr_setup_tc_cb, app);
+		__flow_indr_block_cb_unregister(netdev,
+						nfp_flower_indr_setup_tc_cb,
+						app);
 	}
 
 	return NOTIFY_OK;
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 00b9aab..8f1a7b8 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -4,6 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <net/flow_dissector.h>
+#include <linux/rhashtable.h>
 
 struct flow_match {
 	struct flow_dissector	*dissector;
@@ -366,4 +367,32 @@ static inline void flow_block_init(struct flow_block *flow_block)
 	INIT_LIST_HEAD(&flow_block->cb_list);
 }
 
+typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
+				      enum tc_setup_type type, void *type_data);
+
+typedef void flow_indr_block_ing_cmd_t(struct net_device *dev,
+					flow_indr_block_bind_cb_t *cb,
+					void *cb_priv,
+					enum flow_block_command command);
+
+int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				  flow_indr_block_bind_cb_t *cb,
+				  void *cb_ident);
+
+void __flow_indr_block_cb_unregister(struct net_device *dev,
+				     flow_indr_block_bind_cb_t *cb,
+				     void *cb_ident);
+
+int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				flow_indr_block_bind_cb_t *cb, void *cb_ident);
+
+void flow_indr_block_cb_unregister(struct net_device *dev,
+				   flow_indr_block_bind_cb_t *cb,
+				   void *cb_ident);
+
+void flow_indr_block_call(struct net_device *dev,
+			  flow_indr_block_ing_cmd_t *cb,
+			  struct flow_block_offload *bo,
+			  enum flow_block_command command);
+
 #endif /* _NET_FLOW_OFFLOAD_H */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e429809..0790a4e 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -70,15 +70,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
 	return block->q;
 }
 
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-				tc_indr_block_bind_cb_t *cb, void *cb_ident);
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-			      tc_indr_block_bind_cb_t *cb, void *cb_ident);
-void __tc_indr_block_cb_unregister(struct net_device *dev,
-				   tc_indr_block_bind_cb_t *cb, void *cb_ident);
-void tc_indr_block_cb_unregister(struct net_device *dev,
-				 tc_indr_block_bind_cb_t *cb, void *cb_ident);
-
 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		 struct tcf_result *res, bool compat_mode);
 
@@ -137,32 +128,6 @@ void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb,
 {
 }
 
-static inline
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-				tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	return 0;
-}
-
-static inline
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-			      tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	return 0;
-}
-
-static inline
-void __tc_indr_block_cb_unregister(struct net_device *dev,
-				   tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-}
-
-static inline
-void tc_indr_block_cb_unregister(struct net_device *dev,
-				 tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-}
-
 static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			       struct tcf_result *res, bool compat_mode)
 {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 6b6b012..d9f359a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -23,9 +23,6 @@
 struct module;
 struct bpf_flow_keys;
 
-typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
-				    enum tc_setup_type type, void *type_data);
-
 struct qdisc_rate_table {
 	struct tc_ratespec rate;
 	u32		data[256];
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index d63b970..4cc18e4 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -2,6 +2,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <net/flow_offload.h>
+#include <linux/rtnetlink.h>
 
 struct flow_rule *flow_rule_alloc(unsigned int num_actions)
 {
@@ -280,3 +281,217 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
 	}
 }
 EXPORT_SYMBOL(flow_block_cb_setup_simple);
+
+static struct rhashtable indr_setup_block_ht;
+
+struct flow_indr_block_cb {
+	struct list_head list;
+	void *cb_priv;
+	flow_indr_block_bind_cb_t *cb;
+	void *cb_ident;
+};
+
+struct flow_indr_block_dev {
+	struct rhash_head ht_node;
+	struct net_device *dev;
+	unsigned int refcnt;
+	flow_indr_block_ing_cmd_t  *block_ing_cmd_cb;
+	struct list_head cb_list;
+};
+
+static const struct rhashtable_params flow_indr_setup_block_ht_params = {
+	.key_offset	= offsetof(struct flow_indr_block_dev, dev),
+	.head_offset	= offsetof(struct flow_indr_block_dev, ht_node),
+	.key_len	= sizeof(struct net_device *),
+};
+
+static struct flow_indr_block_dev *
+flow_indr_block_dev_lookup(struct net_device *dev)
+{
+	return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
+				      flow_indr_setup_block_ht_params);
+}
+
+static struct flow_indr_block_dev *
+flow_indr_block_dev_get(struct net_device *dev)
+{
+	struct flow_indr_block_dev *indr_dev;
+
+	indr_dev = flow_indr_block_dev_lookup(dev);
+	if (indr_dev)
+		goto inc_ref;
+
+	indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
+	if (!indr_dev)
+		return NULL;
+
+	INIT_LIST_HEAD(&indr_dev->cb_list);
+	indr_dev->dev = dev;
+	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+				   flow_indr_setup_block_ht_params)) {
+		kfree(indr_dev);
+		return NULL;
+	}
+
+inc_ref:
+	indr_dev->refcnt++;
+	return indr_dev;
+}
+
+static void flow_indr_block_dev_put(struct flow_indr_block_dev *indr_dev)
+{
+	if (--indr_dev->refcnt)
+		return;
+
+	rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+			       flow_indr_setup_block_ht_params);
+	kfree(indr_dev);
+}
+
+static struct flow_indr_block_cb *
+flow_indr_block_cb_lookup(struct flow_indr_block_dev *indr_dev,
+			  flow_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+
+	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+		if (indr_block_cb->cb == cb &&
+		    indr_block_cb->cb_ident == cb_ident)
+			return indr_block_cb;
+	return NULL;
+}
+
+static struct flow_indr_block_cb *
+flow_indr_block_cb_add(struct flow_indr_block_dev *indr_dev, void *cb_priv,
+		       flow_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+
+	indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+	if (indr_block_cb)
+		return ERR_PTR(-EEXIST);
+
+	indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
+	if (!indr_block_cb)
+		return ERR_PTR(-ENOMEM);
+
+	indr_block_cb->cb_priv = cb_priv;
+	indr_block_cb->cb = cb;
+	indr_block_cb->cb_ident = cb_ident;
+	list_add(&indr_block_cb->list, &indr_dev->cb_list);
+
+	return indr_block_cb;
+}
+
+static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
+{
+	list_del(&indr_block_cb->list);
+	kfree(indr_block_cb);
+}
+
+int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				  flow_indr_block_bind_cb_t *cb,
+				  void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+	struct flow_indr_block_dev *indr_dev;
+	int err;
+
+	indr_dev = flow_indr_block_dev_get(dev);
+	if (!indr_dev)
+		return -ENOMEM;
+
+	indr_block_cb = flow_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
+	err = PTR_ERR_OR_ZERO(indr_block_cb);
+	if (err)
+		goto err_dev_put;
+
+	if (indr_dev->block_ing_cmd_cb)
+		indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb,
+					   indr_block_cb->cb_priv,
+					   FLOW_BLOCK_BIND);
+
+	return 0;
+
+err_dev_put:
+	flow_indr_block_dev_put(indr_dev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(__flow_indr_block_cb_register);
+
+int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				flow_indr_block_bind_cb_t *cb,
+				void *cb_ident)
+{
+	int err;
+
+	rtnl_lock();
+	err = __flow_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
+	rtnl_unlock();
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_cb_register);
+
+void __flow_indr_block_cb_unregister(struct net_device *dev,
+				     flow_indr_block_bind_cb_t *cb,
+				     void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+	struct flow_indr_block_dev *indr_dev;
+
+	indr_dev = flow_indr_block_dev_lookup(dev);
+	if (!indr_dev)
+		return;
+
+	indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+	if (!indr_block_cb)
+		return;
+
+	if (indr_dev->block_ing_cmd_cb)
+		indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb,
+					   indr_block_cb->cb_priv,
+					   FLOW_BLOCK_UNBIND);
+
+	flow_indr_block_cb_del(indr_block_cb);
+	flow_indr_block_dev_put(indr_dev);
+}
+EXPORT_SYMBOL_GPL(__flow_indr_block_cb_unregister);
+
+void flow_indr_block_cb_unregister(struct net_device *dev,
+				   flow_indr_block_bind_cb_t *cb,
+				   void *cb_ident)
+{
+	rtnl_lock();
+	__flow_indr_block_cb_unregister(dev, cb, cb_ident);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister);
+
+void flow_indr_block_call(struct net_device *dev,
+			  flow_indr_block_ing_cmd_t cb,
+			  struct flow_block_offload *bo,
+			  enum flow_block_command command)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+	struct flow_indr_block_dev *indr_dev;
+
+	indr_dev = flow_indr_block_dev_lookup(dev);
+	if (!indr_dev)
+		return;
+
+	indr_dev->block_ing_cmd_cb = command == FLOW_BLOCK_BIND
+				     ? cb : NULL;
+
+	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
+				  bo);
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_call);
+
+static int __init init_flow_indr_rhashtable(void)
+{
+	return rhashtable_init(&indr_setup_block_ht,
+			       &flow_indr_setup_block_ht_params);
+}
+subsys_initcall(init_flow_indr_rhashtable);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ebbc1e0..939a5c0 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -37,6 +37,7 @@
 #include <net/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_ct.h>
 #include <net/tc_act/tc_mpls.h>
+#include <net/flow_offload.h>
 
 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
 
@@ -545,139 +546,12 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
 	}
 }
 
-static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
-{
-	const struct Qdisc_class_ops *cops;
-	struct Qdisc *qdisc;
-
-	if (!dev_ingress_queue(dev))
-		return NULL;
-
-	qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
-	if (!qdisc)
-		return NULL;
-
-	cops = qdisc->ops->cl_ops;
-	if (!cops)
-		return NULL;
-
-	if (!cops->tcf_block)
-		return NULL;
-
-	return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
-}
-
-static struct rhashtable indr_setup_block_ht;
-
-struct tc_indr_block_dev {
-	struct rhash_head ht_node;
-	struct net_device *dev;
-	unsigned int refcnt;
-	struct list_head cb_list;
-};
-
-struct tc_indr_block_cb {
-	struct list_head list;
-	void *cb_priv;
-	tc_indr_block_bind_cb_t *cb;
-	void *cb_ident;
-};
-
-static const struct rhashtable_params tc_indr_setup_block_ht_params = {
-	.key_offset	= offsetof(struct tc_indr_block_dev, dev),
-	.head_offset	= offsetof(struct tc_indr_block_dev, ht_node),
-	.key_len	= sizeof(struct net_device *),
-};
-
-static struct tc_indr_block_dev *
-tc_indr_block_dev_lookup(struct net_device *dev)
-{
-	return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
-				      tc_indr_setup_block_ht_params);
-}
-
-static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
-{
-	struct tc_indr_block_dev *indr_dev;
-
-	indr_dev = tc_indr_block_dev_lookup(dev);
-	if (indr_dev)
-		goto inc_ref;
-
-	indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
-	if (!indr_dev)
-		return NULL;
-
-	INIT_LIST_HEAD(&indr_dev->cb_list);
-	indr_dev->dev = dev;
-	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
-				   tc_indr_setup_block_ht_params)) {
-		kfree(indr_dev);
-		return NULL;
-	}
-
-inc_ref:
-	indr_dev->refcnt++;
-	return indr_dev;
-}
-
-static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
-{
-	if (--indr_dev->refcnt)
-		return;
-
-	rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
-			       tc_indr_setup_block_ht_params);
-	kfree(indr_dev);
-}
-
-static struct tc_indr_block_cb *
-tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
-			tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct tc_indr_block_cb *indr_block_cb;
-
-	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
-		if (indr_block_cb->cb == cb &&
-		    indr_block_cb->cb_ident == cb_ident)
-			return indr_block_cb;
-	return NULL;
-}
-
-static struct tc_indr_block_cb *
-tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv,
-		     tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct tc_indr_block_cb *indr_block_cb;
-
-	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
-	if (indr_block_cb)
-		return ERR_PTR(-EEXIST);
-
-	indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
-	if (!indr_block_cb)
-		return ERR_PTR(-ENOMEM);
-
-	indr_block_cb->cb_priv = cb_priv;
-	indr_block_cb->cb = cb;
-	indr_block_cb->cb_ident = cb_ident;
-	list_add(&indr_block_cb->list, &indr_dev->cb_list);
-
-	return indr_block_cb;
-}
-
-static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
-{
-	list_del(&indr_block_cb->list);
-	kfree(indr_block_cb);
-}
-
 static int tcf_block_setup(struct tcf_block *block,
 			   struct flow_block_offload *bo);
 
 static void tc_indr_block_ing_cmd(struct net_device *dev,
 				  struct tcf_block *block,
-				  tc_indr_block_bind_cb_t *cb,
+				  flow_indr_block_bind_cb_t *cb,
 				  void *cb_priv,
 				  enum flow_block_command command)
 {
@@ -699,97 +573,40 @@ static void tc_indr_block_ing_cmd(struct net_device *dev,
 	tcf_block_setup(block, &bo);
 }
 
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-				tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct tc_indr_block_cb *indr_block_cb;
-	struct tc_indr_block_dev *indr_dev;
-	struct tcf_block *block;
-	int err;
-
-	indr_dev = tc_indr_block_dev_get(dev);
-	if (!indr_dev)
-		return -ENOMEM;
-
-	indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
-	err = PTR_ERR_OR_ZERO(indr_block_cb);
-	if (err)
-		goto err_dev_put;
-
-	block = tc_dev_ingress_block(dev);
-	tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb,
-			      indr_block_cb->cb_priv, FLOW_BLOCK_BIND);
-	return 0;
-
-err_dev_put:
-	tc_indr_block_dev_put(indr_dev);
-	return err;
-}
-EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);
-
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-			      tc_indr_block_bind_cb_t *cb, void *cb_ident)
+static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
 {
-	int err;
-
-	rtnl_lock();
-	err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
-	rtnl_unlock();
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);
+	const struct Qdisc_class_ops *cops;
+	struct Qdisc *qdisc;
 
-void __tc_indr_block_cb_unregister(struct net_device *dev,
-				   tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct tc_indr_block_cb *indr_block_cb;
-	struct tc_indr_block_dev *indr_dev;
-	struct tcf_block *block;
+	if (!dev_ingress_queue(dev))
+		return NULL;
 
-	indr_dev = tc_indr_block_dev_lookup(dev);
-	if (!indr_dev)
-		return;
+	qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
+	if (!qdisc)
+		return NULL;
 
-	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
-	if (!indr_block_cb)
-		return;
+	cops = qdisc->ops->cl_ops;
+	if (!cops)
+		return NULL;
 
-	/* Send unbind message if required to free any block cbs. */
-	block = tc_dev_ingress_block(dev);
-	tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb,
-			      indr_block_cb->cb_priv, FLOW_BLOCK_UNBIND);
-	tc_indr_block_cb_del(indr_block_cb);
-	tc_indr_block_dev_put(indr_dev);
-}
-EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);
+	if (!cops->tcf_block)
+		return NULL;
 
-void tc_indr_block_cb_unregister(struct net_device *dev,
-				 tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	rtnl_lock();
-	__tc_indr_block_cb_unregister(dev, cb, cb_ident);
-	rtnl_unlock();
+	return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
 }
-EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
 
-static void flow_indr_block_call(struct net_device *dev,
-				 struct flow_block_offload *bo,
-				 enum flow_block_command command)
+static void tc_indr_block_get_and_ing_cmd(struct net_device *dev,
+					  flow_indr_block_bind_cb_t *cb,
+					  void *cb_priv,
+					  enum flow_block_command command)
 {
-	struct tc_indr_block_cb *indr_block_cb;
-	struct tc_indr_block_dev *indr_dev;
-
-	indr_dev = tc_indr_block_dev_lookup(dev);
-	if (!indr_dev)
-		return;
+	struct tcf_block *block = tc_dev_ingress_block(dev);
 
-	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
-		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
-				  bo);
+	tc_indr_block_ing_cmd(dev, block, cb, cb_priv, command);
 }
 
-static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
+static void tc_indr_block_call(struct tcf_block *block,
+			       struct net_device *dev,
 			       struct tcf_block_ext_info *ei,
 			       enum flow_block_command command,
 			       struct netlink_ext_ack *extack)
@@ -804,7 +621,7 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
 	};
 	INIT_LIST_HEAD(&bo.cb_list);
 
-	flow_indr_block_call(dev, &bo, command);
+	flow_indr_block_call(dev, tc_indr_block_get_and_ing_cmd, &bo, command);
 	tcf_block_setup(block, &bo);
 }
 
@@ -3369,11 +3186,6 @@ static int __init tc_filter_init(void)
 	if (err)
 		goto err_register_pernet_subsys;
 
-	err = rhashtable_init(&indr_setup_block_ht,
-			      &tc_indr_setup_block_ht_params);
-	if (err)
-		goto err_rhash_setup_block_ht;
-
 	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
 		      RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
@@ -3387,8 +3199,6 @@ static int __init tc_filter_init(void)
 
 	return 0;
 
-err_rhash_setup_block_ht:
-	unregister_pernet_subsys(&tcf_net_ops);
 err_register_pernet_subsys:
 	destroy_workqueue(tc_filter_wq);
 	return err;
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next v6 6/6] netfilter: nf_tables_offload: support indr block call
From: wenxu @ 2019-08-04 13:24 UTC (permalink / raw)
  To: jakub.kicinski, jiri; +Cc: netfilter-devel, netdev
In-Reply-To: <1564925041-23530-1-git-send-email-wenxu@ucloud.cn>

From: wenxu <wenxu@ucloud.cn>

nftable support indr-block call. It makes nftable an offload vlan
and tunnel device.

nft add table netdev firewall
nft add chain netdev firewall aclout { type filter hook ingress offload device mlx_pf0vf0 priority - 300 \; }
nft add rule netdev firewall aclout ip daddr 10.0.0.1 fwd to vlan0
nft add chain netdev firewall aclin { type filter hook ingress device vlan0 priority - 300 \; }
nft add rule netdev firewall aclin ip daddr 10.0.0.7 fwd to mlx_pf0vf0

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
v6: support the new callback list

 include/net/netfilter/nf_tables_offload.h |   4 +
 net/netfilter/nf_tables_api.c             |   7 ++
 net/netfilter/nf_tables_offload.c         | 148 +++++++++++++++++++++++++-----
 3 files changed, 135 insertions(+), 24 deletions(-)

diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 3196663..bffd51a 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -63,6 +63,10 @@ struct nft_flow_rule {
 struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule);
 void nft_flow_rule_destroy(struct nft_flow_rule *flow);
 int nft_flow_rule_offload_commit(struct net *net);
+void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
+				    flow_indr_block_bind_cb_t *cb,
+				    void *cb_priv,
+				    enum flow_block_command command);
 
 #define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg)		\
 	(__reg)->base_offset	=					\
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 605a7cf..fe3b7b0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7593,6 +7593,11 @@ static void __net_exit nf_tables_exit_net(struct net *net)
 	.exit	= nf_tables_exit_net,
 };
 
+static struct flow_indr_block_ing_entry block_ing_entry = {
+	.cb = nft_indr_block_get_and_ing_cmd,
+	.list = LIST_HEAD_INIT(block_ing_entry.list),
+};
+
 static int __init nf_tables_module_init(void)
 {
 	int err;
@@ -7624,6 +7629,7 @@ static int __init nf_tables_module_init(void)
 		goto err5;
 
 	nft_chain_route_init();
+	flow_indr_add_block_ing_cb(&block_ing_entry);
 	return err;
 err5:
 	rhltable_destroy(&nft_objname_ht);
@@ -7640,6 +7646,7 @@ static int __init nf_tables_module_init(void)
 
 static void __exit nf_tables_module_exit(void)
 {
+	flow_indr_del_block_ing_cb(&block_ing_entry);
 	nfnetlink_subsys_unregister(&nf_tables_subsys);
 	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
 	nft_chain_filter_fini();
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 64f5fd5..d3c4c9c 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -171,24 +171,110 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo,
 	return 0;
 }
 
+static int nft_block_setup(struct nft_base_chain *basechain,
+			   struct flow_block_offload *bo,
+			   enum flow_block_command cmd)
+{
+	int err;
+
+	switch (cmd) {
+	case FLOW_BLOCK_BIND:
+		err = nft_flow_offload_bind(bo, basechain);
+		break;
+	case FLOW_BLOCK_UNBIND:
+		err = nft_flow_offload_unbind(bo, basechain);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static int nft_block_offload_cmd(struct nft_base_chain *chain,
+				 struct net_device *dev,
+				 enum flow_block_command cmd)
+{
+	struct netlink_ext_ack extack = {};
+	struct flow_block_offload bo = {};
+	int err;
+
+	bo.net = dev_net(dev);
+	bo.block = &chain->flow_block;
+	bo.command = cmd;
+	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo.extack = &extack;
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+	if (err < 0)
+		return err;
+
+	return nft_block_setup(chain, &bo, cmd);
+}
+
+static void nft_indr_block_ing_cmd(struct net_device *dev,
+				   struct nft_base_chain *chain,
+				   flow_indr_block_bind_cb_t *cb,
+				   void *cb_priv,
+				   enum flow_block_command cmd)
+{
+	struct netlink_ext_ack extack = {};
+	struct flow_block_offload bo = {};
+
+	if (!chain)
+		return;
+
+	bo.net = dev_net(dev);
+	bo.block = &chain->flow_block;
+	bo.command = cmd;
+	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo.extack = &extack;
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
+
+	nft_block_setup(chain, &bo, cmd);
+}
+
+static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
+				      struct net_device *dev,
+				      enum flow_block_command cmd)
+{
+	struct flow_block_offload bo = {};
+	struct netlink_ext_ack extack = {};
+
+	bo.net = dev_net(dev);
+	bo.block = &chain->flow_block;
+	bo.command = cmd;
+	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo.extack = &extack;
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	flow_indr_block_call(dev, &bo, cmd);
+
+	if (list_empty(&bo.cb_list))
+		return -EOPNOTSUPP;
+
+	return nft_block_setup(chain, &bo, cmd);
+}
+
 #define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
 
 static int nft_flow_offload_chain(struct nft_trans *trans,
 				  enum flow_block_command cmd)
 {
 	struct nft_chain *chain = trans->ctx.chain;
-	struct netlink_ext_ack extack = {};
-	struct flow_block_offload bo = {};
 	struct nft_base_chain *basechain;
 	struct net_device *dev;
-	int err;
 
 	if (!nft_is_base_chain(chain))
 		return -EOPNOTSUPP;
 
 	basechain = nft_base_chain(chain);
 	dev = basechain->ops.dev;
-	if (!dev || !dev->netdev_ops->ndo_setup_tc)
+	if (!dev)
 		return -EOPNOTSUPP;
 
 	/* Only default policy to accept is supported for now. */
@@ -197,26 +283,10 @@ static int nft_flow_offload_chain(struct nft_trans *trans,
 	    nft_trans_chain_policy(trans) != NF_ACCEPT)
 		return -EOPNOTSUPP;
 
-	bo.command = cmd;
-	bo.block = &basechain->flow_block;
-	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
-	bo.extack = &extack;
-	INIT_LIST_HEAD(&bo.cb_list);
-
-	err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo);
-	if (err < 0)
-		return err;
-
-	switch (cmd) {
-	case FLOW_BLOCK_BIND:
-		err = nft_flow_offload_bind(&bo, basechain);
-		break;
-	case FLOW_BLOCK_UNBIND:
-		err = nft_flow_offload_unbind(&bo, basechain);
-		break;
-	}
-
-	return err;
+	if (dev->netdev_ops->ndo_setup_tc)
+		return nft_block_offload_cmd(basechain, dev, cmd);
+	else
+		return nft_indr_block_offload_cmd(basechain, dev, cmd);
 }
 
 int nft_flow_rule_offload_commit(struct net *net)
@@ -266,3 +336,33 @@ int nft_flow_rule_offload_commit(struct net *net)
 
 	return err;
 }
+
+void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
+				    flow_indr_block_bind_cb_t *cb,
+				    void *cb_priv,
+				    enum flow_block_command command)
+{
+	struct net *net = dev_net(dev);
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+
+	list_for_each_entry_rcu(table, &net->nft.tables, list) {
+		if (table->family != NFPROTO_NETDEV)
+			continue;
+
+		list_for_each_entry_rcu(chain, &table->chains, list) {
+			if (nft_is_base_chain(chain)) {
+				struct nft_base_chain *basechain;
+
+				basechain = nft_base_chain(chain);
+				if (!strncmp(basechain->dev_name, dev->name,
+					     IFNAMSIZ)) {
+					nft_indr_block_ing_cmd(dev, basechain,
+							       cb, cb_priv,
+							       command);
+					return;
+				}
+			}
+		}
+	}
+}
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next v6 5/6] flow_offload: support get multi-subsystem block
From: wenxu @ 2019-08-04 13:24 UTC (permalink / raw)
  To: jakub.kicinski, jiri; +Cc: netfilter-devel, netdev
In-Reply-To: <1564925041-23530-1-git-send-email-wenxu@ucloud.cn>

From: wenxu <wenxu@ucloud.cn>

It provide a callback list to find the blocks of tc
and nft subsystems

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
v6: new patch

 include/net/flow_offload.h | 10 +++++++++-
 net/core/flow_offload.c    | 47 +++++++++++++++++++++++++++++++++-------------
 net/sched/cls_api.c        |  9 ++++++++-
 3 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 8f1a7b8..6022dd0 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -375,6 +375,15 @@ typedef void flow_indr_block_ing_cmd_t(struct net_device *dev,
 					void *cb_priv,
 					enum flow_block_command command);
 
+struct flow_indr_block_ing_entry {
+	flow_indr_block_ing_cmd_t *cb;
+	struct list_head	list;
+};
+
+void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry);
+
+void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry);
+
 int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 				  flow_indr_block_bind_cb_t *cb,
 				  void *cb_ident);
@@ -391,7 +400,6 @@ void flow_indr_block_cb_unregister(struct net_device *dev,
 				   void *cb_ident);
 
 void flow_indr_block_call(struct net_device *dev,
-			  flow_indr_block_ing_cmd_t *cb,
 			  struct flow_block_offload *bo,
 			  enum flow_block_command command);
 
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 4cc18e4..0e84537 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -282,6 +282,8 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
 }
 EXPORT_SYMBOL(flow_block_cb_setup_simple);
 
+static LIST_HEAD(block_ing_cb_list);
+
 static struct rhashtable indr_setup_block_ht;
 
 struct flow_indr_block_cb {
@@ -295,7 +297,6 @@ struct flow_indr_block_dev {
 	struct rhash_head ht_node;
 	struct net_device *dev;
 	unsigned int refcnt;
-	flow_indr_block_ing_cmd_t  *block_ing_cmd_cb;
 	struct list_head cb_list;
 };
 
@@ -389,6 +390,22 @@ static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
 	kfree(indr_block_cb);
 }
 
+static void flow_block_ing_cmd(struct net_device *dev,
+			       flow_indr_block_bind_cb_t *cb,
+			       void *cb_priv,
+			       enum flow_block_command command)
+{
+	struct flow_indr_block_ing_entry *entry;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(entry, &block_ing_cb_list, list) {
+		entry->cb(dev, cb, cb_priv, command);
+	}
+
+	rcu_read_unlock();
+}
+
 int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 				  flow_indr_block_bind_cb_t *cb,
 				  void *cb_ident)
@@ -406,10 +423,8 @@ int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 	if (err)
 		goto err_dev_put;
 
-	if (indr_dev->block_ing_cmd_cb)
-		indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb,
-					   indr_block_cb->cb_priv,
-					   FLOW_BLOCK_BIND);
+	flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
+			   FLOW_BLOCK_BIND);
 
 	return 0;
 
@@ -448,10 +463,8 @@ void __flow_indr_block_cb_unregister(struct net_device *dev,
 	if (!indr_block_cb)
 		return;
 
-	if (indr_dev->block_ing_cmd_cb)
-		indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb,
-					   indr_block_cb->cb_priv,
-					   FLOW_BLOCK_UNBIND);
+	flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
+			   FLOW_BLOCK_UNBIND);
 
 	flow_indr_block_cb_del(indr_block_cb);
 	flow_indr_block_dev_put(indr_dev);
@@ -469,7 +482,6 @@ void flow_indr_block_cb_unregister(struct net_device *dev,
 EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister);
 
 void flow_indr_block_call(struct net_device *dev,
-			  flow_indr_block_ing_cmd_t cb,
 			  struct flow_block_offload *bo,
 			  enum flow_block_command command)
 {
@@ -480,15 +492,24 @@ void flow_indr_block_call(struct net_device *dev,
 	if (!indr_dev)
 		return;
 
-	indr_dev->block_ing_cmd_cb = command == FLOW_BLOCK_BIND
-				     ? cb : NULL;
-
 	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
 		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
 				  bo);
 }
 EXPORT_SYMBOL_GPL(flow_indr_block_call);
 
+void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry)
+{
+	list_add_tail_rcu(&entry->list, &block_ing_cb_list);
+}
+EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb);
+
+void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry)
+{
+	list_del_rcu(&entry->list);
+}
+EXPORT_SYMBOL_GPL(flow_indr_del_block_ing_cb);
+
 static int __init init_flow_indr_rhashtable(void)
 {
 	return rhashtable_init(&indr_setup_block_ht,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 939a5c0..f53478f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -621,7 +621,7 @@ static void tc_indr_block_call(struct tcf_block *block,
 	};
 	INIT_LIST_HEAD(&bo.cb_list);
 
-	flow_indr_block_call(dev, tc_indr_block_get_and_ing_cmd, &bo, command);
+	flow_indr_block_call(dev, &bo, command);
 	tcf_block_setup(block, &bo);
 }
 
@@ -3174,6 +3174,11 @@ static void __net_exit tcf_net_exit(struct net *net)
 	.size = sizeof(struct tcf_net),
 };
 
+static struct flow_indr_block_ing_entry block_ing_entry = {
+	.cb = tc_indr_block_get_and_ing_cmd,
+	.list = LIST_HEAD_INIT(block_ing_entry.list),
+};
+
 static int __init tc_filter_init(void)
 {
 	int err;
@@ -3186,6 +3191,8 @@ static int __init tc_filter_init(void)
 	if (err)
 		goto err_register_pernet_subsys;
 
+	flow_indr_add_block_ing_cb(&block_ing_entry);
+
 	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
 		      RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next v6 1/6] cls_api: modify the tc_indr_block_ing_cmd parameters.
From: wenxu @ 2019-08-04 13:23 UTC (permalink / raw)
  To: jakub.kicinski, jiri; +Cc: netfilter-devel, netdev
In-Reply-To: <1564925041-23530-1-git-send-email-wenxu@ucloud.cn>

From: wenxu <wenxu@ucloud.cn>

This patch make tc_indr_block_ing_cmd can't access struct
tc_indr_block_dev and tc_indr_block_cb.

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
v6: no change

 net/sched/cls_api.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3565d9a..2e3b58d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -677,26 +677,28 @@ static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
 static int tcf_block_setup(struct tcf_block *block,
 			   struct flow_block_offload *bo);
 
-static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
-				  struct tc_indr_block_cb *indr_block_cb,
+static void tc_indr_block_ing_cmd(struct net_device *dev,
+				  struct tcf_block *block,
+				  tc_indr_block_bind_cb_t *cb,
+				  void *cb_priv,
 				  enum flow_block_command command)
 {
 	struct flow_block_offload bo = {
 		.command	= command,
 		.binder_type	= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
-		.net		= dev_net(indr_dev->dev),
-		.block_shared	= tcf_block_non_null_shared(indr_dev->block),
+		.net		= dev_net(dev),
+		.block_shared	= tcf_block_non_null_shared(block),
 	};
 	INIT_LIST_HEAD(&bo.cb_list);
 
-	if (!indr_dev->block)
+	if (!block)
 		return;
 
-	bo.block = &indr_dev->block->flow_block;
+	bo.block = &block->flow_block;
 
-	indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
-			  &bo);
-	tcf_block_setup(indr_dev->block, &bo);
+	cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
+
+	tcf_block_setup(block, &bo);
 }
 
 int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
@@ -715,7 +717,8 @@ int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 	if (err)
 		goto err_dev_put;
 
-	tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_BIND);
+	tc_indr_block_ing_cmd(dev, indr_dev->block, cb, cb_priv,
+			      FLOW_BLOCK_BIND);
 	return 0;
 
 err_dev_put:
@@ -752,7 +755,8 @@ void __tc_indr_block_cb_unregister(struct net_device *dev,
 		return;
 
 	/* Send unbind message if required to free any block cbs. */
-	tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_UNBIND);
+	tc_indr_block_ing_cmd(dev, indr_dev->block, cb, indr_block_cb->cb_priv,
+			      FLOW_BLOCK_UNBIND);
 	tc_indr_block_cb_del(indr_block_cb);
 	tc_indr_block_dev_put(indr_dev);
 }
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next v6 2/6] cls_api: remove the tcf_block cache
From: wenxu @ 2019-08-04 13:23 UTC (permalink / raw)
  To: jakub.kicinski, jiri; +Cc: netfilter-devel, netdev
In-Reply-To: <1564925041-23530-1-git-send-email-wenxu@ucloud.cn>

From: wenxu <wenxu@ucloud.cn>

Remove the tcf_block in the tc_indr_block_dev for muti-subsystem
support.

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
v6: new patch

 net/sched/cls_api.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 2e3b58d..654da8c 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -574,7 +574,6 @@ struct tc_indr_block_dev {
 	struct net_device *dev;
 	unsigned int refcnt;
 	struct list_head cb_list;
-	struct tcf_block *block;
 };
 
 struct tc_indr_block_cb {
@@ -611,7 +610,6 @@ static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
 
 	INIT_LIST_HEAD(&indr_dev->cb_list);
 	indr_dev->dev = dev;
-	indr_dev->block = tc_dev_ingress_block(dev);
 	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
 				   tc_indr_setup_block_ht_params)) {
 		kfree(indr_dev);
@@ -706,6 +704,7 @@ int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 {
 	struct tc_indr_block_cb *indr_block_cb;
 	struct tc_indr_block_dev *indr_dev;
+	struct tcf_block *block;
 	int err;
 
 	indr_dev = tc_indr_block_dev_get(dev);
@@ -717,8 +716,9 @@ int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 	if (err)
 		goto err_dev_put;
 
-	tc_indr_block_ing_cmd(dev, indr_dev->block, cb, cb_priv,
-			      FLOW_BLOCK_BIND);
+	block = tc_dev_ingress_block(dev);
+	tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb,
+			      indr_block_cb->cb_priv, FLOW_BLOCK_BIND);
 	return 0;
 
 err_dev_put:
@@ -745,6 +745,7 @@ void __tc_indr_block_cb_unregister(struct net_device *dev,
 {
 	struct tc_indr_block_cb *indr_block_cb;
 	struct tc_indr_block_dev *indr_dev;
+	struct tcf_block *block;
 
 	indr_dev = tc_indr_block_dev_lookup(dev);
 	if (!indr_dev)
@@ -755,8 +756,9 @@ void __tc_indr_block_cb_unregister(struct net_device *dev,
 		return;
 
 	/* Send unbind message if required to free any block cbs. */
-	tc_indr_block_ing_cmd(dev, indr_dev->block, cb, indr_block_cb->cb_priv,
-			      FLOW_BLOCK_UNBIND);
+	block = tc_dev_ingress_block(dev);
+	tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb,
+			      indr_block_cb->cb_priv, FLOW_BLOCK_UNBIND);
 	tc_indr_block_cb_del(indr_block_cb);
 	tc_indr_block_dev_put(indr_dev);
 }
@@ -792,8 +794,6 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
 	if (!indr_dev)
 		return;
 
-	indr_dev->block = command == FLOW_BLOCK_BIND ? block : NULL;
-
 	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
 		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
 				  &bo);
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next v6 3/6] cls_api: add flow_indr_block_call function
From: wenxu @ 2019-08-04 13:23 UTC (permalink / raw)
  To: jakub.kicinski, jiri; +Cc: netfilter-devel, netdev
In-Reply-To: <1564925041-23530-1-git-send-email-wenxu@ucloud.cn>

From: wenxu <wenxu@ucloud.cn>

This patch make indr_block_call don't access struct tc_indr_block_cb
and tc_indr_block_dev directly

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
v6: no change

 net/sched/cls_api.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 654da8c..ebbc1e0 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -773,13 +773,27 @@ void tc_indr_block_cb_unregister(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
 
+static void flow_indr_block_call(struct net_device *dev,
+				 struct flow_block_offload *bo,
+				 enum flow_block_command command)
+{
+	struct tc_indr_block_cb *indr_block_cb;
+	struct tc_indr_block_dev *indr_dev;
+
+	indr_dev = tc_indr_block_dev_lookup(dev);
+	if (!indr_dev)
+		return;
+
+	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
+				  bo);
+}
+
 static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
 			       struct tcf_block_ext_info *ei,
 			       enum flow_block_command command,
 			       struct netlink_ext_ack *extack)
 {
-	struct tc_indr_block_cb *indr_block_cb;
-	struct tc_indr_block_dev *indr_dev;
 	struct flow_block_offload bo = {
 		.command	= command,
 		.binder_type	= ei->binder_type,
@@ -790,14 +804,7 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
 	};
 	INIT_LIST_HEAD(&bo.cb_list);
 
-	indr_dev = tc_indr_block_dev_lookup(dev);
-	if (!indr_dev)
-		return;
-
-	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
-		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
-				  &bo);
-
+	flow_indr_block_call(dev, &bo, command);
 	tcf_block_setup(block, &bo);
 }
 
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next v6 0/6] flow_offload: add indr-block in nf_table_offload
From: wenxu @ 2019-08-04 13:23 UTC (permalink / raw)
  To: jakub.kicinski, jiri; +Cc: netfilter-devel, netdev

From: wenxu <wenxu@ucloud.cn>

This series patch make nftables offload support the vlan and
tunnel device offload through indr-block architecture.

The first four patches mv tc indr block to flow offload and
rename to flow-indr-block.
Because the new flow-indr-block can't get the tcf_block
directly. The fifth patch provide a callback list to get 
flow_block of each subsystem immediately when the device
register and contain a block.
The last patch make nf_tables_offload support flow-indr-block.

wenxu (6):
  cls_api: modify the tc_indr_block_ing_cmd parameters.
  cls_api: remove the tcf_block cache
  cls_api: add flow_indr_block_call function
  flow_offload: move tc indirect block to flow offload
  flow_offload: support get multi-subsystem block
  netfilter: nf_tables_offload: support indr block call

 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  10 +-
 .../net/ethernet/netronome/nfp/flower/offload.c    |  11 +-
 include/net/flow_offload.h                         |  37 +++
 include/net/netfilter/nf_tables_offload.h          |   4 +
 include/net/pkt_cls.h                              |  35 ---
 include/net/sch_generic.h                          |   3 -
 net/core/flow_offload.c                            | 236 +++++++++++++++++++
 net/netfilter/nf_tables_api.c                      |   7 +
 net/netfilter/nf_tables_offload.c                  | 148 ++++++++++--
 net/sched/cls_api.c                                | 254 ++++-----------------
 10 files changed, 460 insertions(+), 285 deletions(-)

-- 
1.8.3.1


^ permalink raw reply

* Re: [PATCH v2] net/mlx5e: Use refcount_t for refcount
From: Leon Romanovsky @ 2019-08-04 12:58 UTC (permalink / raw)
  To: Chuhong Yuan
  Cc: Saeed Mahameed, David S . Miller, netdev, linux-rdma,
	linux-kernel
In-Reply-To: <20190802164828.20243-1-hslester96@gmail.com>

On Sat, Aug 03, 2019 at 12:48:28AM +0800, Chuhong Yuan wrote:
> refcount_t is better for reference counters since its
> implementation can prevent overflows.
> So convert atomic_t ref counters to refcount_t.

I'm not thrilled to see those automatic conversion patches, especially
for flows which can't overflow. There is nothing wrong in using atomic_t
type of variable, do you have in mind flow which will cause to overflow?

Thanks
>
> Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
> ---
> Changes in v2:
>   - Add #include.
>
>  drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c | 9 +++++----
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
> index b9d4f4e19ff9..148b55c3db7a 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
> @@ -32,6 +32,7 @@
>
>  #include <linux/kernel.h>
>  #include <linux/module.h>
> +#include <linux/refcount.h>
>  #include <linux/mlx5/driver.h>
>  #include <net/vxlan.h>
>  #include "mlx5_core.h"
> @@ -48,7 +49,7 @@ struct mlx5_vxlan {
>
>  struct mlx5_vxlan_port {
>  	struct hlist_node hlist;
> -	atomic_t refcount;
> +	refcount_t refcount;
>  	u16 udp_port;
>  };
>
> @@ -113,7 +114,7 @@ int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
>
>  	vxlanp = mlx5_vxlan_lookup_port(vxlan, port);
>  	if (vxlanp) {
> -		atomic_inc(&vxlanp->refcount);
> +		refcount_inc(&vxlanp->refcount);
>  		return 0;
>  	}
>
> @@ -137,7 +138,7 @@ int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
>  	}
>
>  	vxlanp->udp_port = port;
> -	atomic_set(&vxlanp->refcount, 1);
> +	refcount_set(&vxlanp->refcount, 1);
>
>  	spin_lock_bh(&vxlan->lock);
>  	hash_add(vxlan->htable, &vxlanp->hlist, port);
> @@ -170,7 +171,7 @@ int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
>  		goto out_unlock;
>  	}
>
> -	if (atomic_dec_and_test(&vxlanp->refcount)) {
> +	if (refcount_dec_and_test(&vxlanp->refcount)) {
>  		hash_del(&vxlanp->hlist);
>  		remove = true;
>  	}
> --
> 2.20.1
>

^ permalink raw reply

* Re: [PATCH v2 0/3] Use refcount_t for refcount
From: Leon Romanovsky @ 2019-08-04 12:52 UTC (permalink / raw)
  To: Chuhong Yuan
  Cc: Saeed Mahameed, David S . Miller, Doug Ledford, Jason Gunthorpe,
	netdev, linux-rdma, linux-kernel
In-Reply-To: <20190802172334.8305-1-hslester96@gmail.com>

On Sat, Aug 03, 2019 at 01:23:34AM +0800, Chuhong Yuan wrote:
> Reference counters are preferred to use refcount_t instead of
> atomic_t.
> This is because the implementation of refcount_t can prevent
> overflows and detect possible use-after-free.
>
> First convert the refcount field to refcount_t in mlx5/driver.h.
> Then convert the uses to refcount_() APIs.
>
> Changelog:
>
> v1 -> v2:
>   - Add #include in include/linux/mlx5/driver.h.

The same NAK as for version v0.

Thanks

^ permalink raw reply

* Re: [PATCH 0/3] Use refcount_t for refcount
From: Leon Romanovsky @ 2019-08-04 12:48 UTC (permalink / raw)
  To: Chuhong Yuan
  Cc: Saeed Mahameed, David S . Miller, Doug Ledford, Jason Gunthorpe,
	linux-rdma, netdev, linux-kernel
In-Reply-To: <20190802121035.1315-1-hslester96@gmail.com>

On Fri, Aug 02, 2019 at 08:10:35PM +0800, Chuhong Yuan wrote:
> Reference counters are preferred to use refcount_t instead of
> atomic_t.
> This is because the implementation of refcount_t can prevent
> overflows and detect possible use-after-free.
>
> First convert the refcount field to refcount_t in mlx5/driver.h.
> Then convert the uses to refcount_() APIs.

You can't do it, because you need to ensure that driver compiles and
works between patches. By converting driver.h alone to refcount_t, you
simply broke mlx5 driver.

NAK, to be clear.

And please don't sent series of patches as standalone patches.

Thanks,

^ permalink raw reply

* [net-next 1/8] fm10k: remove unnecessary variable initializer
From: Jeff Kirsher @ 2019-08-04 11:59 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190804115926.31944-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

The err variable in the fm10k_tlv_attr_parse function is initialized
with zero. However, the function never reads err without first assigning
it from a function call. Remove this unnecessary initialization.

This was detected by cppcheck and resolves the following warning
produced by that tool:

[fm10k_tlv.c:498]: (style) Variable 'err' is assigned a value that is
never used.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_tlv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
index 2a7a40bf2b1c..f4c42a40f934 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
@@ -472,7 +472,7 @@ static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results,
 				const struct fm10k_tlv_attr *tlv_attr)
 {
 	u32 i, attr_id, offset = 0;
-	s32 err = 0;
+	s32 err;
 	u16 len;
 
 	/* verify pointers are not NULL */
-- 
2.21.0


^ permalink raw reply related

* [net-next 3/8] fm10k: remove needless initialization of size local variable
From: Jeff Kirsher @ 2019-08-04 11:59 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190804115926.31944-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

The local variable 'size' in fm10k_dfwd_add_station is initialized, but
is always re-assigned immediately before use. Remove this unnecessary
initialization.

This was detected by cppcheck and resolves the following warning
produced by that tool:

[fm10k_netdev.c:1466]: (style) Variable 'size' is assigned a value that is never used.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 4704395c0f66..d3e85480f46d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1463,7 +1463,7 @@ static void *fm10k_dfwd_add_station(struct net_device *dev,
 	struct fm10k_l2_accel *old_l2_accel = NULL;
 	struct fm10k_dglort_cfg dglort = { 0 };
 	struct fm10k_hw *hw = &interface->hw;
-	int size = 0, i;
+	int size, i;
 	u16 vid, glort;
 
 	/* The hardware supported by fm10k only filters on the destination MAC
-- 
2.21.0


^ permalink raw reply related

* [net-next 6/8] fm10k: mark unused parameters with __always_unused
From: Jeff Kirsher @ 2019-08-04 11:59 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190804115926.31944-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

Several functions in the fm10k driver have specific function templates,
as they are used as function pointers. The parameters in these functions
are not always used. Explicitly mark unused parameters with the
__always_unused macro, so that the compiler will not warn about them
when building with the -Wunused-parameter warning enabled.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_mbx.c  |  5 ++--
 drivers/net/ethernet/intel/fm10k/fm10k_pf.c   | 10 ++++----
 drivers/net/ethernet/intel/fm10k/fm10k_tlv.c  |  7 +++---
 drivers/net/ethernet/intel/fm10k/fm10k_type.h |  2 +-
 drivers/net/ethernet/intel/fm10k/fm10k_vf.c   | 25 +++++++++++--------
 5 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
index aece335b41f8..75e51f91036c 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include "fm10k_common.h"
 
@@ -2134,7 +2134,8 @@ static s32 fm10k_sm_mbx_process(struct fm10k_hw *hw,
  *  DWORDs, not bytes.  Any invalid values will cause the mailbox to return
  *  error.
  **/
-s32 fm10k_sm_mbx_init(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx,
+s32 fm10k_sm_mbx_init(struct fm10k_hw __always_unused *hw,
+		      struct fm10k_mbx_info *mbx,
 		      const struct fm10k_msg_data *msg_data)
 {
 	mbx->mbx_reg = FM10K_GMBX;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index e85b2f2eef05..095c5b0e4096 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include "fm10k_pf.h"
 #include "fm10k_vf.h"
@@ -1152,7 +1152,7 @@ static void fm10k_iov_update_stats_pf(struct fm10k_hw *hw,
  *  assumption is that in this case it is acceptable to just directly
  *  hand off the message from the VF to the underlying shared code.
  **/
-s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results,
+s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 __always_unused **results,
 			  struct fm10k_mbx_info *mbx)
 {
 	struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx;
@@ -1641,7 +1641,7 @@ const struct fm10k_tlv_attr fm10k_lport_map_msg_attr[] = {
  *  switch API.
  **/
 s32 fm10k_msg_lport_map_pf(struct fm10k_hw *hw, u32 **results,
-			   struct fm10k_mbx_info *mbx)
+			   struct fm10k_mbx_info __always_unused *mbx)
 {
 	u16 glort, mask;
 	u32 dglort_map;
@@ -1684,7 +1684,7 @@ const struct fm10k_tlv_attr fm10k_update_pvid_msg_attr[] = {
  *  This handler configures the default VLAN for the PF
  **/
 static s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *hw, u32 **results,
-				    struct fm10k_mbx_info *mbx)
+				    struct fm10k_mbx_info __always_unused *mbx)
 {
 	u16 glort, pvid;
 	u32 pvid_update;
@@ -1745,7 +1745,7 @@ const struct fm10k_tlv_attr fm10k_err_msg_attr[] = {
  *  messages that the PF has sent.
  **/
 s32 fm10k_msg_err_pf(struct fm10k_hw *hw, u32 **results,
-		     struct fm10k_mbx_info *mbx)
+		     struct fm10k_mbx_info __always_unused *mbx)
 {
 	struct fm10k_swapi_error err_msg;
 	s32 err;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
index f4c42a40f934..21eff0895a7a 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include "fm10k_tlv.h"
 
@@ -587,8 +587,9 @@ s32 fm10k_tlv_msg_parse(struct fm10k_hw *hw, u32 *msg,
  *  a minimum it just indicates that the message requested was
  *  unimplemented.
  **/
-s32 fm10k_tlv_msg_error(struct fm10k_hw *hw, u32 **results,
-			struct fm10k_mbx_info *mbx)
+s32 fm10k_tlv_msg_error(struct fm10k_hw __always_unused *hw,
+			u32 __always_unused **results,
+			struct fm10k_mbx_info __always_unused *mbx)
 {
 	return FM10K_NOT_IMPLEMENTED;
 }
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
index 9fb9fca375e3..15ac1c7885bc 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #ifndef _FM10K_TYPE_H_
 #define _FM10K_TYPE_H_
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
index a8519c1f0406..dc8ccd378ec9 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include "fm10k_vf.h"
 
@@ -198,7 +198,7 @@ static s32 fm10k_update_vlan_vf(struct fm10k_hw *hw, u32 vid, u8 vsi, bool set)
  *  This function should determine the MAC address for the VF
  **/
 s32 fm10k_msg_mac_vlan_vf(struct fm10k_hw *hw, u32 **results,
-			  struct fm10k_mbx_info *mbx)
+			  struct fm10k_mbx_info __always_unused *mbx)
 {
 	u8 perm_addr[ETH_ALEN];
 	u16 vid;
@@ -267,8 +267,10 @@ static s32 fm10k_read_mac_addr_vf(struct fm10k_hw *hw)
  *  This function is used to add or remove unicast MAC addresses for
  *  the VF.
  **/
-static s32 fm10k_update_uc_addr_vf(struct fm10k_hw *hw, u16 glort,
-				   const u8 *mac, u16 vid, bool add, u8 flags)
+static s32 fm10k_update_uc_addr_vf(struct fm10k_hw *hw,
+				   u16 __always_unused glort,
+				   const u8 *mac, u16 vid, bool add,
+				   u8 __always_unused flags)
 {
 	struct fm10k_mbx_info *mbx = &hw->mbx;
 	u32 msg[7];
@@ -309,7 +311,8 @@ static s32 fm10k_update_uc_addr_vf(struct fm10k_hw *hw, u16 glort,
  *  This function is used to add or remove multicast MAC addresses for
  *  the VF.
  **/
-static s32 fm10k_update_mc_addr_vf(struct fm10k_hw *hw, u16 glort,
+static s32 fm10k_update_mc_addr_vf(struct fm10k_hw *hw,
+				   u16 __always_unused glort,
 				   const u8 *mac, u16 vid, bool add)
 {
 	struct fm10k_mbx_info *mbx = &hw->mbx;
@@ -373,7 +376,7 @@ const struct fm10k_tlv_attr fm10k_lport_state_msg_attr[] = {
  *  are ready to bring up the interface.
  **/
 s32 fm10k_msg_lport_state_vf(struct fm10k_hw *hw, u32 **results,
-			     struct fm10k_mbx_info *mbx)
+			     struct fm10k_mbx_info __always_unused *mbx)
 {
 	hw->mac.dglort_map = !results[FM10K_LPORT_STATE_MSG_READY] ?
 			     FM10K_DGLORTMAP_NONE : FM10K_DGLORTMAP_ZERO;
@@ -392,8 +395,9 @@ s32 fm10k_msg_lport_state_vf(struct fm10k_hw *hw, u32 **results,
  *  enabled we can add filters, if it is disabled all filters for this
  *  logical port are flushed.
  **/
-static s32 fm10k_update_lport_state_vf(struct fm10k_hw *hw, u16 glort,
-				       u16 count, bool enable)
+static s32 fm10k_update_lport_state_vf(struct fm10k_hw *hw,
+				       u16 __always_unused glort,
+				       u16 __always_unused count, bool enable)
 {
 	struct fm10k_mbx_info *mbx = &hw->mbx;
 	u32 msg[2];
@@ -420,7 +424,8 @@ static s32 fm10k_update_lport_state_vf(struct fm10k_hw *hw, u16 glort,
  *  so that it can enable either multicast, multicast promiscuous, or
  *  promiscuous mode of operation.
  **/
-static s32 fm10k_update_xcast_mode_vf(struct fm10k_hw *hw, u16 glort, u8 mode)
+static s32 fm10k_update_xcast_mode_vf(struct fm10k_hw *hw,
+				      u16 __always_unused glort, u8 mode)
 {
 	struct fm10k_mbx_info *mbx = &hw->mbx;
 	u32 msg[3];
@@ -475,7 +480,7 @@ static void fm10k_rebind_hw_stats_vf(struct fm10k_hw *hw,
  *  that information to then populate a DGLORTMAP/DEC entry and the queues
  *  to which it has been assigned.
  **/
-static s32 fm10k_configure_dglort_map_vf(struct fm10k_hw *hw,
+static s32 fm10k_configure_dglort_map_vf(struct fm10k_hw __always_unused *hw,
 					 struct fm10k_dglort_cfg *dglort)
 {
 	/* verify the dglort pointer */
-- 
2.21.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox