All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Or Gerlitz <ogerlitz@mellanox.com>,
	Paul Blakey <paulb@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 4.13 25/44] net/mlx5e: Properly deal with encap flows add/del under neigh update
Date: Thu, 16 Nov 2017 18:42:49 +0100	[thread overview]
Message-ID: <20171116172824.774237901@linuxfoundation.org> (raw)
In-Reply-To: <20171116172823.336649076@linuxfoundation.org>

4.13-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Or Gerlitz <ogerlitz@mellanox.com>


[ Upstream commit 3c37745ec614ff048d5dce38f976804b05d307ee ]

Currently, the encap action offload is handled in the actions parse
function and not in mlx5e_tc_add_fdb_flow() where we deal with all
the other aspects of offloading actions (vlan, modify header) and
the rule itself.

When the neigh update code (mlx5e_tc_encap_flows_add()) recreates the
encap entry and offloads the related flows, we wrongly call again into
mlx5e_tc_add_fdb_flow(), this for itself would cause us to handle
again the offloading of vlans and header re-write which puts things
in non consistent state and step on freed memory (e.g the modify
header parse buffer which is already freed).

Since on error, mlx5e_tc_add_fdb_flow() detaches and may release the
encap entry, it causes a corruption at the neigh update code which goes
over the list of flows associated with this encap entry, or double free
when the tc flow is later deleted by user-space.

When neigh update (mlx5e_tc_encap_flows_del()) unoffloads the flows related
to an encap entry which is now invalid, we do a partial repeat of the eswitch
flow removal code which is wrong too.

To fix things up we do the following:

(1) handle the encap action offload in the eswitch flow add function
    mlx5e_tc_add_fdb_flow() as done for the other actions and the rule itself.

(2) modify the neigh update code (mlx5e_tc_encap_flows_add/del) to only
    deal with the encap entry and rules delete/add and not with any of
    the other offloaded actions.

Fixes: 232c001398ae ('net/mlx5e: Add support to neighbour update flow')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |   89 ++++++++++++++----------
 1 file changed, 54 insertions(+), 35 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -78,9 +78,11 @@ struct mlx5e_tc_flow {
 };
 
 struct mlx5e_tc_flow_parse_attr {
+	struct ip_tunnel_info tun_info;
 	struct mlx5_flow_spec spec;
 	int num_mod_hdr_actions;
 	void *mod_hdr_actions;
+	int mirred_ifindex;
 };
 
 enum {
@@ -322,6 +324,12 @@ static void mlx5e_tc_del_nic_flow(struct
 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 			       struct mlx5e_tc_flow *flow);
 
+static int mlx5e_attach_encap(struct mlx5e_priv *priv,
+			      struct ip_tunnel_info *tun_info,
+			      struct net_device *mirred_dev,
+			      struct net_device **encap_dev,
+			      struct mlx5e_tc_flow *flow);
+
 static struct mlx5_flow_handle *
 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		      struct mlx5e_tc_flow_parse_attr *parse_attr,
@@ -329,9 +337,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
-	struct mlx5_flow_handle *rule;
+	struct net_device *out_dev, *encap_dev = NULL;
+	struct mlx5_flow_handle *rule = NULL;
+	struct mlx5e_rep_priv *rpriv;
+	struct mlx5e_priv *out_priv;
 	int err;
 
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+		out_dev = __dev_get_by_index(dev_net(priv->netdev),
+					     attr->parse_attr->mirred_ifindex);
+		err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
+					 out_dev, &encap_dev, flow);
+		if (err) {
+			rule = ERR_PTR(err);
+			if (err != -EAGAIN)
+				goto err_attach_encap;
+		}
+		out_priv = netdev_priv(encap_dev);
+		rpriv = out_priv->ppriv;
+		attr->out_rep = rpriv->rep;
+	}
+
 	err = mlx5_eswitch_add_vlan_action(esw, attr);
 	if (err) {
 		rule = ERR_PTR(err);
@@ -347,10 +373,14 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv
 		}
 	}
 
-	rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
-	if (IS_ERR(rule))
-		goto err_add_rule;
-
+	/* we get here if (1) there's no error (rule being null) or when
+	 * (2) there's an encap action and we're on -EAGAIN (no valid neigh)
+	 */
+	if (rule != ERR_PTR(-EAGAIN)) {
+		rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
+		if (IS_ERR(rule))
+			goto err_add_rule;
+	}
 	return rule;
 
 err_add_rule:
@@ -361,6 +391,7 @@ err_mod_hdr:
 err_add_vlan:
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
 		mlx5e_detach_encap(priv, flow);
+err_attach_encap:
 	return rule;
 }
 
@@ -389,6 +420,8 @@ static void mlx5e_tc_del_fdb_flow(struct
 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 			      struct mlx5e_encap_entry *e)
 {
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_esw_flow_attr *esw_attr;
 	struct mlx5e_tc_flow *flow;
 	int err;
 
@@ -404,10 +437,9 @@ void mlx5e_tc_encap_flows_add(struct mlx
 	mlx5e_rep_queue_neigh_stats_work(priv);
 
 	list_for_each_entry(flow, &e->flows, encap) {
-		flow->esw_attr->encap_id = e->encap_id;
-		flow->rule = mlx5e_tc_add_fdb_flow(priv,
-						   flow->esw_attr->parse_attr,
-						   flow);
+		esw_attr = flow->esw_attr;
+		esw_attr->encap_id = e->encap_id;
+		flow->rule = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
 		if (IS_ERR(flow->rule)) {
 			err = PTR_ERR(flow->rule);
 			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
@@ -421,15 +453,13 @@ void mlx5e_tc_encap_flows_add(struct mlx
 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 			      struct mlx5e_encap_entry *e)
 {
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5e_tc_flow *flow;
-	struct mlx5_fc *counter;
 
 	list_for_each_entry(flow, &e->flows, encap) {
 		if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
 			flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
-			counter = mlx5_flow_rule_counter(flow->rule);
-			mlx5_del_flow_rules(flow->rule);
-			mlx5_fc_destroy(priv->mdev, counter);
+			mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr);
 		}
 	}
 
@@ -1871,7 +1901,7 @@ static int parse_tc_fdb_actions(struct m
 
 		if (is_tcf_mirred_egress_redirect(a)) {
 			int ifindex = tcf_mirred_ifindex(a);
-			struct net_device *out_dev, *encap_dev = NULL;
+			struct net_device *out_dev;
 			struct mlx5e_priv *out_priv;
 
 			out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
@@ -1884,17 +1914,13 @@ static int parse_tc_fdb_actions(struct m
 				rpriv = out_priv->ppriv;
 				attr->out_rep = rpriv->rep;
 			} else if (encap) {
-				err = mlx5e_attach_encap(priv, info,
-							 out_dev, &encap_dev, flow);
-				if (err && err != -EAGAIN)
-					return err;
+				parse_attr->mirred_ifindex = ifindex;
+				parse_attr->tun_info = *info;
+				attr->parse_attr = parse_attr;
 				attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
 					MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 					MLX5_FLOW_CONTEXT_ACTION_COUNT;
-				out_priv = netdev_priv(encap_dev);
-				rpriv = out_priv->ppriv;
-				attr->out_rep = rpriv->rep;
-				attr->parse_attr = parse_attr;
+				/* attr->out_rep is resolved when we handle encap */
 			} else {
 				pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
 				       priv->netdev->name, out_dev->name);
@@ -1972,7 +1998,7 @@ int mlx5e_configure_flower(struct mlx5e_
 	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 		err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
 		if (err < 0)
-			goto err_handle_encap_flow;
+			goto err_free;
 		flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
 	} else {
 		err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
@@ -1983,10 +2009,13 @@ int mlx5e_configure_flower(struct mlx5e_
 
 	if (IS_ERR(flow->rule)) {
 		err = PTR_ERR(flow->rule);
-		goto err_free;
+		if (err != -EAGAIN)
+			goto err_free;
 	}
 
-	flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+	if (err != -EAGAIN)
+		flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+
 	err = rhashtable_insert_fast(&tc->ht, &flow->node,
 				     tc->ht_params);
 	if (err)
@@ -2000,16 +2029,6 @@ int mlx5e_configure_flower(struct mlx5e_
 err_del_rule:
 	mlx5e_tc_del_flow(priv, flow);
 
-err_handle_encap_flow:
-	if (err == -EAGAIN) {
-		err = rhashtable_insert_fast(&tc->ht, &flow->node,
-					     tc->ht_params);
-		if (err)
-			mlx5e_tc_del_flow(priv, flow);
-		else
-			return 0;
-	}
-
 err_free:
 	kvfree(parse_attr);
 	kfree(flow);

  parent reply	other threads:[~2017-11-16 17:51 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-16 17:42 [PATCH 4.13 00/44] 4.13.14-stable review Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 01/44] ppp: fix race in ppp device destruction Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 02/44] gso: fix payload length when gso_size is zero Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 03/44] ipv4: Fix traffic triggered IPsec connections Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 04/44] ipv6: " Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 05/44] netlink: do not set cb_running if dumps start() errs Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 06/44] net: call cgroup_sk_alloc() earlier in sk_clone_lock() Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 07/44] macsec: fix memory leaks when skb_to_sgvec fails Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 08/44] l2tp: check ps->sock before running pppol2tp_session_ioctl() Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 09/44] tun: call dev_get_valid_name() before register_netdevice() Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 10/44] netlink: fix netlink_ack() extack race Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 11/44] sctp: add the missing sock_owned_by_user check in sctp_icmp_redirect Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 12/44] tcp/dccp: fix ireq->opt races Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 13/44] packet: avoid panic in packet_getsockopt() Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 14/44] geneve: Fix function matching VNI and tunnel ID on big-endian Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 15/44] net: bridge: fix returning of vlan range op errors Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 16/44] soreuseport: fix initialization race Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 17/44] ipv6: flowlabel: do not leave opt->tot_len with garbage Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 18/44] sctp: full support for ipv6 ip_nonlocal_bind & IP_FREEBIND Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 19/44] tcp/dccp: fix lockdep splat in inet_csk_route_req() Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 21/44] net: dsa: check master device before put Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 22/44] net/unix: dont show information about sockets from other namespaces Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 23/44] tap: double-free in error path in tap_open() Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 24/44] net/mlx5: Fix health work queue spin lock to IRQ safe Greg Kroah-Hartman
2017-11-16 17:42 ` Greg Kroah-Hartman [this message]
2017-11-16 17:42 ` [PATCH 4.13 26/44] ipip: only increase err_count for some certain type icmp in ipip_err Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 27/44] ip6_gre: only increase err_count for some certain type icmpv6 in ip6gre_err Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 28/44] ip6_gre: update dst pmtu if dev mtu has been updated by toobig in __gre6_xmit Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 29/44] tcp: refresh tp timestamp before tcp_mtu_probe() Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 30/44] tap: reference to KVA of an unloaded module causes kernel panic Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 31/44] tun: allow positive return values on dev_get_valid_name() call Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 32/44] sctp: reset owner sk for data chunks on out queues when migrating a sock Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 33/44] net_sched: avoid matching qdisc with zero handle Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 34/44] l2tp: hold tunnel in pppol2tp_connect() Greg Kroah-Hartman
2017-11-16 17:42 ` [PATCH 4.13 35/44] tun/tap: sanitize TUNSETSNDBUF input Greg Kroah-Hartman
2017-11-16 17:43 ` [PATCH 4.13 36/44] ipv6: addrconf: increment ifp refcount before ipv6_del_addr() Greg Kroah-Hartman
2017-11-16 17:43 ` [PATCH 4.13 37/44] tcp: fix tcp_mtu_probe() vs highest_sack Greg Kroah-Hartman
2017-11-16 17:43 ` [PATCH 4.13 38/44] mac80211: accept key reinstall without changing anything Greg Kroah-Hartman
2017-11-16 17:43 ` [PATCH 4.13 39/44] mac80211: use constant time comparison with keys Greg Kroah-Hartman
2017-11-16 17:43 ` [PATCH 4.13 40/44] mac80211: dont compare TKIP TX MIC key in reinstall prevention Greg Kroah-Hartman
2017-11-16 17:43 ` [PATCH 4.13 41/44] usb: usbtest: fix NULL pointer dereference Greg Kroah-Hartman
2017-11-16 17:43 ` [PATCH 4.13 42/44] Input: ims-psu - check if CDC union descriptor is sane Greg Kroah-Hartman
2017-11-16 17:43 ` [PATCH 4.13 44/44] dmaengine: dmatest: warn user when dma test times out Greg Kroah-Hartman
2017-11-16 22:45 ` [PATCH 4.13 00/44] 4.13.14-stable review Shuah Khan
2017-11-17  8:10   ` Greg Kroah-Hartman
2017-11-17  2:03 ` Guenter Roeck
2017-11-17  8:11   ` Greg Kroah-Hartman
     [not found] ` <5a0e2714.c5badf0a.3983c.7eaa@mx.google.com>
2017-11-17  8:14   ` Greg Kroah-Hartman
2017-11-18  0:41     ` Kevin Hilman
  -- strict thread matches above, loose matches on Subject: below --
2017-11-16 17:43 [4.13,43/44] EDAC, sb_edac: Dont create a second memory controller if HA1 is not present Greg Kroah-Hartman
2017-11-16 17:43 ` [PATCH 4.13 43/44] " Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171116172824.774237901@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ogerlitz@mellanox.com \
    --cc=paulb@mellanox.com \
    --cc=saeedm@mellanox.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.