netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: John Fastabend <john.fastabend@gmail.com>
To: xiyou.wangcong@gmail.com, jhs@mojatatu.com, eric.dumazet@gmail.com
Cc: netdev@vger.kernel.org, paulmck@linux.vnet.ibm.com, brouer@redhat.com
Subject: [net-next PATCH v2 12/15] net: sched: make tc_action safe to walk under RCU
Date: Sun, 24 Aug 2014 17:53:04 -0700	[thread overview]
Message-ID: <20140825005303.2180.60358.stgit@nitbit.x32> (raw)
In-Reply-To: <20140825004659.2180.35028.stgit@nitbit.x32>

This patch makes tc_actions safe to walk from classifiers under RCU.
Notice that each action act() callback defined in each act_*.c
already does its own locking. This is needed even in the current
infrastructure for the case where users add/remove actions via
'tc actions' and reference them via index from the classifiers.

There are a couple interesting pieces here (i.e. need careful review)
In tcf_exts_exec() the call to tcf_action_exec follows a list_empty
check. However although this is occurring under RCU there is no
guarantee that the list is not empty when tcf_action_exec is called.
This patch fixes up return values from tcf_action_exec() so that
packets wont be dropped on the floor when this occurs. Hopefully
its rare and by using RCU we sort of make this assumption.

Second there is a suspect usage of list_splice_init_rcu() in the
tcf_exts_change() routine. Notice how it is used twice in succession
and the second init works on the src tcf_exts. There is probably a
better way to accomplish that.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 include/net/act_api.h |    1 +
 include/net/pkt_cls.h |   10 +++++++++-
 net/sched/act_api.c   |   14 +++++++-------
 net/sched/cls_api.c   |   17 ++++++++++-------
 4 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 3ee4c92..ddd0c5a 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -79,6 +79,7 @@ struct tc_action {
 	__u32			type; /* for backward compat(TCA_OLD_COMPAT) */
 	__u32			order;
 	struct list_head	list;
+	struct rcu_head		rcu;
 };
 
 struct tc_action_ops {
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6da46dc..58c99fc 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -76,7 +76,7 @@ static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	exts->type = 0;
-	INIT_LIST_HEAD(&exts->actions);
+	INIT_LIST_HEAD_RCU(&exts->actions);
 #endif
 	exts->action = action;
 	exts->police = police;
@@ -88,6 +88,8 @@ static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police)
  *
  * Returns 1 if a predicative extension is present, i.e. an extension which
  * might cause further actions and thus overrule the regular tcf_result.
+ *
+ * This check is only valid if done under RTNL.
  */
 static inline int
 tcf_exts_is_predicative(struct tcf_exts *exts)
@@ -128,6 +130,12 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
 	       struct tcf_result *res)
 {
 #ifdef CONFIG_NET_CLS_ACT
+	/* This check is racy but this is OK, if the list is emptied
+	 * before walking the chain of actions the return value has
+	 * been updated to return zero. This way packets will not be
+	 * dropped when action list deletion occurs after the empty
+	 * check but before execution
+	 */
 	if (!list_empty(&exts->actions))
 		return tcf_action_exec(skb, &exts->actions, res);
 #endif
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 648778a..ae32b5b 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -381,14 +381,14 @@ int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
 		    struct tcf_result *res)
 {
 	const struct tc_action *a;
-	int ret = -1;
+	int ret = 0;
 
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
 		ret = TC_ACT_OK;
 		goto exec_done;
 	}
-	list_for_each_entry(a, actions, list) {
+	list_for_each_entry_rcu(a, actions, list) {
 repeat:
 		ret = a->ops->act(skb, a, res);
 		if (TC_MUNGED & skb->tc_verd) {
@@ -417,8 +417,8 @@ int tcf_action_destroy(struct list_head *actions, int bind)
 			module_put(a->ops->owner);
 		else if (ret < 0)
 			return ret;
-		list_del(&a->list);
-		kfree(a);
+		list_del_rcu(&a->list);
+		kfree_rcu(a, rcu);
 	}
 	return ret;
 }
@@ -584,7 +584,7 @@ int tcf_action_init(struct net *net, struct nlattr *nla,
 			goto err;
 		}
 		act->order = i;
-		list_add_tail(&act->list, actions);
+		list_add_tail_rcu(&act->list, actions);
 	}
 	return 0;
 
@@ -746,8 +746,8 @@ static void cleanup_a(struct list_head *actions)
 	struct tc_action *a, *tmp;
 
 	list_for_each_entry_safe(a, tmp, actions, list) {
-		list_del(&a->list);
-		kfree(a);
+		list_del_rcu(&a->list);
+		kfree_rcu(a, rcu);
 	}
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e3d3c48..cfb22aa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -500,7 +500,7 @@ void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND);
-	INIT_LIST_HEAD(&exts->actions);
+	INIT_LIST_HEAD_RCU(&exts->actions);
 #endif
 }
 EXPORT_SYMBOL(tcf_exts_destroy);
@@ -512,7 +512,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 	{
 		struct tc_action *act;
 
-		INIT_LIST_HEAD(&exts->actions);
+		INIT_LIST_HEAD_RCU(&exts->actions);
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
 						"police", ovr,
@@ -521,7 +521,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 				return PTR_ERR(act);
 
 			act->type = exts->type = TCA_OLD_COMPAT;
-			list_add(&act->list, &exts->actions);
+			list_add_rcu(&act->list, &exts->actions);
 		} else if (exts->action && tb[exts->action]) {
 			int err;
 			err = tcf_action_init(net, tb[exts->action], rate_tlv,
@@ -541,15 +541,18 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 }
 EXPORT_SYMBOL(tcf_exts_validate);
 
+/* It is not safe to use src->actions after this due to _init_rcu usage
+ * INIT_LIST_HEAD_RCU() is called on src->actions
+ */
 void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
 		     struct tcf_exts *src)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	LIST_HEAD(tmp);
-	tcf_tree_lock(tp);
-	list_splice_init(&dst->actions, &tmp);
-	list_splice(&src->actions, &dst->actions);
-	tcf_tree_unlock(tp);
+	list_splice_init_rcu(&dst->actions, &tmp, synchronize_rcu);
+	list_splice_init_rcu(&src->actions,
+			     &dst->actions,
+			     synchronize_rcu);
 	tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
 #endif
 }

  parent reply	other threads:[~2014-08-25  0:53 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-25  0:47 [net-next PATCH v2 00/15] net/sched: use rcu filters John Fastabend
2014-08-25  0:48 ` [net-next PATCH v2 01/15] net: qdisc: use rcu prefix and silence sparse warnings John Fastabend
2014-08-25  0:48 ` [net-next PATCH v2 02/15] net: rcu-ify tcf_proto John Fastabend
2014-08-25  5:31   ` David Miller
2014-09-02  1:39     ` John Fastabend
2014-09-02 20:52       ` David Miller
2014-09-03  6:23         ` John Fastabend
2014-08-25  0:49 ` [net-next PATCH v2 03/15] net: sched: cls_basic use RCU John Fastabend
2014-08-25  0:49 ` [net-next PATCH v2 04/15] net: sched: cls_cgroup " John Fastabend
2014-08-25  0:49 ` [net-next PATCH v2 05/15] net: sched: cls_flow " John Fastabend
2014-08-25  0:50 ` [net-next PATCH v2 06/15] net: sched: fw " John Fastabend
2014-08-25  0:50 ` [net-next PATCH v2 07/15] net: sched: RCU cls_route John Fastabend
2014-08-25  0:51 ` [net-next PATCH v2 08/15] net: sched: RCU cls_tcindex John Fastabend
2014-08-25  0:51 ` [net-next PATCH v2 09/15] net: sched: make cls_u32 lockless John Fastabend
2014-08-25  0:52 ` [net-next PATCH v2 10/15] net: sched: rcu'ify cls_rsvp John Fastabend
2014-08-25  0:52 ` [net-next PATCH v2 11/15] net: sched: rcu'ify cls_bpf John Fastabend
2014-08-25  0:53 ` John Fastabend [this message]
2014-08-25  0:53 ` [net-next PATCH v2 13/15] net: sched: make bstats per cpu and estimator RCU safe John Fastabend
2014-08-25  0:53 ` [net-next PATCH v2 14/15] net: sched: make qstats per cpu John Fastabend
2014-08-25  0:54 ` [net-next PATCH v2 15/15] net: sched: drop ingress qdisc lock John Fastabend

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140825005303.2180.60358.stgit@nitbit.x32 \
    --to=john.fastabend@gmail.com \
    --cc=brouer@redhat.com \
    --cc=eric.dumazet@gmail.com \
    --cc=jhs@mojatatu.com \
    --cc=netdev@vger.kernel.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).