From: John Fastabend <john.fastabend@gmail.com>
To: xiyou.wangcong@gmail.com, davem@davemloft.net,
eric.dumazet@gmail.com, jhs@mojatatu.com
Cc: netdev@vger.kernel.org, paulmck@linux.vnet.ibm.com, brouer@redhat.com
Subject: [net-next PATCH v3 12/15] net: sched: make tc_action safe to walk under RCU
Date: Mon, 08 Sep 2014 22:59:44 -0700 [thread overview]
Message-ID: <20140909055942.2071.4667.stgit@nitbit.x32> (raw)
In-Reply-To: <20140909055221.2071.99671.stgit@nitbit.x32>
This patch makes tc_actions safe to walk from classifiers under RCU.
Notice that each action act() callback defined in each act_*.c
already does its own locking. This is needed even in the current
infrastructure for the case where users add/remove actions via
'tc actions' and reference them via index from the classifiers.
There are a couple interesting pieces here (i.e. need careful review)
In tcf_exts_exec() the call to tcf_action_exec follows a list_empty
check. However although this is occurring under RCU there is no
guarantee that the list is not empty when tcf_action_exec is called.
This patch fixes up return values from tcf_action_exec() so that
packets wont be dropped on the floor when this occurs. Hopefully
its rare and by using RCU we sort of make this assumption.
Second there is a suspect usage of list_splice_init_rcu() in the
tcf_exts_change() routine. Notice how it is used twice in succession
and the second init works on the src tcf_exts. There is probably a
better way to accomplish that.
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
include/net/act_api.h | 1 +
include/net/pkt_cls.h | 10 +++++++++-
net/sched/act_api.c | 14 +++++++-------
net/sched/cls_api.c | 17 ++++++++++-------
4 files changed, 27 insertions(+), 15 deletions(-)
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 3ee4c92..ddd0c5a 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -79,6 +79,7 @@ struct tc_action {
__u32 type; /* for backward compat(TCA_OLD_COMPAT) */
__u32 order;
struct list_head list;
+ struct rcu_head rcu;
};
struct tc_action_ops {
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6da46dc..58c99fc 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -76,7 +76,7 @@ static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police)
{
#ifdef CONFIG_NET_CLS_ACT
exts->type = 0;
- INIT_LIST_HEAD(&exts->actions);
+ INIT_LIST_HEAD_RCU(&exts->actions);
#endif
exts->action = action;
exts->police = police;
@@ -88,6 +88,8 @@ static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police)
*
* Returns 1 if a predicative extension is present, i.e. an extension which
* might cause further actions and thus overrule the regular tcf_result.
+ *
+ * This check is only valid if done under RTNL.
*/
static inline int
tcf_exts_is_predicative(struct tcf_exts *exts)
@@ -128,6 +130,12 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
struct tcf_result *res)
{
#ifdef CONFIG_NET_CLS_ACT
+ /* This check is racy but this is OK, if the list is emptied
+ * before walking the chain of actions the return value has
+ * been updated to return zero. This way packets will not be
+ * dropped when action list deletion occurs after the empty
+ * check but before execution
+ */
if (!list_empty(&exts->actions))
return tcf_action_exec(skb, &exts->actions, res);
#endif
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 648778a..ae32b5b 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -381,14 +381,14 @@ int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
struct tcf_result *res)
{
const struct tc_action *a;
- int ret = -1;
+ int ret = 0;
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
ret = TC_ACT_OK;
goto exec_done;
}
- list_for_each_entry(a, actions, list) {
+ list_for_each_entry_rcu(a, actions, list) {
repeat:
ret = a->ops->act(skb, a, res);
if (TC_MUNGED & skb->tc_verd) {
@@ -417,8 +417,8 @@ int tcf_action_destroy(struct list_head *actions, int bind)
module_put(a->ops->owner);
else if (ret < 0)
return ret;
- list_del(&a->list);
- kfree(a);
+ list_del_rcu(&a->list);
+ kfree_rcu(a, rcu);
}
return ret;
}
@@ -584,7 +584,7 @@ int tcf_action_init(struct net *net, struct nlattr *nla,
goto err;
}
act->order = i;
- list_add_tail(&act->list, actions);
+ list_add_tail_rcu(&act->list, actions);
}
return 0;
@@ -746,8 +746,8 @@ static void cleanup_a(struct list_head *actions)
struct tc_action *a, *tmp;
list_for_each_entry_safe(a, tmp, actions, list) {
- list_del(&a->list);
- kfree(a);
+ list_del_rcu(&a->list);
+ kfree_rcu(a, rcu);
}
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e3d3c48..cfb22aa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -500,7 +500,7 @@ void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND);
- INIT_LIST_HEAD(&exts->actions);
+ INIT_LIST_HEAD_RCU(&exts->actions);
#endif
}
EXPORT_SYMBOL(tcf_exts_destroy);
@@ -512,7 +512,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
{
struct tc_action *act;
- INIT_LIST_HEAD(&exts->actions);
+ INIT_LIST_HEAD_RCU(&exts->actions);
if (exts->police && tb[exts->police]) {
act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
"police", ovr,
@@ -521,7 +521,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
return PTR_ERR(act);
act->type = exts->type = TCA_OLD_COMPAT;
- list_add(&act->list, &exts->actions);
+ list_add_rcu(&act->list, &exts->actions);
} else if (exts->action && tb[exts->action]) {
int err;
err = tcf_action_init(net, tb[exts->action], rate_tlv,
@@ -541,15 +541,18 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
}
EXPORT_SYMBOL(tcf_exts_validate);
+/* It is not safe to use src->actions after this due to _init_rcu usage
+ * INIT_LIST_HEAD_RCU() is called on src->actions
+ */
void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
struct tcf_exts *src)
{
#ifdef CONFIG_NET_CLS_ACT
LIST_HEAD(tmp);
- tcf_tree_lock(tp);
- list_splice_init(&dst->actions, &tmp);
- list_splice(&src->actions, &dst->actions);
- tcf_tree_unlock(tp);
+ list_splice_init_rcu(&dst->actions, &tmp, synchronize_rcu);
+ list_splice_init_rcu(&src->actions,
+ &dst->actions,
+ synchronize_rcu);
tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
#endif
}
next prev parent reply other threads:[~2014-09-09 6:00 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-09-09 5:53 [net-next PATCH v3 00/15] net/sched use rcu filters John Fastabend
2014-09-09 5:54 ` [net-next PATCH v3 01/15] net: qdisc: use rcu prefix and silence sparse warnings John Fastabend
2014-09-09 12:05 ` Eric Dumazet
2014-09-09 5:54 ` [net-next PATCH v3 02/15] net: rcu-ify tcf_proto John Fastabend
2014-09-09 12:20 ` Eric Dumazet
2014-09-09 15:15 ` John Fastabend
2014-09-09 15:20 ` Eric Dumazet
2014-09-09 5:55 ` [net-next PATCH v3 03/15] net: sched: cls_basic use RCU John Fastabend
2014-09-09 12:29 ` Eric Dumazet
2014-09-09 5:55 ` [net-next PATCH v3 04/15] net: sched: cls_cgroup " John Fastabend
2014-09-09 12:34 ` Eric Dumazet
2014-09-09 5:56 ` [net-next PATCH v3 05/15] net: sched: cls_flow " John Fastabend
2014-09-09 12:41 ` Eric Dumazet
2014-09-09 16:09 ` John Fastabend
2014-09-09 16:20 ` Eric Dumazet
2014-09-09 5:56 ` [net-next PATCH v3 06/15] net: sched: fw " John Fastabend
2014-09-09 12:48 ` Eric Dumazet
2014-09-09 5:57 ` [net-next PATCH v3 07/15] net: sched: RCU cls_route John Fastabend
2014-09-09 12:59 ` Eric Dumazet
2014-09-09 16:24 ` John Fastabend
2014-09-09 5:57 ` [net-next PATCH v3 08/15] net: sched: RCU cls_tcindex John Fastabend
2014-09-09 5:58 ` [net-next PATCH v3 09/15] net: sched: make cls_u32 lockless John Fastabend
2014-09-09 13:20 ` Eric Dumazet
2014-09-09 16:35 ` John Fastabend
2014-09-09 18:02 ` Cong Wang
2014-09-10 4:53 ` John Fastabend
2014-09-09 5:58 ` [net-next PATCH v3 10/15] net: sched: rcu'ify cls_rsvp John Fastabend
2014-09-09 13:27 ` Eric Dumazet
2014-09-09 16:46 ` John Fastabend
2014-09-09 5:59 ` [net-next PATCH v3 11/15] net: sched: rcu'ify cls_bpf John Fastabend
2014-09-09 5:59 ` John Fastabend [this message]
2014-09-09 6:00 ` [net-next PATCH v3 13/15] net: sched: make bstats per cpu and estimator RCU safe John Fastabend
2014-09-09 6:00 ` [net-next PATCH v3 14/15] net: sched: make qstats per cpu John Fastabend
2014-09-09 6:01 ` [net-next PATCH v3 15/15] net: sched: drop ingress qdisc lock John Fastabend
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140909055942.2071.4667.stgit@nitbit.x32 \
--to=john.fastabend@gmail.com \
--cc=brouer@redhat.com \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=jhs@mojatatu.com \
--cc=netdev@vger.kernel.org \
--cc=paulmck@linux.vnet.ibm.com \
--cc=xiyou.wangcong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.