From: John Fastabend <john.fastabend@gmail.com>
To: xiyou.wangcong@gmail.com, davem@davemloft.net, eric.dumazet@gmail.com
Cc: netdev@vger.kernel.org, jhs@mojatatu.com
Subject: [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers
Date: Wed, 17 Sep 2014 12:12:03 -0700 [thread overview]
Message-ID: <20140917191202.20529.87231.stgit@nitbit.x32> (raw)
In-Reply-To: <20140917191131.20529.91136.stgit@nitbit.x32>
Changes to the cls_u32 classifier must appear atomic to the
readers. Before this patch if a change is requested for both
the exts and ifindex, first the ifindex is updated then the
exts with tcf_exts_change(). This opens a small window where
a reader can have a exts chain with an incorrect ifindex. This
violates the the RCU semantics.
Here we resolve this by always passing u32_set_parms() a copy
of the tc_u_knode to work on and then inserting it into the hash
table after the updates have been successfully applied.
Tested with the following short script:
#tc filter add dev p3p2 parent 8001:0 protocol ip prio 99 handle 1: \
u32 divisor 256
#tc filter add dev p3p2 parent 8001:0 protocol ip prio 99 \
u32 link 1: hashkey mask ffffff00 at 12 \
match ip src 192.168.8.0/2
#tc filter add dev p3p2 parent 8001:0 protocol ip prio 102 \
handle 1::10 u32 classid 1:2 ht 1: \
match ip src 192.168.8.0/8 match ip tos 0x0a 1e
#tc filter change dev p3p2 parent 8001:0 protocol ip prio 102 \
handle 1::10 u32 classid 1:2 ht 1: \
match ip src 1.1.0.0/8 match ip tos 0x0b 1e
CC: Eric Dumazet <edumazet@google.com>
CC: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
net/sched/cls_u32.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 107 insertions(+), 7 deletions(-)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index e76d50b..7ddc896 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -354,27 +354,36 @@ static int u32_init(struct tcf_proto *tp)
return 0;
}
-static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
+static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool pf)
{
tcf_unbind_filter(tp, &n->res);
tcf_exts_destroy(tp, &n->exts);
if (n->ht_down)
n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
- free_percpu(n->pf);
+ if (pf)
+ free_percpu(n->pf);
#endif
#ifdef CONFIG_CLS_U32_MARK
- free_percpu(n->pcpu_success);
+ if (pf)
+ free_percpu(n->pcpu_success);
#endif
kfree(n);
return 0;
}
+static void u32_delete_key_rcu_pf(struct rcu_head *rcu)
+{
+ struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
+
+ u32_destroy_key(key->tp, key, false);
+}
+
static void u32_delete_key_rcu(struct rcu_head *rcu)
{
struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
- u32_destroy_key(key->tp, key);
+ u32_destroy_key(key->tp, key, true);
}
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
@@ -584,6 +593,82 @@ errout:
return err;
}
+static void u32_replace_knode(struct tcf_proto *tp,
+ struct tc_u_common *tp_c,
+ struct tc_u_knode *n)
+{
+ struct tc_u_knode __rcu **ins;
+ struct tc_u_knode *pins;
+ struct tc_u_hnode *ht;
+
+ if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
+ ht = rtnl_dereference(tp->root);
+ else
+ ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));
+
+ ins = &ht->ht[TC_U32_HASH(n->handle)];
+
+ /* The node must always exist for it to be replaced if this is not the
+ * case then something went very wrong elsewhere.
+ */
+ for (pins = rtnl_dereference(*ins); ;
+ ins = &pins->next, pins = rtnl_dereference(*ins))
+ if (pins->handle == n->handle)
+ break;
+
+ RCU_INIT_POINTER(n->next, pins->next);
+ rcu_assign_pointer(*ins, n);
+}
+
+static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
+ struct tc_u_knode *n)
+{
+ struct tc_u_knode *new;
+ struct tc_u32_sel *s = &n->sel;
+
+ new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
+ GFP_KERNEL);
+
+ if (!new)
+ return NULL;
+
+ RCU_INIT_POINTER(new->next, n->next);
+ new->handle = n->handle;
+ RCU_INIT_POINTER(new->ht_up, n->ht_up);
+
+#ifdef CONFIG_NET_CLS_IND
+ new->ifindex = n->ifindex;
+#endif
+ new->fshift = n->fshift;
+ new->res = n->res;
+ RCU_INIT_POINTER(new->ht_down, n->ht_down);
+
+ /* bump reference count as long as we hold pointer to structure */
+ if (new->ht_down)
+ new->ht_down->refcnt++;
+
+#ifdef CONFIG_CLS_U32_PERF
+ /* Statistics may be incremented by readers during update
+ * so we must keep them in tact. When the node is later destroyed
+ * a special destroy call must be made to not free the pf memory.
+ */
+ new->pf = n->pf;
+#endif
+
+#ifdef CONFIG_CLS_U32_MARK
+ new->val = n->val;
+ new->mask = n->mask;
+ /* Similarly success statistics must be moved as pointers */
+ new->pcpu_success = n->pcpu_success;
+#endif
+ new->tp = tp;
+ memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
+
+ tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE);
+
+ return new;
+}
+
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca,
@@ -610,12 +695,27 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
n = (struct tc_u_knode *)*arg;
if (n) {
+ struct tc_u_knode *new;
+
if (TC_U32_KEY(n->handle) == 0)
return -EINVAL;
- return u32_set_parms(net, tp, base,
- rtnl_dereference(n->ht_up), n, tb,
- tca[TCA_RATE], ovr);
+ new = u32_init_knode(tp, n);
+ if (!new)
+ return -ENOMEM;
+
+ err = u32_set_parms(net, tp, base,
+ rtnl_dereference(n->ht_up), new, tb,
+ tca[TCA_RATE], ovr);
+
+ if (err) {
+ u32_destroy_key(tp, new, false);
+ return err;
+ }
+
+ u32_replace_knode(tp, tp_c, new);
+ call_rcu(&n->rcu, u32_delete_key_rcu_pf);
+ return 0;
}
if (tb[TCA_U32_DIVISOR]) {
next prev parent reply other threads:[~2014-09-17 19:12 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-09-17 19:11 [net-next PATCH 1/2] net: cls_u32: fix missed pcpu_success free_percpu John Fastabend
2014-09-17 19:12 ` John Fastabend [this message]
2014-09-17 21:11 ` [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers Cong Wang
2014-09-18 0:06 ` John Fastabend
2014-09-18 16:28 ` Cong Wang
2014-09-18 16:39 ` John Fastabend
2014-09-18 11:38 ` Jamal Hadi Salim
2014-09-20 4:55 ` John Fastabend
2014-09-18 1:17 ` [net-next PATCH 1/2] net: cls_u32: fix missed pcpu_success free_percpu John Fastabend
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140917191202.20529.87231.stgit@nitbit.x32 \
--to=john.fastabend@gmail.com \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=jhs@mojatatu.com \
--cc=netdev@vger.kernel.org \
--cc=xiyou.wangcong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).