* [net-next PATCH 1/2] net: cls_u32: fix missed pcpu_success free_percpu
@ 2014-09-17 19:11 John Fastabend
2014-09-17 19:12 ` [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers John Fastabend
2014-09-18 1:17 ` [net-next PATCH 1/2] net: cls_u32: fix missed pcpu_success free_percpu John Fastabend
0 siblings, 2 replies; 9+ messages in thread
From: John Fastabend @ 2014-09-17 19:11 UTC (permalink / raw)
To: xiyou.wangcong, davem, eric.dumazet; +Cc: netdev, jhs
This fixes a missed free_percpu in the unwind code path and when
keys are destroyed.
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
net/sched/cls_u32.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 730edb2..e76d50b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -363,6 +363,9 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
#ifdef CONFIG_CLS_U32_PERF
free_percpu(n->pf);
#endif
+#ifdef CONFIG_CLS_U32_MARK
+ free_percpu(n->pcpu_success);
+#endif
kfree(n);
return 0;
}
@@ -723,6 +726,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
#ifdef CONFIG_CLS_U32_PERF
free_percpu(n->pf);
#endif
+#ifdef CONFIG_CLS_U32_MARK
+ free_percpu(n->pcpu_success);
+#endif
kfree(n);
return err;
}
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers
2014-09-17 19:11 [net-next PATCH 1/2] net: cls_u32: fix missed pcpu_success free_percpu John Fastabend
@ 2014-09-17 19:12 ` John Fastabend
2014-09-17 21:11 ` Cong Wang
2014-09-18 11:38 ` Jamal Hadi Salim
2014-09-18 1:17 ` [net-next PATCH 1/2] net: cls_u32: fix missed pcpu_success free_percpu John Fastabend
1 sibling, 2 replies; 9+ messages in thread
From: John Fastabend @ 2014-09-17 19:12 UTC (permalink / raw)
To: xiyou.wangcong, davem, eric.dumazet; +Cc: netdev, jhs
Changes to the cls_u32 classifier must appear atomic to the
readers. Before this patch if a change is requested for both
the exts and ifindex, first the ifindex is updated then the
exts with tcf_exts_change(). This opens a small window where
a reader can have a exts chain with an incorrect ifindex. This
violates the the RCU semantics.
Here we resolve this by always passing u32_set_parms() a copy
of the tc_u_knode to work on and then inserting it into the hash
table after the updates have been successfully applied.
Tested with the following short script:
#tc filter add dev p3p2 parent 8001:0 protocol ip prio 99 handle 1: \
u32 divisor 256
#tc filter add dev p3p2 parent 8001:0 protocol ip prio 99 \
u32 link 1: hashkey mask ffffff00 at 12 \
match ip src 192.168.8.0/2
#tc filter add dev p3p2 parent 8001:0 protocol ip prio 102 \
handle 1::10 u32 classid 1:2 ht 1: \
match ip src 192.168.8.0/8 match ip tos 0x0a 1e
#tc filter change dev p3p2 parent 8001:0 protocol ip prio 102 \
handle 1::10 u32 classid 1:2 ht 1: \
match ip src 1.1.0.0/8 match ip tos 0x0b 1e
CC: Eric Dumazet <edumazet@google.com>
CC: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
net/sched/cls_u32.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 107 insertions(+), 7 deletions(-)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index e76d50b..7ddc896 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -354,27 +354,36 @@ static int u32_init(struct tcf_proto *tp)
return 0;
}
-static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
+static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool pf)
{
tcf_unbind_filter(tp, &n->res);
tcf_exts_destroy(tp, &n->exts);
if (n->ht_down)
n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
- free_percpu(n->pf);
+ if (pf)
+ free_percpu(n->pf);
#endif
#ifdef CONFIG_CLS_U32_MARK
- free_percpu(n->pcpu_success);
+ if (pf)
+ free_percpu(n->pcpu_success);
#endif
kfree(n);
return 0;
}
+static void u32_delete_key_rcu_pf(struct rcu_head *rcu)
+{
+ struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
+
+ u32_destroy_key(key->tp, key, false);
+}
+
static void u32_delete_key_rcu(struct rcu_head *rcu)
{
struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
- u32_destroy_key(key->tp, key);
+ u32_destroy_key(key->tp, key, true);
}
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
@@ -584,6 +593,82 @@ errout:
return err;
}
+static void u32_replace_knode(struct tcf_proto *tp,
+ struct tc_u_common *tp_c,
+ struct tc_u_knode *n)
+{
+ struct tc_u_knode __rcu **ins;
+ struct tc_u_knode *pins;
+ struct tc_u_hnode *ht;
+
+ if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
+ ht = rtnl_dereference(tp->root);
+ else
+ ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));
+
+ ins = &ht->ht[TC_U32_HASH(n->handle)];
+
+ /* The node must always exist for it to be replaced if this is not the
+ * case then something went very wrong elsewhere.
+ */
+ for (pins = rtnl_dereference(*ins); ;
+ ins = &pins->next, pins = rtnl_dereference(*ins))
+ if (pins->handle == n->handle)
+ break;
+
+ RCU_INIT_POINTER(n->next, pins->next);
+ rcu_assign_pointer(*ins, n);
+}
+
+static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
+ struct tc_u_knode *n)
+{
+ struct tc_u_knode *new;
+ struct tc_u32_sel *s = &n->sel;
+
+ new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
+ GFP_KERNEL);
+
+ if (!new)
+ return NULL;
+
+ RCU_INIT_POINTER(new->next, n->next);
+ new->handle = n->handle;
+ RCU_INIT_POINTER(new->ht_up, n->ht_up);
+
+#ifdef CONFIG_NET_CLS_IND
+ new->ifindex = n->ifindex;
+#endif
+ new->fshift = n->fshift;
+ new->res = n->res;
+ RCU_INIT_POINTER(new->ht_down, n->ht_down);
+
+ /* bump reference count as long as we hold pointer to structure */
+ if (new->ht_down)
+ new->ht_down->refcnt++;
+
+#ifdef CONFIG_CLS_U32_PERF
+ /* Statistics may be incremented by readers during update
+ * so we must keep them in tact. When the node is later destroyed
+ * a special destroy call must be made to not free the pf memory.
+ */
+ new->pf = n->pf;
+#endif
+
+#ifdef CONFIG_CLS_U32_MARK
+ new->val = n->val;
+ new->mask = n->mask;
+ /* Similarly success statistics must be moved as pointers */
+ new->pcpu_success = n->pcpu_success;
+#endif
+ new->tp = tp;
+ memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
+
+ tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE);
+
+ return new;
+}
+
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca,
@@ -610,12 +695,27 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
n = (struct tc_u_knode *)*arg;
if (n) {
+ struct tc_u_knode *new;
+
if (TC_U32_KEY(n->handle) == 0)
return -EINVAL;
- return u32_set_parms(net, tp, base,
- rtnl_dereference(n->ht_up), n, tb,
- tca[TCA_RATE], ovr);
+ new = u32_init_knode(tp, n);
+ if (!new)
+ return -ENOMEM;
+
+ err = u32_set_parms(net, tp, base,
+ rtnl_dereference(n->ht_up), new, tb,
+ tca[TCA_RATE], ovr);
+
+ if (err) {
+ u32_destroy_key(tp, new, false);
+ return err;
+ }
+
+ u32_replace_knode(tp, tp_c, new);
+ call_rcu(&n->rcu, u32_delete_key_rcu_pf);
+ return 0;
}
if (tb[TCA_U32_DIVISOR]) {
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers
2014-09-17 19:12 ` [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers John Fastabend
@ 2014-09-17 21:11 ` Cong Wang
2014-09-18 0:06 ` John Fastabend
2014-09-18 11:38 ` Jamal Hadi Salim
1 sibling, 1 reply; 9+ messages in thread
From: Cong Wang @ 2014-09-17 21:11 UTC (permalink / raw)
To: John Fastabend
Cc: Cong Wang, David Miller, Eric Dumazet, netdev, Jamal Hadi Salim
On Wed, Sep 17, 2014 at 12:12 PM, John Fastabend
<john.fastabend@gmail.com> wrote:
>
> -static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
> +static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool pf)
> {
> tcf_unbind_filter(tp, &n->res);
> tcf_exts_destroy(tp, &n->exts);
> if (n->ht_down)
> n->ht_down->refcnt--;
> #ifdef CONFIG_CLS_U32_PERF
> - free_percpu(n->pf);
> + if (pf)
Nit: 'free_pf' is a better name than just 'pf'.
> + free_percpu(n->pf);
> #endif
> #ifdef CONFIG_CLS_U32_MARK
> - free_percpu(n->pcpu_success);
> + if (pf)
> + free_percpu(n->pcpu_success);
> #endif
> kfree(n);
> return 0;
> }
>
> +static void u32_delete_key_rcu_pf(struct rcu_head *rcu)
> +{
> + struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
> +
> + u32_destroy_key(key->tp, key, false);
> +}
I think you need a comment here to explain why you free it partially
on purpose, it is not that clear, at least I spent some time to figure
it out when I read your cls_tcindex patch.
Thanks!
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers
2014-09-17 21:11 ` Cong Wang
@ 2014-09-18 0:06 ` John Fastabend
2014-09-18 16:28 ` Cong Wang
0 siblings, 1 reply; 9+ messages in thread
From: John Fastabend @ 2014-09-18 0:06 UTC (permalink / raw)
To: Cong Wang; +Cc: Cong Wang, David Miller, Eric Dumazet, netdev, Jamal Hadi Salim
On 09/17/2014 02:11 PM, Cong Wang wrote:
> On Wed, Sep 17, 2014 at 12:12 PM, John Fastabend
> <john.fastabend@gmail.com> wrote:
>>
>> -static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
>> +static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool pf)
>> {
>> tcf_unbind_filter(tp, &n->res);
>> tcf_exts_destroy(tp, &n->exts);
>> if (n->ht_down)
>> n->ht_down->refcnt--;
>> #ifdef CONFIG_CLS_U32_PERF
>> - free_percpu(n->pf);
>> + if (pf)
>
> Nit: 'free_pf' is a better name than just 'pf'.
>
agreed I'll update it.
>> + free_percpu(n->pf);
>> #endif
>> #ifdef CONFIG_CLS_U32_MARK
>> - free_percpu(n->pcpu_success);
>> + if (pf)
>> + free_percpu(n->pcpu_success);
>> #endif
>> kfree(n);
>> return 0;
>> }
>>
>> +static void u32_delete_key_rcu_pf(struct rcu_head *rcu)
>> +{
>> + struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
>> +
>> + u32_destroy_key(key->tp, key, false);
>> +}
>
> I think you need a comment here to explain why you free it partially
> on purpose, it is not that clear, at least I spent some time to figure
> it out when I read your cls_tcindex patch.
>
> Thanks!
>
Sure how about this,
/* u32_delete_key_rcu_pf should be called when free'ing a copied
* version of a tc_u_knode obtained from u32_init_knode(). When
* copies are obtained from u32_init_knode() the statistics are
* shared between the old and new copies to allow readers to
* continue to update the statistics during the copy. To support
* this the u32_delete_key_rcu_pf variant does not free the percpu
* statistics.
*/
--
John Fastabend Intel Corporation
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [net-next PATCH 1/2] net: cls_u32: fix missed pcpu_success free_percpu
2014-09-17 19:11 [net-next PATCH 1/2] net: cls_u32: fix missed pcpu_success free_percpu John Fastabend
2014-09-17 19:12 ` [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers John Fastabend
@ 2014-09-18 1:17 ` John Fastabend
1 sibling, 0 replies; 9+ messages in thread
From: John Fastabend @ 2014-09-18 1:17 UTC (permalink / raw)
To: xiyou.wangcong, davem, eric.dumazet; +Cc: netdev, jhs
On 09/17/2014 12:11 PM, John Fastabend wrote:
> This fixes a missed free_percpu in the unwind code path and when
> keys are destroyed.
>
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> ---
> net/sched/cls_u32.c | 6 ++++++
> 1 file changed, 6 insertions(+)
>
This patch still misses free'ing pcpu_success in an error path
so I'll address it when I send a v2 to address comments on the
second patch
--
John Fastabend Intel Corporation
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers
2014-09-17 19:12 ` [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers John Fastabend
2014-09-17 21:11 ` Cong Wang
@ 2014-09-18 11:38 ` Jamal Hadi Salim
2014-09-20 4:55 ` John Fastabend
1 sibling, 1 reply; 9+ messages in thread
From: Jamal Hadi Salim @ 2014-09-18 11:38 UTC (permalink / raw)
To: John Fastabend, xiyou.wangcong, davem, eric.dumazet; +Cc: netdev
On 09/17/14 15:12, John Fastabend wrote:
> Changes to the cls_u32 classifier must appear atomic to the
> readers. Before this patch if a change is requested for both
> the exts and ifindex, first the ifindex is updated then the
> exts with tcf_exts_change(). This opens a small window where
> a reader can have a exts chain with an incorrect ifindex. This
> violates the the RCU semantics.
>
> Here we resolve this by always passing u32_set_parms() a copy
> of the tc_u_knode to work on and then inserting it into the hash
> table after the updates have been successfully applied.
>
> Tested with the following short script:
>
>
> #tc filter add dev p3p2 parent 8001:0 protocol ip prio 99 handle 1: \
> u32 divisor 256
>
> #tc filter add dev p3p2 parent 8001:0 protocol ip prio 99 \
> u32 link 1: hashkey mask ffffff00 at 12 \
> match ip src 192.168.8.0/2
>
> #tc filter add dev p3p2 parent 8001:0 protocol ip prio 102 \
> handle 1::10 u32 classid 1:2 ht 1: \
> match ip src 192.168.8.0/8 match ip tos 0x0a 1e
>
> #tc filter change dev p3p2 parent 8001:0 protocol ip prio 102 \
> handle 1::10 u32 classid 1:2 ht 1: \
> match ip src 1.1.0.0/8 match ip tos 0x0b 1e
>
> CC: Eric Dumazet <edumazet@google.com>
> CC: Jamal Hadi Salim <jhs@mojatatu.com>
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Looks good to me.
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
cheers,
jamal
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers
2014-09-18 0:06 ` John Fastabend
@ 2014-09-18 16:28 ` Cong Wang
2014-09-18 16:39 ` John Fastabend
0 siblings, 1 reply; 9+ messages in thread
From: Cong Wang @ 2014-09-18 16:28 UTC (permalink / raw)
To: John Fastabend
Cc: Cong Wang, David Miller, Eric Dumazet, netdev, Jamal Hadi Salim
On Wed, Sep 17, 2014 at 5:06 PM, John Fastabend
<john.fastabend@gmail.com> wrote:
>
> Sure how about this,
>
> /* u32_delete_key_rcu_pf should be called when free'ing a copied
> * version of a tc_u_knode obtained from u32_init_knode(). When
> * copies are obtained from u32_init_knode() the statistics are
> * shared between the old and new copies to allow readers to
> * continue to update the statistics during the copy. To support
> * this the u32_delete_key_rcu_pf variant does not free the percpu
> * statistics.
> */
>
Looks good, but why you pick the name u32_delete_key_rcu_pf()?
It sounds like you will free pf, but actually you will skip freeing pf. :)
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers
2014-09-18 16:28 ` Cong Wang
@ 2014-09-18 16:39 ` John Fastabend
0 siblings, 0 replies; 9+ messages in thread
From: John Fastabend @ 2014-09-18 16:39 UTC (permalink / raw)
To: Cong Wang; +Cc: Cong Wang, David Miller, Eric Dumazet, netdev, Jamal Hadi Salim
On 09/18/2014 09:28 AM, Cong Wang wrote:
> On Wed, Sep 17, 2014 at 5:06 PM, John Fastabend
> <john.fastabend@gmail.com> wrote:
>>
>> Sure how about this,
>>
>> /* u32_delete_key_rcu_pf should be called when free'ing a copied
>> * version of a tc_u_knode obtained from u32_init_knode(). When
>> * copies are obtained from u32_init_knode() the statistics are
>> * shared between the old and new copies to allow readers to
>> * continue to update the statistics during the copy. To support
>> * this the u32_delete_key_rcu_pf variant does not free the percpu
>> * statistics.
>> */
>>
>
> Looks good, but why you pick the name u32_delete_key_rcu_pf()?
> It sounds like you will free pf, but actually you will skip freeing pf. :)
>
hmm it was just the first thing that came to mind.
I'll rename the two calls to make it clear when reading the code.
Thanks,
John
--
John Fastabend Intel Corporation
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers
2014-09-18 11:38 ` Jamal Hadi Salim
@ 2014-09-20 4:55 ` John Fastabend
0 siblings, 0 replies; 9+ messages in thread
From: John Fastabend @ 2014-09-20 4:55 UTC (permalink / raw)
To: Jamal Hadi Salim; +Cc: xiyou.wangcong, davem, eric.dumazet, netdev
On 09/18/2014 04:38 AM, Jamal Hadi Salim wrote:
> On 09/17/14 15:12, John Fastabend wrote:
>> Changes to the cls_u32 classifier must appear atomic to the
>> readers. Before this patch if a change is requested for both
>> the exts and ifindex, first the ifindex is updated then the
>> exts with tcf_exts_change(). This opens a small window where
>> a reader can have a exts chain with an incorrect ifindex. This
>> violates the the RCU semantics.
>>
>> Here we resolve this by always passing u32_set_parms() a copy
>> of the tc_u_knode to work on and then inserting it into the hash
>> table after the updates have been successfully applied.
>>
>> Tested with the following short script:
>>
>
>>
>> #tc filter add dev p3p2 parent 8001:0 protocol ip prio 99 handle 1: \
>> u32 divisor 256
>>
>> #tc filter add dev p3p2 parent 8001:0 protocol ip prio 99 \
>> u32 link 1: hashkey mask ffffff00 at 12 \
>> match ip src 192.168.8.0/2
>>
>> #tc filter add dev p3p2 parent 8001:0 protocol ip prio 102 \
>> handle 1::10 u32 classid 1:2 ht 1: \
>> match ip src 192.168.8.0/8 match ip tos 0x0a 1e
>>
>> #tc filter change dev p3p2 parent 8001:0 protocol ip prio 102 \
>> handle 1::10 u32 classid 1:2 ht 1: \
>> match ip src 1.1.0.0/8 match ip tos 0x0b 1e
>>
>> CC: Eric Dumazet <edumazet@google.com>
>> CC: Jamal Hadi Salim <jhs@mojatatu.com>
>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
>
>
> Looks good to me.
> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
>
> cheers,
> jamal
Thanks for looking it over! I made v2 though to address a comment
that my variable/function names could be better and added a comment
around the perhaps tricky cases where it is safe to free the percpu
variables.
Because I did touch the patch and make some changes I dropped your
ACK. I always thought it was a bit of bad form to carry ack's around
after modifying the code without an explicit approval. Please add it
back though if you want.
Thanks,
John
--
John Fastabend Intel Corporation
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2014-09-20 4:55 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-09-17 19:11 [net-next PATCH 1/2] net: cls_u32: fix missed pcpu_success free_percpu John Fastabend
2014-09-17 19:12 ` [net-next PATCH 2/2] net: sched: cls_u32 changes to knode must appear atomic to readers John Fastabend
2014-09-17 21:11 ` Cong Wang
2014-09-18 0:06 ` John Fastabend
2014-09-18 16:28 ` Cong Wang
2014-09-18 16:39 ` John Fastabend
2014-09-18 11:38 ` Jamal Hadi Salim
2014-09-20 4:55 ` John Fastabend
2014-09-18 1:17 ` [net-next PATCH 1/2] net: cls_u32: fix missed pcpu_success free_percpu John Fastabend
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).