* [PATCH nf-next 1/3] netfilter: restart nf ct cleanup if hash resize happen
2017-05-21 5:39 [PATCH nf-next 0/3] netfilter: handle hash resize situation in nf_ct_iterate_cleanup Liping Zhang
@ 2017-05-21 5:39 ` Liping Zhang
2017-05-21 8:09 ` Florian Westphal
2017-05-21 5:39 ` [PATCH nf-next 2/3] netfilter: nf_ct_helper: use nf_ct_iterate_cleanup to unlink helper objs Liping Zhang
` (2 subsequent siblings)
3 siblings, 1 reply; 11+ messages in thread
From: Liping Zhang @ 2017-05-21 5:39 UTC (permalink / raw)
To: pablo; +Cc: netfilter-devel, Liping Zhang
From: Liping Zhang <zlpnobody@gmail.com>
Similar to commit 474803d37e7f ("netfilter: cttimeout: unlink timeout
obj again when hash resize happen"), when hash resize happen, we should
try to do cleanup work from the 0#bucket again, so we will never miss
the conntrack entries which we are intrested in. This is important for
the module removal.
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
---
net/netfilter/nf_conntrack_core.c | 35 +++++++++++++++++++++++------------
1 file changed, 23 insertions(+), 12 deletions(-)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e847dba..dec4c2a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1587,7 +1587,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
/* Bring out ya dead! */
static struct nf_conn *
get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
- void *data, unsigned int *bucket)
+ void *data, unsigned int *bucket, unsigned int *hsize)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@@ -1595,20 +1595,28 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
int cpu;
spinlock_t *lockp;
- for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+ for (; *bucket < *hsize; (*bucket)++) {
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
local_bh_disable();
nf_conntrack_lock(lockp);
- if (*bucket < nf_conntrack_htable_size) {
- hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
- if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
- continue;
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (net_eq(nf_ct_net(ct), net) &&
- iter(ct, data))
- goto found;
- }
+
+ /* nf conntrack hash resize happened. */
+ if (*hsize != nf_conntrack_htable_size) {
+ *hsize = nf_conntrack_htable_size;
+ *bucket = 0;
+ goto cont;
+ }
+
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket],
+ hnnode) {
+ if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+ continue;
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (net_eq(nf_ct_net(ct), net) &&
+ iter(ct, data))
+ goto found;
}
+cont:
spin_unlock(lockp);
local_bh_enable();
cond_resched();
@@ -1640,13 +1648,16 @@ void nf_ct_iterate_cleanup(struct net *net,
{
struct nf_conn *ct;
unsigned int bucket = 0;
+ unsigned int hsize;
might_sleep();
if (atomic_read(&net->ct.count) == 0)
return;
- while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
+ hsize = nf_conntrack_htable_size;
+ while ((ct = get_next_corpse(net, iter, data, &bucket,
+ &hsize)) != NULL) {
/* Time to push up daises... */
nf_ct_delete(ct, portid, report);
--
2.5.5
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH nf-next 2/3] netfilter: nf_ct_helper: use nf_ct_iterate_cleanup to unlink helper objs
2017-05-21 5:39 [PATCH nf-next 0/3] netfilter: handle hash resize situation in nf_ct_iterate_cleanup Liping Zhang
2017-05-21 5:39 ` [PATCH nf-next 1/3] netfilter: restart nf ct cleanup if hash resize happen Liping Zhang
@ 2017-05-21 5:39 ` Liping Zhang
2017-05-21 8:15 ` Florian Westphal
2017-05-21 5:39 ` [PATCH nf-next 3/3] netfilter: cttimeout: use nf_ct_iterate_cleanup to unlink timeout objs Liping Zhang
2017-05-21 8:07 ` [PATCH nf-next 0/3] netfilter: handle hash resize situation in nf_ct_iterate_cleanup Florian Westphal
3 siblings, 1 reply; 11+ messages in thread
From: Liping Zhang @ 2017-05-21 5:39 UTC (permalink / raw)
To: pablo; +Cc: netfilter-devel, Liping Zhang
From: Liping Zhang <zlpnobody@gmail.com>
When we unlink the helper objects, we will iterate the nf_conntrack_hash,
iterate the unconfirmed list, handle the hash resize situation, etc.
Actually this logic is same as the nf_ct_iterate_cleanup, so we can use it
to remove these copy & paste codes.
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
---
net/netfilter/nf_conntrack_helper.c | 46 ++++---------------------------------
1 file changed, 4 insertions(+), 42 deletions(-)
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 3a60efa..32cd36d 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -274,16 +274,16 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
/* appropriate ct lock protecting must be taken by caller */
-static inline int unhelp(struct nf_conntrack_tuple_hash *i,
- const struct nf_conntrack_helper *me)
+static int unhelp(struct nf_conn *ct, void *me)
{
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
struct nf_conn_help *help = nfct_help(ct);
if (help && rcu_dereference_raw(help->helper) == me) {
nf_conntrack_event(IPCT_HELPER, ct);
RCU_INIT_POINTER(help->helper, NULL);
}
+
+ /* We are not intended to delete this conntrack. */
return 0;
}
@@ -425,32 +425,10 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
-static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
- struct net *net)
-{
- struct nf_conntrack_tuple_hash *h;
- const struct hlist_nulls_node *nn;
- int cpu;
-
- /* Get rid of expecteds, set helpers to NULL. */
- for_each_possible_cpu(cpu) {
- struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
- spin_lock_bh(&pcpu->lock);
- hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
- unhelp(h, me);
- spin_unlock_bh(&pcpu->lock);
- }
-}
-
void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
{
- struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_expect *exp;
const struct hlist_node *next;
- const struct hlist_nulls_node *nn;
- unsigned int last_hsize;
- spinlock_t *lock;
struct net *net;
unsigned int i;
@@ -481,24 +459,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
rtnl_lock();
for_each_net(net)
- __nf_conntrack_helper_unregister(me, net);
+ nf_ct_iterate_cleanup(net, unhelp, me, 0, 0);
rtnl_unlock();
-
- local_bh_disable();
-restart:
- last_hsize = nf_conntrack_htable_size;
- for (i = 0; i < last_hsize; i++) {
- lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
- nf_conntrack_lock(lock);
- if (last_hsize != nf_conntrack_htable_size) {
- spin_unlock(lock);
- goto restart;
- }
- hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
- unhelp(h, me);
- spin_unlock(lock);
- }
- local_bh_enable();
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
--
2.5.5
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH nf-next 2/3] netfilter: nf_ct_helper: use nf_ct_iterate_cleanup to unlink helper objs
2017-05-21 5:39 ` [PATCH nf-next 2/3] netfilter: nf_ct_helper: use nf_ct_iterate_cleanup to unlink helper objs Liping Zhang
@ 2017-05-21 8:15 ` Florian Westphal
2017-05-21 10:17 ` Liping Zhang
0 siblings, 1 reply; 11+ messages in thread
From: Florian Westphal @ 2017-05-21 8:15 UTC (permalink / raw)
To: Liping Zhang; +Cc: pablo, netfilter-devel, Liping Zhang
Liping Zhang <zlpnobody@163.com> wrote:
> From: Liping Zhang <zlpnobody@gmail.com>
>
> When we unlink the helper objects, we will iterate the nf_conntrack_hash,
> iterate the unconfirmed list, handle the hash resize situation, etc.
>
> Actually this logic is same as the nf_ct_iterate_cleanup, so we can use it
> to remove these copy & paste codes.
Agree, this is a good idea. However:
> --- a/net/netfilter/nf_conntrack_helper.c
> +++ b/net/netfilter/nf_conntrack_helper.c
> @@ -274,16 +274,16 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
> EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
>
> /* appropriate ct lock protecting must be taken by caller */
> -static inline int unhelp(struct nf_conntrack_tuple_hash *i,
> - const struct nf_conntrack_helper *me)
> +static int unhelp(struct nf_conn *ct, void *me)
> {
> - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
> struct nf_conn_help *help = nfct_help(ct);
>
> if (help && rcu_dereference_raw(help->helper) == me) {
> nf_conntrack_event(IPCT_HELPER, ct);
> RCU_INIT_POINTER(help->helper, NULL);
> }
this is broken for unconfirmed conntracks, as
other cpu can reallocate the extension area.
For the module removal case, we have no choice but to toss the
unconfirmed conntracks.
Same for patch #3.
I plan to submit my patches soon, perhaps its best if I only
submit the first couple of patches so you can rebase on top of that?
Alternatively, I'm fine if your patches go in first, I can also
just rebase on top.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH nf-next 2/3] netfilter: nf_ct_helper: use nf_ct_iterate_cleanup to unlink helper objs
2017-05-21 8:15 ` Florian Westphal
@ 2017-05-21 10:17 ` Liping Zhang
2017-05-21 10:31 ` Florian Westphal
0 siblings, 1 reply; 11+ messages in thread
From: Liping Zhang @ 2017-05-21 10:17 UTC (permalink / raw)
To: Florian Westphal
Cc: Liping Zhang, Pablo Neira Ayuso, Netfilter Developer Mailing List
Hi Florian,
2017-05-21 16:15 GMT+08:00 Florian Westphal <fw@strlen.de>:
[...]
> this is broken for unconfirmed conntracks, as
> other cpu can reallocate the extension area.
Right, I missed this point, thanks for your reminder.
> For the module removal case, we have no choice but to toss the
> unconfirmed conntracks.
>
> Same for patch #3.
>
> I plan to submit my patches soon, perhaps its best if I only
> submit the first couple of patches so you can rebase on top of that?
I read your nfct_iterate_cleanup_15 patch series just now.
Your patch set did more jobs, also including all the jobs which
my patch set did. :)
I think it's better to do these things together, so I'm fine if you
can mark my patch set as Superseded. :)
> Alternatively, I'm fine if your patches go in first, I can also
> just rebase on top.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH nf-next 2/3] netfilter: nf_ct_helper: use nf_ct_iterate_cleanup to unlink helper objs
2017-05-21 10:17 ` Liping Zhang
@ 2017-05-21 10:31 ` Florian Westphal
2017-05-21 11:05 ` Liping Zhang
0 siblings, 1 reply; 11+ messages in thread
From: Florian Westphal @ 2017-05-21 10:31 UTC (permalink / raw)
To: Liping Zhang
Cc: Florian Westphal, Liping Zhang, Pablo Neira Ayuso,
Netfilter Developer Mailing List
Liping Zhang <zlpnobody@gmail.com> wrote:
> Hi Florian,
>
> 2017-05-21 16:15 GMT+08:00 Florian Westphal <fw@strlen.de>:
> [...]
> > this is broken for unconfirmed conntracks, as
> > other cpu can reallocate the extension area.
>
> Right, I missed this point, thanks for your reminder.
>
> > For the module removal case, we have no choice but to toss the
> > unconfirmed conntracks.
> >
> > Same for patch #3.
> >
> > I plan to submit my patches soon, perhaps its best if I only
> > submit the first couple of patches so you can rebase on top of that?
>
> I read your nfct_iterate_cleanup_15 patch series just now.
> Your patch set did more jobs, also including all the jobs which
> my patch set did. :)
>
> I think it's better to do these things together, so I'm fine if you
> can mark my patch set as Superseded. :)
What about this: I will submit first half of my patches, then you can
rebase your two patches on top and send them, then I can rebase again
the rest. What do you think?
BTW, I found another bug just now, but I don't have time to address it
right now:
nf_nat_proto_clean() does:
ct->status &= ~IPS_NAT_DONE_MASK;
Thats also broken(racy). We have to audit all the non-atomic writes of
ct->status and change them to set/clear_bit()...
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH nf-next 2/3] netfilter: nf_ct_helper: use nf_ct_iterate_cleanup to unlink helper objs
2017-05-21 10:31 ` Florian Westphal
@ 2017-05-21 11:05 ` Liping Zhang
2017-05-21 11:10 ` Florian Westphal
0 siblings, 1 reply; 11+ messages in thread
From: Liping Zhang @ 2017-05-21 11:05 UTC (permalink / raw)
To: Florian Westphal
Cc: Liping Zhang, Pablo Neira Ayuso, Netfilter Developer Mailing List
Hi Florian,
2017-05-21 18:31 GMT+08:00 Florian Westphal <fw@strlen.de>:
> Liping Zhang <zlpnobody@gmail.com> wrote:
>> Hi Florian,
>>
>> 2017-05-21 16:15 GMT+08:00 Florian Westphal <fw@strlen.de>:
>> [...]
>> > this is broken for unconfirmed conntracks, as
>> > other cpu can reallocate the extension area.
>>
>> Right, I missed this point, thanks for your reminder.
>>
>> > For the module removal case, we have no choice but to toss the
>> > unconfirmed conntracks.
>> >
>> > Same for patch #3.
>> >
>> > I plan to submit my patches soon, perhaps its best if I only
>> > submit the first couple of patches so you can rebase on top of that?
>>
>> I read your nfct_iterate_cleanup_15 patch series just now.
>> Your patch set did more jobs, also including all the jobs which
>> my patch set did. :)
>>
>> I think it's better to do these things together, so I'm fine if you
>> can mark my patch set as Superseded. :)
>
> What about this: I will submit first half of my patches, then you can
> rebase your two patches on top and send them, then I can rebase again
> the rest. What do you think?
OK. Fine with me.
> BTW, I found another bug just now, but I don't have time to address it
> right now:
>
> nf_nat_proto_clean() does:
>
> ct->status &= ~IPS_NAT_DONE_MASK;
Yes, here we should use clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
(For IPS_DST_NAT_DONE, we don't care about it, so we can
leave it unchanged.)
> Thats also broken(racy). We have to audit all the non-atomic writes of
> ct->status and change them to set/clear_bit()...
I audited the related codes just now, this seems to be the last
ct->status writer which use non-atomic bit operation(of course,
except these unconfirmed ct->status writer).
I will have a further and closer check. If you are not opposed to,
I can send a related patch to fix this. :)
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH nf-next 2/3] netfilter: nf_ct_helper: use nf_ct_iterate_cleanup to unlink helper objs
2017-05-21 11:05 ` Liping Zhang
@ 2017-05-21 11:10 ` Florian Westphal
0 siblings, 0 replies; 11+ messages in thread
From: Florian Westphal @ 2017-05-21 11:10 UTC (permalink / raw)
To: Liping Zhang
Cc: Florian Westphal, Liping Zhang, Pablo Neira Ayuso,
Netfilter Developer Mailing List
Liping Zhang <zlpnobody@gmail.com> wrote:
> Yes, here we should use clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
> (For IPS_DST_NAT_DONE, we don't care about it, so we can
> leave it unchanged.)
Oh, right.
> > Thats also broken(racy). We have to audit all the non-atomic writes of
> > ct->status and change them to set/clear_bit()...
>
> I audited the related codes just now, this seems to be the last
> ct->status writer which use non-atomic bit operation(of course,
> except these unconfirmed ct->status writer).
>
> I will have a further and closer check. If you are not opposed to,
> I can send a related patch to fix this. :)
That would be great, thanks Liping!
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH nf-next 3/3] netfilter: cttimeout: use nf_ct_iterate_cleanup to unlink timeout objs
2017-05-21 5:39 [PATCH nf-next 0/3] netfilter: handle hash resize situation in nf_ct_iterate_cleanup Liping Zhang
2017-05-21 5:39 ` [PATCH nf-next 1/3] netfilter: restart nf ct cleanup if hash resize happen Liping Zhang
2017-05-21 5:39 ` [PATCH nf-next 2/3] netfilter: nf_ct_helper: use nf_ct_iterate_cleanup to unlink helper objs Liping Zhang
@ 2017-05-21 5:39 ` Liping Zhang
2017-05-21 8:07 ` [PATCH nf-next 0/3] netfilter: handle hash resize situation in nf_ct_iterate_cleanup Florian Westphal
3 siblings, 0 replies; 11+ messages in thread
From: Liping Zhang @ 2017-05-21 5:39 UTC (permalink / raw)
To: pablo; +Cc: netfilter-devel, Liping Zhang
From: Liping Zhang <zlpnobody@gmail.com>
Similar to nf_conntrack_helper, we can use nf_ct_iterare_cleanup to
remove these copy & paste codes.
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
---
net/netfilter/nfnetlink_cttimeout.c | 39 +++++--------------------------------
1 file changed, 5 insertions(+), 34 deletions(-)
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index a3e7bb5..af59461 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -287,49 +287,20 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
return ret;
}
-static void untimeout(struct nf_conntrack_tuple_hash *i,
- struct ctnl_timeout *timeout)
+static int untimeout(struct nf_conn *ct, void *timeout)
{
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
if (timeout_ext && (!timeout || timeout_ext->timeout == timeout))
RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+
+ /* We are not intended to delete this conntrack. */
+ return 0;
}
static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
{
- struct nf_conntrack_tuple_hash *h;
- const struct hlist_nulls_node *nn;
- unsigned int last_hsize;
- spinlock_t *lock;
- int i, cpu;
-
- for_each_possible_cpu(cpu) {
- struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
- spin_lock_bh(&pcpu->lock);
- hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
- untimeout(h, timeout);
- spin_unlock_bh(&pcpu->lock);
- }
-
- local_bh_disable();
-restart:
- last_hsize = nf_conntrack_htable_size;
- for (i = 0; i < last_hsize; i++) {
- lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
- nf_conntrack_lock(lock);
- if (last_hsize != nf_conntrack_htable_size) {
- spin_unlock(lock);
- goto restart;
- }
-
- hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
- untimeout(h, timeout);
- spin_unlock(lock);
- }
- local_bh_enable();
+ nf_ct_iterate_cleanup(net, untimeout, timeout, 0, 0);
}
/* try to delete object, fail if it is still in use. */
--
2.5.5
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH nf-next 0/3] netfilter: handle hash resize situation in nf_ct_iterate_cleanup
2017-05-21 5:39 [PATCH nf-next 0/3] netfilter: handle hash resize situation in nf_ct_iterate_cleanup Liping Zhang
` (2 preceding siblings ...)
2017-05-21 5:39 ` [PATCH nf-next 3/3] netfilter: cttimeout: use nf_ct_iterate_cleanup to unlink timeout objs Liping Zhang
@ 2017-05-21 8:07 ` Florian Westphal
3 siblings, 0 replies; 11+ messages in thread
From: Florian Westphal @ 2017-05-21 8:07 UTC (permalink / raw)
To: Liping Zhang; +Cc: pablo, netfilter-devel, Liping Zhang
Liping Zhang <zlpnobody@163.com> wrote:
> From: Liping Zhang <zlpnobody@gmail.com>
>
> First, when we do nf ct cleanup, we should also handle the hash resize
> situation, so we will not miss the related conntracks, this is important
> for module removal.
>
> After we accomplish this, we can use nf_ct_iterate_cleanup to remove these
> copy & paste codes, which are used to unlink cthelper objects and
> cttimeout objects.
Agreed.
I have similar patches as part of a larger batch, see
https://git.breakpoint.cc/cgit/fw/nf-next.git/log/?h=nfct_iterate_cleanup_15
More comments as replies to the patches.
^ permalink raw reply [flat|nested] 11+ messages in thread