From: Florian Westphal <fw@strlen.de>
To: Giuseppe Scrivano <gscrivan@redhat.com>
Cc: Florian Westphal <fw@strlen.de>, netfilter-devel@vger.kernel.org
Subject: Re: [RFC PATCH] netfilter: call synchronize_net only once from nf_register_net_hooks
Date: Wed, 22 Nov 2017 12:56:59 +0100 [thread overview]
Message-ID: <20171122115659.GH24866@breakpoint.cc> (raw)
In-Reply-To: <87a7zea73n.fsf@redhat.com>
Giuseppe Scrivano <gscrivan@redhat.com> wrote:
> Florian Westphal <fw@strlen.de> writes:
>
> > Giuseppe Scrivano <gscrivan@redhat.com> wrote:
> >> SELinux, if enabled, registers for each new network namespace 6
> >> netfilter hooks. Avoid to use synchronize_net for each new hook, but do
> >> it once after all the hooks are added. The net benefit on an SMP
> >> machine with two cores is that creating a new network namespace takes
> >> -40% of the original time.
> >
> > but this needs more work.
> >
> >> Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
> >> ---
> >> net/netfilter/core.c | 15 ++++++++++++---
> >> 1 file changed, 12 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/net/netfilter/core.c b/net/netfilter/core.c
> >> index 52cd2901a097..beeb0b36f429 100644
> >> --- a/net/netfilter/core.c
> >> +++ b/net/netfilter/core.c
> >> @@ -252,7 +252,7 @@ static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const
> >> return NULL;
> >> }
> >>
> >> -int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
> >> +static int __nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
> >
> > Change this to return struct nf_hook_entries *
>
> thanks for the quick review. Are you fine if I change it to:
>
> static int __nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg, struct nf_hook_entries **old)
You can use ERR_PTR/PTR_ERR instead.
However, I suggest you try to go with call_rcu to get rid of all of the
synchronize_net() calls. I don't even see why its still needed for
nfqueue case provided we invoke the nfqueue drop handler first.
PoC example, untested:
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -77,6 +77,11 @@ struct nf_hook_entry {
void *priv;
};
+struct nf_hook_entries_rcu_head {
+ struct rcu_head head;
+ void *allocation;
+};
+
struct nf_hook_entries {
u16 num_hook_entries;
/* padding */
@@ -87,10 +92,13 @@ struct nf_hook_entries {
* This is not part of struct nf_hook_entry since its only
* needed in slow path (hook register/unregister).
*
- * const struct nf_hook_ops *orig_ops[]
+ * const struct nf_hook_ops *orig_ops[];
+ * struct nf_hook_entries_rcu_head;
*/
};
+void nf_hook_entries_free_rcu(struct nf_hook_entries *e);
+
static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e)
{
unsigned int n = e->num_hook_entries;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -69,22 +69,61 @@ static DEFINE_MUTEX(nf_hook_mutex);
#define nf_entry_dereference(e) \
rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
+static struct nf_hook_entries_rcu_head *
+nf_hook_entries_rcu_head_get(struct nf_hook_entries *e)
+{
+ unsigned int num = e->num_hook_entries;
+ struct nf_hook_entries_rcu_head *head;
+ static struct nf_hook_ops **ops;
+
+ ops = nf_hook_entries_get_hook_ops(e);
+ head = (void *)&ops[num];
+
+ return head;
+}
+
static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
{
struct nf_hook_entries *e;
size_t alloc = sizeof(*e) +
sizeof(struct nf_hook_entry) * num +
- sizeof(struct nf_hook_ops *) * num;
+ sizeof(struct nf_hook_ops *) * num +
+ sizeof(struct nf_hook_entries_rcu_head);
if (num == 0)
return NULL;
e = kvzalloc(alloc, GFP_KERNEL);
- if (e)
+ if (e) {
+ struct nf_hook_entries_rcu_head *head;
+
e->num_hook_entries = num;
+
+ head = nf_hook_entries_rcu_head_get(e);
+ head->allocation = e;
+ }
return e;
}
+void __nf_hook_entries_free(struct rcu_head *h)
+{
+ struct nf_hook_entries_rcu_head *head;
+
+ head = container_of(h, struct nf_hook_entries_rcu_head, head);
+ kvfree(head->allocation);
+}
+
+void nf_hook_entries_free_rcu(struct nf_hook_entries *e)
+{
+ struct nf_hook_entries_rcu_head *head;
+
+ head = nf_hook_entries_rcu_head_get(e);
+ if (WARN_ON(e != head->allocation))
+ return;
+
+ call_rcu(&head->head, __nf_hook_entries_free);
+}
+
static unsigned int accept_all(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -291,9 +330,9 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
#ifdef HAVE_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
- synchronize_net();
BUG_ON(p == new_hooks);
- kvfree(p);
+ if (p)
+ nf_hook_entries_free_rcu(p);
return 0;
}
EXPORT_SYMBOL(nf_register_net_hook);
@@ -341,7 +380,6 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
struct nf_hook_entries __rcu **pp;
struct nf_hook_entries *p;
- unsigned int nfq;
pp = nf_hook_entry_head(net, reg);
if (!pp)
@@ -362,13 +400,8 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
if (!p)
return;
- synchronize_net();
-
- /* other cpu might still process nfqueue verdict that used reg */
- nfq = nf_queue_nf_hook_drop(net);
- if (nfq)
- synchronize_net();
- kvfree(p);
+ nf_queue_nf_hook_drop(net);
+ nf_hook_entries_free_rcu(p);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
@@ -395,63 +428,10 @@ EXPORT_SYMBOL(nf_register_net_hooks);
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int hookcount)
{
- struct nf_hook_entries *to_free[16], *p;
- struct nf_hook_entries __rcu **pp;
- unsigned int i, j, n;
-
- mutex_lock(&nf_hook_mutex);
- for (i = 0; i < hookcount; i++) {
- pp = nf_hook_entry_head(net, ®[i]);
- if (!pp)
- continue;
-
- p = nf_entry_dereference(*pp);
- if (WARN_ON_ONCE(!p))
- continue;
- __nf_unregister_net_hook(p, ®[i]);
- }
- mutex_unlock(&nf_hook_mutex);
-
- do {
- n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
-
- mutex_lock(&nf_hook_mutex);
-
- for (i = 0, j = 0; i < hookcount && j < n; i++) {
- pp = nf_hook_entry_head(net, ®[i]);
- if (!pp)
- continue;
-
- p = nf_entry_dereference(*pp);
- if (!p)
- continue;
-
- to_free[j] = __nf_hook_entries_try_shrink(pp);
- if (to_free[j])
- ++j;
- }
-
- mutex_unlock(&nf_hook_mutex);
-
- if (j) {
- unsigned int nfq;
-
- synchronize_net();
-
- /* need 2nd synchronize_net() if nfqueue is used, skb
- * can get reinjected right before nf_queue_hook_drop()
- */
- nfq = nf_queue_nf_hook_drop(net);
- if (nfq)
- synchronize_net();
-
- for (i = 0; i < j; i++)
- kvfree(to_free[i]);
- }
+ unsigned int i;
- reg += n;
- hookcount -= n;
- } while (hookcount > 0);
+ for (i = 0; i < hookcount; i++)
+ nf_unregister_net_hook(net, ®[i]);
}
EXPORT_SYMBOL(nf_unregister_net_hooks);
next prev parent reply other threads:[~2017-11-22 11:57 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-22 10:40 [RFC PATCH] netfilter: call synchronize_net only once from nf_register_net_hooks Giuseppe Scrivano
2017-11-22 10:41 ` Florian Westphal
2017-11-22 11:06 ` Florian Westphal
2017-11-22 11:10 ` Florian Westphal
2017-11-22 11:25 ` Giuseppe Scrivano
2017-11-22 11:56 ` Florian Westphal [this message]
2017-11-22 14:30 ` Giuseppe Scrivano
2017-11-22 14:54 ` Florian Westphal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171122115659.GH24866@breakpoint.cc \
--to=fw@strlen.de \
--cc=gscrivan@redhat.com \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).