From: Bart De Schuymer <bdschuym@pandora.be>
To: "David S. Miller" <davem@davemloft.net>
Cc: Martin Josefsson <gandalf@wlug.westbo.se>,
shemminger@osdl.org, dwmw2@infradead.org, ak@suse.de,
snort2004@mail.ru, bridge@osdl.org, netdev@oss.sgi.com,
rusty@rustcorp.com.au, netfilter-devel@lists.netfilter.org
Subject: Re: [PATCH/RFC] Reduce call chain length in netfilter (was: Re: do_IRQ: stack overflow: 872..)
Date: Wed, 26 Jan 2005 10:08:29 +0100 [thread overview]
Message-ID: <1106730510.4041.4.camel@localhost.localdomain> (raw)
In-Reply-To: <20050125220558.6e824f8a.davem@davemloft.net>
Op di, 25-01-2005 te 22:05 -0800, schreef David S. Miller:
> On Sun, 23 Jan 2005 17:08:29 +0100
> Martin Josefsson <gandalf@wlug.westbo.se> wrote:
>
> > I'm now running a kernel with this patch and everything seems to still
> > be working.
> > So unless someone else has something to comment I think this should be
> > applied.
> > The decrease in call-depth is important.
>
> I would like to see at least one ACK from the netfilter
> folks. Bart or Rusty, could you forward to patch to
> netfilter-devel for review?
AFAIK Martin is in the netfilter core team. Anyway, I just included
netfilter-devel.
Does anyone have objections to this patch, which reduces the netfilter
call chain length?
> I have some other ideas about how bridging might be able
> to save some call chain depth... but I need to think about
> it some more before proposing or even trying to implement.
> (basically something akin to how we do route level packet
> output, via dst_output(), but instead we're doing this
> at ->hard_start_xmit() time)
I'm all ears :)
--- linux-2.6.11-rc1/include/linux/netfilter.h.old 2005-01-23 13:31:58.895886808 +0100
+++ linux-2.6.11-rc1/include/linux/netfilter.h 2005-01-23 13:32:02.853285192 +0100
@@ -18,7 +18,8 @@
#define NF_STOLEN 2
#define NF_QUEUE 3
#define NF_REPEAT 4
-#define NF_MAX_VERDICT NF_REPEAT
+#define NF_STOP 5
+#define NF_MAX_VERDICT NF_STOP
/* Generic cache responses from hook functions.
<= 0x2000 is used for protocol-flags. */
@@ -138,23 +139,34 @@ void nf_log_packet(int pf,
/* This is gross, but inline doesn't cut it for avoiding the function
call in fast path: gcc doesn't inline (needs value tracking?). --RR */
#ifdef CONFIG_NETFILTER_DEBUG
-#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
- nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn), INT_MIN)
-#define NF_HOOK_THRESH nf_hook_slow
+#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
+({int __ret = 0; \
+if (!nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, &__ret)) \
+ __ret = (okfn)(skb); \
+__ret;})
+#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
+({int __ret = 0; \
+if (!nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, thresh, &__ret)) \
+ __ret = (okfn)(skb); \
+__ret;})
#else
-#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
-(list_empty(&nf_hooks[(pf)][(hook)]) \
- ? (okfn)(skb) \
- : nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn), INT_MIN))
-#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
-(list_empty(&nf_hooks[(pf)][(hook)]) \
- ? (okfn)(skb) \
- : nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn), (thresh)))
+#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
+({int __ret = 0; \
+if (list_empty(&nf_hooks[pf][hook]) || \
+ !nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, &__ret)) \
+ __ret = (okfn)(skb); \
+__ret;})
+#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
+({int __ret = 0; \
+if (list_empty(&nf_hooks[pf][hook]) || \
+ !nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, thresh, &__ret)) \
+ __ret = (okfn)(skb); \
+__ret;})
#endif
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
struct net_device *indev, struct net_device *outdev,
- int (*okfn)(struct sk_buff *), int thresh);
+ int (*okfn)(struct sk_buff *), int thresh, int *ret);
/* Call setsockopt() */
int nf_setsockopt(struct sock *sk, int pf, int optval, char __user *opt,
--- linux-2.6.11-rc1/net/core/netfilter.c.old 2005-01-23 13:31:48.980394192 +0100
+++ linux-2.6.11-rc1/net/core/netfilter.c 2005-01-23 13:32:02.856284736 +0100
@@ -349,6 +349,8 @@ static unsigned int nf_iterate(struct li
int (*okfn)(struct sk_buff *),
int hook_thresh)
{
+ unsigned int verdict;
+
/*
* The caller must not block between calls to this
* function because of risk of continuing from deleted element.
@@ -361,28 +363,18 @@ static unsigned int nf_iterate(struct li
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
- switch (elem->hook(hook, skb, indev, outdev, okfn)) {
- case NF_QUEUE:
- return NF_QUEUE;
-
- case NF_STOLEN:
- return NF_STOLEN;
-
- case NF_DROP:
- return NF_DROP;
-
- case NF_REPEAT:
- *i = (*i)->prev;
- break;
-
+ verdict = elem->hook(hook, skb, indev, outdev, okfn);
+ if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
- case NF_ACCEPT:
- break;
-
- default:
- NFDEBUG("Evil return from %p(%u).\n",
- elem->hook, hook);
+ if (unlikely(verdict > NF_MAX_VERDICT)) {
+ NFDEBUG("Evil return from %p(%u).\n",
+ elem->hook, hook);
+ continue;
+ }
#endif
+ if (verdict != NF_REPEAT)
+ return verdict;
+ *i = (*i)->prev;
}
}
return NF_ACCEPT;
@@ -494,50 +486,47 @@ static int nf_queue(struct sk_buff *skb,
return 1;
}
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
+/* Returns 0 if okfn() needs to be executed by the caller, -EPERM otherwise.
+ * Assumes *ret==0 when called. On return, *ret!=0 when verdict==NF_DROP */
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *),
- int hook_thresh)
+ int hook_thresh, int *ret)
{
struct list_head *elem;
unsigned int verdict;
- int ret = 0;
+ int ret2 = 0;
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
#ifdef CONFIG_NETFILTER_DEBUG
- if (skb->nf_debug & (1 << hook)) {
+ if (unlikely((*pskb)->nf_debug & (1 << hook))) {
printk("nf_hook: hook %i already set.\n", hook);
- nf_dump_skb(pf, skb);
+ nf_dump_skb(pf, *pskb);
}
- skb->nf_debug |= (1 << hook);
+ (*pskb)->nf_debug |= (1 << hook);
#endif
elem = &nf_hooks[pf][hook];
next_hook:
- verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
+ verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
outdev, &elem, okfn, hook_thresh);
- if (verdict == NF_QUEUE) {
+ if (verdict == NF_ACCEPT || verdict == NF_STOP)
+ goto unlock;
+ else if (verdict == NF_DROP) {
+ kfree_skb(*pskb);
+ *ret = -EPERM;
+ } else if (verdict == NF_QUEUE) {
NFDEBUG("nf_hook: Verdict = QUEUE.\n");
- if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn))
+ if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn))
goto next_hook;
}
-
- switch (verdict) {
- case NF_ACCEPT:
- ret = okfn(skb);
- break;
-
- case NF_DROP:
- kfree_skb(skb);
- ret = -EPERM;
- break;
- }
-
+ ret2 = -EPERM;
+unlock:
rcu_read_unlock();
- return ret;
+ return ret2;
}
void nf_reinject(struct sk_buff *skb, struct nf_info *info,
--- linux-2.6.11-rc1/net/bridge/br_netfilter.c.old 2005-01-23 13:31:39.080899144 +0100
+++ linux-2.6.11-rc1/net/bridge/br_netfilter.c 2005-01-23 13:32:02.861283976 +0100
@@ -829,8 +829,7 @@ static unsigned int ip_sabotage_in(unsig
{
if ((*pskb)->nf_bridge &&
!((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
- okfn(*pskb);
- return NF_STOLEN;
+ return NF_STOP;
}
return NF_ACCEPT;
@@ -888,8 +887,7 @@ static unsigned int ip_sabotage_out(unsi
if (out->priv_flags & IFF_802_1Q_VLAN)
nf_bridge->netoutdev = (struct net_device *)out;
#endif
- okfn(skb);
- return NF_STOLEN;
+ return NF_STOP;
}
return NF_ACCEPT;
next prev parent reply other threads:[~2005-01-26 9:08 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <1131604877.20041218092730@mail.ru.suse.lists.linux.kernel>
2004-12-18 7:50 ` do_IRQ: stack overflow: 872 Andi Kleen
2004-12-18 11:12 ` Bart De Schuymer
2004-12-18 11:14 ` Andi Kleen
2004-12-18 11:51 ` Bart De Schuymer
2004-12-18 13:53 ` Andi Kleen
2004-12-18 16:07 ` Re[2]: " Crazy AMD K7
2004-12-18 16:46 ` Bart De Schuymer
2005-01-07 17:05 ` David Woodhouse
2005-01-07 18:00 ` Stephen Hemminger
2005-01-07 18:06 ` David Woodhouse
2005-01-07 21:27 ` Bart De Schuymer
2005-01-18 21:57 ` David S. Miller
2005-01-22 22:30 ` [PATCH/RFC] Reduce call chain length in netfilter (was: Re: do_IRQ: stack overflow: 872..) Bart De Schuymer
2005-01-22 23:22 ` Martin Josefsson
2005-01-23 12:40 ` Bart De Schuymer
2005-01-23 16:08 ` Martin Josefsson
2005-01-26 6:05 ` David S. Miller
2005-01-26 9:08 ` Bart De Schuymer [this message]
2005-01-26 23:49 ` [PATCH/RFC] Reduce call chain length in netfilter Patrick McHardy
2005-01-27 7:18 ` David S. Miller
2005-01-27 17:50 ` Patrick McHardy
2005-01-27 19:47 ` David S. Miller
2005-01-27 21:16 ` Bart De Schuymer
2005-01-27 22:48 ` Patrick McHardy
2005-01-27 23:24 ` David S. Miller
2005-01-28 0:08 ` Patrick McHardy
2005-01-28 0:29 ` Rusty Russell
2005-01-28 1:10 ` David S. Miller
2005-01-28 1:32 ` Rusty Russell
2005-01-28 1:35 ` Patrick McHardy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1106730510.4041.4.camel@localhost.localdomain \
--to=bdschuym@pandora.be \
--cc=ak@suse.de \
--cc=bridge@osdl.org \
--cc=davem@davemloft.net \
--cc=dwmw2@infradead.org \
--cc=gandalf@wlug.westbo.se \
--cc=netdev@oss.sgi.com \
--cc=netfilter-devel@lists.netfilter.org \
--cc=rusty@rustcorp.com.au \
--cc=shemminger@osdl.org \
--cc=snort2004@mail.ru \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).