From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org
Subject: [PATCH 35/51] netfilter: xt_NFQUEUE: introduce CPU fanout
Date: Sat, 6 Apr 2013 14:17:34 +0200 [thread overview]
Message-ID: <1365250670-14993-36-git-send-email-pablo@netfilter.org> (raw)
In-Reply-To: <1365250670-14993-1-git-send-email-pablo@netfilter.org>
From: "holger@eitzenberger.org" <holger@eitzenberger.org>
Current NFQUEUE target uses a hash, computed over source and
destination address (and other parameters), for steering the packet
to the actual NFQUEUE. This, however forgets about the fact that the
packet eventually is handled by a particular CPU on user request.
If E. g.
1) IRQ affinity is used to handle packets on a particular CPU already
(both single-queue or multi-queue case)
and/or
2) RPS is used to steer packets to a specific softirq
the target easily chooses an NFQUEUE which is not handled by a process
pinned to the same CPU.
The idea is therefore to use the CPU index for determining the
NFQUEUE handling the packet.
E. g. when having a system with 4 CPUs, 4 MQ queues and 4 NFQUEUEs it
looks like this:
+-----+ +-----+ +-----+ +-----+
|NFQ#0| |NFQ#1| |NFQ#2| |NFQ#3|
+-----+ +-----+ +-----+ +-----+
^ ^ ^ ^
| |NFQUEUE | |
+ + + +
+-----+ +-----+ +-----+ +-----+
|rx-0 | |rx-1 | |rx-2 | |rx-3 |
+-----+ +-----+ +-----+ +-----+
The NFQUEUEs not necessarily have to start with number 0, setups with
less NFQUEUEs than packet-handling CPUs are not a problem as well.
This patch extends the NFQUEUE target to accept a new
NFQ_FLAG_CPU_FANOUT flag. If this is specified the target uses the
CPU index for determining the NFQUEUE being used. I have to introduce
rev3 for this. The 'flags' are folded into _v2 'bypass'.
By changing the way which queue is assigned, I'm able to improve the
performance if the processes reading on the NFQUEUs are pinned
correctly.
Signed-off-by: Holger Eitzenberger <holger@eitzenberger.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/uapi/linux/netfilter/xt_NFQUEUE.h | 9 +++++++
net/netfilter/xt_NFQUEUE.c | 41 +++++++++++++++++++++++++++--
2 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/netfilter/xt_NFQUEUE.h b/include/uapi/linux/netfilter/xt_NFQUEUE.h
index 9eafdbb..8bb5fe6 100644
--- a/include/uapi/linux/netfilter/xt_NFQUEUE.h
+++ b/include/uapi/linux/netfilter/xt_NFQUEUE.h
@@ -26,4 +26,13 @@ struct xt_NFQ_info_v2 {
__u16 bypass;
};
+struct xt_NFQ_info_v3 {
+ __u16 queuenum;
+ __u16 queues_total;
+ __u16 flags;
+#define NFQ_FLAG_BYPASS 0x01 /* for compatibility with v2 */
+#define NFQ_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */
+#define NFQ_FLAG_MASK 0x03
+};
+
#endif /* _XT_NFQ_TARGET_H */
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 817f9e9..a287ef2 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -108,7 +108,7 @@ nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
static int nfqueue_tg_check(const struct xt_tgchk_param *par)
{
- const struct xt_NFQ_info_v2 *info = par->targinfo;
+ const struct xt_NFQ_info_v3 *info = par->targinfo;
u32 maxid;
if (unlikely(!rnd_inited)) {
@@ -125,11 +125,39 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par)
info->queues_total, maxid);
return -ERANGE;
}
- if (par->target->revision == 2 && info->bypass > 1)
+ if (par->target->revision == 2 && info->flags > 1)
+ return -EINVAL;
+ if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK)
return -EINVAL;
+
return 0;
}
+static unsigned int
+nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_NFQ_info_v3 *info = par->targinfo;
+ u32 queue = info->queuenum;
+
+ if (info->queues_total > 1) {
+ if (info->flags & NFQ_FLAG_CPU_FANOUT) {
+ int cpu = smp_processor_id();
+
+ queue = info->queuenum + cpu % info->queues_total;
+ } else {
+ if (par->family == NFPROTO_IPV4)
+ queue = (((u64) hash_v4(skb) * info->queues_total) >>
+ 32) + queue;
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ else if (par->family == NFPROTO_IPV6)
+ queue = (((u64) hash_v6(skb) * info->queues_total) >>
+ 32) + queue;
+#endif
+ }
+ }
+ return NF_QUEUE_NR(queue);
+}
+
static struct xt_target nfqueue_tg_reg[] __read_mostly = {
{
.name = "NFQUEUE",
@@ -156,6 +184,15 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
.targetsize = sizeof(struct xt_NFQ_info_v2),
.me = THIS_MODULE,
},
+ {
+ .name = "NFQUEUE",
+ .revision = 3,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = nfqueue_tg_check,
+ .target = nfqueue_tg_v3,
+ .targetsize = sizeof(struct xt_NFQ_info_v3),
+ .me = THIS_MODULE,
+ },
};
static int __init nfqueue_tg_init(void)
--
1.7.10.4
next prev parent reply other threads:[~2013-04-06 12:18 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-04-06 12:16 [PATCH 00/51] netfilter updates for net-next Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 01/51] ipvs: avoid routing by TOS for real server Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 02/51] ipvs: prefer NETDEV_DOWN event to free cached dsts Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 03/51] ipvs: convert the IP_VS_XMIT macros to functions Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 04/51] ipvs: rename functions related to dst_cache reset Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 05/51] ipvs: no need to reroute anymore on DNAT over loopback Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 06/51] ipvs: do not use skb_share_check Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 07/51] ipvs: consolidate all dst checks on transmit in one place Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 08/51] ipvs: optimize dst usage for real server Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 09/51] ipvs: convert app locks Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 10/51] ipvs: remove rs_lock by using RCU Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 11/51] ipvs: convert locks used in persistence engines Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 12/51] ipvs: convert connection locking Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 13/51] ipvs: reorder keys in connection structure Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 14/51] ipvs: avoid kmem_cache_zalloc in ip_vs_conn_new Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 15/51] ipvs: change ip_vs_sched_lock to mutex Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 16/51] ipvs: preparations for using rcu in schedulers Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 17/51] ipvs: add ip_vs_dest_hold and ip_vs_dest_put Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 18/51] ipvs: convert dh scheduler to rcu Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 19/51] ipvs: convert lblc " Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 20/51] ipvs: convert lblcr " Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 21/51] ipvs: convert lc " Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 22/51] ipvs: convert nq " Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 23/51] ipvs: convert rr " Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 24/51] ipvs: convert sed " Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 25/51] ipvs: convert sh " Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 26/51] ipvs: convert wlc " Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 27/51] ipvs: convert wrr " Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 28/51] ipvs: reorganize dest trash Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 29/51] ipvs: do not expect result from done_service Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 30/51] ipvs: convert sched_lock to spin lock Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 31/51] ipvs: convert dests to rcu Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 32/51] ipvs: convert services " Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 33/51] ipvs: do not disable bh for long time Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 34/51] netfilter: use IS_ENABLE to replace if defined in TRACE target Pablo Neira Ayuso
2013-04-06 12:17 ` Pablo Neira Ayuso [this message]
2013-04-06 12:17 ` [PATCH 36/51] netfilter: xt_NFQUEUE: coalesce IPv4 and IPv6 hashing Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 37/51] netfilter: fix struct ip6t_frag field description Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 38/51] netfilter: make /proc/net/netfilter pernet Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 39/51] netfilter: nf_log: prepare net namespace support for loggers Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 40/51] netfilter: ebt_log: add net namespace support for ebt_log Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 41/51] netfilter: xt_LOG: add net namespace support for xt_LOG Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 42/51] netfilter: ebt_ulog: add net namespace support for ebt_ulog Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 43/51] netfilter: ipt_ULOG: add net namespace support for ipt_ULOG Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 44/51] netfilter: nfnetlink_log: add net namespace support for nfnetlink_log Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 45/51] netfilter: enable per netns support for nf_loggers Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 46/51] netfilter: nfnetlink_queue: add net namespace support for nfnetlink_queue Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 47/51] netfilter: remove unneeded variable proc_net_netfilter Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 48/51] netfilter: implement RFC3168 5.3 (ecn protection) for ipv6 fragmentation handling Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 49/51] netfilter: ipv4: propagate routing errors from ip_route_me_harder() Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 50/51] netfilter: ipv6: propagate routing errors from ip6_route_me_harder() Pablo Neira Ayuso
2013-04-06 12:17 ` [PATCH 51/51] netfilter: nat: propagate errors from xfrm_me_harder() Pablo Neira Ayuso
2013-04-06 13:14 ` [PATCH 00/51] netfilter updates for net-next Julian Anastasov
2013-04-06 13:52 ` Pablo Neira Ayuso
2013-04-07 16:27 ` David Miller
2013-04-08 16:06 ` Pablo Neira Ayuso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1365250670-14993-36-git-send-email-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).