netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Karl Hiramoto <karl@hiramoto.org>
To: netfilter-devel@vger.kernel.org
Cc: Karl Hiramoto <karl@hiramoto.org>
Subject: [RFC 4/4] nfnetlink_queue: allow part of a connection to bypass the queue
Date: Sat, 24 Jul 2010 17:44:45 +0200	[thread overview]
Message-ID: <1279986285-11665-5-git-send-email-karl@hiramoto.org> (raw)
In-Reply-To: <1279986285-11665-1-git-send-email-karl@hiramoto.org>

If userspace knows that we want to ACCEPT X bytes of the connection,
allow nfnetlink_queue to accept the packets without queuing to userspace.

Signed-off-by: Karl Hiramoto <karl@hiramoto.org>
---
 include/linux/netfilter/nfnetlink_queue.h |    1 +
 net/netfilter/nfnetlink_queue.c           |  112 ++++++++++++++++++++++++++++-
 2 files changed, 112 insertions(+), 1 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h
index 2455fe5..f3dd99b 100644
--- a/include/linux/netfilter/nfnetlink_queue.h
+++ b/include/linux/netfilter/nfnetlink_queue.h
@@ -41,6 +41,7 @@ enum nfqnl_attr_type {
 	NFQA_IFINDEX_PHYSOUTDEV,	/* __u32 ifindex */
 	NFQA_HWADDR,			/* nfqnl_msg_packet_hw */
 	NFQA_PAYLOAD,			/* opaque data payload */
+	NFQA_ACCEPT_CONNBYTES, /* Accept the next N bytes, bypassing queue  */
 
 	__NFQA_MAX
 };
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 12e1ab3..b5f20c8 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -30,6 +30,10 @@
 #include <linux/list.h>
 #include <net/sock.h>
 #include <net/netfilter/nf_queue.h>
+#if defined(CONFIG_NF_QUEUE_CONNBYTES_BYPASS)
+#include <net/netfilter/nf_conntrack_queue.h>
+#include <linux/tcp.h>
+#endif
 
 #include <asm/atomic.h>
 
@@ -385,6 +389,67 @@ nla_put_failure:
 	return NULL;
 }
 
+#if defined(CONFIG_NF_QUEUE_CONNBYTES_BYPASS)
+static struct tcphdr *__get_tcp_hdr(const struct sk_buff *skb, uint8_t prot)
+{
+	if (prot == PF_INET)
+		return (struct tcphdr *)(skb->data + (ip_hdr(skb)->ihl * 4));
+	else if (prot == PF_INET6)
+		return (struct tcphdr *)(skb->data + 40);
+	else
+		BUG();
+
+	return NULL;
+}
+
+static int check_entry_reinject(struct nf_queue_entry *entry)
+{
+	struct nf_conn *ct;
+	struct nf_conntrack_queue *ct_queue;
+	enum ip_conntrack_info ctinfo;
+	enum ip_conntrack_dir dir;
+	const struct tcphdr *tcphdr;
+	ct = nf_ct_get(entry->skb, &ctinfo);
+
+	if (!ct || !nf_ct_is_confirmed(ct))
+		return -ENOENT;
+
+	ct_queue = nf_ct_ext_find(ct, NF_CT_EXT_QUEUE);
+	if (!ct_queue)
+		return -ENOENT;
+
+	spin_lock_bh(&ct_queue->lock);
+	pr_debug("%s:%d has ctinfo=%u\n", __func__, __LINE__, ctinfo);
+	dir = CTINFO2DIR(ctinfo);
+	if (ct_queue->dir[dir].connbytes) {
+		if (nf_ct_protonum(ct) == IPPROTO_TCP) {
+			tcphdr = __get_tcp_hdr(entry->skb, nf_ct_l3num(ct));
+
+			if (ntohl(tcphdr->seq) < ct_queue->dir[dir].tcp_seq)
+				goto reinject;
+			else
+				ct_queue->dir[dir].tcp_seq = 0; /* clear */
+		} else {
+			/* other proto */
+			if (ct_queue->dir[dir].connbytes < entry->skb->len) {
+				ct_queue->dir[dir].connbytes -= entry->skb->len;
+				goto reinject;
+			} else
+				ct_queue->dir[dir].connbytes = 0; /* clear */
+		}
+	}
+	spin_unlock_bh(&ct_queue->lock);
+
+
+	return -ENOENT;
+
+reinject:
+	spin_unlock_bh(&ct_queue->lock);
+	nf_reinject(entry, NF_ACCEPT);
+	return 0;
+}
+#endif
+
 static int
 nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 {
@@ -400,6 +465,11 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	if (queue->copy_mode == NFQNL_COPY_NONE)
 		goto err_out;
 
+#if defined(CONFIG_NF_QUEUE_CONNBYTES_BYPASS)
+	if (!check_entry_reinject(entry))
+		return 0; /* bypass without enqueueing packet */
+#endif
+
 	nskb = nfqnl_build_packet_message(queue, entry);
 	if (nskb == NULL)
 		goto err_out;
@@ -604,6 +674,40 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
 	[NFQA_PAYLOAD]		= { .type = NLA_UNSPEC },
 };
 
+
+#if defined(CONFIG_NF_QUEUE_CONNBYTES_BYPASS)
+static int recv_connbytes(u32 connbytes, struct nfqnl_instance *queue,
+	struct nf_queue_entry *entry)
+{
+	struct nf_conn *ct;
+	struct nf_conntrack_queue *ct_queue;
+	enum ip_conntrack_info ctinfo;
+	enum ip_conntrack_dir dir;
+	const struct tcphdr *tcphdr;
+
+	ct = nf_ct_get(entry->skb, &ctinfo);
+	if (!ct) {
+		pr_err("nf_queue: no conntrack info\n");
+		return -ENOENT;
+	}
+
+	dir = CTINFO2DIR(ctinfo);
+	ct_queue = nf_ct_ext_find(ct, NF_CT_EXT_QUEUE);
+	if (!ct_queue) {
+		pr_err("nf_queue: no conntrack queue ext\n");
+		return -ENOENT;
+	}
+
+	if (nf_ct_protonum(ct) == IPPROTO_TCP) {
+		tcphdr = __get_tcp_hdr(entry->skb, nf_ct_l3num(ct));
+		ct_queue->dir[dir].tcp_seq = connbytes;
+	} else
+		ct_queue->dir[dir].connbytes = connbytes;
+
+	return 0;
+}
+#endif
+
 static int
 nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 		   const struct nlmsghdr *nlh,
@@ -611,7 +715,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
-
 	struct nfqnl_msg_verdict_hdr *vhdr;
 	struct nfqnl_instance *queue;
 	unsigned int verdict;
@@ -659,6 +762,13 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 	if (nfqa[NFQA_MARK])
 		entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
 
+#if defined(CONFIG_NF_QUEUE_CONNBYTES_BYPASS)
+	if (nfqa[NFQA_ACCEPT_CONNBYTES])
+		recv_connbytes(
+			ntohl(nla_get_be32(nfqa[NFQA_ACCEPT_CONNBYTES])),
+			queue, entry);
+#endif
+
 	nf_reinject(entry, verdict);
 	return 0;
 
-- 
1.7.1


  parent reply	other threads:[~2010-07-24 15:46 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-24 15:44 [RFC 0/4] nfnetlink_queue bypass queue to userspace X bytes of connection Karl Hiramoto
2010-07-24 15:44 ` [RFC 1/4] netfilter/Kconfig: NF_QUEUE_CONNBYTES_BYPASS Karl Hiramoto
2010-07-24 15:44 ` [RFC 2/4] nf_conntrack_queue: define struct that will be stored in nf_ct_extend Karl Hiramoto
2010-07-24 15:44 ` [RFC 3/4] nf_conntrack: add nf_queue extension Karl Hiramoto
2010-07-24 15:44 ` Karl Hiramoto [this message]
2010-07-24 18:26 ` [RFC 0/4] nfnetlink_queue bypass queue to userspace X bytes of connection Pablo Neira Ayuso
2010-07-25  6:55   ` Karl Hiramoto
2010-07-25 10:42     ` Pablo Neira Ayuso
2010-07-26  6:50       ` Karl Hiramoto
     [not found]       ` <4C4D2C33.6050901@hiramoto.org>
2010-07-26 17:35         ` Pablo Neira Ayuso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1279986285-11665-5-git-send-email-karl@hiramoto.org \
    --to=karl@hiramoto.org \
    --cc=netfilter-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).