netlink socket filtering

All of lore.kernel.org
 help / color / mirror / Atom feed

* netlink socket filtering
@ 2008-03-02 13:36 Patrick McHardy
  2008-03-05 13:20 ` Pablo Neira Ayuso
  0 siblings, 1 reply; 6+ messages in thread
From: Patrick McHardy @ 2008-03-02 13:36 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Netfilter Development Mailinglist

[-- Attachment #1: Type: text/plain, Size: 981 bytes --]

Out of interest how feasible it would be to do ctnetlink
message filtering using socket filters I've hacked together
these two patches for the kernel and libnl to filter on
the TCP_CONNTRACK_ESTABLISHED state.

The filtering works well, but it brought up a question that
I think also affects the patches you've posted earlier.
You mentioned that for synchronization you want to filter
on ESTABLISHED states. Since BPF only gets the final message
it can't filter on the previous conntrack state when
transitioning, but only on the current state. This means
that a filter on TCP_CONNTRACK_ESTABLISHED won't let
a message for a transition from TCP_CONNTRACK_ESTABLISHED
to TCP_CONNTRACK_CLOSED pass.

Your patches add a new table, at which point the conntrack
will also already have performed the transistion and filtering
using state matches will also only see the new state. So I'm
wondering, what are the exact filtering needs for replication
and would something like this work?



[-- Attachment #2: libnl-skfilter.diff --]
[-- Type: text/x-patch, Size: 3083 bytes --]

diff --git a/src/nf-monitor.c b/src/nf-monitor.c
index 2bc58c9..8614924 100644
--- a/src/nf-monitor.c
+++ b/src/nf-monitor.c
@@ -13,6 +13,9 @@
 
 #include "utils.h"
 #include <netlink/netfilter/nfnl.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <linux/netfilter/nf_conntrack_tcp.h>
+#include <linux/filter.h>
 
 static void obj_input(struct nl_object *obj, void *arg)
 {
@@ -34,6 +37,116 @@ static int event_input(struct nl_msg *msg, void *arg)
 	return NL_STOP;
 }
 
+#define SKF_AD_NLATTR	12
+
+#define FILTER_ACCEPT	0xFFFE
+#define FILTER_REJECT	0xFFFF
+
+static int sk_set_filter(int fd)
+{
+	struct sock_filter filter[] = {
+		{
+			/* A = sizeof(struct nlmsghdr) + sizeof(struct nfgenmsg) */
+			.code	= BPF_LD|BPF_IMM,
+			.k	= sizeof(struct nlmsghdr) + sizeof(struct nfgenmsg),
+		},
+		{
+			/* X = CTA_PROTOINFO */
+			.code	= BPF_LDX|BPF_IMM,
+			.k	= CTA_PROTOINFO,
+		},
+		{
+			/* A = netlink attribute offset */
+			.code	= BPF_LD|BPF_B|BPF_ABS,
+			.k	= SKF_AD_OFF + SKF_AD_NLATTR,
+		},
+		{
+			/* Reject if not found (A == 0) */
+			.code	= BPF_JMP|BPF_JEQ|BPF_K,
+			.k	= 0,
+			.jt	= 20 - 3 - 1,
+		},
+
+		{
+			/* A += sizeof(struct nlattr) */
+			.code	= BPF_ALU|BPF_ADD|BPF_K,
+			.k	= sizeof(struct nlattr),
+		},
+		{
+			/* X = CTA_PROTOINFO_TCP */
+			.code	= BPF_LDX|BPF_IMM,
+			.k	= CTA_PROTOINFO_TCP,
+		},
+		{
+			/* A = netlink attribute offset */
+			.code	= BPF_LD|BPF_B|BPF_ABS,
+			.k	= SKF_AD_OFF + SKF_AD_NLATTR,
+		},
+		{
+			/* Reject if not found (A == 0) */
+			.code	= BPF_JMP|BPF_JEQ|BPF_K,
+			.k	= 0,
+			.jt	= 20 - 7 - 1,
+		},
+
+		{
+			/* A += sizeof(struct nlattr) */
+			.code	= BPF_ALU|BPF_ADD|BPF_K,
+			.k	= sizeof(struct nlattr),
+		},
+		{
+			/* X = CTA_PROTOINFO_TCP_STATE */
+			.code	= BPF_LDX|BPF_IMM,
+			.k	= CTA_PROTOINFO_TCP_STATE,
+		},
+		{
+			/* A = netlink attribute offset */
+			.code	= BPF_LD|BPF_B|BPF_ABS,
+			.k	= SKF_AD_OFF + SKF_AD_NLATTR,
+		},
+		{
+			/* Reject if not found (A == 0) */
+			.code	= BPF_JMP|BPF_JEQ|BPF_K,
+			.k	= 0,
+			.jt	= 20 - 11 - 1,
+		},
+
+		{
+			/* X = A */
+			.code	= BPF_MISC|BPF_TAX,
+		},
+		{
+			/* A = skb->data[X + k] */
+			.code	= BPF_LD|BPF_B|BPF_IND,
+			.k	= sizeof(struct nlattr),
+		},
+		{
+			/* Reject if A != TCA_CONNTRACK_ESTABLISHED */
+			.code	= BPF_JMP|BPF_JEQ|BPF_K,
+			.k	= TCP_CONNTRACK_ESTABLISHED,
+			.jf	= 20 - 14 - 1,
+		},
+
+		{
+			/* Accept */
+			.code	= BPF_RET|BPF_K,
+			.k	= 1,
+		},
+		[20]	= {
+			/* Reject */
+			.code	= BPF_RET|BPF_K,
+			.k	= 0,
+		},
+	};
+	struct sock_fprog fprog = {
+		.len		= sizeof(filter) / sizeof(filter[0]),
+		.filter		= filter,
+	};
+
+	return setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER,
+			  &fprog, sizeof(fprog));
+}
+
 int main(int argc, char *argv[])
 {
 	struct nl_handle *nlh;
@@ -92,6 +205,11 @@ int main(int argc, char *argv[])
 			fprintf(stderr, "Warning: Unknown group: %s\n", argv[idx]);
 	}
 
+	if (sk_set_filter(nl_socket_get_fd(nlh)) < 0) {
+		perror("setsockopt(SO_ATTACH_FILTER)");
+		goto errout;
+	}
+
 	while (1) {
 		fd_set rfds;
 		int fd, retval;

[-- Attachment #3: linux-skfilter.diff --]
[-- Type: text/x-patch, Size: 1908 bytes --]

diff --git a/include/linux/filter.h b/include/linux/filter.h
index ddfa037..0e39016 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -136,7 +136,8 @@ static inline unsigned int sk_filter_len(struct sk_filter *fp)
 #define SKF_AD_PROTOCOL 0
 #define SKF_AD_PKTTYPE 	4
 #define SKF_AD_IFINDEX 	8
-#define SKF_AD_MAX 	12
+#define SKF_AD_NLATTR	12
+#define SKF_AD_MAX 	16
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index e0a0694..20ed056 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -27,6 +27,7 @@
 #include <linux/if_packet.h>
 #include <net/ip.h>
 #include <net/protocol.h>
+#include <net/netlink.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <linux/errno.h>
@@ -268,6 +269,22 @@ load_b:
 		case SKF_AD_IFINDEX:
 			A = skb->dev->ifindex;
 			continue;
+		case SKF_AD_NLATTR: {
+			struct nlattr *nla;
+
+			if (skb_is_nonlinear(skb))
+				return 0;
+			if (A > skb->len - sizeof(struct nlattr))
+				return 0;
+
+			nla = nla_find((struct nlattr *)&skb->data[A],
+				       skb->len - A, X);
+			if (nla)
+				A = (void *)nla - (void *)skb->data;
+			else
+				A = 0;
+			continue;
+		}
 		default:
 			return 0;
 		}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 524e826..6f68f2b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -919,6 +919,17 @@ static inline int netlink_broadcast_deliver(struct sock *sk,
 					    struct sk_buff *skb)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
+	struct sk_filter *filter;
+	unsigned int len = skb->len;
+
+	rcu_read_lock_bh();
+	filter = rcu_dereference(sk->sk_filter);
+	if (filter)
+		len = sk_run_filter(skb, filter->insns, filter->len);
+	rcu_read_unlock_bh();
+
+	if (len == 0)
+		return 0;
 
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
 	    !test_bit(0, &nlk->state)) {

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: netlink socket filtering
  2008-03-02 13:36 netlink socket filtering Patrick McHardy
@ 2008-03-05 13:20 ` Pablo Neira Ayuso
  2008-03-05 13:22   ` Pablo Neira Ayuso
  2008-03-10 16:59   ` Patrick McHardy
  0 siblings, 2 replies; 6+ messages in thread
From: Pablo Neira Ayuso @ 2008-03-05 13:20 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Netfilter Development Mailinglist

Patrick McHardy wrote:
> Out of interest how feasible it would be to do ctnetlink
> message filtering using socket filters I've hacked together
> these two patches for the kernel and libnl to filter on
> the TCP_CONNTRACK_ESTABLISHED state.
> 
> The filtering works well, but it brought up a question that
> I think also affects the patches you've posted earlier.
> You mentioned that for synchronization you want to filter
> on ESTABLISHED states. Since BPF only gets the final message
> it can't filter on the previous conntrack state when
> transitioning, but only on the current state. This means
> that a filter on TCP_CONNTRACK_ESTABLISHED won't let
> a message for a transition from TCP_CONNTRACK_ESTABLISHED
> to TCP_CONNTRACK_CLOSED pass.
> 
> Your patches add a new table, at which point the conntrack
> will also already have performed the transistion and filtering
> using state matches will also only see the new state. So I'm
> wondering, what are the exact filtering needs for replication
> and would something like this work?

I mainly need conntrack event filtering capabilities by:

* protocol states, so that one can replicate TCP Established and 
whatever state in the connection closure (or even the destroy event), I 
don't need state transitions.
* source address and destination, so that the administrator can 
replicate traffic for certain parts of the networks, eg. 192.168.0.0/24

I link this BSF-based solution, however, would they be flexible enough 
for my needs? Another question that comes to my mind, isn't this 
filtering coming to late? I mean, we have to invest time to build the 
netlink message and then decide if we want to replicate it or not.

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: netlink socket filtering
  2008-03-05 13:20 ` Pablo Neira Ayuso
@ 2008-03-05 13:22   ` Pablo Neira Ayuso
  2008-03-10 16:59   ` Patrick McHardy
  1 sibling, 0 replies; 6+ messages in thread
From: Pablo Neira Ayuso @ 2008-03-05 13:22 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Netfilter Development Mailinglist

Pablo Neira Ayuso wrote:
> I mainly need conntrack event filtering capabilities by:
> 
> * protocol states, so that one can replicate TCP Established and 
> whatever state in the connection closure (or even the destroy event), I 
> don't need state transitions.
> * source address and destination, so that the administrator can 
> replicate traffic for certain parts of the networks, eg. 192.168.0.0/24

Well, also other descriptors such as the layer 4 protocol number, etc... 
I mean, similar descriptors to filter that are available in iptables.

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: netlink socket filtering
  2008-03-05 13:20 ` Pablo Neira Ayuso
  2008-03-05 13:22   ` Pablo Neira Ayuso
@ 2008-03-10 16:59   ` Patrick McHardy
  2008-03-16 11:58     ` Pablo Neira Ayuso
  1 sibling, 1 reply; 6+ messages in thread
From: Patrick McHardy @ 2008-03-10 16:59 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Netfilter Development Mailinglist

Pablo Neira Ayuso wrote:
> Patrick McHardy wrote:
>> Your patches add a new table, at which point the conntrack
>> will also already have performed the transistion and filtering
>> using state matches will also only see the new state. So I'm
>> wondering, what are the exact filtering needs for replication
>> and would something like this work?
> 
> I mainly need conntrack event filtering capabilities by:
> 
> * protocol states, so that one can replicate TCP Established and 
> whatever state in the connection closure (or even the destroy event), I 
> don't need state transitions.

OK, so that should work.

> * source address and destination, so that the administrator can 
> replicate traffic for certain parts of the networks, eg. 192.168.0.0/24

That also works using BPF.

> I link this BSF-based solution, however, would they be flexible enough 
> for my needs? Another question that comes to my mind, isn't this 
> filtering coming to late? I mean, we have to invest time to build the 
> netlink message and then decide if we want to replicate it or not.

Its quite flexible, but you're right that it only takes place
after the message has already been constructed. The advantage
over selective unicast delivery is that if messages are consumed
by multiple receivers we only need to construct them once.
The downside is that messages that will get filtered on all
sockets are constructed completely unnecessary.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: netlink socket filtering
  2008-03-10 16:59   ` Patrick McHardy
@ 2008-03-16 11:58     ` Pablo Neira Ayuso
  2008-03-17 14:51       ` Patrick McHardy
  0 siblings, 1 reply; 6+ messages in thread
From: Pablo Neira Ayuso @ 2008-03-16 11:58 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Netfilter Development Mailinglist

Patrick McHardy wrote:
> Pablo Neira Ayuso wrote:
>> I link this BSF-based solution, however, would they be flexible enough
>> for my needs? Another question that comes to my mind, isn't this
>> filtering coming to late? I mean, we have to invest time to build the
>> netlink message and then decide if we want to replicate it or not.
> 
> Its quite flexible, but you're right that it only takes place
> after the message has already been constructed. The advantage
> over selective unicast delivery is that if messages are consumed
> by multiple receivers we only need to construct them once.

On most system the number of listener would be usually 2: ulogd and
conntrack-daemon. I remember that someone told during the workshop that
building netlink messages is resource consuming.

> The downside is that messages that will get filtered on all
> sockets are constructed completely unnecessary.

More concerns, if we go BSF, I'll have to implement some kind of
"compiler" to translate user options from conntrackd.conf to BSF code.
Using iptables for this seems to be more user-friendly?

I have a patch here that I'll send you as I have some spare time. It
introduces a nfevent field in the skbuff by using a 2 bytes free hole in
it. Thus, I only have to insert one hook for the 'events' table.

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: netlink socket filtering
  2008-03-16 11:58     ` Pablo Neira Ayuso
@ 2008-03-17 14:51       ` Patrick McHardy
  0 siblings, 0 replies; 6+ messages in thread
From: Patrick McHardy @ 2008-03-17 14:51 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Netfilter Development Mailinglist

Pablo Neira Ayuso wrote:
> Patrick McHardy wrote:
>> Pablo Neira Ayuso wrote:
>>> I link this BSF-based solution, however, would they be flexible enough
>>> for my needs? Another question that comes to my mind, isn't this
>>> filtering coming to late? I mean, we have to invest time to build the
>>> netlink message and then decide if we want to replicate it or not.
>> Its quite flexible, but you're right that it only takes place
>> after the message has already been constructed. The advantage
>> over selective unicast delivery is that if messages are consumed
>> by multiple receivers we only need to construct them once.
> 
> On most system the number of listener would be usually 2: ulogd and
> conntrack-daemon. I remember that someone told during the workshop that
> building netlink messages is resource consuming.

Yes, the question is how many messages will be filtered.
Since with unicasting and two listeners we potentially
have to construct messages twice, with anything > 50%
delivery rate the socket filtering should be more efficient
(not counting filtering overhead itself).

>> The downside is that messages that will get filtered on all
>> sockets are constructed completely unnecessary.
> 
> More concerns, if we go BSF, I'll have to implement some kind of
> "compiler" to translate user options from conntrackd.conf to BSF code.
> Using iptables for this seems to be more user-friendly?

That should be fairly simple since you're usually only
looking at addresses, ports and protocol states. For
a start this could be hardcoded in a few templates.

> I have a patch here that I'll send you as I have some spare time. It
> introduces a nfevent field in the skbuff by using a 2 bytes free hole in
> it. Thus, I only have to insert one hook for the 'events' table.

I wasn't aware that we still have holes in the skb. Anyway,
adding new skb members is a hard sale for something that
really only a very small subset of users need.


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2008-03-17 14:59 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-02 13:36 netlink socket filtering Patrick McHardy
2008-03-05 13:20 ` Pablo Neira Ayuso
2008-03-05 13:22   ` Pablo Neira Ayuso
2008-03-10 16:59   ` Patrick McHardy
2008-03-16 11:58     ` Pablo Neira Ayuso
2008-03-17 14:51       ` Patrick McHardy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.