netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH nf-next] netfilter: xt_bpf: support ebpf
@ 2016-12-05 20:28 Willem de Bruijn
  2016-12-05 21:06 ` Eric Dumazet
  2016-12-05 22:55 ` Daniel Borkmann
  0 siblings, 2 replies; 16+ messages in thread
From: Willem de Bruijn @ 2016-12-05 20:28 UTC (permalink / raw)
  To: netfilter-devel; +Cc: pablo, netdev, daniel, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

Add support for attaching an eBPF object by file descriptor.

The iptables binary can be called with a path to an elf object or a
pinned bpf object. Also pass the mode and path to the kernel to be
able to return it later for iptables dump and save.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/uapi/linux/netfilter/xt_bpf.h | 21 ++++++++
 net/netfilter/xt_bpf.c                | 96 +++++++++++++++++++++++++++++------
 2 files changed, 101 insertions(+), 16 deletions(-)

diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h
index 1fad2c2..652d2b6 100644
--- a/include/uapi/linux/netfilter/xt_bpf.h
+++ b/include/uapi/linux/netfilter/xt_bpf.h
@@ -2,9 +2,11 @@
 #define _XT_BPF_H
 
 #include <linux/filter.h>
+#include <linux/limits.h>
 #include <linux/types.h>
 
 #define XT_BPF_MAX_NUM_INSTR	64
+#define XT_BPF_MAX_NUM_INSTR_V1	(PATH_MAX / sizeof(struct sock_filter))
 
 struct bpf_prog;
 
@@ -16,4 +18,23 @@ struct xt_bpf_info {
 	struct bpf_prog *filter __attribute__((aligned(8)));
 };
 
+enum xt_bpf_modes {
+	XT_BPF_MODE_BYTECODE,
+	XT_BPF_MODE_FD_PINNED,
+	XT_BPF_MODE_FD_ELF,
+};
+
+struct xt_bpf_info_v1 {
+	__u16 mode;
+	__u16 bpf_program_num_elem;
+	__s32 fd;
+	union {
+		struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR_V1];
+		char path[PATH_MAX];
+	};
+
+	/* only used in the kernel */
+	struct bpf_prog *filter __attribute__((aligned(8)));
+};
+
 #endif /*_XT_BPF_H */
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index dffee9d47..2dedaa2 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/filter.h>
+#include <linux/bpf.h>
 
 #include <linux/netfilter/xt_bpf.h>
 #include <linux/netfilter/x_tables.h>
@@ -20,15 +21,15 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_bpf");
 MODULE_ALIAS("ip6t_bpf");
 
-static int bpf_mt_check(const struct xt_mtchk_param *par)
+static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
+				   struct bpf_prog **ret)
 {
-	struct xt_bpf_info *info = par->matchinfo;
 	struct sock_fprog_kern program;
 
-	program.len = info->bpf_program_num_elem;
-	program.filter = info->bpf_program;
+	program.len = len;
+	program.filter = insns;
 
-	if (bpf_prog_create(&info->filter, &program)) {
+	if (bpf_prog_create(ret, &program)) {
 		pr_info("bpf: check failed: parse error\n");
 		return -EINVAL;
 	}
@@ -36,6 +37,42 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
 	return 0;
 }
 
+static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
+{
+	struct bpf_prog *prog;
+
+	prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	*ret = prog;
+	return 0;
+}
+
+static int bpf_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_bpf_info *info = par->matchinfo;
+
+	return __bpf_mt_check_bytecode(info->bpf_program,
+				       info->bpf_program_num_elem,
+				       &info->filter);
+}
+
+static int bpf_mt_check_v1(const struct xt_mtchk_param *par)
+{
+	struct xt_bpf_info_v1 *info = par->matchinfo;
+
+	if (info->mode == XT_BPF_MODE_BYTECODE)
+		return __bpf_mt_check_bytecode(info->bpf_program,
+					       info->bpf_program_num_elem,
+					       &info->filter);
+	else if (info->mode == XT_BPF_MODE_FD_PINNED ||
+		 info->mode == XT_BPF_MODE_FD_ELF)
+		return __bpf_mt_check_fd(info->fd, &info->filter);
+	else
+		return -EINVAL;
+}
+
 static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_bpf_info *info = par->matchinfo;
@@ -43,31 +80,58 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	return BPF_PROG_RUN(info->filter, skb);
 }
 
+static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_bpf_info_v1 *info = par->matchinfo;
+
+	return !!bpf_prog_run_save_cb(info->filter, (struct sk_buff *) skb);
+}
+
 static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
 {
 	const struct xt_bpf_info *info = par->matchinfo;
+
+	bpf_prog_destroy(info->filter);
+}
+
+static void bpf_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+	const struct xt_bpf_info_v1 *info = par->matchinfo;
+
 	bpf_prog_destroy(info->filter);
 }
 
-static struct xt_match bpf_mt_reg __read_mostly = {
-	.name		= "bpf",
-	.revision	= 0,
-	.family		= NFPROTO_UNSPEC,
-	.checkentry	= bpf_mt_check,
-	.match		= bpf_mt,
-	.destroy	= bpf_mt_destroy,
-	.matchsize	= sizeof(struct xt_bpf_info),
-	.me		= THIS_MODULE,
+static struct xt_match bpf_mt_reg[] __read_mostly = {
+	{
+		.name		= "bpf",
+		.revision	= 0,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= bpf_mt_check,
+		.match		= bpf_mt,
+		.destroy	= bpf_mt_destroy,
+		.matchsize	= sizeof(struct xt_bpf_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "bpf",
+		.revision	= 1,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= bpf_mt_check_v1,
+		.match		= bpf_mt_v1,
+		.destroy	= bpf_mt_destroy_v1,
+		.matchsize	= sizeof(struct xt_bpf_info_v1),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init bpf_mt_init(void)
 {
-	return xt_register_match(&bpf_mt_reg);
+	return xt_register_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg));
 }
 
 static void __exit bpf_mt_exit(void)
 {
-	xt_unregister_match(&bpf_mt_reg);
+	xt_unregister_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg));
 }
 
 module_init(bpf_mt_init);
-- 
2.8.0.rc3.226.g39d4020


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 20:28 [PATCH nf-next] netfilter: xt_bpf: support ebpf Willem de Bruijn
@ 2016-12-05 21:06 ` Eric Dumazet
  2016-12-05 21:30   ` Florian Westphal
  2016-12-05 22:55 ` Daniel Borkmann
  1 sibling, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2016-12-05 21:06 UTC (permalink / raw)
  To: Willem de Bruijn; +Cc: netfilter-devel, pablo, netdev, daniel, Willem de Bruijn

On Mon, 2016-12-05 at 15:28 -0500, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
> 
> Add support for attaching an eBPF object by file descriptor.
> 
> The iptables binary can be called with a path to an elf object or a
> pinned bpf object. Also pass the mode and path to the kernel to be
> able to return it later for iptables dump and save.
> 
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---

Assuming there is no simple way to get variable matchsize in iptables,
this looks good to me, thanks.

Reviewed-by: Eric Dumazet <edumazet@google.com>





^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 21:06 ` Eric Dumazet
@ 2016-12-05 21:30   ` Florian Westphal
  2016-12-05 22:34     ` Pablo Neira Ayuso
  0 siblings, 1 reply; 16+ messages in thread
From: Florian Westphal @ 2016-12-05 21:30 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Willem de Bruijn, netfilter-devel, pablo, netdev, daniel,
	Willem de Bruijn

Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Mon, 2016-12-05 at 15:28 -0500, Willem de Bruijn wrote:
> > From: Willem de Bruijn <willemb@google.com>
> > 
> > Add support for attaching an eBPF object by file descriptor.
> > 
> > The iptables binary can be called with a path to an elf object or a
> > pinned bpf object. Also pass the mode and path to the kernel to be
> > able to return it later for iptables dump and save.
> > 
> > Signed-off-by: Willem de Bruijn <willemb@google.com>
> > ---
> 
> Assuming there is no simple way to get variable matchsize in iptables,
> this looks good to me, thanks.

It should be possible by setting kernel .matchsize to ~0 which
suppresses strict size enforcement.

Its currently only used by ebt_among, but this should work for any xtables
module.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 21:30   ` Florian Westphal
@ 2016-12-05 22:34     ` Pablo Neira Ayuso
  2016-12-05 22:40       ` Florian Westphal
  2016-12-05 23:00       ` Pablo Neira Ayuso
  0 siblings, 2 replies; 16+ messages in thread
From: Pablo Neira Ayuso @ 2016-12-05 22:34 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Eric Dumazet, Willem de Bruijn, netfilter-devel, netdev, daniel,
	Willem de Bruijn

On Mon, Dec 05, 2016 at 10:30:01PM +0100, Florian Westphal wrote:
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > On Mon, 2016-12-05 at 15:28 -0500, Willem de Bruijn wrote:
> > > From: Willem de Bruijn <willemb@google.com>
> > > 
> > > Add support for attaching an eBPF object by file descriptor.
> > > 
> > > The iptables binary can be called with a path to an elf object or a
> > > pinned bpf object. Also pass the mode and path to the kernel to be
> > > able to return it later for iptables dump and save.
> > > 
> > > Signed-off-by: Willem de Bruijn <willemb@google.com>
> > > ---
> > 
> > Assuming there is no simple way to get variable matchsize in iptables,
> > this looks good to me, thanks.
> 
> It should be possible by setting kernel .matchsize to ~0 which
> suppresses strict size enforcement.
> 
> Its currently only used by ebt_among, but this should work for any xtables
> module.

This is likely going to trigger a large rewrite of the core userspace
iptables codebase, and likely going to pull part of the mess we have
in ebtables into iptables. So I'd prefer not to follow this path.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 22:34     ` Pablo Neira Ayuso
@ 2016-12-05 22:40       ` Florian Westphal
  2016-12-05 22:59         ` Eric Dumazet
  2016-12-05 23:00       ` Pablo Neira Ayuso
  1 sibling, 1 reply; 16+ messages in thread
From: Florian Westphal @ 2016-12-05 22:40 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: Florian Westphal, Eric Dumazet, Willem de Bruijn, netfilter-devel,
	netdev, daniel, Willem de Bruijn

Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Mon, Dec 05, 2016 at 10:30:01PM +0100, Florian Westphal wrote:
> > Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > > On Mon, 2016-12-05 at 15:28 -0500, Willem de Bruijn wrote:
> > > > From: Willem de Bruijn <willemb@google.com>
> > > > 
> > > > Add support for attaching an eBPF object by file descriptor.
> > > > 
> > > > The iptables binary can be called with a path to an elf object or a
> > > > pinned bpf object. Also pass the mode and path to the kernel to be
> > > > able to return it later for iptables dump and save.
> > > > 
> > > > Signed-off-by: Willem de Bruijn <willemb@google.com>
> > > > ---
> > > 
> > > Assuming there is no simple way to get variable matchsize in iptables,
> > > this looks good to me, thanks.
> > 
> > It should be possible by setting kernel .matchsize to ~0 which
> > suppresses strict size enforcement.
> > 
> > Its currently only used by ebt_among, but this should work for any xtables
> > module.
> 
> This is likely going to trigger a large rewrite of the core userspace
> iptables codebase, and likely going to pull part of the mess we have
> in ebtables into iptables. So I'd prefer not to follow this path.

Fair enough, I have no objections to the patch.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 20:28 [PATCH nf-next] netfilter: xt_bpf: support ebpf Willem de Bruijn
  2016-12-05 21:06 ` Eric Dumazet
@ 2016-12-05 22:55 ` Daniel Borkmann
  2016-12-05 23:01   ` Willem de Bruijn
  1 sibling, 1 reply; 16+ messages in thread
From: Daniel Borkmann @ 2016-12-05 22:55 UTC (permalink / raw)
  To: Willem de Bruijn, netfilter-devel; +Cc: pablo, netdev, Willem de Bruijn

Hi Willem,

On 12/05/2016 09:28 PM, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
>
> Add support for attaching an eBPF object by file descriptor.
>
> The iptables binary can be called with a path to an elf object or a
> pinned bpf object. Also pass the mode and path to the kernel to be
> able to return it later for iptables dump and save.
>
> Signed-off-by: Willem de Bruijn <willemb@google.com>

just out of pure curiosity, use case is for android guys wrt
accounting, or anything specific that cls_bpf on tc ingress +
egress cannot do already?

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 22:40       ` Florian Westphal
@ 2016-12-05 22:59         ` Eric Dumazet
  2016-12-05 23:05           ` Pablo Neira Ayuso
  0 siblings, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2016-12-05 22:59 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Pablo Neira Ayuso, Willem de Bruijn, netfilter-devel, netdev,
	daniel, Willem de Bruijn

On Mon, 2016-12-05 at 23:40 +0100, Florian Westphal wrote:

> Fair enough, I have no objections to the patch.

An additional question is about PATH_MAX :

Is it guaranteed to stay at 4096 forever ?

To be safe, maybe we should use a constant of our own.



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 22:34     ` Pablo Neira Ayuso
  2016-12-05 22:40       ` Florian Westphal
@ 2016-12-05 23:00       ` Pablo Neira Ayuso
  2016-12-05 23:06         ` Willem de Bruijn
  1 sibling, 1 reply; 16+ messages in thread
From: Pablo Neira Ayuso @ 2016-12-05 23:00 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: netfilter-devel, netdev, daniel, Willem de Bruijn,
	Florian Westphal, Eric Dumazet

On Mon, Dec 05, 2016 at 11:34:15PM +0100, Pablo Neira Ayuso wrote:
> On Mon, Dec 05, 2016 at 10:30:01PM +0100, Florian Westphal wrote:
> > Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > > On Mon, 2016-12-05 at 15:28 -0500, Willem de Bruijn wrote:
> > > > From: Willem de Bruijn <willemb@google.com>
> > > > 
> > > > Add support for attaching an eBPF object by file descriptor.
> > > > 
> > > > The iptables binary can be called with a path to an elf object or a
> > > > pinned bpf object. Also pass the mode and path to the kernel to be
> > > > able to return it later for iptables dump and save.
> > > > 
> > > > Signed-off-by: Willem de Bruijn <willemb@google.com>
> > > > ---
> > > 
> > > Assuming there is no simple way to get variable matchsize in iptables,
> > > this looks good to me, thanks.
> > 
> > It should be possible by setting kernel .matchsize to ~0 which
> > suppresses strict size enforcement.
> > 
> > Its currently only used by ebt_among, but this should work for any xtables
> > module.
> 
> This is likely going to trigger a large rewrite of the core userspace
> iptables codebase, and likely going to pull part of the mess we have
> in ebtables into iptables. So I'd prefer not to follow this path.

So this variable path is there to annotate what userspace claims that
is the file that contains the bpf blob that was loaded, actually this
is irrelevant to the kernel, so this is just there to dump it back
when iptables-save it is called. Just a side note, one could set
anything there from userspace, point somewhere else actually...

Well anyway, going back to the path problem to keep it simple: Why
don't just trim this down to something smaller, are you really
expecting to reach PATH_MAX in your usecase?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 22:55 ` Daniel Borkmann
@ 2016-12-05 23:01   ` Willem de Bruijn
  0 siblings, 0 replies; 16+ messages in thread
From: Willem de Bruijn @ 2016-12-05 23:01 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: netfilter-devel, pablo, Network Development, Willem de Bruijn

On Mon, Dec 5, 2016 at 5:55 PM, Daniel Borkmann <daniel@iogearbox.net> wrote:
> Hi Willem,
>
> On 12/05/2016 09:28 PM, Willem de Bruijn wrote:
>>
>> From: Willem de Bruijn <willemb@google.com>
>>
>> Add support for attaching an eBPF object by file descriptor.
>>
>> The iptables binary can be called with a path to an elf object or a
>> pinned bpf object. Also pass the mode and path to the kernel to be
>> able to return it later for iptables dump and save.
>>
>> Signed-off-by: Willem de Bruijn <willemb@google.com>
>
>
> just out of pure curiosity, use case is for android guys wrt
> accounting, or anything specific that cls_bpf on tc ingress +
> egress cannot do already?

That is the immediate motivation, yes.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 22:59         ` Eric Dumazet
@ 2016-12-05 23:05           ` Pablo Neira Ayuso
  0 siblings, 0 replies; 16+ messages in thread
From: Pablo Neira Ayuso @ 2016-12-05 23:05 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Florian Westphal, Willem de Bruijn, netfilter-devel, netdev,
	daniel, Willem de Bruijn

On Mon, Dec 05, 2016 at 02:59:09PM -0800, Eric Dumazet wrote:
> On Mon, 2016-12-05 at 23:40 +0100, Florian Westphal wrote:
> 
> > Fair enough, I have no objections to the patch.
> 
> An additional question is about PATH_MAX :
> 
> Is it guaranteed to stay at 4096 forever ?
> 
> To be safe, maybe we should use a constant of our own.

Right, this reminds me we have to fix something else.

So constant of our own plus something smaller, if possible, would be
good to go. Thanks.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 23:00       ` Pablo Neira Ayuso
@ 2016-12-05 23:06         ` Willem de Bruijn
  2016-12-05 23:22           ` Pablo Neira Ayuso
  0 siblings, 1 reply; 16+ messages in thread
From: Willem de Bruijn @ 2016-12-05 23:06 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: netfilter-devel, Network Development, Daniel Borkmann,
	Willem de Bruijn, Florian Westphal, Eric Dumazet

On Mon, Dec 5, 2016 at 6:00 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Mon, Dec 05, 2016 at 11:34:15PM +0100, Pablo Neira Ayuso wrote:
>> On Mon, Dec 05, 2016 at 10:30:01PM +0100, Florian Westphal wrote:
>> > Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> > > On Mon, 2016-12-05 at 15:28 -0500, Willem de Bruijn wrote:
>> > > > From: Willem de Bruijn <willemb@google.com>
>> > > >
>> > > > Add support for attaching an eBPF object by file descriptor.
>> > > >
>> > > > The iptables binary can be called with a path to an elf object or a
>> > > > pinned bpf object. Also pass the mode and path to the kernel to be
>> > > > able to return it later for iptables dump and save.
>> > > >
>> > > > Signed-off-by: Willem de Bruijn <willemb@google.com>
>> > > > ---
>> > >
>> > > Assuming there is no simple way to get variable matchsize in iptables,
>> > > this looks good to me, thanks.
>> >
>> > It should be possible by setting kernel .matchsize to ~0 which
>> > suppresses strict size enforcement.
>> >
>> > Its currently only used by ebt_among, but this should work for any xtables
>> > module.
>>
>> This is likely going to trigger a large rewrite of the core userspace
>> iptables codebase, and likely going to pull part of the mess we have
>> in ebtables into iptables. So I'd prefer not to follow this path.
>
> So this variable path is there to annotate what userspace claims that
> is the file that contains the bpf blob that was loaded, actually this
> is irrelevant to the kernel, so this is just there to dump it back
> when iptables-save it is called. Just a side note, one could set
> anything there from userspace, point somewhere else actually...
>
> Well anyway, going back to the path problem to keep it simple: Why
> don't just trim this down to something smaller, are you really
> expecting to reach PATH_MAX in your usecase?

Not often. Module-specific limitations that differ from global
definitions are just a pain when they bite. This module also has an
arbitrary low limit on the length of the cBPF program passed, for
instance.

Eric also suggests a private variable to avoid being subject to
changes to PATH_MAX. Then we can indeed also choose an arbitrary lower
length than current PATH_MAX.

FWIW, there is a workaround for users with deeply nested paths: the
path passed does not have to be absolute. It is literally what is
passed on the command line to iptables right now, including relative
addresses.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 23:06         ` Willem de Bruijn
@ 2016-12-05 23:22           ` Pablo Neira Ayuso
  2016-12-05 23:29             ` Willem de Bruijn
  0 siblings, 1 reply; 16+ messages in thread
From: Pablo Neira Ayuso @ 2016-12-05 23:22 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: netfilter-devel, Network Development, Daniel Borkmann,
	Willem de Bruijn, Florian Westphal, Eric Dumazet

On Mon, Dec 05, 2016 at 06:06:05PM -0500, Willem de Bruijn wrote:
[...]
> Eric also suggests a private variable to avoid being subject to
> changes to PATH_MAX. Then we can indeed also choose an arbitrary lower
> length than current PATH_MAX.

Good.

> FWIW, there is a workaround for users with deeply nested paths: the
> path passed does not have to be absolute. It is literally what is
> passed on the command line to iptables right now, including relative
> addresses.

If iptables userspace always expects to have the bpf file repository
in some given location (suggesting to have a directory that we specify
at ./configure time, similar to what we do with connlabel.conf), then
I think we can rely on relative paths. Would this be flexible enough
for your usecase?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 23:22           ` Pablo Neira Ayuso
@ 2016-12-05 23:29             ` Willem de Bruijn
  2016-12-05 23:51               ` Willem de Bruijn
  0 siblings, 1 reply; 16+ messages in thread
From: Willem de Bruijn @ 2016-12-05 23:29 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: Willem de Bruijn, netfilter-devel, Network Development,
	Daniel Borkmann, Florian Westphal, Eric Dumazet

On Mon, Dec 5, 2016 at 6:22 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Mon, Dec 05, 2016 at 06:06:05PM -0500, Willem de Bruijn wrote:
> [...]
>> Eric also suggests a private variable to avoid being subject to
>> changes to PATH_MAX. Then we can indeed also choose an arbitrary lower
>> length than current PATH_MAX.
>
> Good.
>
>> FWIW, there is a workaround for users with deeply nested paths: the
>> path passed does not have to be absolute. It is literally what is
>> passed on the command line to iptables right now, including relative
>> addresses.
>
> If iptables userspace always expects to have the bpf file repository
> in some given location (suggesting to have a directory that we specify
> at ./configure time, similar to what we do with connlabel.conf), then
> I think we can rely on relative paths. Would this be flexible enough
> for your usecase?

As long as it accepts relative paths, I think it will always work.
Worst case, a user has to cd. No need for hardcoding the bpf mount
point at compile time.

I have the matching iptables patch for pinned objects, btw. Not for
elf objects, which requires linking to libelf and parsing the object,
which is more work (and perhaps best punted on by expanding libbpf in
bcc to include this functionality. it already exists under samples/bpf
and iproute2).

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 23:29             ` Willem de Bruijn
@ 2016-12-05 23:51               ` Willem de Bruijn
  2016-12-06  0:20                 ` Florian Westphal
  0 siblings, 1 reply; 16+ messages in thread
From: Willem de Bruijn @ 2016-12-05 23:51 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Pablo Neira Ayuso, netfilter-devel, Network Development,
	Daniel Borkmann, Florian Westphal, Eric Dumazet

On Mon, Dec 5, 2016 at 6:29 PM, Willem de Bruijn <willemb@google.com> wrote:
> On Mon, Dec 5, 2016 at 6:22 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>> On Mon, Dec 05, 2016 at 06:06:05PM -0500, Willem de Bruijn wrote:
>> [...]
>>> Eric also suggests a private variable to avoid being subject to
>>> changes to PATH_MAX. Then we can indeed also choose an arbitrary lower
>>> length than current PATH_MAX.
>>
>> Good.
>>
>>> FWIW, there is a workaround for users with deeply nested paths: the
>>> path passed does not have to be absolute. It is literally what is
>>> passed on the command line to iptables right now, including relative
>>> addresses.
>>
>> If iptables userspace always expects to have the bpf file repository
>> in some given location (suggesting to have a directory that we specify
>> at ./configure time, similar to what we do with connlabel.conf), then
>> I think we can rely on relative paths. Would this be flexible enough
>> for your usecase?
>
> As long as it accepts relative paths, I think it will always work.
> Worst case, a user has to cd. No need for hardcoding the bpf mount
> point at compile time.
>
> I have the matching iptables patch for pinned objects, btw. Not for
> elf objects, which requires linking to libelf and parsing the object,
> which is more work (and perhaps best punted on by expanding libbpf in
> bcc to include this functionality. it already exists under samples/bpf
> and iproute2).

While we're discussing the patch, another question, about revisions: I
tested both modified and original iptables binaries on both standard
and modified kernels. It all works as expected, except for the case
where both binaries are used on a single kernel. For instance:

  iptables -A OUTPUT -m bpf --bytecode "`./nfbpf_compile RAW 'udp port
8000'`" -j LOG
  ./iptables.new -L

Here the new binary will interpret the object as xt_bpf_match_v1, but
iptables has inserted xt_bpf_match. The same problem happens the other
way around. A new binary can be made robust to detect old structs, but
not the other way around. Specific to bpf, the existing xt_bpf code
has an unfortunate bug that it always prints at least one line of
code, even if ->bpf_program_num_elems == 0.

I notice that other extensions also do not necessarily only extend
struct vN in vN+1. Is the above a known issue?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-05 23:51               ` Willem de Bruijn
@ 2016-12-06  0:20                 ` Florian Westphal
  2016-12-06 22:44                   ` Willem de Bruijn
  0 siblings, 1 reply; 16+ messages in thread
From: Florian Westphal @ 2016-12-06  0:20 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Willem de Bruijn, Pablo Neira Ayuso, netfilter-devel,
	Network Development, Daniel Borkmann, Florian Westphal,
	Eric Dumazet

Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote:
> While we're discussing the patch, another question, about revisions: I
> tested both modified and original iptables binaries on both standard
> and modified kernels. It all works as expected, except for the case
> where both binaries are used on a single kernel. For instance:
> 
>   iptables -A OUTPUT -m bpf --bytecode "`./nfbpf_compile RAW 'udp port
> 8000'`" -j LOG
>   ./iptables.new -L
> 
> Here the new binary will interpret the object as xt_bpf_match_v1, but
> iptables has inserted xt_bpf_match. The same problem happens the other
> way around. A new binary can be made robust to detect old structs, but
> not the other way around. Specific to bpf, the existing xt_bpf code
> has an unfortunate bug that it always prints at least one line of
> code, even if ->bpf_program_num_elems == 0.
> 
> I notice that other extensions also do not necessarily only extend
> struct vN in vN+1. Is the above a known issue?

Yes, I guess noone ever bothered to fix this.

The kernel blob should contain the match/target revision number,
so userspace can in fact see that 'this is bpf v42', but iirc
the netfilter userspace just loads the highest userspace revision
supported by the kernel (which is then different for the 2 iptables
binaries).

But we *could* display message like 'kernel uses revision 2 but I can
only find 0 and 1' or fall back to the lower supported revision without
guess-the-struct-by-size games.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH nf-next] netfilter: xt_bpf: support ebpf
  2016-12-06  0:20                 ` Florian Westphal
@ 2016-12-06 22:44                   ` Willem de Bruijn
  0 siblings, 0 replies; 16+ messages in thread
From: Willem de Bruijn @ 2016-12-06 22:44 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Willem de Bruijn, Pablo Neira Ayuso, netfilter-devel,
	Network Development, Daniel Borkmann, Eric Dumazet

On Mon, Dec 5, 2016 at 7:20 PM, Florian Westphal <fw@strlen.de> wrote:
> Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote:
>> While we're discussing the patch, another question, about revisions: I
>> tested both modified and original iptables binaries on both standard
>> and modified kernels. It all works as expected, except for the case
>> where both binaries are used on a single kernel. For instance:
>>
>>   iptables -A OUTPUT -m bpf --bytecode "`./nfbpf_compile RAW 'udp port
>> 8000'`" -j LOG
>>   ./iptables.new -L
>>
>> Here the new binary will interpret the object as xt_bpf_match_v1, but
>> iptables has inserted xt_bpf_match. The same problem happens the other
>> way around. A new binary can be made robust to detect old structs, but
>> not the other way around. Specific to bpf, the existing xt_bpf code
>> has an unfortunate bug that it always prints at least one line of
>> code, even if ->bpf_program_num_elems == 0.
>>
>> I notice that other extensions also do not necessarily only extend
>> struct vN in vN+1. Is the above a known issue?
>
> Yes, I guess noone ever bothered to fix this.
>
> The kernel blob should contain the match/target revision number,
> so userspace can in fact see that 'this is bpf v42', but iirc
> the netfilter userspace just loads the highest userspace revision
> supported by the kernel (which is then different for the 2 iptables
> binaries).

We can fall back on not parsing contents on mismatch:

diff --git a/iptables/iptables.c b/iptables/iptables.c
index 540d111..ada7c94 100644
--- a/iptables/iptables.c
+++ b/iptables/iptables.c
@@ -504,7 +504,8 @@ print_match(const struct xt_entry_match *m,
                xtables_find_match(m->u.user.name, XTF_TRY_LOAD, NULL);

        if (match) {
-               if (match->print)
+               if (match->print &&
+                   m->u.user.revision == match->revision)
                        match->print(ip, m, numeric);
                else
                        printf("%s ", match->name);

> But we *could* display message like 'kernel uses revision 2 but I can
> only find 0 and 1' or fall back to the lower supported revision without
> guess-the-struct-by-size games.

That's a good idea. A special case printf() with a notice, then.

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2016-12-06 22:44 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-12-05 20:28 [PATCH nf-next] netfilter: xt_bpf: support ebpf Willem de Bruijn
2016-12-05 21:06 ` Eric Dumazet
2016-12-05 21:30   ` Florian Westphal
2016-12-05 22:34     ` Pablo Neira Ayuso
2016-12-05 22:40       ` Florian Westphal
2016-12-05 22:59         ` Eric Dumazet
2016-12-05 23:05           ` Pablo Neira Ayuso
2016-12-05 23:00       ` Pablo Neira Ayuso
2016-12-05 23:06         ` Willem de Bruijn
2016-12-05 23:22           ` Pablo Neira Ayuso
2016-12-05 23:29             ` Willem de Bruijn
2016-12-05 23:51               ` Willem de Bruijn
2016-12-06  0:20                 ` Florian Westphal
2016-12-06 22:44                   ` Willem de Bruijn
2016-12-05 22:55 ` Daniel Borkmann
2016-12-05 23:01   ` Willem de Bruijn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).