* [RFC PATCH] netfilter: nf_tables: add new write expression
@ 2014-02-15 13:17 Nikolay Aleksandrov
2014-02-15 13:19 ` Nikolay Aleksandrov
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: Nikolay Aleksandrov @ 2014-02-15 13:17 UTC (permalink / raw)
To: netfilter-devel; +Cc: pablo, kaber
The new "write" expression can be used to manipulate packet data.
The parameters that it has are source register (source for the bytes
which are to be written), offset in the packet and length to write.
It uses a select_ops method to choose between fast ops in the cases
length is 1,2 or 4 bytes and slow ops (i.e. using memcpy) in other
cases.
Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
---
I needed a way (other than passing the packets to user-space) to alter
the ToS field via nftables, so I decided to make it a bit more general. I
use it with the immediate expression to load the new ToS and then write it.
If you find this useful I can post the libnftnl patch as well.
Right now as you can see it continues even if the "write" wasn't successful
which should be probably changed to NFT_BREAK for that case.
This patch applies to Dave's net-next tree.
include/uapi/linux/netfilter/nf_tables.h | 16 ++++
net/netfilter/Kconfig | 7 ++
net/netfilter/Makefile | 1 +
net/netfilter/nft_write.c | 160 +++++++++++++++++++++++++++++++
4 files changed, 184 insertions(+)
create mode 100644 net/netfilter/nft_write.c
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 83c985a..ac8b37b 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -745,4 +745,20 @@ enum nft_nat_attributes {
};
#define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1)
+/**
+ * enum nft_write_attributes - nf_tables write expression netlink attributes
+ *
+ * @NFTA_WRITE_SREG: source register (source for the bytes to be written)
+ * @NFTA_WRITE_OFFSET: offset into packet to write
+ * @NFTA_WRITE_WLEN: number of bytes to write
+ */
+enum nft_write_attributes {
+ NFTA_WRITE_UNSPEC,
+ NFTA_WRITE_SREG,
+ NFTA_WRITE_OFFSET,
+ NFTA_WRITE_WLEN,
+ __NFTA_WRITE_MAX
+};
+#define NFTA_WRITE_MAX (__NFTA_WRITE_MAX - 1)
+
#endif /* _LINUX_NF_TABLES_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e9410d1..54dbff6 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -534,6 +534,13 @@ config NFT_COMPAT
x_tables match/target extensions over the nf_tables
framework.
+config NFT_WRITE
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables write module"
+ help
+ This option adds the "write" expression that you can use to
+ manipulate packet data.
+
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index bffdad7..9fbedb8 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -78,6 +78,7 @@ obj-$(CONFIG_NFT_CT) += nft_ct.o
obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
obj-$(CONFIG_NFT_NAT) += nft_nat.o
obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
+obj-$(CONFIG_NFT_WRITE) += nft_write.o
obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o
diff --git a/net/netfilter/nft_write.c b/net/netfilter/nft_write.c
new file mode 100644
index 0000000..2fd5608
--- /dev/null
+++ b/net/netfilter/nft_write.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2014 Nikolay Aleksandrov <nikolay@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_write_expr {
+ enum nft_registers sreg:8;
+ u32 offset;
+ u8 wlen;
+};
+
+static void nft_write_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_write_expr *priv = nft_expr_priv(expr);
+ struct nft_data *src = &data[priv->sreg];
+
+ data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ if (!skb_make_writable(pkt->skb, priv->offset+priv->wlen))
+ return;
+ pr_debug("Writing at %u : 0x%x len %u\n",
+ priv->offset, src->data[0], priv->wlen);
+ memcpy(pkt->skb->data + priv->offset, src, priv->wlen);
+}
+
+static void nft_write_fast_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_write_expr *priv = nft_expr_priv(expr);
+ struct nft_data *src = &data[priv->sreg];
+ unsigned char *ptr;
+
+ data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ if (!skb_make_writable(pkt->skb, priv->offset+priv->wlen))
+ return;
+ ptr = pkt->skb->data + priv->offset;
+ pr_debug("Writing at %u : 0x%x len %u\n",
+ priv->offset, src->data[0], priv->wlen);
+ if (priv->wlen == 4)
+ *(u32 *)ptr = *(u32 *)src->data;
+ else if (priv->wlen == 2)
+ *(u16 *)ptr = *(u16 *)src->data;
+ else
+ *(u8 *)ptr = *(u8 *)src->data;
+}
+
+
+static const struct nla_policy nft_write_policy[NFTA_WRITE_MAX + 1] = {
+ [NFTA_WRITE_SREG] = { .type = NLA_U32 },
+ [NFTA_WRITE_OFFSET] = { .type = NLA_U32 },
+ [NFTA_WRITE_WLEN] = { .type = NLA_U8 },
+};
+
+static int nft_write_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_write_expr *priv = nft_expr_priv(expr);
+
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_WRITE_SREG]));
+ priv->wlen = nla_get_u8(tb[NFTA_WRITE_WLEN]);
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_WRITE_OFFSET]));
+
+ return 0;
+}
+
+static int nft_write_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_write_expr *priv = nft_expr_priv(expr);
+
+ nla_put_be32(skb, NFTA_WRITE_SREG, htonl(priv->sreg));
+ nla_put_be32(skb, NFTA_WRITE_OFFSET, htonl(priv->offset));
+ nla_put_u8(skb, NFTA_WRITE_WLEN, priv->wlen);
+
+ return 0;
+}
+
+static struct nft_expr_type nft_write_type;
+static const struct nft_expr_ops nft_write_ops = {
+ .type = &nft_write_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_write_expr)),
+ .eval = nft_write_eval,
+ .init = nft_write_init,
+ .dump = nft_write_dump,
+};
+
+static const struct nft_expr_ops nft_write_fast_ops = {
+ .type = &nft_write_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_write_expr)),
+ .eval = nft_write_fast_eval,
+ .init = nft_write_init,
+ .dump = nft_write_dump,
+};
+
+static const struct nft_expr_ops *
+nft_write_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ enum nft_registers sreg;
+ u32 offset;
+ u8 wlen;
+ int err;
+
+ if (tb[NFTA_WRITE_OFFSET] == NULL ||
+ tb[NFTA_WRITE_WLEN] == NULL ||
+ tb[NFTA_WRITE_SREG] == NULL)
+ return ERR_PTR(-EINVAL);
+
+ wlen = nla_get_u8(tb[NFTA_WRITE_WLEN]);
+ if (wlen == 0 || wlen > FIELD_SIZEOF(struct nft_data, data))
+ return ERR_PTR(-EINVAL);
+ sreg = ntohl(nla_get_be32(tb[NFTA_WRITE_SREG]));
+ err = nft_validate_input_register(sreg);
+ if (err < 0)
+ return ERR_PTR(err);
+ offset = ntohl(nla_get_u32(tb[NFTA_WRITE_OFFSET]));
+
+ if (wlen != 3 && wlen <= 4 && IS_ALIGNED(offset, wlen))
+ return &nft_write_fast_ops;
+ else
+ return &nft_write_ops;
+}
+
+static struct nft_expr_type nft_write_type __read_mostly = {
+ .name = "write",
+ .select_ops = &nft_write_select_ops,
+ .policy = nft_write_policy,
+ .maxattr = NFTA_WRITE_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_write_module_init(void)
+{
+ return nft_register_expr(&nft_write_type);
+}
+
+static void __exit nft_write_module_exit(void)
+{
+ nft_unregister_expr(&nft_write_type);
+}
+
+module_init(nft_write_module_init);
+module_exit(nft_write_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Nikolay Aleksandrov <nikolay@redhat.com>");
+MODULE_ALIAS_NFT_EXPR("write");
--
1.8.4.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [RFC PATCH] netfilter: nf_tables: add new write expression
2014-02-15 13:17 [RFC PATCH] netfilter: nf_tables: add new write expression Nikolay Aleksandrov
@ 2014-02-15 13:19 ` Nikolay Aleksandrov
2014-02-15 13:30 ` Patrick McHardy
2014-02-15 13:36 ` Florian Westphal
2 siblings, 0 replies; 12+ messages in thread
From: Nikolay Aleksandrov @ 2014-02-15 13:19 UTC (permalink / raw)
To: netfilter-devel; +Cc: pablo, kaber
On 02/15/2014 02:17 PM, Nikolay Aleksandrov wrote:
> The new "write" expression can be used to manipulate packet data.
> The parameters that it has are source register (source for the bytes
> which are to be written), offset in the packet and length to write.
> It uses a select_ops method to choose between fast ops in the cases
> length is 1,2 or 4 bytes and slow ops (i.e. using memcpy) in other
> cases.
>
> Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
> ---
> I needed a way (other than passing the packets to user-space) to alter
> the ToS field via nftables, so I decided to make it a bit more general. I
> use it with the immediate expression to load the new ToS and then write it.
> If you find this useful I can post the libnftnl patch as well.
> Right now as you can see it continues even if the "write" wasn't successful
> which should be probably changed to NFT_BREAK for that case.
> This patch applies to Dave's net-next tree.
>
And I'll fix the direct skb->data references and some other small issues,
thus the RFC :-)
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] netfilter: nf_tables: add new write expression
2014-02-15 13:17 [RFC PATCH] netfilter: nf_tables: add new write expression Nikolay Aleksandrov
2014-02-15 13:19 ` Nikolay Aleksandrov
@ 2014-02-15 13:30 ` Patrick McHardy
2014-02-15 13:35 ` Nikolay Aleksandrov
2014-02-15 13:36 ` Florian Westphal
2 siblings, 1 reply; 12+ messages in thread
From: Patrick McHardy @ 2014-02-15 13:30 UTC (permalink / raw)
To: Nikolay Aleksandrov, netfilter-devel; +Cc: pablo
On 15. Februar 2014 13:17:22 GMT+00:00, Nikolay Aleksandrov <nikolay@redhat.com> wrote:
>The new "write" expression can be used to manipulate packet data.
>The parameters that it has are source register (source for the bytes
>which are to be written), offset in the packet and length to write.
>It uses a select_ops method to choose between fast ops in the cases
>length is 1,2 or 4 bytes and slow ops (i.e. using memcpy) in other
>cases.
>
>Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
>---
>I needed a way (other than passing the packets to user-space) to alter
>the ToS field via nftables, so I decided to make it a bit more general.
>I
>use it with the immediate expression to load the new ToS and then write
>it.
>If you find this useful I can post the libnftnl patch as well.
>Right now as you can see it continues even if the "write" wasn't
>successful
>which should be probably changed to NFT_BREAK for that case.
Yes.
>This patch applies to Dave's net-next tree.
I think this is a useful addition. However I prefer to put thus into the payload expression and select the proper ops based on the presence of sreg/dreg.
>+++ b/net/netfilter/nft_write.c
>@@ -0,0 +1,160 @@
>+/*
>+ * Copyright (c) 2014 Nikolay Aleksandrov <nikolay@redhat.com>
>+ *
>+ * This program is free software; you can redistribute it and/or
>modify
>+ * it under the terms of the GNU General Public License version 2 as
>+ * published by the Free Software Foundation.
>+ */
>+
>+#include <linux/kernel.h>
>+#include <linux/init.h>
>+#include <linux/module.h>
>+#include <linux/netlink.h>
>+#include <linux/netfilter.h>
>+#include <linux/netfilter/nf_tables.h>
>+#include <net/netfilter/nf_tables.h>
>+
>+struct nft_write_expr {
>+ enum nft_registers sreg:8;
>+ u32 offset;
>+ u8 wlen;
Probably obsolete when combining this with payload, but you can save memory by moving wlen next to sreg.
>+};
>+
>+static void nft_write_eval(const struct nft_expr *expr,
>+ struct nft_data data[NFT_REG_MAX + 1],
>+ const struct nft_pktinfo *pkt)
>+{
>+ struct nft_write_expr *priv = nft_expr_priv(expr);
>+ struct nft_data *src = &data[priv->sreg];
>+
>+ data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
Not necessary, NFT_CONTINUE is the default.
>+ if (!skb_make_writable(pkt->skb, priv->offset+priv->wlen))
>+ return;
>+ pr_debug("Writing at %u : 0x%x len %u\n",
>+ priv->offset, src->data[0], priv->wlen);
>+ memcpy(pkt->skb->data + priv->offset, src, priv->wlen);
The offset should be relative to a payload base.
>+}
>+
>+static void nft_write_fast_eval(const struct nft_expr *expr,
>+ struct nft_data data[NFT_REG_MAX + 1],
>+ const struct nft_pktinfo *pkt)
>+{
>+ struct nft_write_expr *priv = nft_expr_priv(expr);
>+ struct nft_data *src = &data[priv->sreg];
>+ unsigned char *ptr;
>+
>+ data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
>+ if (!skb_make_writable(pkt->skb, priv->offset+priv->wlen))
>+ return;
>+ ptr = pkt->skb->data + priv->offset;
>+ pr_debug("Writing at %u : 0x%x len %u\n",
>+ priv->offset, src->data[0], priv->wlen);
>+ if (priv->wlen == 4)
>+ *(u32 *)ptr = *(u32 *)src->data;
>+ else if (priv->wlen == 2)
>+ *(u16 *)ptr = *(u16 *)src->data;
>+ else
>+ *(u8 *)ptr = *(u8 *)src->data;
>+}
I'm not sure we need this. The payload fast types exist because to inline the very common operation. I wouldn't expect it to be much of a gain in this case.
>+
>+
>+static const struct nla_policy nft_write_policy[NFTA_WRITE_MAX + 1] =
>{
>+ [NFTA_WRITE_SREG] = { .type = NLA_U32 },
>+ [NFTA_WRITE_OFFSET] = { .type = NLA_U32 },
>+ [NFTA_WRITE_WLEN] = { .type = NLA_U8 },
>+};
>+
>+static int nft_write_init(const struct nft_ctx *ctx,
>+ const struct nft_expr *expr,
>+ const struct nlattr * const tb[])
>+{
>+ struct nft_write_expr *priv = nft_expr_priv(expr);
>+
>+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_WRITE_SREG]));
>+ priv->wlen = nla_get_u8(tb[NFTA_WRITE_WLEN]);
>+ priv->offset = ntohl(nla_get_be32(tb[NFTA_WRITE_OFFSET]));
>+
>+ return 0;
>+}
>+
>+static int nft_write_dump(struct sk_buff *skb, const struct nft_expr
>*expr)
>+{
>+ const struct nft_write_expr *priv = nft_expr_priv(expr);
>+
>+ nla_put_be32(skb, NFTA_WRITE_SREG, htonl(priv->sreg));
>+ nla_put_be32(skb, NFTA_WRITE_OFFSET, htonl(priv->offset));
>+ nla_put_u8(skb, NFTA_WRITE_WLEN, priv->wlen);
>+
>+ return 0;
>+}
>+
>+static struct nft_expr_type nft_write_type;
>+static const struct nft_expr_ops nft_write_ops = {
>+ .type = &nft_write_type,
>+ .size = NFT_EXPR_SIZE(sizeof(struct nft_write_expr)),
>+ .eval = nft_write_eval,
>+ .init = nft_write_init,
>+ .dump = nft_write_dump,
>+};
>+
>+static const struct nft_expr_ops nft_write_fast_ops = {
>+ .type = &nft_write_type,
>+ .size = NFT_EXPR_SIZE(sizeof(struct nft_write_expr)),
>+ .eval = nft_write_fast_eval,
>+ .init = nft_write_init,
>+ .dump = nft_write_dump,
>+};
>+
>+static const struct nft_expr_ops *
>+nft_write_select_ops(const struct nft_ctx *ctx,
>+ const struct nlattr * const tb[])
>+{
>+ enum nft_registers sreg;
>+ u32 offset;
>+ u8 wlen;
>+ int err;
>+
>+ if (tb[NFTA_WRITE_OFFSET] == NULL ||
>+ tb[NFTA_WRITE_WLEN] == NULL ||
>+ tb[NFTA_WRITE_SREG] == NULL)
>+ return ERR_PTR(-EINVAL);
>+
>+ wlen = nla_get_u8(tb[NFTA_WRITE_WLEN]);
>+ if (wlen == 0 || wlen > FIELD_SIZEOF(struct nft_data, data))
>+ return ERR_PTR(-EINVAL);
>+ sreg = ntohl(nla_get_be32(tb[NFTA_WRITE_SREG]));
>+ err = nft_validate_input_register(sreg);
>+ if (err < 0)
>+ return ERR_PTR(err);
>+ offset = ntohl(nla_get_u32(tb[NFTA_WRITE_OFFSET]));
>+
>+ if (wlen != 3 && wlen <= 4 && IS_ALIGNED(offset, wlen))
>+ return &nft_write_fast_ops;
>+ else
>+ return &nft_write_ops;
>+}
>+
>+static struct nft_expr_type nft_write_type __read_mostly = {
>+ .name = "write",
>+ .select_ops = &nft_write_select_ops,
>+ .policy = nft_write_policy,
>+ .maxattr = NFTA_WRITE_MAX,
>+ .owner = THIS_MODULE,
>+};
>+
>+static int __init nft_write_module_init(void)
>+{
>+ return nft_register_expr(&nft_write_type);
>+}
>+
>+static void __exit nft_write_module_exit(void)
>+{
>+ nft_unregister_expr(&nft_write_type);
>+}
>+
>+module_init(nft_write_module_init);
>+module_exit(nft_write_module_exit);
>+
>+MODULE_LICENSE("GPL");
>+MODULE_AUTHOR("Nikolay Aleksandrov <nikolay@redhat.com>");
>+MODULE_ALIAS_NFT_EXPR("write");
>--
>1.8.4.2
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [RFC PATCH] netfilter: nf_tables: add new write expression
2014-02-15 13:30 ` Patrick McHardy
@ 2014-02-15 13:35 ` Nikolay Aleksandrov
2014-02-16 10:09 ` Pablo Neira Ayuso
0 siblings, 1 reply; 12+ messages in thread
From: Nikolay Aleksandrov @ 2014-02-15 13:35 UTC (permalink / raw)
To: Patrick McHardy, netfilter-devel; +Cc: pablo
On 02/15/2014 02:30 PM, Patrick McHardy wrote:
> On 15. Februar 2014 13:17:22 GMT+00:00, Nikolay Aleksandrov <nikolay@redhat.com> wrote:
>> The new "write" expression can be used to manipulate packet data.
>> The parameters that it has are source register (source for the bytes
>> which are to be written), offset in the packet and length to write.
>> It uses a select_ops method to choose between fast ops in the cases
>> length is 1,2 or 4 bytes and slow ops (i.e. using memcpy) in other
>> cases.
>>
>> Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
>> ---
>> I needed a way (other than passing the packets to user-space) to alter
>> the ToS field via nftables, so I decided to make it a bit more general.
>> I
>> use it with the immediate expression to load the new ToS and then write
>> it.
>> If you find this useful I can post the libnftnl patch as well.
>> Right now as you can see it continues even if the "write" wasn't
>> successful
>> which should be probably changed to NFT_BREAK for that case.
>
> Yes.
>
>> This patch applies to Dave's net-next tree.
>
> I think this is a useful addition. However I prefer to put thus into the payload expression and select the proper ops based on the presence of sreg/dreg.
>
Okay, makes sense. I'll re-write it in such form taking into consideration
the other comments and will re-post after some testing.
Thank you for the feedback,
Nik
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] netfilter: nf_tables: add new write expression
2014-02-15 13:35 ` Nikolay Aleksandrov
@ 2014-02-16 10:09 ` Pablo Neira Ayuso
2014-02-16 10:36 ` Patrick McHardy
0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2014-02-16 10:09 UTC (permalink / raw)
To: Nikolay Aleksandrov; +Cc: Patrick McHardy, netfilter-devel
On Sat, Feb 15, 2014 at 02:35:54PM +0100, Nikolay Aleksandrov wrote:
> On 02/15/2014 02:30 PM, Patrick McHardy wrote:
> > On 15. Februar 2014 13:17:22 GMT+00:00, Nikolay Aleksandrov <nikolay@redhat.com> wrote:
> >> The new "write" expression can be used to manipulate packet data.
> >> The parameters that it has are source register (source for the bytes
> >> which are to be written), offset in the packet and length to write.
> >> It uses a select_ops method to choose between fast ops in the cases
> >> length is 1,2 or 4 bytes and slow ops (i.e. using memcpy) in other
> >> cases.
> >>
> >> Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
> >> ---
> >> I needed a way (other than passing the packets to user-space) to alter
> >> the ToS field via nftables, so I decided to make it a bit more general.
> >> I
> >> use it with the immediate expression to load the new ToS and then write
> >> it.
> >> If you find this useful I can post the libnftnl patch as well.
> >> Right now as you can see it continues even if the "write" wasn't
> >> successful
> >> which should be probably changed to NFT_BREAK for that case.
> >
> > Yes.
> >
> >> This patch applies to Dave's net-next tree.
> >
> > I think this is a useful addition. However I prefer to put thus
> > into the payload expression and select the proper ops based on the
> > presence of sreg/dreg.
> >
> Okay, makes sense. I'll re-write it in such form taking into consideration
> the other comments and will re-post after some testing.
You can use this patch as reference to make it:
commit e035b77ac7be430a5fef8c9c23f60b6b50ec81c5
Author: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Date: Thu Dec 26 16:38:01 2013 +0100
netfilter: nf_tables: nft_meta module get/set ops
That patch is similar to what you propose, but it sets the meta fields
of a packet.
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [RFC PATCH] netfilter: nf_tables: add new write expression
2014-02-16 10:09 ` Pablo Neira Ayuso
@ 2014-02-16 10:36 ` Patrick McHardy
2014-02-16 10:49 ` Nikolay Aleksandrov
0 siblings, 1 reply; 12+ messages in thread
From: Patrick McHardy @ 2014-02-16 10:36 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: Nikolay Aleksandrov, netfilter-devel
On Sun, Feb 16, 2014 at 11:09:53AM +0100, Pablo Neira Ayuso wrote:
> On Sat, Feb 15, 2014 at 02:35:54PM +0100, Nikolay Aleksandrov wrote:
> > On 02/15/2014 02:30 PM, Patrick McHardy wrote:
> > > On 15. Februar 2014 13:17:22 GMT+00:00, Nikolay Aleksandrov <nikolay@redhat.com> wrote:
> > >> The new "write" expression can be used to manipulate packet data.
> > >> The parameters that it has are source register (source for the bytes
> > >> which are to be written), offset in the packet and length to write.
> > >> It uses a select_ops method to choose between fast ops in the cases
> > >> length is 1,2 or 4 bytes and slow ops (i.e. using memcpy) in other
> > >> cases.
> > >>
> > >> Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
> > >> ---
> > >> I needed a way (other than passing the packets to user-space) to alter
> > >> the ToS field via nftables, so I decided to make it a bit more general.
> > >> I
> > >> use it with the immediate expression to load the new ToS and then write
> > >> it.
> > >> If you find this useful I can post the libnftnl patch as well.
> > >> Right now as you can see it continues even if the "write" wasn't
> > >> successful
> > >> which should be probably changed to NFT_BREAK for that case.
> > >
> > > Yes.
> > >
> > >> This patch applies to Dave's net-next tree.
> > >
> > > I think this is a useful addition. However I prefer to put thus
> > > into the payload expression and select the proper ops based on the
> > > presence of sreg/dreg.
> > >
> > Okay, makes sense. I'll re-write it in such form taking into consideration
> > the other comments and will re-post after some testing.
>
> You can use this patch as reference to make it:
>
> commit e035b77ac7be430a5fef8c9c23f60b6b50ec81c5
> Author: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
> Date: Thu Dec 26 16:38:01 2013 +0100
>
> netfilter: nf_tables: nft_meta module get/set ops
>
> That patch is similar to what you propose, but it sets the meta fields
> of a packet.
Actually I'd propose two different init functions, that's just not pretty.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] netfilter: nf_tables: add new write expression
2014-02-16 10:36 ` Patrick McHardy
@ 2014-02-16 10:49 ` Nikolay Aleksandrov
2014-02-16 11:00 ` Patrick McHardy
0 siblings, 1 reply; 12+ messages in thread
From: Nikolay Aleksandrov @ 2014-02-16 10:49 UTC (permalink / raw)
To: Patrick McHardy, Pablo Neira Ayuso; +Cc: netfilter-devel
On 02/16/2014 11:36 AM, Patrick McHardy wrote:
> On Sun, Feb 16, 2014 at 11:09:53AM +0100, Pablo Neira Ayuso wrote:
>> On Sat, Feb 15, 2014 at 02:35:54PM +0100, Nikolay Aleksandrov wrote:
>>> On 02/15/2014 02:30 PM, Patrick McHardy wrote:
>>>> On 15. Februar 2014 13:17:22 GMT+00:00, Nikolay Aleksandrov <nikolay@redhat.com> wrote:
>>>>> The new "write" expression can be used to manipulate packet data.
>>>>> The parameters that it has are source register (source for the bytes
>>>>> which are to be written), offset in the packet and length to write.
>>>>> It uses a select_ops method to choose between fast ops in the cases
>>>>> length is 1,2 or 4 bytes and slow ops (i.e. using memcpy) in other
>>>>> cases.
>>>>>
>>>>> Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
>>>>> ---
>>>>> I needed a way (other than passing the packets to user-space) to alter
>>>>> the ToS field via nftables, so I decided to make it a bit more general.
>>>>> I
>>>>> use it with the immediate expression to load the new ToS and then write
>>>>> it.
>>>>> If you find this useful I can post the libnftnl patch as well.
>>>>> Right now as you can see it continues even if the "write" wasn't
>>>>> successful
>>>>> which should be probably changed to NFT_BREAK for that case.
>>>>
>>>> Yes.
>>>>
>>>>> This patch applies to Dave's net-next tree.
>>>>
>>>> I think this is a useful addition. However I prefer to put thus
>>>> into the payload expression and select the proper ops based on the
>>>> presence of sreg/dreg.
>>>>
>>> Okay, makes sense. I'll re-write it in such form taking into consideration
>>> the other comments and will re-post after some testing.
>>
>> You can use this patch as reference to make it:
>>
>> commit e035b77ac7be430a5fef8c9c23f60b6b50ec81c5
>> Author: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
>> Date: Thu Dec 26 16:38:01 2013 +0100
>>
>> netfilter: nf_tables: nft_meta module get/set ops
>>
>> That patch is similar to what you propose, but it sets the meta fields
>> of a packet.
>
> Actually I'd propose two different init functions, that's just not pretty.
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
Hm, okay.
How about something else, since I wanted to make use of the inlined payload
fast op, couldn't I just break the dreg/sreg in separate variables and
based on whether sreg is set act in the fast op (i.e. get/set based on
that) ? That way we can save some code duplication and keep the ops as
they're. (That'll work for the slow op as well actually)
Also, there's a small problem for payload because the code in the
select_ops function:
if (len <= 4 && IS_ALIGNED(offset, len) && base !=
NFT_PAYLOAD_LL_HEADER)
return &nft_payload_fast_ops;
else
return &nft_payload_ops;
Has a problem when the offset ends in 101b and length of 3 is used, then
the fast ops get selected but since that case isn't handled there, we'll
only load 1 byte from the offset, which is fine for loading since we can
just switch to 4 bytes and mask out later the unneeded byte when comparing
for example, but for writing it's a problem since someone might actually
want to write out 3 bytes. Of course one can always add 2 expressions (1
byte + 2 byte write) :-)
Nik
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [RFC PATCH] netfilter: nf_tables: add new write expression
2014-02-16 10:49 ` Nikolay Aleksandrov
@ 2014-02-16 11:00 ` Patrick McHardy
2014-02-16 11:05 ` Nikolay Aleksandrov
0 siblings, 1 reply; 12+ messages in thread
From: Patrick McHardy @ 2014-02-16 11:00 UTC (permalink / raw)
To: Nikolay Aleksandrov; +Cc: Pablo Neira Ayuso, netfilter-devel
On Sun, Feb 16, 2014 at 11:49:12AM +0100, Nikolay Aleksandrov wrote:
> On 02/16/2014 11:36 AM, Patrick McHardy wrote:
> >>
> >> netfilter: nf_tables: nft_meta module get/set ops
> >>
> >> That patch is similar to what you propose, but it sets the meta fields
> >> of a packet.
> >
> > Actually I'd propose two different init functions, that's just not pretty.
> >
> Hm, okay.
> How about something else, since I wanted to make use of the inlined payload
> fast op, couldn't I just break the dreg/sreg in separate variables and
> based on whether sreg is set act in the fast op (i.e. get/set based on
> that) ? That way we can save some code duplication and keep the ops as
> they're. (That'll work for the slow op as well actually)
I don't agree to adding a set fast op. The get fast op is meant to be
small since its the most common case and is inlined into the main loop.
Anything added there needs a *really* good justification. Modifying
packet data isn't a very common operation and should be kept seperate.
Outside of the main loop, there's no need for a fast op as well since
memcpy *is* fast and any optimized implementation will already do the
same thing you do.
> Also, there's a small problem for payload because the code in the
> select_ops function:
> if (len <= 4 && IS_ALIGNED(offset, len) && base !=
> NFT_PAYLOAD_LL_HEADER)
> return &nft_payload_fast_ops;
> else
> return &nft_payload_ops;
>
> Has a problem when the offset ends in 101b and length of 3 is used, then
> the fast ops get selected but since that case isn't handled there, we'll
> only load 1 byte from the offset, which is fine for loading since we can
> just switch to 4 bytes and mask out later the unneeded byte when comparing
> for example, but for writing it's a problem since someone might actually
> want to write out 3 bytes. Of course one can always add 2 expressions (1
> byte + 2 byte write) :-)
Good catch, we should make sure the offset is a power of two since the
fast version is only intended for well aligned loads.
Would you care to send a patch?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] netfilter: nf_tables: add new write expression
2014-02-16 11:00 ` Patrick McHardy
@ 2014-02-16 11:05 ` Nikolay Aleksandrov
0 siblings, 0 replies; 12+ messages in thread
From: Nikolay Aleksandrov @ 2014-02-16 11:05 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Pablo Neira Ayuso, netfilter-devel
On 02/16/2014 12:00 PM, Patrick McHardy wrote:
> On Sun, Feb 16, 2014 at 11:49:12AM +0100, Nikolay Aleksandrov wrote:
>> On 02/16/2014 11:36 AM, Patrick McHardy wrote:
>>>>
>>>> netfilter: nf_tables: nft_meta module get/set ops
>>>>
>>>> That patch is similar to what you propose, but it sets the meta fields
>>>> of a packet.
>>>
>>> Actually I'd propose two different init functions, that's just not pretty.
>>>
>> Hm, okay.
>> How about something else, since I wanted to make use of the inlined payload
>> fast op, couldn't I just break the dreg/sreg in separate variables and
>> based on whether sreg is set act in the fast op (i.e. get/set based on
>> that) ? That way we can save some code duplication and keep the ops as
>> they're. (That'll work for the slow op as well actually)
>
> I don't agree to adding a set fast op. The get fast op is meant to be
> small since its the most common case and is inlined into the main loop.
> Anything added there needs a *really* good justification. Modifying
> packet data isn't a very common operation and should be kept seperate.
>
> Outside of the main loop, there's no need for a fast op as well since
> memcpy *is* fast and any optimized implementation will already do the
> same thing you do.
>
Right, okay going the standard way then :-)
>> Also, there's a small problem for payload because the code in the
>> select_ops function:
>> if (len <= 4 && IS_ALIGNED(offset, len) && base !=
>> NFT_PAYLOAD_LL_HEADER)
>> return &nft_payload_fast_ops;
>> else
>> return &nft_payload_ops;
>>
>> Has a problem when the offset ends in 101b and length of 3 is used, then
>> the fast ops get selected but since that case isn't handled there, we'll
>> only load 1 byte from the offset, which is fine for loading since we can
>> just switch to 4 bytes and mask out later the unneeded byte when comparing
>> for example, but for writing it's a problem since someone might actually
>> want to write out 3 bytes. Of course one can always add 2 expressions (1
>> byte + 2 byte write) :-)
>
> Good catch, we should make sure the offset is a power of two since the
> fast version is only intended for well aligned loads.
>
> Would you care to send a patch?
Sure, I'll take care of it.
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] netfilter: nf_tables: add new write expression
2014-02-15 13:17 [RFC PATCH] netfilter: nf_tables: add new write expression Nikolay Aleksandrov
2014-02-15 13:19 ` Nikolay Aleksandrov
2014-02-15 13:30 ` Patrick McHardy
@ 2014-02-15 13:36 ` Florian Westphal
2014-02-15 13:38 ` Nikolay Aleksandrov
2014-02-15 13:43 ` Patrick McHardy
2 siblings, 2 replies; 12+ messages in thread
From: Florian Westphal @ 2014-02-15 13:36 UTC (permalink / raw)
To: Nikolay Aleksandrov; +Cc: netfilter-devel, pablo, kaber
Nikolay Aleksandrov <nikolay@redhat.com> wrote:
> The new "write" expression can be used to manipulate packet data.
Thanks. This is very useful feature (needed also for TCPMSS mangling
for instance).
General question: How are/should packet checksums be handled after packet
content was altered?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] netfilter: nf_tables: add new write expression
2014-02-15 13:36 ` Florian Westphal
@ 2014-02-15 13:38 ` Nikolay Aleksandrov
2014-02-15 13:43 ` Patrick McHardy
1 sibling, 0 replies; 12+ messages in thread
From: Nikolay Aleksandrov @ 2014-02-15 13:38 UTC (permalink / raw)
To: Florian Westphal; +Cc: netfilter-devel, pablo, kaber
On 02/15/2014 02:36 PM, Florian Westphal wrote:
> Nikolay Aleksandrov <nikolay@redhat.com> wrote:
>> The new "write" expression can be used to manipulate packet data.
>
> Thanks. This is very useful feature (needed also for TCPMSS mangling
> for instance).
>
> General question: How are/should packet checksums be handled after packet
> content was altered?
>
hehe very good question, right now I do it with a hacked version of this
patch, but I think we can add some more generic checksum op where you can
choose which checksum to recompute. I've also thought about making the
destination (where to write the checksum) variable for some corner cases,
but this is a stretch.
Cheers,
Nik
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] netfilter: nf_tables: add new write expression
2014-02-15 13:36 ` Florian Westphal
2014-02-15 13:38 ` Nikolay Aleksandrov
@ 2014-02-15 13:43 ` Patrick McHardy
1 sibling, 0 replies; 12+ messages in thread
From: Patrick McHardy @ 2014-02-15 13:43 UTC (permalink / raw)
To: Florian Westphal, Nikolay Aleksandrov; +Cc: netfilter-devel, pablo
On 15. Februar 2014 13:36:13 GMT+00:00, Florian Westphal <fw@strlen.de> wrote:
>Nikolay Aleksandrov <nikolay@redhat.com> wrote:
>> The new "write" expression can be used to manipulate packet data.
>
>Thanks. This is very useful feature (needed also for TCPMSS mangling
>for instance).
>
>General question: How are/should packet checksums be handled after
>packet
>content was altered?
Good question indeed. I guess we need to make this dependant on the payload base and protocols involved. LuckIly in most cases we can rely on checksum offload to take care of this.
Maybe we can have userspace specify the necessary parameters such as the checksum position and whether a pseudo header is used.
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2014-02-16 11:05 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-02-15 13:17 [RFC PATCH] netfilter: nf_tables: add new write expression Nikolay Aleksandrov
2014-02-15 13:19 ` Nikolay Aleksandrov
2014-02-15 13:30 ` Patrick McHardy
2014-02-15 13:35 ` Nikolay Aleksandrov
2014-02-16 10:09 ` Pablo Neira Ayuso
2014-02-16 10:36 ` Patrick McHardy
2014-02-16 10:49 ` Nikolay Aleksandrov
2014-02-16 11:00 ` Patrick McHardy
2014-02-16 11:05 ` Nikolay Aleksandrov
2014-02-15 13:36 ` Florian Westphal
2014-02-15 13:38 ` Nikolay Aleksandrov
2014-02-15 13:43 ` Patrick McHardy
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.