netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into the batch
@ 2014-03-25 20:39 Pablo Neira Ayuso
  2014-03-25 20:39 ` [PATCH RFC nft 2/2] src: add set netlink message to " Pablo Neira Ayuso
  2014-03-26 11:25 ` [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into " Patrick McHardy
  0 siblings, 2 replies; 8+ messages in thread
From: Pablo Neira Ayuso @ 2014-03-25 20:39 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kaber

This patch reworks the nf_tables API so set updates are moved into
the same batch that contains rule updates. This speeds up rule-set
updates we skip a dialog of four messages between kernel and
user-space (two on each direction).

 1) create the set and send netlink message to the kernel
 2) process the response from the kernel that contains the allocated name.
 3) add the set elements and send netlink message to the kernel.
 4) process the response from the kernel (to check for errors).

To:

 1) add the set to the batch.
 2) add the set elements to the batch.
 3) add the rule that points to the set.
 4) send batch to the kernel.

The idea is to allocate an internal set ID to the batch that can be
used when adding set elements and rules that refer to the set in the
batch.

Backward compatibility has been only retained in userspace, this
means that new nft versions can talk to the kernel both in the new
and the old fashion.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
I'm considering preparing a follow up patch to add tables and chains
to the big batch message as well.

 include/net/netfilter/nf_tables.h        |    2 +
 include/net/netns/nftables.h             |    2 +
 include/uapi/linux/netfilter/nf_tables.h |    6 ++
 net/netfilter/nf_tables_api.c            |  124 ++++++++++++++++++++++++++----
 net/netfilter/nft_lookup.c               |   14 +++-
 5 files changed, 130 insertions(+), 18 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e6bc14d..b749e4d 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -222,6 +222,8 @@ static inline void *nft_set_priv(const struct nft_set *set)
 
 struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
 				     const struct nlattr *nla);
+struct nft_set *nf_tables_set_lookup2(const struct net *net,
+				      const struct nlattr *nla);
 
 /**
  *	struct nft_set_binding - nf_tables set binding
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 26a394c..742fe57 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -8,6 +8,8 @@ struct nft_af_info;
 struct netns_nftables {
 	struct list_head	af_info;
 	struct list_head	commit_list;
+	struct list_head	newset_list;
+	struct list_head	delset_list;
 	struct nft_af_info	*ipv4;
 	struct nft_af_info	*ipv6;
 	struct nft_af_info	*inet;
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c88ccbf..3776beb 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -221,6 +221,7 @@ enum nft_set_flags {
  * @NFTA_SET_KEY_LEN: key data length (NLA_U32)
  * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32)
  * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
+ * @NFTA_SET_ID: set ID (NLA_U64)
  */
 enum nft_set_attributes {
 	NFTA_SET_UNSPEC,
@@ -231,6 +232,7 @@ enum nft_set_attributes {
 	NFTA_SET_KEY_LEN,
 	NFTA_SET_DATA_TYPE,
 	NFTA_SET_DATA_LEN,
+	NFTA_SET_ID,
 	__NFTA_SET_MAX
 };
 #define NFTA_SET_MAX		(__NFTA_SET_MAX - 1)
@@ -266,12 +268,14 @@ enum nft_set_elem_attributes {
  * @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING)
  * @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING)
  * @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes)
+ * @NFTA_SET_ELEM_LIST_SET_ID: set ID (NLA_U64)
  */
 enum nft_set_elem_list_attributes {
 	NFTA_SET_ELEM_LIST_UNSPEC,
 	NFTA_SET_ELEM_LIST_TABLE,
 	NFTA_SET_ELEM_LIST_SET,
 	NFTA_SET_ELEM_LIST_ELEMENTS,
+	NFTA_SET_ELEM_LIST_SET_ID,
 	__NFTA_SET_ELEM_LIST_MAX
 };
 #define NFTA_SET_ELEM_LIST_MAX	(__NFTA_SET_ELEM_LIST_MAX - 1)
@@ -457,12 +461,14 @@ enum nft_cmp_attributes {
  * @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING)
  * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers)
  * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_LOOKUP_SET_ID: set ID (NLA_U64)
  */
 enum nft_lookup_attributes {
 	NFTA_LOOKUP_UNSPEC,
 	NFTA_LOOKUP_SET,
 	NFTA_LOOKUP_SREG,
 	NFTA_LOOKUP_DREG,
+	NFTA_LOOKUP_SET_ID,
 	__NFTA_LOOKUP_MAX
 };
 #define NFTA_LOOKUP_MAX		(__NFTA_LOOKUP_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 43ae487..ae05c24 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1807,10 +1807,37 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
 	return err;
 }
 
+struct nft_set_trans {
+	struct list_head	list;
+	struct nft_set		*set;
+	struct nft_ctx		ctx;
+	u64			id;
+};
+
+static void nf_tables_set_destroy(const struct nft_ctx *ctx,
+				  struct nft_set *set);
+static int nf_tables_set_notify(const struct nft_ctx *ctx,
+				const struct nft_set *set,
+				int event);
+static void nft_set_destroy(struct nft_set *set);
+
+#define __NFT_SET_INACTIVE	(1 << 15)	/* internal set flag */
+
 static int nf_tables_commit(struct sk_buff *skb)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nft_rule_trans *rupd, *tmp;
+	struct nft_set_trans *st, *stmp;
+
+	/* New sets come in first place before you can bind them to rules */
+	list_for_each_entry_safe(st, stmp, &net->nft.newset_list, list) {
+		list_del(&st->list);
+		st->set->flags &= ~__NFT_SET_INACTIVE;
+		list_add_tail(&st->set->list,
+			      (struct list_head *)&st->ctx.table->sets);
+		nf_tables_set_notify(&st->ctx, st->set, NFT_MSG_NEWSET);
+		kfree(st);
+	}
 
 	/* Bump generation counter, invalidate any dump in progress */
 	net->nft.genctr++;
@@ -1857,6 +1884,14 @@ static int nf_tables_commit(struct sk_buff *skb)
 		kfree(rupd);
 	}
 
+	/* We can delete sets that are not bound to any rules anymore */
+	list_for_each_entry_safe(st, stmp, &net->nft.delset_list, list) {
+		list_del(&st->set->list);
+		nf_tables_set_destroy(&st->ctx, st->set);
+		list_del(&st->list);
+		kfree(st);
+	}
+
 	return 0;
 }
 
@@ -1864,6 +1899,7 @@ static int nf_tables_abort(struct sk_buff *skb)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nft_rule_trans *rupd, *tmp;
+	struct nft_set_trans *st, *stmp;
 
 	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
 		if (!nft_rule_is_active_next(net, rupd->rule)) {
@@ -1885,6 +1921,17 @@ static int nf_tables_abort(struct sk_buff *skb)
 		list_del(&rupd->list);
 		kfree(rupd);
 	}
+	/* release newly created sets in this batch that are inactive */
+	list_for_each_entry_safe(st, stmp, &net->nft.newset_list, list) {
+		list_del(&st->list);
+		nft_set_destroy(st->set);
+		kfree(st);
+	}
+	/* abort scheduled removal of sets */
+	list_for_each_entry_safe(st, stmp, &net->nft.delset_list, list) {
+		list_del(&st->list);
+		kfree(st);
+	}
 
 	return 0;
 }
@@ -1953,6 +2000,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
 	[NFTA_SET_KEY_LEN]		= { .type = NLA_U32 },
 	[NFTA_SET_DATA_TYPE]		= { .type = NLA_U32 },
 	[NFTA_SET_DATA_LEN]		= { .type = NLA_U32 },
+	[NFTA_SET_ID]			= { .type = NLA_U64 },
 };
 
 static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
@@ -1999,6 +2047,19 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
 	return ERR_PTR(-ENOENT);
 }
 
+struct nft_set *nf_tables_set_lookup2(const struct net *net,
+				      const struct nlattr *nla)
+{
+	struct nft_set_trans *st;
+	u64 id = be64_to_cpu(nla_get_be64(nla));
+
+	list_for_each_entry(st, &net->nft.newset_list, list) {
+		if (id == st->id)
+			return st->set;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
 static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
 				    const char *name)
 {
@@ -2305,6 +2366,25 @@ err:
 	return err;
 }
 
+static int nf_tables_set_trans_add(struct list_head *list, struct nft_ctx *ctx,
+				   struct nft_set *set)
+{
+	struct nft_set_trans *strans;
+
+	strans = kmalloc(sizeof(struct nft_set_trans), GFP_ATOMIC);
+	if (strans == NULL)
+		return -ENOMEM;
+
+	strans->set = set;
+	strans->ctx = *ctx;
+	if (ctx->nla[NFTA_SET_ID])
+		strans->id = be64_to_cpu(nla_get_be64(ctx->nla[NFTA_SET_ID]));
+
+	set->flags |= __NFT_SET_INACTIVE;
+	list_add_tail(&strans->list, list);
+	return 0;
+}
+
 static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 			    const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[])
@@ -2429,8 +2509,10 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	if (err < 0)
 		goto err2;
 
-	list_add_tail(&set->list, &table->sets);
-	nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET);
+	err = nf_tables_set_trans_add(&net->nft.newset_list, &ctx, set);
+	if (err < 0)
+		goto err2;
+
 	return 0;
 
 err2:
@@ -2440,16 +2522,20 @@ err1:
 	return err;
 }
 
-static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+static void nft_set_destroy(struct nft_set *set)
 {
-	list_del(&set->list);
-	nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
-
 	set->ops->destroy(set);
 	module_put(set->ops->owner);
 	kfree(set);
 }
 
+static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	list_del(&set->list);
+	nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
+	nft_set_destroy(set);
+}
+
 static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
 			    const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[])
@@ -2474,8 +2560,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
 	if (!list_empty(&set->bindings))
 		return -EBUSY;
 
-	nf_tables_set_destroy(&ctx, set);
-	return 0;
+	return nf_tables_set_trans_add(&ctx.net->nft.delset_list, &ctx, set);
 }
 
 static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
@@ -2534,7 +2619,8 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 {
 	list_del(&binding->list);
 
-	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
+	    !(set->flags & __NFT_SET_INACTIVE))
 		nf_tables_set_destroy(ctx, set);
 }
 
@@ -2552,6 +2638,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
 	[NFTA_SET_ELEM_LIST_TABLE]	= { .type = NLA_STRING },
 	[NFTA_SET_ELEM_LIST_SET]	= { .type = NLA_STRING },
 	[NFTA_SET_ELEM_LIST_ELEMENTS]	= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_LIST_SET_ID]	= { .type = NLA_U64 },
 };
 
 static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
@@ -2815,6 +2902,7 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
 				const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
 {
+	struct net *net = sock_net(skb->sk);
 	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
@@ -2824,7 +2912,13 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+	if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
+		set = nf_tables_set_lookup2(net,
+					    nla[NFTA_SET_ELEM_LIST_SET_ID]);
+	} else {
+		set = nf_tables_set_lookup(ctx.table,
+					   nla[NFTA_SET_ELEM_LIST_SET]);
+	}
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
@@ -2953,7 +3047,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_rule_policy,
 	},
 	[NFT_MSG_NEWSET] = {
-		.call		= nf_tables_newset,
+		.call_batch	= nf_tables_newset,
 		.attr_count	= NFTA_SET_MAX,
 		.policy		= nft_set_policy,
 	},
@@ -2963,12 +3057,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_set_policy,
 	},
 	[NFT_MSG_DELSET] = {
-		.call		= nf_tables_delset,
+		.call_batch	= nf_tables_delset,
 		.attr_count	= NFTA_SET_MAX,
 		.policy		= nft_set_policy,
 	},
 	[NFT_MSG_NEWSETELEM] = {
-		.call		= nf_tables_newsetelem,
+		.call_batch	= nf_tables_newsetelem,
 		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
 		.policy		= nft_set_elem_list_policy,
 	},
@@ -2978,7 +3072,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_set_elem_list_policy,
 	},
 	[NFT_MSG_DELSETELEM] = {
-		.call		= nf_tables_delsetelem,
+		.call_batch	= nf_tables_delsetelem,
 		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
 		.policy		= nft_set_elem_list_policy,
 	},
@@ -3371,6 +3465,8 @@ static int nf_tables_init_net(struct net *net)
 {
 	INIT_LIST_HEAD(&net->nft.af_info);
 	INIT_LIST_HEAD(&net->nft.commit_list);
+	INIT_LIST_HEAD(&net->nft.newset_list);
+	INIT_LIST_HEAD(&net->nft.delset_list);
 	return 0;
 }
 
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 7fd2bea..28d28ea 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -51,13 +51,19 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
 	struct nft_set *set;
 	int err;
 
-	if (tb[NFTA_LOOKUP_SET] == NULL ||
+	if ((tb[NFTA_LOOKUP_SET] == NULL && tb[NFTA_LOOKUP_SET_ID] == NULL) ||
 	    tb[NFTA_LOOKUP_SREG] == NULL)
 		return -EINVAL;
 
-	set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
-	if (IS_ERR(set))
-		return PTR_ERR(set);
+	if (tb[NFTA_LOOKUP_SET_ID]) {
+		set = nf_tables_set_lookup2(ctx->net, tb[NFTA_LOOKUP_SET_ID]);
+		if (IS_ERR(set))
+			return PTR_ERR(set);
+	} else {
+		set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
+		if (IS_ERR(set))
+			return PTR_ERR(set);
+	}
 
 	priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
 	err = nft_validate_input_register(priv->sreg);
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH RFC nft 2/2] src: add set netlink message to the batch
  2014-03-25 20:39 [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into the batch Pablo Neira Ayuso
@ 2014-03-25 20:39 ` Pablo Neira Ayuso
  2014-03-26 11:25 ` [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into " Patrick McHardy
  1 sibling, 0 replies; 8+ messages in thread
From: Pablo Neira Ayuso @ 2014-03-25 20:39 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kaber

This patch moves the netlink set messages to the batch that contains
the rules. This helps to speed up rule-set restoration time by
changing the operational from:

 1) create the set message and send it to the kernel.
 2) process the response message that contains the allocated name
    from the kernel.
 3) add the set elements and send it to the kernel.
 4) process the response message from the kernel indicating the
    result.

To:

 1) add the set to the batch.
 2) add the set elements to the batch.
 3) add the rule that points to the set.
 4) send batch to the kernel.

To achieve this, an internal set ID which is unique to the batch
is allocated as suggested by Patrick.

To retain backward compatibility, nft initially guesses if the
kernel supports set in batches. Otherwise, it falls back to the
previous (slowier) operational.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/mnl.h           |   12 ++++
 include/netlink.h       |    4 ++
 include/rule.h          |    2 +
 src/main.c              |    2 +
 src/mnl.c               |  130 +++++++++++++++++++++++++++++++++++
 src/netlink.c           |  173 ++++++++++++++++++++++++++++++++++++++++++++---
 src/netlink_linearize.c |    8 +++
 7 files changed, 323 insertions(+), 8 deletions(-)

diff --git a/include/mnl.h b/include/mnl.h
index f4de27d..d62c146 100644
--- a/include/mnl.h
+++ b/include/mnl.h
@@ -67,4 +67,16 @@ int mnl_nft_setelem_get(struct mnl_socket *nf_sock, struct nft_set *nls);
 
 struct nft_ruleset *mnl_nft_ruleset_dump(struct mnl_socket *nf_sock,
 					 uint32_t family);
+
+int mnl_nft_set_batch_add(struct mnl_socket *nf_sock, struct nft_set *nls,
+			  unsigned int flags);
+int mnl_nft_set_batch_del(struct mnl_socket *nf_sock, struct nft_set *nls,
+			  unsigned int flags);
+int mnl_nft_setelem_batch_add(struct mnl_socket *nf_sock, struct nft_set *nls,
+			      unsigned int flags);
+int mnl_nft_setelem_batch_del(struct mnl_socket *nf_sock, struct nft_set *nls,
+			      unsigned int flags);
+
+bool mnl_set_batch_supported(struct mnl_socket *nf_sock);
+
 #endif /* _NFTABLES_MNL_H_ */
diff --git a/include/netlink.h b/include/netlink.h
index 4e3f8aa..4f6bd5a 100644
--- a/include/netlink.h
+++ b/include/netlink.h
@@ -30,6 +30,7 @@ struct netlink_ctx {
 	struct set		*set;
 	const void		*data;
 	uint32_t		seqnum;
+	bool			set_batch_supported;
 };
 
 extern struct nft_table *alloc_nft_table(const struct handle *h);
@@ -142,4 +143,7 @@ extern int netlink_io_error(struct netlink_ctx *ctx,
 extern struct nft_ruleset *netlink_dump_ruleset(struct netlink_ctx *ctx,
 						const struct handle *h,
 						const struct location *loc);
+
+bool netlink_set_batch_supported(void);
+
 #endif /* NFTABLES_NETLINK_H */
diff --git a/include/rule.h b/include/rule.h
index ecf801f..226353d 100644
--- a/include/rule.h
+++ b/include/rule.h
@@ -12,6 +12,7 @@
  * @table:	table name
  * @chain:	chain name (chains and rules only)
  * @set:	set name (sets only)
+ * @set_id:	set ID (sets only)
  * @handle:	rule handle (rules only)
  * @position:	rule position (rules only)
  * @comment:	human-readable comment (rules only)
@@ -21,6 +22,7 @@ struct handle {
 	const char		*table;
 	const char		*chain;
 	const char		*set;
+	uint64_t		set_id;
 	uint64_t		handle;
 	uint64_t		position;
 	const char		*comment;
diff --git a/src/main.c b/src/main.c
index 9d50577..355f606 100644
--- a/src/main.c
+++ b/src/main.c
@@ -170,6 +170,7 @@ static int nft_netlink(struct parser_state *state, struct list_head *msgs)
 	struct mnl_err *err, *tmp;
 	LIST_HEAD(err_list);
 	uint32_t batch_seqnum;
+	bool set_batch_supported = netlink_set_batch_supported();
 	int ret = 0;
 
 	batch_seqnum = mnl_batch_begin();
@@ -177,6 +178,7 @@ static int nft_netlink(struct parser_state *state, struct list_head *msgs)
 		memset(&ctx, 0, sizeof(ctx));
 		ctx.msgs = msgs;
 		ctx.seqnum = cmd->seqnum = mnl_seqnum_alloc();
+		ctx.set_batch_supported = set_batch_supported;
 		init_list_head(&ctx.list);
 		ret = do_command(&ctx, cmd);
 		if (ret < 0)
diff --git a/src/mnl.c b/src/mnl.c
index e825fb0..c5d9b27 100644
--- a/src/mnl.c
+++ b/src/mnl.c
@@ -622,6 +622,38 @@ int mnl_nft_set_delete(struct mnl_socket *nf_sock, struct nft_set *nls,
 	return mnl_talk(nf_sock, nlh, nlh->nlmsg_len, NULL, NULL);
 }
 
+int mnl_nft_set_batch_add(struct mnl_socket *nf_sock, struct nft_set *nls,
+			  unsigned int flags)
+{
+	struct nlmsghdr *nlh;
+
+	nlh = nft_set_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch),
+			NFT_MSG_NEWSET,
+			nft_set_attr_get_u32(nls, NFT_SET_ATTR_FAMILY),
+			NLM_F_CREATE | flags, seq);
+	nft_set_nlmsg_build_payload(nlh, nls);
+	if (!mnl_nlmsg_batch_next(batch))
+		mnl_batch_page_add();
+
+	return 0;
+}
+
+int mnl_nft_set_batch_del(struct mnl_socket *nf_sock, struct nft_set *nls,
+			  unsigned int flags)
+{
+	struct nlmsghdr *nlh;
+
+	nlh = nft_set_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch),
+			NFT_MSG_DELSET,
+			nft_set_attr_get_u32(nls, NFT_SET_ATTR_FAMILY),
+			flags, seq);
+	nft_set_nlmsg_build_payload(nlh, nls);
+	if (!mnl_nlmsg_batch_next(batch))
+		mnl_batch_page_add();
+
+	return 0;
+}
+
 static int set_cb(const struct nlmsghdr *nlh, void *data)
 {
 	struct nft_set_list *nls_list = data;
@@ -734,6 +766,38 @@ static int set_elem_cb(const struct nlmsghdr *nlh, void *data)
 	return MNL_CB_OK;
 }
 
+int mnl_nft_setelem_batch_add(struct mnl_socket *nf_sock, struct nft_set *nls,
+			      unsigned int flags)
+{
+	struct nlmsghdr *nlh;
+
+	nlh = nft_set_elem_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch),
+			NFT_MSG_NEWSETELEM,
+			nft_set_attr_get_u32(nls, NFT_SET_ATTR_FAMILY),
+			NLM_F_CREATE | flags, seq);
+	nft_set_elems_nlmsg_build_payload(nlh, nls);
+	if (!mnl_nlmsg_batch_next(batch))
+		mnl_batch_page_add();
+
+	return 0;
+}
+
+int mnl_nft_setelem_batch_del(struct mnl_socket *nf_sock, struct nft_set *nls,
+			      unsigned int flags)
+{
+	struct nlmsghdr *nlh;
+
+	nlh = nft_set_elem_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch),
+			NFT_MSG_DELSETELEM,
+			nft_set_attr_get_u32(nls, NFT_SET_ATTR_FAMILY),
+			0, seq);
+	nft_set_elems_nlmsg_build_payload(nlh, nls);
+	if (!mnl_nlmsg_batch_next(batch))
+		mnl_batch_page_add();
+
+	return 0;
+}
+
 int mnl_nft_setelem_get(struct mnl_socket *nf_sock, struct nft_set *nls)
 {
 	char buf[MNL_SOCKET_BUFFER_SIZE];
@@ -805,3 +869,69 @@ out:
 	nft_ruleset_free(rs);
 	return NULL;
 }
+
+static void nft_mnl_batch_put(char *buf, uint16_t type, uint32_t seq)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfg;
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = type;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+	nlh->nlmsg_seq = seq;
+
+	nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+	nfg->nfgen_family = AF_INET;
+	nfg->version = NFNETLINK_V0;
+	nfg->res_id = NFNL_SUBSYS_NFTABLES;
+}
+
+bool mnl_set_batch_supported(struct mnl_socket *nf_sock)
+{
+	struct mnl_nlmsg_batch *b;
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh;
+	int ret;
+
+	b = mnl_nlmsg_batch_start(buf, sizeof(buf));
+
+	nft_mnl_batch_put(mnl_nlmsg_batch_current(b), NFNL_MSG_BATCH_BEGIN,
+			  seq++);
+	mnl_nlmsg_batch_next(b);
+
+	nlh = nft_set_nlmsg_build_hdr(mnl_nlmsg_batch_current(b),
+				NFT_MSG_NEWSET, AF_INET,
+				NLM_F_ACK, seq++);
+	mnl_nlmsg_batch_next(b);
+
+	nft_mnl_batch_put(mnl_nlmsg_batch_current(b), NFNL_MSG_BATCH_END,
+			  seq++);
+	mnl_nlmsg_batch_next(b);
+
+	ret = mnl_socket_sendto(nf_sock, mnl_nlmsg_batch_head(b),
+				mnl_nlmsg_batch_size(b));
+	if (ret < 0)
+		goto err;
+
+	mnl_nlmsg_batch_stop(b);
+
+	ret = mnl_socket_recvfrom(nf_sock, buf, sizeof(buf));
+	while (ret > 0) {
+		ret = mnl_cb_run(buf, ret, 0, mnl_socket_get_portid(nf_sock),
+				 NULL, NULL);
+		if (ret <= 0)
+			break;
+
+		ret = mnl_socket_recvfrom(nf_sock, buf, sizeof(buf));
+	}
+
+	/* We're sending an incomplete message to see if the kernel supports
+	 * set messages in batches. EINVAL means that we sent an incomplete
+	 * message with missing attributes. The kernel just ignores messages
+	 * that we cannot include in the batch.
+	 */
+	return (ret == -1 && errno == EINVAL) ? true : false;
+err:
+	mnl_nlmsg_batch_stop(b);
+	return ret;
+}
diff --git a/src/netlink.c b/src/netlink.c
index daac64c..6ddf274 100644
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -149,6 +149,8 @@ struct nft_set *alloc_nft_set(const struct handle *h)
 	nft_set_attr_set_str(nls, NFT_SET_ATTR_TABLE, h->table);
 	if (h->set != NULL)
 		nft_set_attr_set_str(nls, NFT_SET_ATTR_NAME, h->set);
+	if (h->set_id)
+		nft_set_attr_set_u64(nls, NFT_SET_ATTR_ID, h->set_id);
 
 	return nls;
 }
@@ -755,8 +757,8 @@ void netlink_dump_set(struct nft_set *nls)
 #endif
 }
 
-int netlink_add_set(struct netlink_ctx *ctx, const struct handle *h,
-		    struct set *set)
+static int netlink_add_set_compat(struct netlink_ctx *ctx,
+				  const struct handle *h, struct set *set)
 {
 	struct nft_set *nls;
 	int err;
@@ -787,8 +789,60 @@ int netlink_add_set(struct netlink_ctx *ctx, const struct handle *h,
 	return err;
 }
 
-int netlink_delete_set(struct netlink_ctx *ctx, const struct handle *h,
-		       const struct location *loc)
+/* internal set ID used in the batch */
+static uint64_t set_id;
+
+static int netlink_add_set_batch(struct netlink_ctx *ctx,
+				 const struct handle *h, struct set *set)
+{
+	struct nft_set *nls;
+	int err;
+
+	nls = alloc_nft_set(h);
+	nft_set_attr_set_u32(nls, NFT_SET_ATTR_FLAGS, set->flags);
+	nft_set_attr_set_u32(nls, NFT_SET_ATTR_KEY_TYPE,
+			     dtype_map_to_kernel(set->keytype));
+	nft_set_attr_set_u32(nls, NFT_SET_ATTR_KEY_LEN,
+			     set->keylen / BITS_PER_BYTE);
+	if (set->flags & NFT_SET_MAP) {
+		nft_set_attr_set_u32(nls, NFT_SET_ATTR_DATA_TYPE,
+				     dtype_map_to_kernel(set->datatype));
+		nft_set_attr_set_u32(nls, NFT_SET_ATTR_DATA_LEN,
+				     set->datalen / BITS_PER_BYTE);
+	}
+	netlink_dump_set(nls);
+
+	if (set->flags & SET_F_ANONYMOUS) {
+		set->handle.set_id = ++set_id;
+		nft_set_attr_set_u64(nls, NFT_SET_ATTR_ID, set->handle.set_id);
+	}
+
+	err = mnl_nft_set_batch_add(nf_sock, nls, NLM_F_EXCL);
+	if (err < 0) {
+		netlink_io_error(ctx, &set->location, "Could not add set: %s",
+				 strerror(errno));
+	}
+	nft_set_free(nls);
+
+	return err;
+}
+
+int netlink_add_set(struct netlink_ctx *ctx, const struct handle *h,
+		    struct set *set)
+{
+	int ret;
+
+	if (ctx->set_batch_supported)
+		ret = netlink_add_set_batch(ctx, h, set);
+	else
+		ret = netlink_add_set_compat(ctx, h, set);
+
+	return ret;
+}
+
+static int netlink_del_set_compat(struct netlink_ctx *ctx,
+				  const struct handle *h,
+				  const struct location *loc)
 {
 	struct nft_set *nls;
 	int err;
@@ -803,6 +857,36 @@ int netlink_delete_set(struct netlink_ctx *ctx, const struct handle *h,
 	return err;
 }
 
+static int netlink_del_set_batch(struct netlink_ctx *ctx,
+				 const struct handle *h,
+				 const struct location *loc)
+{
+	struct nft_set *nls;
+	int err;
+
+	nls = alloc_nft_set(h);
+	err = mnl_nft_set_batch_del(nf_sock, nls, 0);
+	nft_set_free(nls);
+
+	if (err < 0)
+		netlink_io_error(ctx, loc, "Could not delete set: %s",
+				 strerror(errno));
+	return err;
+}
+
+int netlink_delete_set(struct netlink_ctx *ctx, const struct handle *h,
+		       const struct location *loc)
+{
+	int ret;
+
+	if (ctx->set_batch_supported)
+		ret = netlink_del_set_batch(ctx, h, loc);
+	else
+		ret = netlink_del_set_compat(ctx, h, loc);
+
+	return ret;
+}
+
 static int list_set_cb(struct nft_set *nls, void *arg)
 {
 	struct netlink_ctx *ctx = arg;
@@ -916,8 +1000,29 @@ static void alloc_setelem_cache(const struct expr *set, struct nft_set *nls)
 	}
 }
 
-int netlink_add_setelems(struct netlink_ctx *ctx, const struct handle *h,
-			 const struct expr *expr)
+static int netlink_add_setelems_batch(struct netlink_ctx *ctx,
+				      const struct handle *h,
+				      const struct expr *expr)
+{
+	struct nft_set *nls;
+	int err;
+
+	nls = alloc_nft_set(h);
+	alloc_setelem_cache(expr, nls);
+	netlink_dump_set(nls);
+
+	err = mnl_nft_setelem_batch_add(nf_sock, nls, 0);
+	nft_set_free(nls);
+	if (err < 0)
+		netlink_io_error(ctx, &expr->location,
+				 "Could not add set elements: %s",
+				 strerror(errno));
+	return err;
+}
+
+static int netlink_add_setelems_compat(struct netlink_ctx *ctx,
+				       const struct handle *h,
+				       const struct expr *expr)
 {
 	struct nft_set *nls;
 	int err;
@@ -935,8 +1040,42 @@ int netlink_add_setelems(struct netlink_ctx *ctx, const struct handle *h,
 	return err;
 }
 
-int netlink_delete_setelems(struct netlink_ctx *ctx, const struct handle *h,
-			    const struct expr *expr)
+int netlink_add_setelems(struct netlink_ctx *ctx, const struct handle *h,
+			 const struct expr *expr)
+{
+	int ret;
+
+	if (ctx->set_batch_supported)
+		ret = netlink_add_setelems_batch(ctx, h, expr);
+	else
+		ret = netlink_add_setelems_compat(ctx, h, expr);
+
+	return ret;
+}
+
+static int netlink_del_setelems_batch(struct netlink_ctx *ctx,
+				      const struct handle *h,
+				      const struct expr *expr)
+{
+	struct nft_set *nls;
+	int err;
+
+	nls = alloc_nft_set(h);
+	alloc_setelem_cache(expr, nls);
+	netlink_dump_set(nls);
+
+	err = mnl_nft_setelem_batch_del(nf_sock, nls, 0);
+	nft_set_free(nls);
+	if (err < 0)
+		netlink_io_error(ctx, &expr->location,
+				 "Could not delete set elements: %s",
+				 strerror(errno));
+	return err;
+}
+
+static int netlink_del_setelems_compat(struct netlink_ctx *ctx,
+				       const struct handle *h,
+				       const struct expr *expr)
 {
 	struct nft_set *nls;
 	int err;
@@ -954,6 +1093,19 @@ int netlink_delete_setelems(struct netlink_ctx *ctx, const struct handle *h,
 	return err;
 }
 
+int netlink_delete_setelems(struct netlink_ctx *ctx, const struct handle *h,
+			    const struct expr *expr)
+{
+	int ret;
+
+	if (ctx->set_batch_supported)
+		ret = netlink_del_setelems_batch(ctx, h, expr);
+	else
+		ret = netlink_del_setelems_compat(ctx, h, expr);
+
+	return ret;
+}
+
 static int list_setelem_cb(struct nft_set_elem *nlse, void *arg)
 {
 	struct nft_data_delinearize nld;
@@ -1050,3 +1202,8 @@ struct nft_ruleset *netlink_dump_ruleset(struct netlink_ctx *ctx,
 
 	return rs;
 }
+
+bool netlink_set_batch_supported(void)
+{
+	return mnl_set_batch_supported(nf_sock);
+}
diff --git a/src/netlink_linearize.c b/src/netlink_linearize.c
index e80646b..df8da77 100644
--- a/src/netlink_linearize.c
+++ b/src/netlink_linearize.c
@@ -129,6 +129,10 @@ static void netlink_gen_map(struct netlink_linearize_ctx *ctx,
 	nft_rule_expr_set_u32(nle, NFT_EXPR_LOOKUP_DREG, dreg);
 	nft_rule_expr_set_str(nle, NFT_EXPR_LOOKUP_SET,
 			      expr->mappings->set->handle.set);
+	if (expr->mappings->set->handle.set_id) {
+		nft_rule_expr_set_u64(nle, NFT_EXPR_LOOKUP_SET_ID,
+				      expr->mappings->set->handle.set_id);
+	}
 
 	if (dreg == NFT_REG_VERDICT)
 		release_register(ctx);
@@ -153,6 +157,10 @@ static void netlink_gen_lookup(struct netlink_linearize_ctx *ctx,
 	nft_rule_expr_set_u32(nle, NFT_EXPR_LOOKUP_SREG, sreg);
 	nft_rule_expr_set_str(nle, NFT_EXPR_LOOKUP_SET,
 			      expr->right->set->handle.set);
+	if (expr->right->set->handle.set_id) {
+		nft_rule_expr_set_u64(nle, NFT_EXPR_LOOKUP_SET_ID,
+				      expr->right->set->handle.set_id);
+	}
 
 	release_register(ctx);
 	nft_rule_add_expr(ctx->nlr, nle);
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into the batch
  2014-03-25 20:39 [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into the batch Pablo Neira Ayuso
  2014-03-25 20:39 ` [PATCH RFC nft 2/2] src: add set netlink message to " Pablo Neira Ayuso
@ 2014-03-26 11:25 ` Patrick McHardy
  2014-03-26 13:03   ` Pablo Neira Ayuso
  1 sibling, 1 reply; 8+ messages in thread
From: Patrick McHardy @ 2014-03-26 11:25 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

On Tue, Mar 25, 2014 at 09:39:41PM +0100, Pablo Neira Ayuso wrote:
> diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
> index e6bc14d..b749e4d 100644
> --- a/include/net/netfilter/nf_tables.h
> +++ b/include/net/netfilter/nf_tables.h
> @@ -222,6 +222,8 @@ static inline void *nft_set_priv(const struct nft_set *set)
>  
>  struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
>  				     const struct nlattr *nla);
> +struct nft_set *nf_tables_set_lookup2(const struct net *net,
> +				      const struct nlattr *nla);

nf_tables_set_lookup_byid?

> diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
> index c88ccbf..3776beb 100644
> --- a/include/uapi/linux/netfilter/nf_tables.h
> +++ b/include/uapi/linux/netfilter/nf_tables.h
> @@ -221,6 +221,7 @@ enum nft_set_flags {
>   * @NFTA_SET_KEY_LEN: key data length (NLA_U32)
>   * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32)
>   * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
> + * @NFTA_SET_ID: set ID (NLA_U64)

I think a U32 should be perfectly fine. These are not permanent IDs but just to
identify new sets contained within a batch, so we can always start at 0.

> +static int nf_tables_set_trans_add(struct list_head *list, struct nft_ctx *ctx,
> +				   struct nft_set *set)
> +{
> +	struct nft_set_trans *strans;
> +
> +	strans = kmalloc(sizeof(struct nft_set_trans), GFP_ATOMIC);

Do we need GFP_ATOMIC?

> +	if (strans == NULL)
> +		return -ENOMEM;
> +
> +	strans->set = set;
> +	strans->ctx = *ctx;
> +	if (ctx->nla[NFTA_SET_ID])
> +		strans->id = be64_to_cpu(nla_get_be64(ctx->nla[NFTA_SET_ID]));
> +
> +	set->flags |= __NFT_SET_INACTIVE;
> +	list_add_tail(&strans->list, list);
> +	return 0;
> +}
> +
>  static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
>  			    const struct nlmsghdr *nlh,
>  			    const struct nlattr * const nla[])
>  static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
> @@ -2534,7 +2619,8 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
>  {
>  	list_del(&binding->list);
>  
> -	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
> +	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
> +	    !(set->flags & __NFT_SET_INACTIVE))

Why are we not destroying anonymous inactive sets when unbinding? This means we're
either aborting or replaying the entire transaction, so it seems we should remove
them, no?

>  		nf_tables_set_destroy(ctx, set);
>  }
>  

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into the batch
  2014-03-26 11:25 ` [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into " Patrick McHardy
@ 2014-03-26 13:03   ` Pablo Neira Ayuso
  2014-03-26 13:18     ` Patrick McHardy
  0 siblings, 1 reply; 8+ messages in thread
From: Pablo Neira Ayuso @ 2014-03-26 13:03 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netfilter-devel

On Wed, Mar 26, 2014 at 11:25:54AM +0000, Patrick McHardy wrote:
> On Tue, Mar 25, 2014 at 09:39:41PM +0100, Pablo Neira Ayuso wrote:
> > diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
> > index e6bc14d..b749e4d 100644
> > --- a/include/net/netfilter/nf_tables.h
> > +++ b/include/net/netfilter/nf_tables.h
> > @@ -222,6 +222,8 @@ static inline void *nft_set_priv(const struct nft_set *set)
> >  
> >  struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
> >  				     const struct nlattr *nla);
> > +struct nft_set *nf_tables_set_lookup2(const struct net *net,
> > +				      const struct nlattr *nla);
> 
> nf_tables_set_lookup_byid?

That looks better.

> > diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
> > index c88ccbf..3776beb 100644
> > --- a/include/uapi/linux/netfilter/nf_tables.h
> > +++ b/include/uapi/linux/netfilter/nf_tables.h
> > @@ -221,6 +221,7 @@ enum nft_set_flags {
> >   * @NFTA_SET_KEY_LEN: key data length (NLA_U32)
> >   * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32)
> >   * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
> > + * @NFTA_SET_ID: set ID (NLA_U64)
> 
> I think a U32 should be perfectly fine. These are not permanent IDs but just to
> identify new sets contained within a batch, so we can always start at 0.

Yes, I was considering using this also to output the set identifier
for anonymous sets, but the dynamically allocated name should be fine
so I'll stick to u32.

Going back to the idea of using u64 to allocate the set/map names, I
think we have to extend the set name, currently IFNAMSIZ is limited
when mapping the name to set%lld, assuming a large u64 number.

> > +static int nf_tables_set_trans_add(struct list_head *list, struct nft_ctx *ctx,
> > +				   struct nft_set *set)
> > +{
> > +	struct nft_set_trans *strans;
> > +
> > +	strans = kmalloc(sizeof(struct nft_set_trans), GFP_ATOMIC);
> 
> Do we need GFP_ATOMIC?

not really, will swap that to GFP_KERNEL.

> > +	if (strans == NULL)
> > +		return -ENOMEM;
> > +
> > +	strans->set = set;
> > +	strans->ctx = *ctx;
> > +	if (ctx->nla[NFTA_SET_ID])
> > +		strans->id = be64_to_cpu(nla_get_be64(ctx->nla[NFTA_SET_ID]));
> > +
> > +	set->flags |= __NFT_SET_INACTIVE;
> > +	list_add_tail(&strans->list, list);
> > +	return 0;
> > +}
> > +
> >  static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
> >  			    const struct nlmsghdr *nlh,
> >  			    const struct nlattr * const nla[])
> >  static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
> > @@ -2534,7 +2619,8 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
> >  {
> >  	list_del(&binding->list);
> >  
> > -	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
> > +	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
> > +	    !(set->flags & __NFT_SET_INACTIVE))
> 
> Why are we not destroying anonymous inactive sets when unbinding? This means we're
> either aborting or replaying the entire transaction, so it seems we should remove
> them, no?

The set may be already bound to the rule, we skip this here, so the
set is released from the transaction. This is just to avoid a double
free case.

> >  		nf_tables_set_destroy(ctx, set);
> >  }
> >  

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into the batch
  2014-03-26 13:03   ` Pablo Neira Ayuso
@ 2014-03-26 13:18     ` Patrick McHardy
  2014-03-26 13:40       ` Pablo Neira Ayuso
  0 siblings, 1 reply; 8+ messages in thread
From: Patrick McHardy @ 2014-03-26 13:18 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

On Wed, Mar 26, 2014 at 02:03:38PM +0100, Pablo Neira Ayuso wrote:
> On Wed, Mar 26, 2014 at 11:25:54AM +0000, Patrick McHardy wrote:
> > On Tue, Mar 25, 2014 at 09:39:41PM +0100, Pablo Neira Ayuso wrote:
> 
> > > diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
> > > index c88ccbf..3776beb 100644
> > > --- a/include/uapi/linux/netfilter/nf_tables.h
> > > +++ b/include/uapi/linux/netfilter/nf_tables.h
> > > @@ -221,6 +221,7 @@ enum nft_set_flags {
> > >   * @NFTA_SET_KEY_LEN: key data length (NLA_U32)
> > >   * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32)
> > >   * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
> > > + * @NFTA_SET_ID: set ID (NLA_U64)
> > 
> > I think a U32 should be perfectly fine. These are not permanent IDs but just to
> > identify new sets contained within a batch, so we can always start at 0.
> 
> Yes, I was considering using this also to output the set identifier
> for anonymous sets, but the dynamically allocated name should be fine
> so I'll stick to u32.

I think these are two different things. The set identifier is so far actually
a transaction identifier, mixing these will most likely get messy.

> Going back to the idea of using u64 to allocate the set/map names, I
> think we have to extend the set name, currently IFNAMSIZ is limited
> when mapping the name to set%lld, assuming a large u64 number.

The problem is more that this ID is chosen by userspace, so it's not suitable
for anonymous sets. I'd rather get rid of the names for anonymous sets
completely. For dumps we can just as well use a dynamic numeric identifier
to associate lookup expressions with sets. We don't really need a permanent
identifier.

> > > @@ -2534,7 +2619,8 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
> > >  {
> > >  	list_del(&binding->list);
> > >  
> > > -	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
> > > +	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
> > > +	    !(set->flags & __NFT_SET_INACTIVE))
> > 
> > Why are we not destroying anonymous inactive sets when unbinding? This means we're
> > either aborting or replaying the entire transaction, so it seems we should remove
> > them, no?
> 
> The set may be already bound to the rule, we skip this here, so the
> set is released from the transaction. This is just to avoid a double
> free case.

I have to read the patch again, I was under the impression that a bound set
has already been comitted and we therefore won't fail anymore. Probably
misunderstood something.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into the batch
  2014-03-26 13:18     ` Patrick McHardy
@ 2014-03-26 13:40       ` Pablo Neira Ayuso
  2014-03-26 14:00         ` Patrick McHardy
  0 siblings, 1 reply; 8+ messages in thread
From: Pablo Neira Ayuso @ 2014-03-26 13:40 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netfilter-devel

On Wed, Mar 26, 2014 at 01:18:57PM +0000, Patrick McHardy wrote:
> On Wed, Mar 26, 2014 at 02:03:38PM +0100, Pablo Neira Ayuso wrote:
> > On Wed, Mar 26, 2014 at 11:25:54AM +0000, Patrick McHardy wrote:
> > > On Tue, Mar 25, 2014 at 09:39:41PM +0100, Pablo Neira Ayuso wrote:
> > 
> > > > diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
> > > > index c88ccbf..3776beb 100644
> > > > --- a/include/uapi/linux/netfilter/nf_tables.h
> > > > +++ b/include/uapi/linux/netfilter/nf_tables.h
> > > > @@ -221,6 +221,7 @@ enum nft_set_flags {
> > > >   * @NFTA_SET_KEY_LEN: key data length (NLA_U32)
> > > >   * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32)
> > > >   * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
> > > > + * @NFTA_SET_ID: set ID (NLA_U64)
> > > 
> > > I think a U32 should be perfectly fine. These are not permanent IDs but just to
> > > identify new sets contained within a batch, so we can always start at 0.
> > 
> > Yes, I was considering using this also to output the set identifier
> > for anonymous sets, but the dynamically allocated name should be fine
> > so I'll stick to u32.
> 
> I think these are two different things. The set identifier is so far actually
> a transaction identifier, mixing these will most likely get messy.
> 
> > Going back to the idea of using u64 to allocate the set/map names, I
> > think we have to extend the set name, currently IFNAMSIZ is limited
> > when mapping the name to set%lld, assuming a large u64 number.
> 
> The problem is more that this ID is chosen by userspace, so it's not suitable
> for anonymous sets.

I was not considering to use the ID allocated from userspace, but
reusing the NFTA_SET_ID attribute given that it is only used for
messages from userspace -> kernel, so we can reuse it when dumping the
anonymous set to userspace, but that's two different things with
different semantics, so better to have two different attributes for
each, eg. NFTA_SET_ID_ANONYMOUS?

> I'd rather get rid of the names for anonymous sets completely. For
> dumps we can just as well use a dynamic numeric identifier to
> associate lookup expressions with sets. We don't really need a
> permanent identifier.

How will that look like? I think we need some permanent way to
identify sets even if they are anonymous. A permanent ID should be
fine for that.

> > > > @@ -2534,7 +2619,8 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
> > > >  {
> > > >  	list_del(&binding->list);
> > > >  
> > > > -	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
> > > > +	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
> > > > +	    !(set->flags & __NFT_SET_INACTIVE))
> > > 
> > > Why are we not destroying anonymous inactive sets when unbinding? This means we're
> > > either aborting or replaying the entire transaction, so it seems we should remove
> > > them, no?
> > 
> > The set may be already bound to the rule, we skip this here, so the
> > set is released from the transaction. This is just to avoid a double
> > free case.
> 
> I have to read the patch again, I was under the impression that a bound set
> has already been comitted and we therefore won't fail anymore. Probably
> misunderstood something.

Think of the abort step. We already have some rules bound to the set
(not yet committed), but we failed to load some rule, we need to
release those rules and their set bindings. In that case, the rule
destroy path skips releasing the set, so we just let the abort routine
to iterate over the list of new sets to releasing.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into the batch
  2014-03-26 13:40       ` Pablo Neira Ayuso
@ 2014-03-26 14:00         ` Patrick McHardy
  2014-03-26 15:53           ` Pablo Neira Ayuso
  0 siblings, 1 reply; 8+ messages in thread
From: Patrick McHardy @ 2014-03-26 14:00 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

On Wed, Mar 26, 2014 at 02:40:22PM +0100, Pablo Neira Ayuso wrote:
> On Wed, Mar 26, 2014 at 01:18:57PM +0000, Patrick McHardy wrote:
> > On Wed, Mar 26, 2014 at 02:03:38PM +0100, Pablo Neira Ayuso wrote:
> > > On Wed, Mar 26, 2014 at 11:25:54AM +0000, Patrick McHardy wrote:
> > > > On Tue, Mar 25, 2014 at 09:39:41PM +0100, Pablo Neira Ayuso wrote:
> > > 
> > > > I think a U32 should be perfectly fine. These are not permanent IDs but just to
> > > > identify new sets contained within a batch, so we can always start at 0.
> > > 
> > > Yes, I was considering using this also to output the set identifier
> > > for anonymous sets, but the dynamically allocated name should be fine
> > > so I'll stick to u32.
> > 
> > I think these are two different things. The set identifier is so far actually
> > a transaction identifier, mixing these will most likely get messy.
> > 
> > > Going back to the idea of using u64 to allocate the set/map names, I
> > > think we have to extend the set name, currently IFNAMSIZ is limited
> > > when mapping the name to set%lld, assuming a large u64 number.
> > 
> > The problem is more that this ID is chosen by userspace, so it's not suitable
> > for anonymous sets.
> 
> I was not considering to use the ID allocated from userspace, but
> reusing the NFTA_SET_ID attribute given that it is only used for
> messages from userspace -> kernel, so we can reuse it when dumping the
> anonymous set to userspace, but that's two different things with
> different semantics, so better to have two different attributes for
> each, eg. NFTA_SET_ID_ANONYMOUS?

Actually that one would probably be fine. In both cases it would be used
in a very similar fashion, a temporary identifier or transaction ID.

> > I'd rather get rid of the names for anonymous sets completely. For
> > dumps we can just as well use a dynamic numeric identifier to
> > associate lookup expressions with sets. We don't really need a
> > permanent identifier.
> 
> How will that look like? I think we need some permanent way to
> identify sets even if they are anonymous. A permanent ID should be
> fine for that.

Well it depends. Right now we have anonymous sets, meaning an ID is allocated,
but they're only used in combination with constant sets, which means they
can not be changed so we don't need a permanent ID. If we were to use
anonymous in combination with non-const then yes, we need a permanent ID.

In either case the numeric ID is fine, the question is simply whether it
needs to be a permanent ID or whether a temoprary one is enough.

> > > > > @@ -2534,7 +2619,8 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
> > > > >  {
> > > > >  	list_del(&binding->list);
> > > > >  
> > > > > -	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
> > > > > +	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
> > > > > +	    !(set->flags & __NFT_SET_INACTIVE))
> > > > 
> > > > Why are we not destroying anonymous inactive sets when unbinding? This means we're
> > > > either aborting or replaying the entire transaction, so it seems we should remove
> > > > them, no?
> > > 
> > > The set may be already bound to the rule, we skip this here, so the
> > > set is released from the transaction. This is just to avoid a double
> > > free case.
> > 
> > I have to read the patch again, I was under the impression that a bound set
> > has already been comitted and we therefore won't fail anymore. Probably
> > misunderstood something.
> 
> Think of the abort step. We already have some rules bound to the set
> (not yet committed), but we failed to load some rule, we need to
> release those rules and their set bindings. In that case, the rule
> destroy path skips releasing the set, so we just let the abort routine
> to iterate over the list of new sets to releasing.

So we do bind to uncommitted sets? That's the part I was missing.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into the batch
  2014-03-26 14:00         ` Patrick McHardy
@ 2014-03-26 15:53           ` Pablo Neira Ayuso
  0 siblings, 0 replies; 8+ messages in thread
From: Pablo Neira Ayuso @ 2014-03-26 15:53 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netfilter-devel

On Wed, Mar 26, 2014 at 02:00:21PM +0000, Patrick McHardy wrote:
> On Wed, Mar 26, 2014 at 02:40:22PM +0100, Pablo Neira Ayuso wrote:
> > On Wed, Mar 26, 2014 at 01:18:57PM +0000, Patrick McHardy wrote:
> > > On Wed, Mar 26, 2014 at 02:03:38PM +0100, Pablo Neira Ayuso wrote:
> > > > On Wed, Mar 26, 2014 at 11:25:54AM +0000, Patrick McHardy wrote:
> > > > > On Tue, Mar 25, 2014 at 09:39:41PM +0100, Pablo Neira Ayuso wrote:
> > > > 
> > > > > I think a U32 should be perfectly fine. These are not permanent IDs but just to
> > > > > identify new sets contained within a batch, so we can always start at 0.
> > > > 
> > > > Yes, I was considering using this also to output the set identifier
> > > > for anonymous sets, but the dynamically allocated name should be fine
> > > > so I'll stick to u32.
> > > 
> > > I think these are two different things. The set identifier is so far actually
> > > a transaction identifier, mixing these will most likely get messy.
> > > 
> > > > Going back to the idea of using u64 to allocate the set/map names, I
> > > > think we have to extend the set name, currently IFNAMSIZ is limited
> > > > when mapping the name to set%lld, assuming a large u64 number.
> > > 
> > > The problem is more that this ID is chosen by userspace, so it's not suitable
> > > for anonymous sets.
> > 
> > I was not considering to use the ID allocated from userspace, but
> > reusing the NFTA_SET_ID attribute given that it is only used for
> > messages from userspace -> kernel, so we can reuse it when dumping the
> > anonymous set to userspace, but that's two different things with
> > different semantics, so better to have two different attributes for
> > each, eg. NFTA_SET_ID_ANONYMOUS?
> 
> Actually that one would probably be fine. In both cases it would be used
> in a very similar fashion, a temporary identifier or transaction ID.

Ok, then I'm going to leave the u64 NFTA_SET_ID there so we can reuse
it later on for the dumping. Let me know if I misinterpreted your
comment.

> > > I'd rather get rid of the names for anonymous sets completely. For
> > > dumps we can just as well use a dynamic numeric identifier to
> > > associate lookup expressions with sets. We don't really need a
> > > permanent identifier.
> > 
> > How will that look like? I think we need some permanent way to
> > identify sets even if they are anonymous. A permanent ID should be
> > fine for that.
> 
> Well it depends. Right now we have anonymous sets, meaning an ID is allocated,
> but they're only used in combination with constant sets, which means they
> can not be changed so we don't need a permanent ID. If we were to use
> anonymous in combination with non-const then yes, we need a permanent ID.

I see, for const + anonymous it doesn't make much sense as userspace
can just barely fetch them, but not update them.

> In either case the numeric ID is fine, the question is simply whether it
> needs to be a permanent ID or whether a temoprary one is enough.

Hm, perhaps we may add statistics per element in the future or any
other feature that alters some internal state of the element. The user
can fetch the set ID from the rule and then keep using it to retrieve
that that internal state using the permanent ID.

> > > > > > @@ -2534,7 +2619,8 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
> > > > > >  {
> > > > > >  	list_del(&binding->list);
> > > > > >  
> > > > > > -	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
> > > > > > +	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
> > > > > > +	    !(set->flags & __NFT_SET_INACTIVE))
> > > > > 
> > > > > Why are we not destroying anonymous inactive sets when unbinding? This means we're
> > > > > either aborting or replaying the entire transaction, so it seems we should remove
> > > > > them, no?
> > > > 
> > > > The set may be already bound to the rule, we skip this here, so the
> > > > set is released from the transaction. This is just to avoid a double
> > > > free case.
> > > 
> > > I have to read the patch again, I was under the impression that a bound set
> > > has already been comitted and we therefore won't fail anymore. Probably
> > > misunderstood something.
> > 
> > Think of the abort step. We already have some rules bound to the set
> > (not yet committed), but we failed to load some rule, we need to
> > release those rules and their set bindings. In that case, the rule
> > destroy path skips releasing the set, so we just let the abort routine
> > to iterate over the list of new sets to releasing.
> 
> So we do bind to uncommitted sets? That's the part I was missing.

Right. That simplifies the error handling as no errors can currently
happen in the nf_tables_commit() step, so we don't need to undo
changes that were already applied because of errors which would be
tricky.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-03-26 15:53 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-03-25 20:39 [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into the batch Pablo Neira Ayuso
2014-03-25 20:39 ` [PATCH RFC nft 2/2] src: add set netlink message to " Pablo Neira Ayuso
2014-03-26 11:25 ` [PATCH RFC 1/2] netfilter: nf_tables: move set netlink messages into " Patrick McHardy
2014-03-26 13:03   ` Pablo Neira Ayuso
2014-03-26 13:18     ` Patrick McHardy
2014-03-26 13:40       ` Pablo Neira Ayuso
2014-03-26 14:00         ` Patrick McHardy
2014-03-26 15:53           ` Pablo Neira Ayuso

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).