* [PATCH 1/2] netfilter: nf_tables: partially rework commit and abort operation @ 2013-02-28 23:08 pablo 2013-02-28 23:08 ` [PATCH 2/2] netfilter: nf_tables: don't skip inactive rules and dump generation mask pablo ` (2 more replies) 0 siblings, 3 replies; 12+ messages in thread From: pablo @ 2013-02-28 23:08 UTC (permalink / raw) To: netfilter-devel; +Cc: kaber, tomasz.bursztyka From: Pablo Neira Ayuso <pablo@netfilter.org> This patch partially reworks the commit operation (added in 1577831) to get rid of the extra struct list_head per rule as discussed with Patrick McHardy. With this patch, a temporary object is allocated to store the rule update information. The commit and abort loops have been also simplified from ideas extracted after discusion with Tomasz Bursztyka. Basically, there is a single list per net namespace that contains pending rule updates. The dirty list is now owned by the netlink socket portid that adds the first rule that waits to be committed. If another socket is open to add a rule-set in an atomic fashion, it will hit -EBUSY. Note that pending updates, if not committed, are destroyed on socket removal. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- include/net/netfilter/nf_tables.h | 21 ++++- include/net/netns/nftables.h | 2 + net/netfilter/nf_tables_api.c | 185 +++++++++++++++++++++++++------------ 3 files changed, 145 insertions(+), 63 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3782777..69cb5ea 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -321,7 +321,6 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) * struct nft_rule - nf_tables rule * * @list: used internally - * @dirty_list: this rule needs an update after new generation * @rcu_head: used internally for rcu * @handle: rule handle * @genmask: generation mask @@ -330,7 +329,6 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) */ struct nft_rule { struct list_head list; - struct list_head dirty_list; struct rcu_head rcu_head; u64 handle:46, genmask:2, @@ -339,6 +337,23 @@ struct nft_rule { __attribute__((aligned(__alignof__(struct nft_expr)))); }; +/** + * struct nft_rule_update - nf_tables rule update + * + * @list: used internally + * @rule: rule that needs to be updated + * @chain: chain that this rule belongs to + * @table: table for which this chain applies + * @net: the net namespace that this rule updates belongs to + */ +struct nft_rule_update { + struct list_head list; + struct nft_rule *rule; + const struct nft_chain *chain; + const struct nft_table *table; + struct net *net; +}; + static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) { return (struct nft_expr *)&rule->data[0]; @@ -372,7 +387,6 @@ enum nft_chain_flags { * struct nft_chain - nf_tables chain * * @rules: list of rules in the chain - * @dirty_rules: rules that need an update after next generation * @list: used internally * @rcu_head: used internally * @net: net namespace that this chain belongs to @@ -385,7 +399,6 @@ enum nft_chain_flags { */ struct nft_chain { struct list_head rules; - struct list_head dirty_rules; struct list_head list; struct rcu_head rcu_head; struct net *net; diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index fe919c7..97992fb 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -10,6 +10,8 @@ struct netns_nftables { struct nft_af_info *ipv4; struct nft_af_info *ipv6; struct nft_af_info *bridge; + u32 pid_owner; + struct list_head dirty_rules; u8 gencursor:1, genctr:7; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 40d94e2..beace28 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -961,7 +961,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, } INIT_LIST_HEAD(&chain->rules); - INIT_LIST_HEAD(&chain->dirty_rules); chain->handle = nf_tables_alloc_handle(table); chain->net = net; chain->table = table; @@ -1510,6 +1509,29 @@ static void nf_tables_rule_destroy(struct nft_rule *rule) static struct nft_expr_info *info; +static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx) +{ + struct nft_rule_update *rupd; + + /* Another socket owns the dirty list? */ + if (!ctx->net->nft.pid_owner) + ctx->net->nft.pid_owner = ctx->nlh->nlmsg_pid; + else if (ctx->net->nft.pid_owner != ctx->nlh->nlmsg_pid) + return -EBUSY; + + rupd = kmalloc(sizeof(struct nft_rule_update), GFP_KERNEL); + if (rupd == NULL) + return -ENOMEM; + + rupd->chain = ctx->chain; + rupd->table = ctx->table; + rupd->rule = rule; + rupd->net = ctx->net; + list_add(&rupd->list, &ctx->net->nft.dirty_rules); + + return 0; +} + static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1619,9 +1641,13 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, else list_add_rcu(&rule->list, &chain->rules); - if (flags & NFT_RULE_F_COMMIT) - list_add(&rule->dirty_list, &chain->dirty_rules); - else { + if (flags & NFT_RULE_F_COMMIT) { + err = nf_tables_dirty_add(rule, &ctx); + if (err < 0) { + list_del_rcu(&rule->list); + goto err2; + } + } else { nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), @@ -1639,14 +1665,19 @@ err1: return err; } -static void +static int nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule, u32 flags) { + int ret = 0; + if (flags & NFT_RULE_F_COMMIT) { - struct nft_chain *chain = (struct nft_chain *)ctx->chain; + /* Will be deleted already in the next generation */ + if (!nft_rule_is_active_next(ctx->net, rule)) + return -EBUSY; - nft_rule_disactivate_next(ctx->net, rule); - list_add(&rule->dirty_list, &chain->dirty_rules); + ret = nf_tables_dirty_add(rule, ctx); + if (ret >= 0) + nft_rule_disactivate_next(ctx->net, rule); } else { list_del_rcu(&rule->list); nf_tables_rule_notify(ctx->skb, ctx->nlh, ctx->table, @@ -1654,6 +1685,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule, u32 flags) 0, ctx->afi->family); nf_tables_rule_destroy(rule); } + return ret; } static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, @@ -1666,7 +1698,7 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *tmp; - int family = nfmsg->nfgen_family; + int family = nfmsg->nfgen_family, err; struct nft_ctx ctx; u32 flags = 0; @@ -1696,7 +1728,9 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(rule)) return PTR_ERR(rule); - nf_tables_delrule_one(&ctx, rule, flags); + err = nf_tables_delrule_one(&ctx, rule, flags); + if (err < 0) + return err; } else { /* Remove all rules in this chain */ list_for_each_entry_safe(rule, tmp, &chain->rules, list) @@ -1713,12 +1747,14 @@ static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; struct net *net = sock_net(skb->sk); - struct nft_table *table; - struct nft_chain *chain; - struct nft_rule *rule, *tmp; + struct nft_rule_update *rupd, *tmp; int family = nfmsg->nfgen_family; bool create; + /* Are you the owner of the dirty list? */ + if (nlh->nlmsg_pid != net->nft.pid_owner) + return -EBUSY; + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); @@ -1736,40 +1772,60 @@ static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, */ synchronize_rcu(); - list_for_each_entry(table, &afi->tables, list) { - list_for_each_entry(chain, &table->chains, list) { - list_for_each_entry_safe(rule, tmp, &chain->dirty_rules, dirty_list) { - /* Delete this rule from the dirty list */ - list_del(&rule->dirty_list); - - /* This rule was inactive in the past and just - * became active. Clear the next bit of the - * genmask since its meaning has changed, now - * it is the future. - */ - if (nft_rule_is_active(net, rule)) { - nft_rule_clear(net, rule); - nf_tables_rule_notify(skb, nlh, table, - chain, rule, - NFT_MSG_NEWRULE, - 0, - nfmsg->nfgen_family); - continue; - } + list_for_each_entry_safe(rupd, tmp, &net->nft.dirty_rules, list) { + /* Delete this rule from the dirty list */ + list_del(&rupd->list); - /* This rule is in the past, get rid of it */ - list_del_rcu(&rule->list); - nf_tables_rule_notify(skb, nlh, table, chain, - rule, NFT_MSG_DELRULE, 0, - family); - nf_tables_rule_destroy(rule); - } + /* This rule was inactive in the past and just became active. + * Clear the next bit of the genmask since its meaning has + * changed, now it is the future. + */ + if (nft_rule_is_active(net, rupd->rule)) { + nft_rule_clear(net, rupd->rule); + nf_tables_rule_notify(skb, nlh, rupd->table, + rupd->chain, rupd->rule, + NFT_MSG_NEWRULE, 0, + nfmsg->nfgen_family); + kfree(rupd); + continue; } + + /* This rule is in the past, get rid of it */ + list_del_rcu(&rupd->rule->list); + nf_tables_rule_notify(skb, nlh, rupd->table, rupd->chain, + rupd->rule, NFT_MSG_DELRULE, 0, family); + nf_tables_rule_destroy(rupd->rule); + kfree(rupd); } + /* Clear owner of this dirty list */ + net->nft.pid_owner = 0; return 0; } +static void nf_tables_abort_all(struct net *net) +{ + struct nft_rule_update *rupd, *tmp; + + list_for_each_entry_safe(rupd, tmp, &net->nft.dirty_rules, list) { + /* Delete all rules from the dirty list */ + list_del(&rupd->list); + + if (!nft_rule_is_active_next(rupd->net, rupd->rule)) { + nft_rule_clear(rupd->net, rupd->rule); + kfree(rupd); + return; + } + + /* This rule is inactive, get rid of it */ + list_del_rcu(&rupd->rule->list); + nf_tables_rule_destroy(rupd->rule); + kfree(rupd); + } + /* Clear the owner of the dirty list */ + net->nft.pid_owner = 0; +} + static int nf_tables_abort(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1777,37 +1833,44 @@ static int nf_tables_abort(struct sock *nlsk, struct sk_buff *skb, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; struct net *net = sock_net(skb->sk); - struct nft_table *table; - struct nft_chain *chain; - struct nft_rule *rule, *tmp; bool create; + /* Are you the owner of the dirty list? */ + if (nlh->nlmsg_pid != net->nft.pid_owner) + return -EBUSY; + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); if (IS_ERR(afi)) return PTR_ERR(afi); - list_for_each_entry(table, &afi->tables, list) { - list_for_each_entry(chain, &table->chains, list) { - list_for_each_entry_safe(rule, tmp, &chain->dirty_rules, dirty_list) { - /* Delete all rules from the dirty list */ - list_del(&rule->dirty_list); - - if (!nft_rule_is_active_next(net, rule)) { - nft_rule_clear(net, rule); - continue; - } + nf_tables_abort_all(net); - /* This rule is inactive, get rid of it */ - list_del_rcu(&rule->list); - nf_tables_rule_destroy(rule); - } - } - } return 0; } +static int +nf_tables_rcv_nl_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + + if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER) + return NOTIFY_DONE; + + if (n->portid != n->net->nft.pid_owner) + return NOTIFY_DONE; + + nf_tables_abort_all(n->net); + + return NOTIFY_DONE; +} + +static struct notifier_block nf_tables_notifier = { + .notifier_call = nf_tables_rcv_nl_event, +}; + /* * Sets */ @@ -3156,6 +3219,7 @@ EXPORT_SYMBOL_GPL(nft_data_dump); static int nf_tables_init_net(struct net *net) { INIT_LIST_HEAD(&net->nft.af_info); + INIT_LIST_HEAD(&net->nft.dirty_rules); return 0; } @@ -3182,6 +3246,8 @@ static int __init nf_tables_module_init(void) if (err < 0) goto err3; + netlink_register_notifier(&nf_tables_notifier); + pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n"); return register_pernet_subsys(&nf_tables_net_ops); err3: @@ -3195,6 +3261,7 @@ err1: static void __exit nf_tables_module_exit(void) { unregister_pernet_subsys(&nf_tables_net_ops); + netlink_unregister_notifier(&nf_tables_notifier); nfnetlink_subsys_unregister(&nf_tables_subsys); nf_tables_core_module_exit(); kfree(info); -- 1.7.10.4 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/2] netfilter: nf_tables: don't skip inactive rules and dump generation mask 2013-02-28 23:08 [PATCH 1/2] netfilter: nf_tables: partially rework commit and abort operation pablo @ 2013-02-28 23:08 ` pablo 2013-03-04 12:22 ` [PATCH 1/2] netfilter: nf_tables: partially rework commit and abort operation Tomasz Bursztyka 2013-03-26 10:19 ` [RFC] Atomic rule manipulation part of transactions Tomasz Bursztyka 2 siblings, 0 replies; 12+ messages in thread From: pablo @ 2013-02-28 23:08 UTC (permalink / raw) To: netfilter-devel; +Cc: kaber, tomasz.bursztyka From: Pablo Neira Ayuso <pablo@netfilter.org> The user-space program gets the full rule-set, including active and inactive ones plus the generation mask to know what the current state of the rule is. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- include/uapi/linux/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 76b215f..66d5bde 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -101,6 +101,7 @@ enum nft_rule_attributes { NFTA_RULE_EXPRESSIONS, NFTA_RULE_FLAGS, NFTA_RULE_COMPAT, + NFTA_RULE_GENMASK, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index beace28..fd2c8b6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1287,6 +1287,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, goto nla_put_failure; if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle))) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_RULE_GENMASK, htonl(rule->genmask))) + return -1; list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS); if (list == NULL) @@ -1401,8 +1403,6 @@ static int nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry(table, &afi->tables, list) { list_for_each_entry(chain, &table->chains, list) { list_for_each_entry(rule, &chain->rules, list) { - if (!nft_rule_is_active(net, rule)) - goto cont; if (idx < s_idx) goto cont; if (idx > s_idx) -- 1.7.10.4 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] netfilter: nf_tables: partially rework commit and abort operation 2013-02-28 23:08 [PATCH 1/2] netfilter: nf_tables: partially rework commit and abort operation pablo 2013-02-28 23:08 ` [PATCH 2/2] netfilter: nf_tables: don't skip inactive rules and dump generation mask pablo @ 2013-03-04 12:22 ` Tomasz Bursztyka 2013-03-26 10:19 ` [RFC] Atomic rule manipulation part of transactions Tomasz Bursztyka 2 siblings, 0 replies; 12+ messages in thread From: Tomasz Bursztyka @ 2013-03-04 12:22 UTC (permalink / raw) To: pablo; +Cc: netfilter-devel, kaber Hi Pablo, I would have some comments again, but mostly on the features it exposes. I am not fully sure we want to dump non-active rules for instance. And why limiting the transaction access to one netlink connection at a time? (though for commit operation it's fully relevant of course) Things like that. Could we see that in detail at the NFWS? Tomasz ^ permalink raw reply [flat|nested] 12+ messages in thread
* [RFC] Atomic rule manipulation part of transactions 2013-02-28 23:08 [PATCH 1/2] netfilter: nf_tables: partially rework commit and abort operation pablo 2013-02-28 23:08 ` [PATCH 2/2] netfilter: nf_tables: don't skip inactive rules and dump generation mask pablo 2013-03-04 12:22 ` [PATCH 1/2] netfilter: nf_tables: partially rework commit and abort operation Tomasz Bursztyka @ 2013-03-26 10:19 ` Tomasz Bursztyka 2013-03-26 10:19 ` [PATCH] nf_tables: Transaction API proposal Tomasz Bursztyka 2 siblings, 1 reply; 12+ messages in thread From: Tomasz Bursztyka @ 2013-03-26 10:19 UTC (permalink / raw) To: netfilter-devel; +Cc: kaber, pablo, Tomasz Bursztyka Hi, I took the patch "[PATCH 1/2] netfilter: nf_tables: partially rework commit and abort operation" of Pablo and quickly did the changes on top of it. Basically if you need to do atomic rule manipulation, it will go in a transaction (like on a database). And it's enabled per-nfnetlink connection. Any connection should be able to do such manipulation. For that, I used the struct sock { sk_user_data } attribute... Afaik, nothing is using it on nfnetlink, so it looks safe to use it. But if it should not be used, due to some reasons, let's find another way. It remove the rule flags as well. It always sounded weird to add such flags, and the commit flag was just semantically wrong. Besides that, I have a question about style issue: what naming rule is applied if the functions is static and not exposed anywhere? For instance static function exposed as struct nfnl_callback callbacks are always following nf_tables_ prefix But what about other functions? I have seen some with __nf_tables, nft_ or nf_tables_ ... It's a proposal, not a patch since it's made on top of previous patch proposal. Please review, Tomasz Bursztyka (1): nf_tables: Transaction API proposal include/net/netfilter/nf_tables.h | 9 ++ include/uapi/linux/netfilter/nf_tables.h | 11 +- net/netfilter/nf_tables_api.c | 170 +++++++++++++++++-------------- 3 files changed, 106 insertions(+), 84 deletions(-) -- 1.8.1.5 ^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH] nf_tables: Transaction API proposal 2013-03-26 10:19 ` [RFC] Atomic rule manipulation part of transactions Tomasz Bursztyka @ 2013-03-26 10:19 ` Tomasz Bursztyka 2013-03-27 16:35 ` Pablo Neira Ayuso 0 siblings, 1 reply; 12+ messages in thread From: Tomasz Bursztyka @ 2013-03-26 10:19 UTC (permalink / raw) To: netfilter-devel; +Cc: kaber, pablo, Tomasz Bursztyka It reworks the current atomic API: - proposes START/COMMIT/ABORT transaction messages - removes rule flags: rule manipalution is part of a transaction once one has been started - make use of nfnl socket user data: enables transaction per-nfnetlink connection --- include/net/netfilter/nf_tables.h | 9 ++ include/uapi/linux/netfilter/nf_tables.h | 11 +- net/netfilter/nf_tables_api.c | 170 +++++++++++++++++-------------- 3 files changed, 106 insertions(+), 84 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 69cb5ea..490acb0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -354,6 +354,15 @@ struct nft_rule_update { struct net *net; }; +/** + * struct nft_transaction - nf_tables transaction + * + * @updates: list of rule updates for the current transaction + */ +struct nft_transaction { + struct list_head updates; +}; + static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) { return (struct nft_expr *)&rule->data[0]; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 76b215f..8f8d6a6 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -37,8 +37,9 @@ enum nf_tables_msg_types { NFT_MSG_NEWSETELEM, NFT_MSG_GETSETELEM, NFT_MSG_DELSETELEM, - NFT_MSG_COMMIT, - NFT_MSG_ABORT, + NFT_MSG_START_TRANSACTION, + NFT_MSG_COMMIT_TRANSACTION, + NFT_MSG_ABORT_TRANSACTION, NFT_MSG_MAX, }; @@ -88,18 +89,12 @@ enum nft_chain_attributes { }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) -enum { - NFT_RULE_F_COMMIT = (1 << 0), - NFT_RULE_F_MASK = NFT_RULE_F_COMMIT, -}; - enum nft_rule_attributes { NFTA_RULE_UNSPEC, NFTA_RULE_TABLE, NFTA_RULE_CHAIN, NFTA_RULE_HANDLE, NFTA_RULE_EXPRESSIONS, - NFTA_RULE_FLAGS, NFTA_RULE_COMPAT, __NFTA_RULE_MAX }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d2da5df..7d2cbd5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1264,7 +1264,6 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, - [NFTA_RULE_FLAGS] = { .type = NLA_U32 }, [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, }; @@ -1518,16 +1517,12 @@ static void nf_tables_rule_destroy(struct nft_rule *rule) static struct nft_expr_info *info; -static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx) +static int nf_tables_transaction_add(const struct nft_ctx *ctx, + struct nft_transaction *transaction, + struct nft_rule *rule) { struct nft_rule_update *rupd; - /* Another socket owns the dirty list? */ - if (!ctx->net->nft.pid_owner) - ctx->net->nft.pid_owner = ctx->nlh->nlmsg_pid; - else if (ctx->net->nft.pid_owner != ctx->nlh->nlmsg_pid) - return -EBUSY; - rupd = kmalloc(sizeof(struct nft_rule_update), GFP_KERNEL); if (rupd == NULL) return -ENOMEM; @@ -1536,7 +1531,7 @@ static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx) rupd->table = ctx->table; rupd->rule = rule; rupd->net = ctx->net; - list_add(&rupd->list, &ctx->net->nft.dirty_rules); + list_add(&rupd->list, &transaction->updates); return 0; } @@ -1547,6 +1542,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct nft_transaction *transaction = nlsk->sk_user_data; struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_chain *chain; @@ -1557,7 +1553,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, unsigned int size, i, n; int err, rem; bool create; - u32 flags = 0; u64 handle; create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; @@ -1616,15 +1611,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (rule == NULL) goto err1; - if (nla[NFTA_RULE_FLAGS]) { - flags = ntohl(nla_get_be32(nla[NFTA_RULE_FLAGS])); - if (flags & ~NFT_RULE_F_MASK) - return -EINVAL; - - if (flags & NFT_RULE_F_COMMIT) - nft_rule_activate_next(net, rule); - } - rule->handle = handle; rule->dlen = size; @@ -1637,8 +1623,11 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, expr = nft_expr_next(expr); } + if (transaction != NULL) + nft_rule_activate_next(net, rule); + if (nlh->nlmsg_flags & NLM_F_REPLACE) { - if (flags & NFT_RULE_F_COMMIT) { + if (transaction != NULL) { nft_rule_disactivate_next(net, old_rule); list_add_tail_rcu(&rule->list, &chain->rules); } else { @@ -1650,8 +1639,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, else list_add_rcu(&rule->list, &chain->rules); - if (flags & NFT_RULE_F_COMMIT) { - err = nf_tables_dirty_add(rule, &ctx); + if (transaction != NULL) { + err = nf_tables_transaction_add(&ctx, transaction, rule); if (err < 0) { list_del_rcu(&rule->list); goto err2; @@ -1659,7 +1648,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, } else { nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, - nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), + nlh->nlmsg_flags & + (NLM_F_APPEND | NLM_F_REPLACE), nfmsg->nfgen_family); } return 0; @@ -1674,17 +1664,18 @@ err1: return err; } -static int -nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule, u32 flags) +static int nf_tables_delrule_one(struct nft_ctx *ctx, + struct nft_transaction *transaction, + struct nft_rule *rule) { int ret = 0; - if (flags & NFT_RULE_F_COMMIT) { + if (transaction != NULL) { /* Will be deleted already in the next generation */ if (!nft_rule_is_active_next(ctx->net, rule)) return -EBUSY; - ret = nf_tables_dirty_add(rule, ctx); + ret = nf_tables_transaction_add(ctx, transaction, rule); if (ret >= 0) nft_rule_disactivate_next(ctx->net, rule); } else { @@ -1703,13 +1694,13 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct nft_transaction *transaction = nlsk->sk_user_data; struct net *net = sock_net(skb->sk); const struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *tmp; int family = nfmsg->nfgen_family, err; struct nft_ctx ctx; - u32 flags = 0; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -1725,44 +1716,59 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); - if (nla[NFTA_RULE_FLAGS]) { - flags = ntohl(nla_get_be32(nla[NFTA_RULE_FLAGS])); - - if (flags & ~NFT_RULE_F_MASK) - return -EINVAL; - } - if (nla[NFTA_RULE_HANDLE]) { rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) return PTR_ERR(rule); - err = nf_tables_delrule_one(&ctx, rule, flags); + err = nf_tables_delrule_one(&ctx, transaction, rule); if (err < 0) return err; } else { /* Remove all rules in this chain */ list_for_each_entry_safe(rule, tmp, &chain->rules, list) - nf_tables_delrule_one(&ctx, rule, flags); + nf_tables_delrule_one(&ctx, transaction, rule); } return 0; } -static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) +static int nf_tables_start_transaction(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + struct nft_transaction *transaction; + + if (nlsk->sk_user_data != NULL) + return -EALREADY; + + transaction = kmalloc(sizeof(struct nft_transaction), GFP_KERNEL); + if (transaction == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&transaction->updates); + nlsk->sk_user_data = transaction; + + return 0; +} + +static int nf_tables_commit_transaction(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct nft_transaction *transaction = nlsk->sk_user_data; struct net *net = sock_net(skb->sk); struct nft_rule_update *rupd, *tmp; int family = nfmsg->nfgen_family; bool create; - /* Are you the owner of the dirty list? */ - if (nlh->nlmsg_pid != net->nft.pid_owner) - return -EBUSY; + if (transaction == NULL) + return -EINVAL; + + if (list_empty(&transaction->updates)) + goto done; create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; @@ -1781,7 +1787,7 @@ static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, */ synchronize_rcu(); - list_for_each_entry_safe(rupd, tmp, &net->nft.dirty_rules, list) { + list_for_each_entry_safe(rupd, tmp, &transaction->updates, list) { /* Delete this rule from the dirty list */ list_del(&rupd->list); @@ -1806,47 +1812,47 @@ static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, nf_tables_rule_destroy(rupd->rule); kfree(rupd); } - /* Clear owner of this dirty list */ - net->nft.pid_owner = 0; + +done: + kfree(transaction); + nlsk->sk_user_data = NULL; return 0; } -static void nf_tables_abort_all(struct net *net) +static void nf_tables_abort_all(struct nft_transaction *transaction) { struct nft_rule_update *rupd, *tmp; - list_for_each_entry_safe(rupd, tmp, &net->nft.dirty_rules, list) { - /* Delete all rules from the dirty list */ + list_for_each_entry_safe(rupd, tmp, &transaction->updates, list) { + /* Delete all rules from the update list */ list_del(&rupd->list); - if (!nft_rule_is_active_next(rupd->net, rupd->rule)) { + if (nft_rule_is_active_next(rupd->net, rupd->rule)) { + list_del_rcu(&rupd->rule->list); + nf_tables_rule_destroy(rupd->rule); + } else nft_rule_clear(rupd->net, rupd->rule); - kfree(rupd); - return; - } - /* This rule is inactive, get rid of it */ - list_del_rcu(&rupd->rule->list); - nf_tables_rule_destroy(rupd->rule); kfree(rupd); } - /* Clear the owner of the dirty list */ - net->nft.pid_owner = 0; } -static int nf_tables_abort(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) +static int nf_tables_abort_transaction(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct nft_transaction *transaction = nlsk->sk_user_data; struct net *net = sock_net(skb->sk); bool create; - /* Are you the owner of the dirty list? */ - if (nlh->nlmsg_pid != net->nft.pid_owner) - return -EBUSY; + if (transaction == NULL) + return -EINVAL; + + if (list_empty(&transaction->updates)) + goto done; create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; @@ -1854,24 +1860,31 @@ static int nf_tables_abort(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - nf_tables_abort_all(net); + nf_tables_abort_all(transaction); + +done: + kfree(transaction); + nlsk->sk_user_data = NULL; return 0; } -static int -nf_tables_rcv_nl_event(struct notifier_block *this, - unsigned long event, void *ptr) +static int nf_tables_rcv_nl_event(struct notifier_block *this, + unsigned long event, void *ptr) { struct netlink_notify *n = ptr; + struct nft_transaction *transaction; if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER) return NOTIFY_DONE; - if (n->portid != n->net->nft.pid_owner) - return NOTIFY_DONE; - - nf_tables_abort_all(n->net); + transaction = n->net->nfnl->sk_user_data; + if (transaction != NULL) { + if (!list_empty(&transaction->updates)) + nf_tables_abort_all(transaction); + kfree(transaction); + n->net->nfnl->sk_user_data = NULL; + } return NOTIFY_DONE; } @@ -2840,13 +2853,18 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, - [NFT_MSG_COMMIT] = { - .call = nf_tables_commit, + [NFT_MSG_START_TRANSACTION] = { + .call = nf_tables_start_transaction, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_rule_policy, + }, + [NFT_MSG_COMMIT_TRANSACTION] = { + .call = nf_tables_commit_transaction, .attr_count = NFTA_TABLE_MAX, .policy = nft_rule_policy, }, - [NFT_MSG_ABORT] = { - .call = nf_tables_abort, + [NFT_MSG_ABORT_TRANSACTION] = { + .call = nf_tables_abort_transaction, .attr_count = NFTA_TABLE_MAX, .policy = nft_rule_policy, }, -- 1.8.1.5 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] nf_tables: Transaction API proposal 2013-03-26 10:19 ` [PATCH] nf_tables: Transaction API proposal Tomasz Bursztyka @ 2013-03-27 16:35 ` Pablo Neira Ayuso 2013-03-27 16:42 ` Pablo Neira Ayuso 2013-03-28 8:01 ` Tomasz Bursztyka 0 siblings, 2 replies; 12+ messages in thread From: Pablo Neira Ayuso @ 2013-03-27 16:35 UTC (permalink / raw) To: Tomasz Bursztyka; +Cc: netfilter-devel, kaber Hi Tomasz, On Tue, Mar 26, 2013 at 12:19:04PM +0200, Tomasz Bursztyka wrote: > It reworks the current atomic API: > - proposes START/COMMIT/ABORT transaction messages > - removes rule flags: rule manipalution is part of a transaction once > one has been started > - make use of nfnl socket user data: enables transaction per-nfnetlink > connection > > --- > include/net/netfilter/nf_tables.h | 9 ++ > include/uapi/linux/netfilter/nf_tables.h | 11 +- > net/netfilter/nf_tables_api.c | 170 +++++++++++++++++-------------- > 3 files changed, 106 insertions(+), 84 deletions(-) > > diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h > index 69cb5ea..490acb0 100644 > --- a/include/net/netfilter/nf_tables.h > +++ b/include/net/netfilter/nf_tables.h > @@ -354,6 +354,15 @@ struct nft_rule_update { > struct net *net; > }; > > +/** > + * struct nft_transaction - nf_tables transaction > + * > + * @updates: list of rule updates for the current transaction > + */ > +struct nft_transaction { > + struct list_head updates; > +}; > + > static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) > { > return (struct nft_expr *)&rule->data[0]; > diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h > index 76b215f..8f8d6a6 100644 > --- a/include/uapi/linux/netfilter/nf_tables.h > +++ b/include/uapi/linux/netfilter/nf_tables.h > @@ -37,8 +37,9 @@ enum nf_tables_msg_types { > NFT_MSG_NEWSETELEM, > NFT_MSG_GETSETELEM, > NFT_MSG_DELSETELEM, > - NFT_MSG_COMMIT, > - NFT_MSG_ABORT, > + NFT_MSG_START_TRANSACTION, > + NFT_MSG_COMMIT_TRANSACTION, > + NFT_MSG_ABORT_TRANSACTION, No need to rename this and use long names, I would leave them as: NFT_MSG_BEGIN NFT_MSG_COMMIT NFT_MSG_ABORT > NFT_MSG_MAX, > }; > > @@ -88,18 +89,12 @@ enum nft_chain_attributes { > }; > #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) > > -enum { > - NFT_RULE_F_COMMIT = (1 << 0), > - NFT_RULE_F_MASK = NFT_RULE_F_COMMIT, > -}; I like the idea of removing the COMMIT flag by the explicit NFT_MSG_BEGIN. > - > enum nft_rule_attributes { > NFTA_RULE_UNSPEC, > NFTA_RULE_TABLE, > NFTA_RULE_CHAIN, > NFTA_RULE_HANDLE, > NFTA_RULE_EXPRESSIONS, > - NFTA_RULE_FLAGS, > NFTA_RULE_COMPAT, > __NFTA_RULE_MAX > }; > diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c > index d2da5df..7d2cbd5 100644 > --- a/net/netfilter/nf_tables_api.c > +++ b/net/netfilter/nf_tables_api.c > @@ -1264,7 +1264,6 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { > .len = NFT_CHAIN_MAXNAMELEN - 1 }, > [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, > [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, > - [NFTA_RULE_FLAGS] = { .type = NLA_U32 }, > [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, > }; > > @@ -1518,16 +1517,12 @@ static void nf_tables_rule_destroy(struct nft_rule *rule) > > static struct nft_expr_info *info; > > -static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx) > +static int nf_tables_transaction_add(const struct nft_ctx *ctx, > + struct nft_transaction *transaction, > + struct nft_rule *rule) > { > struct nft_rule_update *rupd; > > - /* Another socket owns the dirty list? */ > - if (!ctx->net->nft.pid_owner) > - ctx->net->nft.pid_owner = ctx->nlh->nlmsg_pid; > - else if (ctx->net->nft.pid_owner != ctx->nlh->nlmsg_pid) > - return -EBUSY; We still need that there is a single owner at time. Otherwise two ongoing transactions may overlap. > rupd = kmalloc(sizeof(struct nft_rule_update), GFP_KERNEL); > if (rupd == NULL) > return -ENOMEM; > @@ -1536,7 +1531,7 @@ static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx) > rupd->table = ctx->table; > rupd->rule = rule; > rupd->net = ctx->net; > - list_add(&rupd->list, &ctx->net->nft.dirty_rules); > + list_add(&rupd->list, &transaction->updates); > > return 0; > } > @@ -1547,6 +1542,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, > { > const struct nfgenmsg *nfmsg = nlmsg_data(nlh); > const struct nft_af_info *afi; > + struct nft_transaction *transaction = nlsk->sk_user_data; > struct net *net = sock_net(skb->sk); > struct nft_table *table; > struct nft_chain *chain; > @@ -1557,7 +1553,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, > unsigned int size, i, n; > int err, rem; > bool create; > - u32 flags = 0; > u64 handle; > > create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; > @@ -1616,15 +1611,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, > if (rule == NULL) > goto err1; > > - if (nla[NFTA_RULE_FLAGS]) { > - flags = ntohl(nla_get_be32(nla[NFTA_RULE_FLAGS])); > - if (flags & ~NFT_RULE_F_MASK) > - return -EINVAL; > - > - if (flags & NFT_RULE_F_COMMIT) > - nft_rule_activate_next(net, rule); > - } > - > rule->handle = handle; > rule->dlen = size; > > @@ -1637,8 +1623,11 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, > expr = nft_expr_next(expr); > } > > + if (transaction != NULL) > + nft_rule_activate_next(net, rule); > + > if (nlh->nlmsg_flags & NLM_F_REPLACE) { > - if (flags & NFT_RULE_F_COMMIT) { > + if (transaction != NULL) { > nft_rule_disactivate_next(net, old_rule); > list_add_tail_rcu(&rule->list, &chain->rules); > } else { > @@ -1650,8 +1639,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, > else > list_add_rcu(&rule->list, &chain->rules); > > - if (flags & NFT_RULE_F_COMMIT) { > - err = nf_tables_dirty_add(rule, &ctx); > + if (transaction != NULL) { > + err = nf_tables_transaction_add(&ctx, transaction, rule); > if (err < 0) { > list_del_rcu(&rule->list); > goto err2; > @@ -1659,7 +1648,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, > } else { > nf_tables_rule_notify(skb, nlh, table, chain, rule, > NFT_MSG_NEWRULE, > - nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), > + nlh->nlmsg_flags & > + (NLM_F_APPEND | NLM_F_REPLACE), > nfmsg->nfgen_family); > } > return 0; > @@ -1674,17 +1664,18 @@ err1: > return err; > } > > -static int > -nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule, u32 flags) > +static int nf_tables_delrule_one(struct nft_ctx *ctx, > + struct nft_transaction *transaction, > + struct nft_rule *rule) > { > int ret = 0; > > - if (flags & NFT_RULE_F_COMMIT) { > + if (transaction != NULL) { > /* Will be deleted already in the next generation */ > if (!nft_rule_is_active_next(ctx->net, rule)) > return -EBUSY; > > - ret = nf_tables_dirty_add(rule, ctx); > + ret = nf_tables_transaction_add(ctx, transaction, rule); > if (ret >= 0) > nft_rule_disactivate_next(ctx->net, rule); > } else { > @@ -1703,13 +1694,13 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, > { > const struct nfgenmsg *nfmsg = nlmsg_data(nlh); > const struct nft_af_info *afi; > + struct nft_transaction *transaction = nlsk->sk_user_data; > struct net *net = sock_net(skb->sk); > const struct nft_table *table; > struct nft_chain *chain; > struct nft_rule *rule, *tmp; > int family = nfmsg->nfgen_family, err; > struct nft_ctx ctx; > - u32 flags = 0; > > afi = nf_tables_afinfo_lookup(net, family, false); > if (IS_ERR(afi)) > @@ -1725,44 +1716,59 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, > > nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); > > - if (nla[NFTA_RULE_FLAGS]) { > - flags = ntohl(nla_get_be32(nla[NFTA_RULE_FLAGS])); > - > - if (flags & ~NFT_RULE_F_MASK) > - return -EINVAL; > - } > - > if (nla[NFTA_RULE_HANDLE]) { > rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); > if (IS_ERR(rule)) > return PTR_ERR(rule); > > - err = nf_tables_delrule_one(&ctx, rule, flags); > + err = nf_tables_delrule_one(&ctx, transaction, rule); > if (err < 0) > return err; > } else { > /* Remove all rules in this chain */ > list_for_each_entry_safe(rule, tmp, &chain->rules, list) > - nf_tables_delrule_one(&ctx, rule, flags); > + nf_tables_delrule_one(&ctx, transaction, rule); > } > > return 0; > } > > -static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, > - const struct nlmsghdr *nlh, > - const struct nlattr * const nla[]) > +static int nf_tables_start_transaction(struct sock *nlsk, struct sk_buff *skb, > + const struct nlmsghdr *nlh, > + const struct nlattr * const nla[]) > +{ > + struct nft_transaction *transaction; > + > + if (nlsk->sk_user_data != NULL) > + return -EALREADY; > + > + transaction = kmalloc(sizeof(struct nft_transaction), GFP_KERNEL); > + if (transaction == NULL) > + return -ENOMEM; > + > + INIT_LIST_HEAD(&transaction->updates); > + nlsk->sk_user_data = transaction; This is global to all other subsystems sharing the nfnetlink bus. This patch will be smaller if you reuse the existing per-net list that is used for rule updates. > + > + return 0; > +} > + > +static int nf_tables_commit_transaction(struct sock *nlsk, struct sk_buff *skb, > + const struct nlmsghdr *nlh, > + const struct nlattr * const nla[]) > { > const struct nfgenmsg *nfmsg = nlmsg_data(nlh); > const struct nft_af_info *afi; > + struct nft_transaction *transaction = nlsk->sk_user_data; > struct net *net = sock_net(skb->sk); > struct nft_rule_update *rupd, *tmp; > int family = nfmsg->nfgen_family; > bool create; > > - /* Are you the owner of the dirty list? */ > - if (nlh->nlmsg_pid != net->nft.pid_owner) > - return -EBUSY; > + if (transaction == NULL) > + return -EINVAL; > + > + if (list_empty(&transaction->updates)) > + goto done; > > create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; > > @@ -1781,7 +1787,7 @@ static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, > */ > synchronize_rcu(); > > - list_for_each_entry_safe(rupd, tmp, &net->nft.dirty_rules, list) { > + list_for_each_entry_safe(rupd, tmp, &transaction->updates, list) { > /* Delete this rule from the dirty list */ > list_del(&rupd->list); > > @@ -1806,47 +1812,47 @@ static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, > nf_tables_rule_destroy(rupd->rule); > kfree(rupd); > } > - /* Clear owner of this dirty list */ > - net->nft.pid_owner = 0; > + > +done: > + kfree(transaction); > + nlsk->sk_user_data = NULL; > > return 0; > } > > -static void nf_tables_abort_all(struct net *net) > +static void nf_tables_abort_all(struct nft_transaction *transaction) > { > struct nft_rule_update *rupd, *tmp; > > - list_for_each_entry_safe(rupd, tmp, &net->nft.dirty_rules, list) { > - /* Delete all rules from the dirty list */ > + list_for_each_entry_safe(rupd, tmp, &transaction->updates, list) { > + /* Delete all rules from the update list */ > list_del(&rupd->list); > > - if (!nft_rule_is_active_next(rupd->net, rupd->rule)) { > + if (nft_rule_is_active_next(rupd->net, rupd->rule)) { > + list_del_rcu(&rupd->rule->list); > + nf_tables_rule_destroy(rupd->rule); > + } else > nft_rule_clear(rupd->net, rupd->rule); > - kfree(rupd); > - return; > - } > > - /* This rule is inactive, get rid of it */ > - list_del_rcu(&rupd->rule->list); > - nf_tables_rule_destroy(rupd->rule); > kfree(rupd); > } > - /* Clear the owner of the dirty list */ > - net->nft.pid_owner = 0; > } > > -static int nf_tables_abort(struct sock *nlsk, struct sk_buff *skb, > - const struct nlmsghdr *nlh, > - const struct nlattr * const nla[]) > +static int nf_tables_abort_transaction(struct sock *nlsk, struct sk_buff *skb, > + const struct nlmsghdr *nlh, > + const struct nlattr * const nla[]) > { > const struct nfgenmsg *nfmsg = nlmsg_data(nlh); > const struct nft_af_info *afi; > + struct nft_transaction *transaction = nlsk->sk_user_data; > struct net *net = sock_net(skb->sk); > bool create; > > - /* Are you the owner of the dirty list? */ > - if (nlh->nlmsg_pid != net->nft.pid_owner) > - return -EBUSY; > + if (transaction == NULL) > + return -EINVAL; > + > + if (list_empty(&transaction->updates)) > + goto done; > > create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; > > @@ -1854,24 +1860,31 @@ static int nf_tables_abort(struct sock *nlsk, struct sk_buff *skb, > if (IS_ERR(afi)) > return PTR_ERR(afi); > > - nf_tables_abort_all(net); > + nf_tables_abort_all(transaction); > + > +done: > + kfree(transaction); > + nlsk->sk_user_data = NULL; > > return 0; > } > > -static int > -nf_tables_rcv_nl_event(struct notifier_block *this, > - unsigned long event, void *ptr) > +static int nf_tables_rcv_nl_event(struct notifier_block *this, > + unsigned long event, void *ptr) > { > struct netlink_notify *n = ptr; > + struct nft_transaction *transaction; > > if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER) > return NOTIFY_DONE; > > - if (n->portid != n->net->nft.pid_owner) > - return NOTIFY_DONE; > - > - nf_tables_abort_all(n->net); > + transaction = n->net->nfnl->sk_user_data; > + if (transaction != NULL) { > + if (!list_empty(&transaction->updates)) > + nf_tables_abort_all(transaction); > + kfree(transaction); > + n->net->nfnl->sk_user_data = NULL; > + } > > return NOTIFY_DONE; > } > @@ -2840,13 +2853,18 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { > .attr_count = NFTA_SET_ELEM_LIST_MAX, > .policy = nft_set_elem_list_policy, > }, > - [NFT_MSG_COMMIT] = { > - .call = nf_tables_commit, > + [NFT_MSG_START_TRANSACTION] = { > + .call = nf_tables_start_transaction, > + .attr_count = NFTA_TABLE_MAX, > + .policy = nft_rule_policy, > + }, > + [NFT_MSG_COMMIT_TRANSACTION] = { > + .call = nf_tables_commit_transaction, > .attr_count = NFTA_TABLE_MAX, > .policy = nft_rule_policy, > }, > - [NFT_MSG_ABORT] = { > - .call = nf_tables_abort, > + [NFT_MSG_ABORT_TRANSACTION] = { > + .call = nf_tables_abort_transaction, > .attr_count = NFTA_TABLE_MAX, > .policy = nft_rule_policy, > }, > -- > 1.8.1.5 > > -- > To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] nf_tables: Transaction API proposal 2013-03-27 16:35 ` Pablo Neira Ayuso @ 2013-03-27 16:42 ` Pablo Neira Ayuso 2013-03-28 8:01 ` Tomasz Bursztyka 1 sibling, 0 replies; 12+ messages in thread From: Pablo Neira Ayuso @ 2013-03-27 16:42 UTC (permalink / raw) To: Tomasz Bursztyka; +Cc: netfilter-devel, kaber One more thing: On Wed, Mar 27, 2013 at 05:35:50PM +0100, Pablo Neira Ayuso wrote: [...] > > @@ -1650,8 +1639,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, > > else > > list_add_rcu(&rule->list, &chain->rules); > > > > - if (flags & NFT_RULE_F_COMMIT) { > > - err = nf_tables_dirty_add(rule, &ctx); > > + if (transaction != NULL) { > > + err = nf_tables_transaction_add(&ctx, transaction, rule); > > if (err < 0) { > > list_del_rcu(&rule->list); > > goto err2; We can still support incremental updates without transactions (ie. adding/delete one single rule). However, if a non-transactional rule update happens while there is an ongoing transaction, we'll have to reject it with -EBUSY. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] nf_tables: Transaction API proposal 2013-03-27 16:35 ` Pablo Neira Ayuso 2013-03-27 16:42 ` Pablo Neira Ayuso @ 2013-03-28 8:01 ` Tomasz Bursztyka 2013-03-28 10:04 ` Pablo Neira Ayuso 1 sibling, 1 reply; 12+ messages in thread From: Tomasz Bursztyka @ 2013-03-28 8:01 UTC (permalink / raw) To: Pablo Neira Ayuso; +Cc: netfilter-devel, kaber Hi Pablo, >> --- a/include/uapi/linux/netfilter/nf_tables.h >> +++ b/include/uapi/linux/netfilter/nf_tables.h >> @@ -37,8 +37,9 @@ enum nf_tables_msg_types { >> NFT_MSG_NEWSETELEM, >> NFT_MSG_GETSETELEM, >> NFT_MSG_DELSETELEM, >> - NFT_MSG_COMMIT, >> - NFT_MSG_ABORT, >> + NFT_MSG_START_TRANSACTION, >> + NFT_MSG_COMMIT_TRANSACTION, >> + NFT_MSG_ABORT_TRANSACTION, > No need to rename this and use long names, I would leave them as: > > NFT_MSG_BEGIN > NFT_MSG_COMMIT > NFT_MSG_ABORT I did that change to get a fully explicit message name, as all the other ones are actually. Why not shortening to NFT_MSG_BEGIN_TRANS then? or something like that. > >> NFT_MSG_MAX, >> }; >> >> @@ -88,18 +89,12 @@ enum nft_chain_attributes { >> }; >> #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) >> >> -enum { >> - NFT_RULE_F_COMMIT = (1 << 0), >> - NFT_RULE_F_MASK = NFT_RULE_F_COMMIT, >> -}; > I like the idea of removing the COMMIT flag by the explicit > NFT_MSG_BEGIN. > >> -static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx) >> +static int nf_tables_transaction_add(const struct nft_ctx *ctx, >> + struct nft_transaction *transaction, >> + struct nft_rule *rule) >> { >> struct nft_rule_update *rupd; >> >> - /* Another socket owns the dirty list? */ >> - if (!ctx->net->nft.pid_owner) >> - ctx->net->nft.pid_owner = ctx->nlh->nlmsg_pid; >> - else if (ctx->net->nft.pid_owner != ctx->nlh->nlmsg_pid) >> - return -EBUSY; > We still need that there is a single owner at time. Otherwise two > ongoing transactions may overlap. One of the point of this RFC is to propose a way to enable transaction per-client. It's actually not nice to enable only one transaction at a time, what do we do if the owner never commits? That's why I thought I could store client's transaction somewhere. But my proposal is bogus anyway as you noticed below, about sk_user_data. > >> +static int nf_tables_start_transaction(struct sock *nlsk, struct sk_buff *skb, >> + const struct nlmsghdr *nlh, >> + const struct nlattr * const nla[]) >> +{ >> + struct nft_transaction *transaction; >> + >> + if (nlsk->sk_user_data != NULL) >> + return -EALREADY; >> + >> + transaction = kmalloc(sizeof(struct nft_transaction), GFP_KERNEL); >> + if (transaction == NULL) >> + return -ENOMEM; >> + >> + INIT_LIST_HEAD(&transaction->updates); >> + nlsk->sk_user_data = transaction; > This is global to all other subsystems sharing the nfnetlink bus. This > patch will be smaller if you reuse the existing per-net list that is > used for rule updates. Ok I was suspecting something like that about this socket. I first thought it was tight to the client. We have to figure out something else then, having a list of pid_owner + transaction list. We could also limit this list to very few amount of owners, let's say 5? Of course this would lead to lookup in this list every time a request is made, to know whether or not the pid_owner has started a transaction or not. I will prepare another RFC Tomasz ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] nf_tables: Transaction API proposal 2013-03-28 8:01 ` Tomasz Bursztyka @ 2013-03-28 10:04 ` Pablo Neira Ayuso 2013-03-28 13:52 ` [RFC v2] " Tomasz Bursztyka 0 siblings, 1 reply; 12+ messages in thread From: Pablo Neira Ayuso @ 2013-03-28 10:04 UTC (permalink / raw) To: Tomasz Bursztyka; +Cc: netfilter-devel, kaber Hi Tomasz, On Thu, Mar 28, 2013 at 10:01:42AM +0200, Tomasz Bursztyka wrote: > Hi Pablo, > > >>--- a/include/uapi/linux/netfilter/nf_tables.h > >>+++ b/include/uapi/linux/netfilter/nf_tables.h > >>@@ -37,8 +37,9 @@ enum nf_tables_msg_types { > >> NFT_MSG_NEWSETELEM, > >> NFT_MSG_GETSETELEM, > >> NFT_MSG_DELSETELEM, > >>- NFT_MSG_COMMIT, > >>- NFT_MSG_ABORT, > >>+ NFT_MSG_START_TRANSACTION, > >>+ NFT_MSG_COMMIT_TRANSACTION, > >>+ NFT_MSG_ABORT_TRANSACTION, > >No need to rename this and use long names, I would leave them as: > > > >NFT_MSG_BEGIN > >NFT_MSG_COMMIT > >NFT_MSG_ABORT > > I did that change to get a fully explicit message name, as all the > other ones are actually. > Why not shortening to NFT_MSG_BEGIN_TRANS then? or something like that. I was just trying to reduce the length of the patch, we can save a couple of renames, but I leave it up to you. > >> NFT_MSG_MAX, > >> }; > >>@@ -88,18 +89,12 @@ enum nft_chain_attributes { > >> }; > >> #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) > >>-enum { > >>- NFT_RULE_F_COMMIT = (1 << 0), > >>- NFT_RULE_F_MASK = NFT_RULE_F_COMMIT, > >>-}; > >I like the idea of removing the COMMIT flag by the explicit > >NFT_MSG_BEGIN. > > > >>-static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx) > >>+static int nf_tables_transaction_add(const struct nft_ctx *ctx, > >>+ struct nft_transaction *transaction, > >>+ struct nft_rule *rule) > >> { > >> struct nft_rule_update *rupd; > >>- /* Another socket owns the dirty list? */ > >>- if (!ctx->net->nft.pid_owner) > >>- ctx->net->nft.pid_owner = ctx->nlh->nlmsg_pid; > >>- else if (ctx->net->nft.pid_owner != ctx->nlh->nlmsg_pid) > >>- return -EBUSY; > >We still need that there is a single owner at time. Otherwise two > >ongoing transactions may overlap. > > One of the point of this RFC is to propose a way to enable > transaction per-client. I cannot come up with a way to resolve the case in which several commit overlaps, as they may leave the rule-set in inconsistent state temporarily. Using iptables as reference, it returns an error if you try to commit while another commit is happening. > It's actually not nice to enable only one transaction at a time, > what do we do if the owner never commits? Currently, the transaction is aborted if the process dies or it finishes without committing. If you don't commit, I think the behaviour is similar to databases, the client blocks others. We can document this behaviour. Alternatively, we also have batch helpers in libmnl: http://git.netfilter.org/libmnl/tree/src/nlmsg.c#n387 We can batch several rule updates into one single netlink datagram that is sent to the kernel. The batch needs to be started BEGIN and finished by COMMIT. We may also decide to restrict the BEGIN and COMMIT to batches. Regards. ^ permalink raw reply [flat|nested] 12+ messages in thread
* [RFC v2] nf_tables: Transaction API proposal 2013-03-28 10:04 ` Pablo Neira Ayuso @ 2013-03-28 13:52 ` Tomasz Bursztyka 2013-03-28 17:02 ` Pablo Neira Ayuso 0 siblings, 1 reply; 12+ messages in thread From: Tomasz Bursztyka @ 2013-03-28 13:52 UTC (permalink / raw) To: netfilter-devel; +Cc: kaber, pablo, Tomasz Bursztyka It reworks the current atomic API: - propose START/COMMIT/ABORT transaction messages - remove rule flags: rule manipalution is part of a transaction once one has been started - Enable 1 transaction per-client - 1 commit at a time (mutex based) --- Hi, Ok I tried now to fix a bit better the proposal. The nft message first are more like the other ones: -> not too long, and the meaningful part is concatenated. @Pablo: I took your idea of "BEGIN" instead of "START", it sounds better. I still believe we should enable 1 transaction per-client so now instead of the bogus sk_user_data pointer, it stores the client transaction in a local list. I did not yet put a limit on how many clients can start a transaction actually, it has to be added. It's actually cleaner that way. I also refactored a bit the nf_tables_newrule(), it's cleaner and optimized. (no list_del_rcu(rule) in case of not added properly to the transaction) About the commit, you are right Pablo: only one at a time, this is mandatory. I missed the point on my first proposal. To do so I introduced a mutex. If it's already locked it return -EAGAIN, so it's up to the client to retry. I guess this is anyway not going to happen very often. I don't know if we could get something better here, at least now no client can lock up the other indefinitely, it enables per-client transaction... Please review, include/net/netfilter/nf_tables.h | 11 ++ include/uapi/linux/netfilter/nf_tables.h | 11 +- net/netfilter/nf_tables_api.c | 216 +++++++++++++++++++------------ 3 files changed, 150 insertions(+), 88 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 69cb5ea..028f832 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -354,6 +354,17 @@ struct nft_rule_update { struct net *net; }; +/** + * struct nft_transaction - nf_tables transaction + * + * @updates: list of rule updates for the current transaction + */ +struct nft_transaction { + struct list_head list; + struct list_head updates; + u32 pid_owner; +}; + static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) { return (struct nft_expr *)&rule->data[0]; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 76b215f..5f3251b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -37,8 +37,9 @@ enum nf_tables_msg_types { NFT_MSG_NEWSETELEM, NFT_MSG_GETSETELEM, NFT_MSG_DELSETELEM, - NFT_MSG_COMMIT, - NFT_MSG_ABORT, + NFT_MSG_BEGINTRANSACT, + NFT_MSG_COMMITTRANSACT, + NFT_MSG_ABORTTRANSACT, NFT_MSG_MAX, }; @@ -88,18 +89,12 @@ enum nft_chain_attributes { }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) -enum { - NFT_RULE_F_COMMIT = (1 << 0), - NFT_RULE_F_MASK = NFT_RULE_F_COMMIT, -}; - enum nft_rule_attributes { NFTA_RULE_UNSPEC, NFTA_RULE_TABLE, NFTA_RULE_CHAIN, NFTA_RULE_HANDLE, NFTA_RULE_EXPRESSIONS, - NFTA_RULE_FLAGS, NFTA_RULE_COMPAT, __NFTA_RULE_MAX }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d2da5df..0bc6318 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/list.h> +#include <linux/mutex.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/netfilter.h> @@ -22,6 +23,8 @@ #include <net/sock.h> static LIST_HEAD(nf_tables_expressions); +static LIST_HEAD(nf_tables_transactions); +static DEFINE_MUTEX(nf_tables_commit_lock); /** * nft_register_afinfo - register nf_tables address family info @@ -1264,7 +1267,6 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, - [NFTA_RULE_FLAGS] = { .type = NLA_U32 }, [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, }; @@ -1518,15 +1520,35 @@ static void nf_tables_rule_destroy(struct nft_rule *rule) static struct nft_expr_info *info; -static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx) +static struct nft_transaction *nf_tables_transaction_lookup(u32 pid_owner) { - struct nft_rule_update *rupd; + struct nft_transaction *transaction; - /* Another socket owns the dirty list? */ - if (!ctx->net->nft.pid_owner) - ctx->net->nft.pid_owner = ctx->nlh->nlmsg_pid; - else if (ctx->net->nft.pid_owner != ctx->nlh->nlmsg_pid) - return -EBUSY; + if (list_empty(&nf_tables_transactions)) + return NULL; + + list_for_each_entry(transaction, &nf_tables_transactions, list) { + if (pid_owner == transaction->pid_owner) + return transaction; + } + + return NULL; +} + +static void nf_tables_transaction_remove(struct nft_transaction *transaction) +{ + nfnl_lock(); + list_del(&transaction->list); + nfnl_unlock(); + + kfree(transaction); +} + +static int nf_tables_transaction_add(const struct nft_ctx *ctx, + struct nft_transaction *transaction, + struct nft_rule *rule) +{ + struct nft_rule_update *rupd; rupd = kmalloc(sizeof(struct nft_rule_update), GFP_KERNEL); if (rupd == NULL) @@ -1536,7 +1558,7 @@ static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx) rupd->table = ctx->table; rupd->rule = rule; rupd->net = ctx->net; - list_add(&rupd->list, &ctx->net->nft.dirty_rules); + list_add(&rupd->list, &transaction->updates); return 0; } @@ -1547,6 +1569,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct nft_transaction *transaction; struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_chain *chain; @@ -1557,7 +1580,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, unsigned int size, i, n; int err, rem; bool create; - u32 flags = 0; u64 handle; create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; @@ -1616,15 +1638,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (rule == NULL) goto err1; - if (nla[NFTA_RULE_FLAGS]) { - flags = ntohl(nla_get_be32(nla[NFTA_RULE_FLAGS])); - if (flags & ~NFT_RULE_F_MASK) - return -EINVAL; - - if (flags & NFT_RULE_F_COMMIT) - nft_rule_activate_next(net, rule); - } - rule->handle = handle; rule->dlen = size; @@ -1637,8 +1650,16 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, expr = nft_expr_next(expr); } + transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid); + if (transaction != NULL) { + err = nf_tables_transaction_add(&ctx, transaction, rule); + if (err < 0) + goto err2; + nft_rule_activate_next(net, rule); + } + if (nlh->nlmsg_flags & NLM_F_REPLACE) { - if (flags & NFT_RULE_F_COMMIT) { + if (transaction != NULL) { nft_rule_disactivate_next(net, old_rule); list_add_tail_rcu(&rule->list, &chain->rules); } else { @@ -1650,16 +1671,11 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, else list_add_rcu(&rule->list, &chain->rules); - if (flags & NFT_RULE_F_COMMIT) { - err = nf_tables_dirty_add(rule, &ctx); - if (err < 0) { - list_del_rcu(&rule->list); - goto err2; - } - } else { + if (transaction == NULL) { nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, - nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), + nlh->nlmsg_flags & + (NLM_F_APPEND | NLM_F_REPLACE), nfmsg->nfgen_family); } return 0; @@ -1674,17 +1690,18 @@ err1: return err; } -static int -nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule, u32 flags) +static int nf_tables_delrule_one(struct nft_ctx *ctx, + struct nft_transaction *transaction, + struct nft_rule *rule) { int ret = 0; - if (flags & NFT_RULE_F_COMMIT) { + if (transaction != NULL) { /* Will be deleted already in the next generation */ if (!nft_rule_is_active_next(ctx->net, rule)) return -EBUSY; - ret = nf_tables_dirty_add(rule, ctx); + ret = nf_tables_transaction_add(ctx, transaction, rule); if (ret >= 0) nft_rule_disactivate_next(ctx->net, rule); } else { @@ -1703,13 +1720,13 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct nft_transaction *transaction; struct net *net = sock_net(skb->sk); const struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *tmp; int family = nfmsg->nfgen_family, err; struct nft_ctx ctx; - u32 flags = 0; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -1725,44 +1742,68 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); - if (nla[NFTA_RULE_FLAGS]) { - flags = ntohl(nla_get_be32(nla[NFTA_RULE_FLAGS])); - - if (flags & ~NFT_RULE_F_MASK) - return -EINVAL; - } + transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid); if (nla[NFTA_RULE_HANDLE]) { rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) return PTR_ERR(rule); - err = nf_tables_delrule_one(&ctx, rule, flags); + err = nf_tables_delrule_one(&ctx, transaction, rule); if (err < 0) return err; } else { /* Remove all rules in this chain */ list_for_each_entry_safe(rule, tmp, &chain->rules, list) - nf_tables_delrule_one(&ctx, rule, flags); + nf_tables_delrule_one(&ctx, transaction, rule); } return 0; } -static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) +static int nf_tables_begin_transaction(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + struct nft_transaction *transaction; + + transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid); + if (transaction != NULL) + return -EALREADY; + + transaction = kmalloc(sizeof(struct nft_transaction), GFP_KERNEL); + if (transaction == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&transaction->list); + INIT_LIST_HEAD(&transaction->updates); + transaction->pid_owner = nlh->nlmsg_pid; + + nfnl_lock(); + list_add_tail(&transaction->list, &nf_tables_transactions); + nfnl_unlock(); + + return 0; +} + +static int nf_tables_commit_transaction(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct nft_transaction *transaction; struct net *net = sock_net(skb->sk); struct nft_rule_update *rupd, *tmp; int family = nfmsg->nfgen_family; bool create; - /* Are you the owner of the dirty list? */ - if (nlh->nlmsg_pid != net->nft.pid_owner) - return -EBUSY; + transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid); + if (transaction == NULL) + return -EINVAL; + + if (list_empty(&transaction->updates)) + goto done; create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; @@ -1770,6 +1811,10 @@ static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); + /* Check if a commit is on-going */ + if (mutex_trylock(&nf_tables_commit_lock)) + return -EAGAIN; + /* Bump generation counter, invalidate any dump in progress */ net->nft.genctr++; @@ -1781,7 +1826,7 @@ static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, */ synchronize_rcu(); - list_for_each_entry_safe(rupd, tmp, &net->nft.dirty_rules, list) { + list_for_each_entry_safe(rupd, tmp, &transaction->updates, list) { /* Delete this rule from the dirty list */ list_del(&rupd->list); @@ -1806,47 +1851,49 @@ static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, nf_tables_rule_destroy(rupd->rule); kfree(rupd); } - /* Clear owner of this dirty list */ - net->nft.pid_owner = 0; + /* Release the commit lock */ + mutex_unlock(&nf_tables_commit_lock); + +done: + nf_tables_transaction_remove(transaction); return 0; } -static void nf_tables_abort_all(struct net *net) +static void nf_tables_abort_all(struct nft_transaction *transaction) { struct nft_rule_update *rupd, *tmp; - list_for_each_entry_safe(rupd, tmp, &net->nft.dirty_rules, list) { - /* Delete all rules from the dirty list */ + list_for_each_entry_safe(rupd, tmp, &transaction->updates, list) { + /* Delete all rules from the update list */ list_del(&rupd->list); - if (!nft_rule_is_active_next(rupd->net, rupd->rule)) { + if (nft_rule_is_active_next(rupd->net, rupd->rule)) { + list_del_rcu(&rupd->rule->list); + nf_tables_rule_destroy(rupd->rule); + } else nft_rule_clear(rupd->net, rupd->rule); - kfree(rupd); - return; - } - /* This rule is inactive, get rid of it */ - list_del_rcu(&rupd->rule->list); - nf_tables_rule_destroy(rupd->rule); kfree(rupd); } - /* Clear the owner of the dirty list */ - net->nft.pid_owner = 0; } -static int nf_tables_abort(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) +static int nf_tables_abort_transaction(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct nft_transaction *transaction; struct net *net = sock_net(skb->sk); bool create; - /* Are you the owner of the dirty list? */ - if (nlh->nlmsg_pid != net->nft.pid_owner) - return -EBUSY; + transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid); + if (transaction == NULL) + return -EINVAL; + + if (list_empty(&transaction->updates)) + goto done; create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; @@ -1854,24 +1901,28 @@ static int nf_tables_abort(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - nf_tables_abort_all(net); + nf_tables_abort_all(transaction); +done: + nf_tables_transaction_remove(transaction); return 0; } -static int -nf_tables_rcv_nl_event(struct notifier_block *this, - unsigned long event, void *ptr) +static int nf_tables_rcv_nl_event(struct notifier_block *this, + unsigned long event, void *ptr) { struct netlink_notify *n = ptr; + struct nft_transaction *transaction; if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER) return NOTIFY_DONE; - if (n->portid != n->net->nft.pid_owner) - return NOTIFY_DONE; - - nf_tables_abort_all(n->net); + transaction = nf_tables_transaction_lookup(n->net->nft.pid_owner); + if (transaction != NULL) { + if (!list_empty(&transaction->updates)) + nf_tables_abort_all(transaction); + nf_tables_transaction_remove(transaction); + } return NOTIFY_DONE; } @@ -2840,13 +2891,18 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, - [NFT_MSG_COMMIT] = { - .call = nf_tables_commit, + [NFT_MSG_BEGINTRANSACT] = { + .call = nf_tables_begin_transaction, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_rule_policy, + }, + [NFT_MSG_COMMITTRANSACT] = { + .call = nf_tables_commit_transaction, .attr_count = NFTA_TABLE_MAX, .policy = nft_rule_policy, }, - [NFT_MSG_ABORT] = { - .call = nf_tables_abort, + [NFT_MSG_ABORTTRANSACT] = { + .call = nf_tables_abort_transaction, .attr_count = NFTA_TABLE_MAX, .policy = nft_rule_policy, }, -- 1.8.1.5 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [RFC v2] nf_tables: Transaction API proposal 2013-03-28 13:52 ` [RFC v2] " Tomasz Bursztyka @ 2013-03-28 17:02 ` Pablo Neira Ayuso 2013-04-02 8:26 ` Tomasz Bursztyka 0 siblings, 1 reply; 12+ messages in thread From: Pablo Neira Ayuso @ 2013-03-28 17:02 UTC (permalink / raw) To: Tomasz Bursztyka; +Cc: netfilter-devel, kaber [-- Attachment #1: Type: text/plain, Size: 13369 bytes --] On Thu, Mar 28, 2013 at 03:52:53PM +0200, Tomasz Bursztyka wrote: > It reworks the current atomic API: > - propose START/COMMIT/ABORT transaction messages > - remove rule flags: rule manipalution is part of a transaction once > one has been started > - Enable 1 transaction per-client > - 1 commit at a time (mutex based) > --- > > Hi, > > Ok I tried now to fix a bit better the proposal. > The nft message first are more like the other ones: > -> not too long, and the meaningful part is concatenated. > > @Pablo: I took your idea of "BEGIN" instead of "START", it sounds better. > > I still believe we should enable 1 transaction per-client so now instead > of the bogus sk_user_data pointer, it stores the client transaction in a local > list. I did not yet put a limit on how many clients can start a transaction actually, > it has to be added. > > It's actually cleaner that way. > > I also refactored a bit the nf_tables_newrule(), it's cleaner and optimized. > (no list_del_rcu(rule) in case of not added properly to the transaction) > > About the commit, you are right Pablo: only one at a time, this is mandatory. > I missed the point on my first proposal. > > To do so I introduced a mutex. If it's already locked it return -EAGAIN, so it's up > to the client to retry. I guess this is anyway not going to happen very often. > I don't know if we could get something better here, at least now no client can > lock up the other indefinitely, it enables per-client transaction... Races may still occur if we try to support simultaneous transactions. client 1 start transaction client 2 add rule X1, table Y1, chain Z1 start transaction ... more rule updates delete all rules in table Y1, chain Z1 ... more rule updates commit commit client 2 will see rules in table Y1, chain Z1 after its commit but will not know why. However, if it hits -EBUSY because another client was performing a transaction, it can retry a fresh update with the current rule-set, not based on the stale one. Regarding your patch: > Please review, > > include/net/netfilter/nf_tables.h | 11 ++ > include/uapi/linux/netfilter/nf_tables.h | 11 +- > net/netfilter/nf_tables_api.c | 216 +++++++++++++++++++------------ > 3 files changed, 150 insertions(+), 88 deletions(-) > > diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h > index 69cb5ea..028f832 100644 > --- a/include/net/netfilter/nf_tables.h > +++ b/include/net/netfilter/nf_tables.h > @@ -354,6 +354,17 @@ struct nft_rule_update { > struct net *net; > }; > > +/** > + * struct nft_transaction - nf_tables transaction > + * > + * @updates: list of rule updates for the current transaction > + */ > +struct nft_transaction { > + struct list_head list; > + struct list_head updates; > + u32 pid_owner; > +}; > + > static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) > { > return (struct nft_expr *)&rule->data[0]; > diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h > index 76b215f..5f3251b 100644 > --- a/include/uapi/linux/netfilter/nf_tables.h > +++ b/include/uapi/linux/netfilter/nf_tables.h > @@ -37,8 +37,9 @@ enum nf_tables_msg_types { > NFT_MSG_NEWSETELEM, > NFT_MSG_GETSETELEM, > NFT_MSG_DELSETELEM, > - NFT_MSG_COMMIT, > - NFT_MSG_ABORT, > + NFT_MSG_BEGINTRANSACT, > + NFT_MSG_COMMITTRANSACT, > + NFT_MSG_ABORTTRANSACT, > NFT_MSG_MAX, > }; > > @@ -88,18 +89,12 @@ enum nft_chain_attributes { > }; > #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) > > -enum { > - NFT_RULE_F_COMMIT = (1 << 0), > - NFT_RULE_F_MASK = NFT_RULE_F_COMMIT, > -}; > - > enum nft_rule_attributes { > NFTA_RULE_UNSPEC, > NFTA_RULE_TABLE, > NFTA_RULE_CHAIN, > NFTA_RULE_HANDLE, > NFTA_RULE_EXPRESSIONS, > - NFTA_RULE_FLAGS, > NFTA_RULE_COMPAT, > __NFTA_RULE_MAX > }; > diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c > index d2da5df..0bc6318 100644 > --- a/net/netfilter/nf_tables_api.c > +++ b/net/netfilter/nf_tables_api.c > @@ -11,6 +11,7 @@ > #include <linux/module.h> > #include <linux/init.h> > #include <linux/list.h> > +#include <linux/mutex.h> > #include <linux/skbuff.h> > #include <linux/netlink.h> > #include <linux/netfilter.h> > @@ -22,6 +23,8 @@ > #include <net/sock.h> > > static LIST_HEAD(nf_tables_expressions); > +static LIST_HEAD(nf_tables_transactions); > +static DEFINE_MUTEX(nf_tables_commit_lock); > > /** > * nft_register_afinfo - register nf_tables address family info > @@ -1264,7 +1267,6 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { > .len = NFT_CHAIN_MAXNAMELEN - 1 }, > [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, > [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, > - [NFTA_RULE_FLAGS] = { .type = NLA_U32 }, > [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, > }; > > @@ -1518,15 +1520,35 @@ static void nf_tables_rule_destroy(struct nft_rule *rule) > > static struct nft_expr_info *info; > > -static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx) > +static struct nft_transaction *nf_tables_transaction_lookup(u32 pid_owner) > { > - struct nft_rule_update *rupd; > + struct nft_transaction *transaction; > > - /* Another socket owns the dirty list? */ > - if (!ctx->net->nft.pid_owner) > - ctx->net->nft.pid_owner = ctx->nlh->nlmsg_pid; > - else if (ctx->net->nft.pid_owner != ctx->nlh->nlmsg_pid) > - return -EBUSY; > + if (list_empty(&nf_tables_transactions)) > + return NULL; > + > + list_for_each_entry(transaction, &nf_tables_transactions, list) { > + if (pid_owner == transaction->pid_owner) > + return transaction; > + } > + > + return NULL; > +} > + > +static void nf_tables_transaction_remove(struct nft_transaction *transaction) > +{ > + nfnl_lock(); Won't work. nfnl_lock is already held when calling this function. Note that all nfnetlink functions are currently protected by nfnl_lock. > + list_del(&transaction->list); > + nfnl_unlock(); > + > + kfree(transaction); > +} > + > +static int nf_tables_transaction_add(const struct nft_ctx *ctx, > + struct nft_transaction *transaction, > + struct nft_rule *rule) > +{ > + struct nft_rule_update *rupd; > > rupd = kmalloc(sizeof(struct nft_rule_update), GFP_KERNEL); > if (rupd == NULL) > @@ -1536,7 +1558,7 @@ static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx) > rupd->table = ctx->table; > rupd->rule = rule; > rupd->net = ctx->net; > - list_add(&rupd->list, &ctx->net->nft.dirty_rules); > + list_add(&rupd->list, &transaction->updates); > > return 0; > } > @@ -1547,6 +1569,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, > { > const struct nfgenmsg *nfmsg = nlmsg_data(nlh); > const struct nft_af_info *afi; > + struct nft_transaction *transaction; > struct net *net = sock_net(skb->sk); > struct nft_table *table; > struct nft_chain *chain; > @@ -1557,7 +1580,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, > unsigned int size, i, n; > int err, rem; > bool create; > - u32 flags = 0; > u64 handle; > > create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; > @@ -1616,15 +1638,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, > if (rule == NULL) > goto err1; > > - if (nla[NFTA_RULE_FLAGS]) { > - flags = ntohl(nla_get_be32(nla[NFTA_RULE_FLAGS])); > - if (flags & ~NFT_RULE_F_MASK) > - return -EINVAL; > - > - if (flags & NFT_RULE_F_COMMIT) > - nft_rule_activate_next(net, rule); > - } > - > rule->handle = handle; > rule->dlen = size; > > @@ -1637,8 +1650,16 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, > expr = nft_expr_next(expr); > } > > + transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid); > + if (transaction != NULL) { > + err = nf_tables_transaction_add(&ctx, transaction, rule); > + if (err < 0) > + goto err2; > + nft_rule_activate_next(net, rule); > + } > + > if (nlh->nlmsg_flags & NLM_F_REPLACE) { > - if (flags & NFT_RULE_F_COMMIT) { > + if (transaction != NULL) { > nft_rule_disactivate_next(net, old_rule); > list_add_tail_rcu(&rule->list, &chain->rules); > } else { > @@ -1650,16 +1671,11 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, > else > list_add_rcu(&rule->list, &chain->rules); > > - if (flags & NFT_RULE_F_COMMIT) { > - err = nf_tables_dirty_add(rule, &ctx); > - if (err < 0) { > - list_del_rcu(&rule->list); > - goto err2; > - } > - } else { > + if (transaction == NULL) { > nf_tables_rule_notify(skb, nlh, table, chain, rule, > NFT_MSG_NEWRULE, > - nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), > + nlh->nlmsg_flags & > + (NLM_F_APPEND | NLM_F_REPLACE), > nfmsg->nfgen_family); > } > return 0; > @@ -1674,17 +1690,18 @@ err1: > return err; > } > > -static int > -nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule, u32 flags) > +static int nf_tables_delrule_one(struct nft_ctx *ctx, > + struct nft_transaction *transaction, > + struct nft_rule *rule) > { > int ret = 0; > > - if (flags & NFT_RULE_F_COMMIT) { > + if (transaction != NULL) { > /* Will be deleted already in the next generation */ > if (!nft_rule_is_active_next(ctx->net, rule)) > return -EBUSY; > > - ret = nf_tables_dirty_add(rule, ctx); > + ret = nf_tables_transaction_add(ctx, transaction, rule); > if (ret >= 0) > nft_rule_disactivate_next(ctx->net, rule); > } else { > @@ -1703,13 +1720,13 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, > { > const struct nfgenmsg *nfmsg = nlmsg_data(nlh); > const struct nft_af_info *afi; > + struct nft_transaction *transaction; > struct net *net = sock_net(skb->sk); > const struct nft_table *table; > struct nft_chain *chain; > struct nft_rule *rule, *tmp; > int family = nfmsg->nfgen_family, err; > struct nft_ctx ctx; > - u32 flags = 0; > > afi = nf_tables_afinfo_lookup(net, family, false); > if (IS_ERR(afi)) > @@ -1725,44 +1742,68 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, > > nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); > > - if (nla[NFTA_RULE_FLAGS]) { > - flags = ntohl(nla_get_be32(nla[NFTA_RULE_FLAGS])); > - > - if (flags & ~NFT_RULE_F_MASK) > - return -EINVAL; > - } > + transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid); > > if (nla[NFTA_RULE_HANDLE]) { > rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); > if (IS_ERR(rule)) > return PTR_ERR(rule); > > - err = nf_tables_delrule_one(&ctx, rule, flags); > + err = nf_tables_delrule_one(&ctx, transaction, rule); > if (err < 0) > return err; > } else { > /* Remove all rules in this chain */ > list_for_each_entry_safe(rule, tmp, &chain->rules, list) > - nf_tables_delrule_one(&ctx, rule, flags); > + nf_tables_delrule_one(&ctx, transaction, rule); > } > > return 0; > } > > -static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, > - const struct nlmsghdr *nlh, > - const struct nlattr * const nla[]) > +static int nf_tables_begin_transaction(struct sock *nlsk, struct sk_buff *skb, > + const struct nlmsghdr *nlh, > + const struct nlattr * const nla[]) > +{ > + struct nft_transaction *transaction; > + > + transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid); > + if (transaction != NULL) > + return -EALREADY; > + > + transaction = kmalloc(sizeof(struct nft_transaction), GFP_KERNEL); > + if (transaction == NULL) > + return -ENOMEM; > + > + INIT_LIST_HEAD(&transaction->list); > + INIT_LIST_HEAD(&transaction->updates); > + transaction->pid_owner = nlh->nlmsg_pid; > + > + nfnl_lock(); Same thing as above. > + list_add_tail(&transaction->list, &nf_tables_transactions); > + nfnl_unlock(); > + > + return 0; > +} > + > +static int nf_tables_commit_transaction(struct sock *nlsk, struct sk_buff *skb, > + const struct nlmsghdr *nlh, > + const struct nlattr * const nla[]) > { > const struct nfgenmsg *nfmsg = nlmsg_data(nlh); > const struct nft_af_info *afi; > + struct nft_transaction *transaction; > struct net *net = sock_net(skb->sk); > struct nft_rule_update *rupd, *tmp; > int family = nfmsg->nfgen_family; > bool create; > > - /* Are you the owner of the dirty list? */ > - if (nlh->nlmsg_pid != net->nft.pid_owner) > - return -EBUSY; > + transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid); > + if (transaction == NULL) > + return -EINVAL; > + > + if (list_empty(&transaction->updates)) > + goto done; > > create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; > > @@ -1770,6 +1811,10 @@ static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, > if (IS_ERR(afi)) > return PTR_ERR(afi); > > + /* Check if a commit is on-going */ > + if (mutex_trylock(&nf_tables_commit_lock)) > + return -EAGAIN; You should use -EBUSY. -EAGAIN is used internally by nfnetlink to retry on module autoload. Note that this is just delaying the moment at which the client hits an error. I'm attaching a patch that removes the commit flag and add the explicit begin operation. [-- Attachment #2: 0001-netfilter-nf_tables-add-explicit-begin-operation-for.patch --] [-- Type: text/x-diff, Size: 6207 bytes --] >From 5723a577c90810435937173f256dbccd015e3b34 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso <pablo@netfilter.org> Date: Thu, 28 Mar 2013 17:50:04 +0100 Subject: [PATCH] netfilter: nf_tables: add explicit begin operation for transactions This patch removes the NFT_RULE_F_COMMIT flag and it adds an explicit begin operation to start transactions, as suggested by Tomasz. You hit -EBUSY if another process started a transaction before you try to perform an incremental (ie. one single rule) or transactional update. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- include/uapi/linux/netfilter/nf_tables.h | 7 +--- net/netfilter/nf_tables_api.c | 59 +++++++++++++++++------------- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 76b215f..1461a42 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -37,6 +37,7 @@ enum nf_tables_msg_types { NFT_MSG_NEWSETELEM, NFT_MSG_GETSETELEM, NFT_MSG_DELSETELEM, + NFT_MSG_BEGIN, NFT_MSG_COMMIT, NFT_MSG_ABORT, NFT_MSG_MAX, @@ -88,18 +89,12 @@ enum nft_chain_attributes { }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) -enum { - NFT_RULE_F_COMMIT = (1 << 0), - NFT_RULE_F_MASK = NFT_RULE_F_COMMIT, -}; - enum nft_rule_attributes { NFTA_RULE_UNSPEC, NFTA_RULE_TABLE, NFTA_RULE_CHAIN, NFTA_RULE_HANDLE, NFTA_RULE_EXPRESSIONS, - NFTA_RULE_FLAGS, NFTA_RULE_COMPAT, __NFTA_RULE_MAX }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 57d28cb..57cfc74 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1264,7 +1264,6 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, - [NFTA_RULE_FLAGS] = { .type = NLA_U32 }, [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, }; @@ -1523,12 +1522,6 @@ static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx) { struct nft_rule_update *rupd; - /* Another socket owns the dirty list? */ - if (!ctx->net->nft.pid_owner) - ctx->net->nft.pid_owner = ctx->nlh->nlmsg_pid; - else if (ctx->net->nft.pid_owner != ctx->nlh->nlmsg_pid) - return -EBUSY; - rupd = kmalloc(sizeof(struct nft_rule_update), GFP_KERNEL); if (rupd == NULL) return -ENOMEM; @@ -1558,9 +1551,12 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, unsigned int size, i, n; int err, rem; bool create; - u32 flags = 0; u64 handle; + /* A transaction is happening, tell this process that it should retry */ + if (net->nft.pid_owner && net->nft.pid_owner != nlh->nlmsg_pid) + return -EBUSY; + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); @@ -1617,14 +1613,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (rule == NULL) goto err1; - if (nla[NFTA_RULE_FLAGS]) { - flags = ntohl(nla_get_be32(nla[NFTA_RULE_FLAGS])); - if (flags & ~NFT_RULE_F_MASK) - return -EINVAL; - - if (flags & NFT_RULE_F_COMMIT) - nft_rule_activate_next(net, rule); - } + if (net->nft.pid_owner) + nft_rule_activate_next(net, rule); rule->handle = handle; rule->dlen = size; @@ -1639,7 +1629,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - if (flags & NFT_RULE_F_COMMIT) { + if (net->nft.pid_owner) { nft_rule_disactivate_next(net, old_rule); list_add_tail_rcu(&rule->list, &chain->rules); } else { @@ -1651,7 +1641,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, else list_add_rcu(&rule->list, &chain->rules); - if (flags & NFT_RULE_F_COMMIT) { + if (net->nft.pid_owner) { err = nf_tables_dirty_add(rule, &ctx); if (err < 0) { list_del_rcu(&rule->list); @@ -1680,7 +1670,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule, u32 flags) { int ret = 0; - if (flags & NFT_RULE_F_COMMIT) { + if (ctx->net->nft.pid_owner) { /* Will be deleted already in the next generation */ if (!nft_rule_is_active_next(ctx->net, rule)) return -EBUSY; @@ -1712,6 +1702,10 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, struct nft_ctx ctx; u32 flags = 0; + /* A transaction is happening, tell this process that it should retry */ + if (net->nft.pid_owner && net->nft.pid_owner != nlh->nlmsg_pid) + return -EBUSY; + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1726,13 +1720,6 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); - if (nla[NFTA_RULE_FLAGS]) { - flags = ntohl(nla_get_be32(nla[NFTA_RULE_FLAGS])); - - if (flags & ~NFT_RULE_F_MASK) - return -EINVAL; - } - if (nla[NFTA_RULE_HANDLE]) { rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) @@ -1750,6 +1737,21 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, return 0; } +static int nf_tables_begin(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + struct net *net = sock_net(skb->sk); + + /* Check if another process is performing a transaction */ + if (!net->nft.pid_owner) + net->nft.pid_owner = nlh->nlmsg_pid; + else if (net->nft.pid_owner != nlh->nlmsg_pid) + return -EBUSY; + + return 0; +} + static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2840,6 +2842,11 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, + [NFT_MSG_BEGIN] = { + .call = nf_tables_begin, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_rule_policy, + }, [NFT_MSG_COMMIT] = { .call = nf_tables_commit, .attr_count = NFTA_TABLE_MAX, -- 1.7.10.4 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [RFC v2] nf_tables: Transaction API proposal 2013-03-28 17:02 ` Pablo Neira Ayuso @ 2013-04-02 8:26 ` Tomasz Bursztyka 0 siblings, 0 replies; 12+ messages in thread From: Tomasz Bursztyka @ 2013-04-02 8:26 UTC (permalink / raw) To: Pablo Neira Ayuso; +Cc: netfilter-devel, kaber Hi Pablo, >> About the commit, you are right Pablo: only one at a time, this is mandatory. >> I missed the point on my first proposal. >> >> To do so I introduced a mutex. If it's already locked it return -EAGAIN, so it's up >> to the client to retry. I guess this is anyway not going to happen very often. >> I don't know if we could get something better here, at least now no client can >> lock up the other indefinitely, it enables per-client transaction... > Races may still occur if we try to support simultaneous transactions. > > client 1 > start transaction client 2 > add rule X1, table Y1, chain Z1 start transaction > ... more rule updates delete all rules in table Y1, chain Z1 > ... more rule updates commit 1 line is missing notifications (other rules deleted) notifications (ok chain Z1 has no rules) > commit and: notifications (own rules added) notifications (hum, new rules added. ok) > client 2 will see rules in table Y1, chain Z1 after its commit but > will not know why. However, if it hits -EBUSY because another client > was performing a transaction, it can retry a fresh update with the > current rule-set, not based on the stale one. Your example is wrong: it is valid, as it would be also in a unique transaction based approach as your are proposing: client 2 - start / delete all rules / commit client 2 - notifications: see no rules client 1 - start / add rules / commit client 2 - notifications: see rules again. => exact same result, which is a legitimate one in both case. As long as transaction n cannot mess up with any other transaction m, there is definitely no race condition between transactions. It would be a race condition in your example if client 2 can delete the rule updates of client 1. But this must not be the case, and that's also why we should not dump the non-active rules to anybody BUT to the owner. --> As long as a rule is not active: it does not exist but in a transaction. With transaction you never know the result as long as it did not end up to a commit, so there is no point to tell everybody about it before. I still think it's really wrong to propose unique transaction approach: if the client is bogus (and they will! ^^) and never commit/abort its transaction, it locks all others. Anyway, there are issues in my proposal I agree: it's an RFC not a patch. For instance: In your proposal, as long a the transaction is "going on" no other manipulation can be done to the rule base. It's wrong because it locks all other clients (even on untouched tables/chains by the transaction) but it "fixes" the case where non-transaction based manipulation could be done: these are locked as well. And it does that to all manipulations, whatever table/chain is in the game. --> in my RFC, getting non-transaction based manipulation could lead to troubles: what if a non-atomic manipulation removes all rules and the chain itself, where a transaction was working? (yes that one is a valid race condition ;-) ) Here we could fix it, if there is a transaction going on in that specific chain, either by killing the entire transaction and notifying its owner (I would prefer that one), or like you are doing: return ebusy. But I don't like the idea of a transaction being able to lock anyone (I repeat myself, sorry). To me, non-transaction (so non-atomic) based manipulation should always have the highest priority versus any transactions, unless a commit is going on of course. We have to maintain this difference of transaction and non-transaction based manipulation. We have to think properly about that: how we store the transaction in an efficient way so it would not be a performance issue to look-up for a table/chain in transaction list. Things like that. I am sure we can get something still simple and reliable from API point of view but way more flexible than your approach. And with nobody being able to lock this subsystem. We are not in a hurry anyway. > + > +static void nf_tables_transaction_remove(struct nft_transaction *transaction) > +{ > + nfnl_lock(); > Won't work. nfnl_lock is already held when calling this function. Note > that all nfnetlink functions are currently protected by nfnl_lock. Indeed, did that patch too quickly. Loosely looked at nf_tables_expressions. > + /* Check if a commit is on-going */ > + if (mutex_trylock(&nf_tables_commit_lock)) > + return -EAGAIN; > You should use -EBUSY. -EAGAIN is used internally by nfnetlink to > retry on module autoload. Ok good to know. Br, Tomasz ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2013-04-02 8:26 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2013-02-28 23:08 [PATCH 1/2] netfilter: nf_tables: partially rework commit and abort operation pablo 2013-02-28 23:08 ` [PATCH 2/2] netfilter: nf_tables: don't skip inactive rules and dump generation mask pablo 2013-03-04 12:22 ` [PATCH 1/2] netfilter: nf_tables: partially rework commit and abort operation Tomasz Bursztyka 2013-03-26 10:19 ` [RFC] Atomic rule manipulation part of transactions Tomasz Bursztyka 2013-03-26 10:19 ` [PATCH] nf_tables: Transaction API proposal Tomasz Bursztyka 2013-03-27 16:35 ` Pablo Neira Ayuso 2013-03-27 16:42 ` Pablo Neira Ayuso 2013-03-28 8:01 ` Tomasz Bursztyka 2013-03-28 10:04 ` Pablo Neira Ayuso 2013-03-28 13:52 ` [RFC v2] " Tomasz Bursztyka 2013-03-28 17:02 ` Pablo Neira Ayuso 2013-04-02 8:26 ` Tomasz Bursztyka
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).