From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Patrick McHardy <kaber@trash.net>
Cc: netfilter-devel@vger.kernel.org, kadlec@blackhole.kfki.hu,
jengelh@medozas.de, thomas.jarosch@intra2net.com
Subject: Re: [PATCH 1/2] netfilter: add extended accounting infrastructure over nfnetlink
Date: Wed, 14 Dec 2011 14:18:02 +0100 [thread overview]
Message-ID: <20111214131802.GB2749@1984> (raw)
In-Reply-To: <4EE88729.7010507@trash.net>
On Wed, Dec 14, 2011 at 12:23:21PM +0100, Patrick McHardy wrote:
[...]
> >+nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
> >+ const struct nlmsghdr *nlh, const struct nlattr * const tb[])
> >+{
> >+ int ret;
> >+ struct nf_acct *acct, *matching = NULL;
> >+ char *acct_name;
> >+
> >+ if (!tb[NFACCT_NAME])
> >+ return -EINVAL;
> >+
> >+ acct_name = nla_data(tb[NFACCT_NAME]);
> >+
> >+ rcu_read_lock();
> >+ list_for_each_entry(acct,&nfnl_acct_list, head) {
>
> I don't really get the locking concept. All netlink operations happen under
> the nfnl mutex, so using RCU for the lookup shouldn't be necessary
> (applied to all netlink operations).
If you add one iptables rule with NFACCT, you have to iterate over the
list (without the mutex). Very unlikely, but we may delete one
accounting object via nfnetlink at the same time of adding the rule
that refers to it.
> >+ if (strncmp(acct->name, acct_name, NFACCT_NAME_MAX) != 0)
> >+ continue;
> >+
> >+ if (nlh->nlmsg_flags& NLM_F_EXCL) {
> >+ rcu_read_unlock();
> >+ ret = -EEXIST;
> >+ goto err;
> >+ }
> >+ matching = acct;
>
> break?
Indeed.
> >+ }
> >+ rcu_read_unlock();
> >+
> >+ acct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
> >+ if (acct == NULL) {
> >+ ret = -ENOMEM;
> >+ goto err;
> >+ }
> >+ spin_lock_init(&acct->lock);
> >+ strncpy(acct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
> >+ if (tb[NFACCT_BYTES])
> >+ acct->bytes = be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES]));
> >+ if (tb[NFACCT_PKTS])
> >+ acct->pkts = be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS]));
> >+
> >+ atomic_inc(&acct->refcnt);
> >+
> >+ /* We are protected by nfnl mutex. */
> >+ if (matching) {
>
> This seems to be a replace operation, so I think you should
> require NLM_F_REPLACE. Also it seems you could just
> reinitialize the existing counter instead of unconditionally
> allocating a new one.
I think it's easier to return -EBUSY as you suggested.
> >+ list_del_rcu(&matching->head);
> >+ if (atomic_dec_and_test(&matching->refcnt))
> >+ call_rcu(&matching->rcu_head, nfnl_acct_free_rcu);
> >+
> >+ }
> >+ list_add_tail_rcu(&acct->head,&nfnl_acct_list);
> >+ return 0;
> >+err:
> >+ return ret;
> >+}
> >+
> >+static int
> >+nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
> >+ int event, struct nf_acct *acct)
> >+{
> >+ struct nlmsghdr *nlh;
> >+ struct nfgenmsg *nfmsg;
> >+ unsigned int flags = pid ? NLM_F_MULTI : 0;
> >+
> >+ event |= NFNL_SUBSYS_ACCT<< 8;
> >+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
> >+ if (nlh == NULL)
> >+ goto nlmsg_failure;
> >+
> >+ nfmsg = nlmsg_data(nlh);
> >+ nfmsg->nfgen_family = AF_UNSPEC;
> >+ nfmsg->version = NFNETLINK_V0;
> >+ nfmsg->res_id = 0;
> >+
> >+ NLA_PUT_STRING(skb, NFACCT_NAME, acct->name);
> >+ NLA_PUT_BE64(skb, NFACCT_PKTS, cpu_to_be64(acct->pkts));
> >+ NLA_PUT_BE64(skb, NFACCT_BYTES, cpu_to_be64(acct->bytes));
> >+
> >+ nlmsg_end(skb, nlh);
> >+ return skb->len;
> >+
> >+nlmsg_failure:
> >+nla_put_failure:
> >+ nlmsg_cancel(skb, nlh);
> >+ return -1;
> >+}
> >+
> >+static int
> >+nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
> >+{
> >+ struct nf_acct *cur, *last;
> >+
> >+ if (cb->args[2])
> >+ return 0;
> >+
> >+ last = (struct nf_acct *)cb->args[1];
> >+ if (cb->args[1])
> >+ cb->args[1] = 0;
> >+
> >+ rcu_read_lock();
> >+ list_for_each_entry(cur,&nfnl_acct_list, head) {
> >+ if (last&& cur != last)
> >+ continue;
> >+
> >+ if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid,
> >+ cb->nlh->nlmsg_seq,
> >+ NFNL_MSG_ACCT_NEW, cur)< 0) {
> >+ cb->args[1] = (unsigned long)cur;
> >+ break;
> >+ }
> >+
> >+ if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
> >+ NFNL_MSG_ACCT_GET_CTRZERO) {
> >+ spin_lock_bh(&cur->lock);
> >+ cur->pkts = 0;
> >+ cur->bytes = 0;
> >+ spin_unlock_bh(&cur->lock);
> >+ }
> >+ }
> >+ if (!cb->args[1])
> >+ cb->args[2] = 1;
> >+ rcu_read_unlock();
> >+ return skb->len;
> >+}
> >+
> >+static int
> >+nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
> >+ const struct nlmsghdr *nlh, const struct nlattr * const tb[])
> >+{
> >+ int ret = 0;
> >+ struct nf_acct *cur;
> >+ char *acct_name;
> >+
> >+ if (nlh->nlmsg_flags& NLM_F_DUMP) {
> >+ return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump,
> >+ NULL, 0);
> >+ }
> >+
> >+ if (!tb[NFACCT_NAME])
> >+ return -EINVAL;
> >+ acct_name = nla_data(tb[NFACCT_NAME]);
> >+
> >+ rcu_read_lock();
> >+ list_for_each_entry(cur,&nfnl_acct_list, head) {
> >+ struct sk_buff *skb2;
> >+
> >+ if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
> >+ continue;
> >+
> >+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
> >+ if (skb2 == NULL)
> >+ break;
> >+
> >+ ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid,
> >+ nlh->nlmsg_seq,
> >+ NFNL_MSG_ACCT_NEW, cur);
> >+ if (ret<= 0)
> >+ kfree_skb(skb2);
> >+
> >+ if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
> >+ NFNL_MSG_ACCT_GET_CTRZERO) {
> >+ spin_lock_bh(&cur->lock);
> >+ cur->pkts = 0;
> >+ cur->bytes = 0;
> >+ spin_unlock_bh(&cur->lock);
> >+ }
> >+ break;
> >+ }
> >+ rcu_read_unlock();
> >+ return ret;
> >+}
> >+
> >+static int
> >+nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
> >+ const struct nlmsghdr *nlh, const struct nlattr * const tb[])
> >+{
> >+ char *acct_name;
> >+ struct nf_acct *cur;
> >+ int ret = -ENOENT;
> >+
> >+ if (!tb[NFACCT_NAME]) {
> >+ rcu_read_lock();
> >+ list_for_each_entry(cur,&nfnl_acct_list, head) {
> >+ /* We are protected by nfnl mutex. */
> >+ list_del_rcu(&cur->head);
> >+ if (atomic_dec_and_test(&cur->refcnt))
> >+ call_rcu(&cur->rcu_head, nfnl_acct_free_rcu);
>
> I think its strange to keep the object around after deletion if
> it is still in use. In case it is still in use, I'd return -EBUSY.
-EBUSY sounds fine to me.
> >+ }
> >+ rcu_read_lock();
> >+ return 0;
> >+ }
> >+ acct_name = nla_data(tb[NFACCT_NAME]);
> >+
> >+ rcu_read_lock();
> >+ list_for_each_entry(cur,&nfnl_acct_list, head) {
> >+ if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
> >+ continue;
> >+
> >+ /* We are protected by nfnl mutex. */
> >+ list_del_rcu(&cur->head);
> >+ if (atomic_dec_and_test(&cur->refcnt))
> >+ call_rcu(&cur->rcu_head, nfnl_acct_free_rcu);
> >+ ret = 0;
> >+ break;
> >+ }
> >+ rcu_read_lock();
> >+ return ret;
> >+}
> >+
> >+static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
> >+ [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
> >+ [NFACCT_BYTES] = { .type = NLA_U64 },
> >+ [NFACCT_PKTS] = { .type = NLA_U64 },
> >+};
> >+
> >+static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
> >+ [NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new,
> >+ .attr_count = NFACCT_MAX,
> >+ .policy = nfnl_acct_policy },
> >+ [NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get,
> >+ .attr_count = NFACCT_MAX,
> >+ .policy = nfnl_acct_policy },
> >+ [NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get,
> >+ .attr_count = NFACCT_MAX,
> >+ .policy = nfnl_acct_policy },
> >+ [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del,
> >+ .attr_count = NFACCT_MAX,
> >+ .policy = nfnl_acct_policy },
> >+};
> >+
> >+static const struct nfnetlink_subsystem nfnl_acct_subsys = {
> >+ .name = "acct",
> >+ .subsys_id = NFNL_SUBSYS_ACCT,
> >+ .cb_count = NFNL_MSG_ACCT_MAX,
> >+ .cb = nfnl_acct_cb,
> >+};
> >+
> >+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
> >+
> >+struct nf_acct *nfnl_acct_find_get(const char *acct_name)
> >+{
> >+ struct nf_acct *cur, *acct = NULL;
> >+
> >+ rcu_read_lock();
> >+ list_for_each_entry(cur,&nfnl_acct_list, head) {
> >+ if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
> >+ continue;
> >+
> >+ acct = cur;
> >+ atomic_inc(&acct->refcnt);
>
> This probably needs atomic_inc_not_zero() since the
> lookup might race with deletion.
I'll fix this.
> >+ break;
> >+ }
> >+ rcu_read_unlock();
> >+ return acct;
> >+}
> >+EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
> >+
> >+void nfnl_acct_put(struct nf_acct *acct)
> >+{
> >+ if (atomic_dec_and_test(&acct->refcnt))
> >+ call_rcu(&acct->rcu_head, nfnl_acct_free_rcu);
> >+}
> >+EXPORT_SYMBOL_GPL(nfnl_acct_put);
> >+
> >+void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
> >+{
> >+ spin_lock_bh(&nfacct->lock);
> >+ nfacct->pkts++;
> >+ nfacct->bytes += skb->len;
> >+ spin_unlock_bh(&nfacct->lock);
> >+}
> >+EXPORT_SYMBOL_GPL(nfnl_acct_update);
> >+
> >+static int __init nfnl_acct_init(void)
> >+{
> >+ int ret;
> >+
> >+ pr_info("nfnl_acct: registering with nfnetlink.\n");
> >+ ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
> >+ if (ret< 0) {
> >+ pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
> >+ goto err_out;
> >+ }
> >+ return 0;
> >+err_out:
> >+ return ret;
> >+}
> >+
> >+static void __exit nfnl_acct_exit(void)
> >+{
> >+ struct nf_acct *cur, *tmp;
> >+
> >+ pr_info("nfnl_acct: unregistering from nfnetlink.\n");
> >+ nfnetlink_subsys_unregister(&nfnl_acct_subsys);
> >+
> >+ /* if we can remove this module, it means that it has no clients. */
> >+ list_for_each_entry_safe(cur, tmp,&nfnl_acct_list, head) {
> >+ list_del_rcu(&cur->head);
> >+ if (atomic_dec_and_test(&cur->refcnt))
> >+ kfree(cur);
>
> What happens if it is non-zero? The iptables target should
> take a module reference as long as its using objects that
> this module is responsible for managing.
I'll fix this as well.
Thanks for the review!
next prev parent reply other threads:[~2011-12-14 13:18 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-12-14 11:00 [PATCH 0/2] [RFC] Extended accounting infrastructure for iptables pablo
2011-12-14 11:00 ` [PATCH 1/2] netfilter: add extended accounting infrastructure over nfnetlink pablo
2011-12-14 11:16 ` Eric Dumazet
2011-12-14 12:41 ` Pablo Neira Ayuso
2011-12-14 13:18 ` Eric Dumazet
2011-12-14 13:45 ` Eric Dumazet
2011-12-18 0:21 ` Pablo Neira Ayuso
2011-12-14 11:23 ` Patrick McHardy
2011-12-14 13:18 ` Pablo Neira Ayuso [this message]
2011-12-14 16:31 ` Patrick McHardy
2011-12-15 12:20 ` Pablo Neira Ayuso
2011-12-14 13:23 ` Changli Gao
2011-12-14 13:43 ` Jan Engelhardt
2011-12-14 16:50 ` Pablo Neira Ayuso
2011-12-14 18:30 ` Jozsef Kadlecsik
2011-12-14 23:06 ` Maciej Żenczykowski
2011-12-15 12:26 ` Pablo Neira Ayuso
2011-12-15 12:32 ` Jan Engelhardt
2011-12-14 13:49 ` Anand Raj Manickam
2011-12-14 13:54 ` Eric Dumazet
2011-12-14 11:00 ` [PATCH 2/2] netfilter: xtables: add NFACCT target to support extended accounting pablo
2011-12-14 13:12 ` [PATCH 0/2] [RFC] Extended accounting infrastructure for iptables Changli Gao
2011-12-14 13:30 ` Pablo Neira Ayuso
2011-12-14 13:37 ` Anand Raj Manickam
2011-12-14 14:52 ` Changli Gao
2011-12-14 15:59 ` Jan Engelhardt
2011-12-15 20:23 ` Ferenc Wagner
2011-12-15 21:01 ` Jan Engelhardt
2011-12-16 15:25 ` Ferenc Wagner
2011-12-17 18:05 ` Pablo Neira Ayuso
2011-12-16 13:08 ` Pablo Neira Ayuso
2011-12-14 19:29 ` Pete Holland
2011-12-15 13:22 ` Pablo Neira Ayuso
-- strict thread matches above, loose matches on Subject: below --
2011-12-23 13:42 [PATCH 0/2] nfacct infrastructure (version 2) pablo
2011-12-23 13:42 ` [PATCH 1/2] netfilter: add extended accounting infrastructure over nfnetlink pablo
2011-12-23 14:10 ` Eric Dumazet
2011-12-23 14:12 ` Eric Dumazet
2011-12-24 0:24 ` Pablo Neira Ayuso
2011-12-24 0:23 ` Pablo Neira Ayuso
2011-12-23 14:54 ` Changli Gao
2011-12-24 0:55 ` Pablo Neira Ayuso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20111214131802.GB2749@1984 \
--to=pablo@netfilter.org \
--cc=jengelh@medozas.de \
--cc=kaber@trash.net \
--cc=kadlec@blackhole.kfki.hu \
--cc=netfilter-devel@vger.kernel.org \
--cc=thomas.jarosch@intra2net.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).