netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Patrick McHardy <kaber@trash.net>
To: pablo@netfilter.org
Cc: netfilter-devel@vger.kernel.org, kadlec@blackhole.kfki.hu,
	jengelh@medozas.de, thomas.jarosch@intra2net.com
Subject: Re: [PATCH 1/2] netfilter: add extended accounting infrastructure over nfnetlink
Date: Wed, 14 Dec 2011 12:23:21 +0100	[thread overview]
Message-ID: <4EE88729.7010507@trash.net> (raw)
In-Reply-To: <1323860443-7129-2-git-send-email-pablo@netfilter.org>

On 12/14/2011 12:00 PM, pablo@netfilter.org wrote:
>
> In order to manipulate create accounting objects, you require the
> new libnetfilter_acct library. It contains several examples of use:
>
> libnetfilter_acct/examples# ./nfacct-add http-traffic
> libnetfilter_acct/examples# ./nfacct-get
> http-traffic = { pkts = 000000000000,   bytes = 000000000000 };
>
> Then, you can use one of this accounting objects in several iptables
> rules using the new NFACCT target (which comes in a follow-up patch):
>
>   # iptables -I INPUT -p tcp --sport 80 -j NFACCT --nfacct-name http-traffic
>   # iptables -I OUTPUT -p tcp --dport 80 -j NFACCT --nfacct-name http-traffic
>
> The idea is simple: if one packet matches the rule, the NFACCT target
> updates the counters.

I like the idea.
>
> diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
> new file mode 100644
> index 0000000..3ec407f
> --- /dev/null
> +++ b/net/netfilter/nfnetlink_acct.c
> @@ -0,0 +1,352 @@
> +/*
> + * (C) 2011 Pablo Neira Ayuso<pablo@netfilter.org>
> + * (C) 2011 Intra2net AG<http://www.intra2net.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation (or any later at your option).
> + */
> +#include<linux/init.h>
> +#include<linux/module.h>
> +#include<linux/kernel.h>
> +#include<linux/skbuff.h>
> +#include<linux/netlink.h>
> +#include<linux/rculist.h>
> +#include<linux/slab.h>
> +#include<linux/types.h>
> +#include<linux/errno.h>
> +#include<net/netlink.h>
> +#include<net/sock.h>
> +#include<asm/atomic.h>
> +
> +#include<linux/netfilter.h>
> +#include<linux/netfilter/nfnetlink.h>
> +#include<linux/netfilter/nfnetlink_acct.h>
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Pablo Neira Ayuso<pablo@netfilter.org>");
> +MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
> +
> +static LIST_HEAD(nfnl_acct_list);
> +
> +struct nf_acct {
> +	struct rcu_head		rcu_head;
> +	struct list_head	head;
> +	spinlock_t		lock;	/* to update the counters. */
> +	atomic_t		refcnt;
> +
> +	char			name[NFACCT_NAME_MAX];
> +	__u64			pkts;
> +	__u64			bytes;
> +};
> +
> +static void nfnl_acct_free_rcu(struct rcu_head *rcu_head)
> +{
> +	struct nf_acct *acct = container_of(rcu_head, struct nf_acct, rcu_head);
> +
> +	kfree(acct);
> +}
> +
> +static int
> +nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
> +	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
> +{
> +	int ret;
> +	struct nf_acct *acct, *matching = NULL;
> +	char *acct_name;
> +
> +	if (!tb[NFACCT_NAME])
> +		return -EINVAL;
> +
> +	acct_name = nla_data(tb[NFACCT_NAME]);
> +
> +	rcu_read_lock();
> +	list_for_each_entry(acct,&nfnl_acct_list, head) {

I don't really get the locking concept. All netlink operations happen under
the nfnl mutex, so using RCU for the lookup shouldn't be necessary
(applied to all netlink operations).

> +		if (strncmp(acct->name, acct_name, NFACCT_NAME_MAX) != 0)
> +			continue;
> +
> +                if (nlh->nlmsg_flags&  NLM_F_EXCL) {
> +			rcu_read_unlock();
> +			ret = -EEXIST;
> +			goto err;
> +		}
> +		matching = acct;

break?
> +        }
> +	rcu_read_unlock();
> +
> +	acct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
> +	if (acct == NULL) {
> +		ret = -ENOMEM;
> +		goto err;
> +	}
> +	spin_lock_init(&acct->lock);
> +	strncpy(acct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
> +	if (tb[NFACCT_BYTES])
> +		acct->bytes = be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES]));
> +	if (tb[NFACCT_PKTS])
> +		acct->pkts = be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS]));
> +
> +	atomic_inc(&acct->refcnt);
> +
> +	/* We are protected by nfnl mutex. */
> +	if (matching) {

This seems to be a replace operation, so I think you should
require NLM_F_REPLACE. Also it seems you could just
reinitialize the existing counter instead of unconditionally
allocating a new one.

> +		list_del_rcu(&matching->head);
> +		if (atomic_dec_and_test(&matching->refcnt))
> +			call_rcu(&matching->rcu_head, nfnl_acct_free_rcu);
> +
> +	}
> +	list_add_tail_rcu(&acct->head,&nfnl_acct_list);
> +	return 0;
> +err:
> +	return ret;
> +}
> +
> +static int
> +nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
> +		   int event, struct nf_acct *acct)
> +{
> +	struct nlmsghdr *nlh;
> +	struct nfgenmsg *nfmsg;
> +	unsigned int flags = pid ? NLM_F_MULTI : 0;
> +
> +	event |= NFNL_SUBSYS_ACCT<<  8;
> +	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
> +	if (nlh == NULL)
> +		goto nlmsg_failure;
> +
> +	nfmsg = nlmsg_data(nlh);
> +	nfmsg->nfgen_family = AF_UNSPEC;
> +	nfmsg->version = NFNETLINK_V0;
> +	nfmsg->res_id = 0;
> +
> +	NLA_PUT_STRING(skb, NFACCT_NAME, acct->name);
> +	NLA_PUT_BE64(skb, NFACCT_PKTS, cpu_to_be64(acct->pkts));
> +	NLA_PUT_BE64(skb, NFACCT_BYTES, cpu_to_be64(acct->bytes));
> +
> +	nlmsg_end(skb, nlh);
> +	return skb->len;
> +
> +nlmsg_failure:
> +nla_put_failure:
> +	nlmsg_cancel(skb, nlh);
> +	return -1;
> +}
> +
> +static int
> +nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
> +{
> +	struct nf_acct *cur, *last;
> +
> +	if (cb->args[2])
> +		return 0;
> +
> +	last = (struct nf_acct *)cb->args[1];
> +	if (cb->args[1])
> +		cb->args[1] = 0;
> +
> +	rcu_read_lock();
> +	list_for_each_entry(cur,&nfnl_acct_list, head) {
> +		if (last&&  cur != last)
> +			continue;
> +
> +		if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid,
> +				       cb->nlh->nlmsg_seq,
> +				       NFNL_MSG_ACCT_NEW, cur)<  0) {
> +			cb->args[1] = (unsigned long)cur;
> +			break;
> +		}
> +
> +		if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
> +						NFNL_MSG_ACCT_GET_CTRZERO) {
> +			spin_lock_bh(&cur->lock);
> +			cur->pkts = 0;
> +			cur->bytes = 0;
> +			spin_unlock_bh(&cur->lock);
> +		}
> +	}
> +	if (!cb->args[1])
> +		cb->args[2] = 1;
> +	rcu_read_unlock();
> +	return skb->len;
> +}
> +
> +static int
> +nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
> +	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
> +{
> +	int ret = 0;
> +	struct nf_acct *cur;
> +	char *acct_name;
> +
> +	if (nlh->nlmsg_flags&  NLM_F_DUMP) {
> +		return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump,
> +					  NULL, 0);
> +	}
> +
> +	if (!tb[NFACCT_NAME])
> +		return -EINVAL;
> +	acct_name = nla_data(tb[NFACCT_NAME]);
> +
> +	rcu_read_lock();
> +	list_for_each_entry(cur,&nfnl_acct_list, head) {
> +		struct sk_buff *skb2;
> +
> +		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
> +			continue;
> +
> +		skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
> +		if (skb2 == NULL)
> +			break;
> +
> +		ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid,
> +					 nlh->nlmsg_seq,
> +					 NFNL_MSG_ACCT_NEW, cur);
> +		if (ret<= 0)
> +			kfree_skb(skb2);
> +
> +		if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
> +						NFNL_MSG_ACCT_GET_CTRZERO) {
> +			spin_lock_bh(&cur->lock);
> +			cur->pkts = 0;
> +			cur->bytes = 0;
> +			spin_unlock_bh(&cur->lock);
> +		}
> +		break;
> +	}
> +	rcu_read_unlock();
> +	return ret;
> +}
> +
> +static int
> +nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
> +	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
> +{
> +	char *acct_name;
> +	struct nf_acct *cur;
> +	int ret = -ENOENT;
> +
> +	if (!tb[NFACCT_NAME]) {
> +		rcu_read_lock();
> +		list_for_each_entry(cur,&nfnl_acct_list, head) {
> +			/* We are protected by nfnl mutex. */
> +			list_del_rcu(&cur->head);
> +			if (atomic_dec_and_test(&cur->refcnt))
> +				call_rcu(&cur->rcu_head, nfnl_acct_free_rcu);

I think its strange to keep the object around after deletion if
it is still in use. In case it is still in use, I'd return -EBUSY.

> +		}
> +		rcu_read_lock();
> +		return 0;
> +	}
> +	acct_name = nla_data(tb[NFACCT_NAME]);
> +
> +	rcu_read_lock();
> +	list_for_each_entry(cur,&nfnl_acct_list, head) {
> +		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
> +			continue;
> +
> +		/* We are protected by nfnl mutex. */
> +		list_del_rcu(&cur->head);
> +		if (atomic_dec_and_test(&cur->refcnt))
> +			call_rcu(&cur->rcu_head, nfnl_acct_free_rcu);
> +		ret = 0;
> +		break;
> +	}
> +	rcu_read_lock();
> +	return ret;
> +}
> +
> +static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
> +	[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
> +	[NFACCT_BYTES] = { .type = NLA_U64 },
> +	[NFACCT_PKTS] = { .type = NLA_U64 },
> +};
> +
> +static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
> +	[NFNL_MSG_ACCT_NEW]		= { .call = nfnl_acct_new,
> +					    .attr_count = NFACCT_MAX,
> +					    .policy = nfnl_acct_policy },
> +	[NFNL_MSG_ACCT_GET] 		= { .call = nfnl_acct_get,
> +					    .attr_count = NFACCT_MAX,
> +					    .policy = nfnl_acct_policy },
> +	[NFNL_MSG_ACCT_GET_CTRZERO] 	= { .call = nfnl_acct_get,
> +					    .attr_count = NFACCT_MAX,
> +					    .policy = nfnl_acct_policy },
> +	[NFNL_MSG_ACCT_DEL]		= { .call = nfnl_acct_del,
> +					    .attr_count = NFACCT_MAX,
> +					    .policy = nfnl_acct_policy },
> +};
> +
> +static const struct nfnetlink_subsystem nfnl_acct_subsys = {
> +	.name				= "acct",
> +	.subsys_id			= NFNL_SUBSYS_ACCT,
> +	.cb_count			= NFNL_MSG_ACCT_MAX,
> +	.cb				= nfnl_acct_cb,
> +};
> +
> +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
> +
> +struct nf_acct *nfnl_acct_find_get(const char *acct_name)
> +{
> +	struct nf_acct *cur, *acct = NULL;
> +
> +	rcu_read_lock();
> +	list_for_each_entry(cur,&nfnl_acct_list, head) {
> +		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
> +			continue;
> +
> +		acct = cur;
> +		atomic_inc(&acct->refcnt);

This probably needs atomic_inc_not_zero() since the
lookup might race with deletion.

> +		break;
> +	}
> +	rcu_read_unlock();
> +	return acct;
> +}
> +EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
> +
> +void nfnl_acct_put(struct nf_acct *acct)
> +{
> +	if (atomic_dec_and_test(&acct->refcnt))
> +		call_rcu(&acct->rcu_head, nfnl_acct_free_rcu);
> +}
> +EXPORT_SYMBOL_GPL(nfnl_acct_put);
> +
> +void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
> +{
> +	spin_lock_bh(&nfacct->lock);
> +	nfacct->pkts++;
> +	nfacct->bytes += skb->len;
> +	spin_unlock_bh(&nfacct->lock);
> +}
> +EXPORT_SYMBOL_GPL(nfnl_acct_update);
> +
> +static int __init nfnl_acct_init(void)
> +{
> +	int ret;
> +
> +	pr_info("nfnl_acct: registering with nfnetlink.\n");
> +	ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
> +	if (ret<  0) {
> +		pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
> +		goto err_out;
> +	}
> +	return 0;
> +err_out:
> +	return ret;
> +}
> +
> +static void __exit nfnl_acct_exit(void)
> +{
> +	struct nf_acct *cur, *tmp;
> +
> +	pr_info("nfnl_acct: unregistering from nfnetlink.\n");
> +	nfnetlink_subsys_unregister(&nfnl_acct_subsys);
> +
> +	/* if we can remove this module, it means that it has no clients. */
> +	list_for_each_entry_safe(cur, tmp,&nfnl_acct_list, head) {
> +		list_del_rcu(&cur->head);
> +		if (atomic_dec_and_test(&cur->refcnt))
> +			kfree(cur);

What happens if it is non-zero? The iptables target should
take a module reference as long as its using objects that
this module is responsible for managing.

> +	}
> +}
> +
> +module_init(nfnl_acct_init);
> +module_exit(nfnl_acct_exit);


  parent reply	other threads:[~2011-12-14 11:23 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-12-14 11:00 [PATCH 0/2] [RFC] Extended accounting infrastructure for iptables pablo
2011-12-14 11:00 ` [PATCH 1/2] netfilter: add extended accounting infrastructure over nfnetlink pablo
2011-12-14 11:16   ` Eric Dumazet
2011-12-14 12:41     ` Pablo Neira Ayuso
2011-12-14 13:18       ` Eric Dumazet
2011-12-14 13:45         ` Eric Dumazet
2011-12-18  0:21           ` Pablo Neira Ayuso
2011-12-14 11:23   ` Patrick McHardy [this message]
2011-12-14 13:18     ` Pablo Neira Ayuso
2011-12-14 16:31       ` Patrick McHardy
2011-12-15 12:20         ` Pablo Neira Ayuso
2011-12-14 13:23   ` Changli Gao
2011-12-14 13:43   ` Jan Engelhardt
2011-12-14 16:50     ` Pablo Neira Ayuso
2011-12-14 18:30       ` Jozsef Kadlecsik
2011-12-14 23:06         ` Maciej Żenczykowski
2011-12-15 12:26         ` Pablo Neira Ayuso
2011-12-15 12:32           ` Jan Engelhardt
2011-12-14 13:49   ` Anand Raj Manickam
2011-12-14 13:54     ` Eric Dumazet
2011-12-14 11:00 ` [PATCH 2/2] netfilter: xtables: add NFACCT target to support extended accounting pablo
2011-12-14 13:12 ` [PATCH 0/2] [RFC] Extended accounting infrastructure for iptables Changli Gao
2011-12-14 13:30   ` Pablo Neira Ayuso
2011-12-14 13:37     ` Anand Raj Manickam
2011-12-14 14:52     ` Changli Gao
2011-12-14 15:59       ` Jan Engelhardt
2011-12-15 20:23         ` Ferenc Wagner
2011-12-15 21:01           ` Jan Engelhardt
2011-12-16 15:25             ` Ferenc Wagner
2011-12-17 18:05               ` Pablo Neira Ayuso
2011-12-16 13:08           ` Pablo Neira Ayuso
2011-12-14 19:29 ` Pete Holland
2011-12-15 13:22   ` Pablo Neira Ayuso
  -- strict thread matches above, loose matches on Subject: below --
2011-12-23 13:42 [PATCH 0/2] nfacct infrastructure (version 2) pablo
2011-12-23 13:42 ` [PATCH 1/2] netfilter: add extended accounting infrastructure over nfnetlink pablo
2011-12-23 14:10   ` Eric Dumazet
2011-12-23 14:12     ` Eric Dumazet
2011-12-24  0:24       ` Pablo Neira Ayuso
2011-12-24  0:23     ` Pablo Neira Ayuso
2011-12-23 14:54   ` Changli Gao
2011-12-24  0:55     ` Pablo Neira Ayuso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4EE88729.7010507@trash.net \
    --to=kaber@trash.net \
    --cc=jengelh@medozas.de \
    --cc=kadlec@blackhole.kfki.hu \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=pablo@netfilter.org \
    --cc=thomas.jarosch@intra2net.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).