All of lore.kernel.org
 help / color / mirror / Atom feed
From: Patrick McHardy <kaber@trash.net>
To: pablo@netfilter.org
Cc: netfilter-devel@vger.kernel.org, kadlec@blackhole.kfki.hu,
	jengelh@medozas.de, thomas.jarosch@intra2net.com
Subject: Re: [PATCH 1/2] netfilter: add extended accounting infrastructure over nfnetlink
Date: Wed, 14 Dec 2011 12:23:21 +0100	[thread overview]
Message-ID: <4EE88729.7010507@trash.net> (raw)
In-Reply-To: <1323860443-7129-2-git-send-email-pablo@netfilter.org>

On 12/14/2011 12:00 PM, pablo@netfilter.org wrote:
>
> In order to manipulate create accounting objects, you require the
> new libnetfilter_acct library. It contains several examples of use:
>
> libnetfilter_acct/examples# ./nfacct-add http-traffic
> libnetfilter_acct/examples# ./nfacct-get
> http-traffic = { pkts = 000000000000,   bytes = 000000000000 };
>
> Then, you can use one of this accounting objects in several iptables
> rules using the new NFACCT target (which comes in a follow-up patch):
>
>   # iptables -I INPUT -p tcp --sport 80 -j NFACCT --nfacct-name http-traffic
>   # iptables -I OUTPUT -p tcp --dport 80 -j NFACCT --nfacct-name http-traffic
>
> The idea is simple: if one packet matches the rule, the NFACCT target
> updates the counters.

I like the idea.
>
> diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
> new file mode 100644
> index 0000000..3ec407f
> --- /dev/null
> +++ b/net/netfilter/nfnetlink_acct.c
> @@ -0,0 +1,352 @@
> +/*
> + * (C) 2011 Pablo Neira Ayuso<pablo@netfilter.org>
> + * (C) 2011 Intra2net AG<http://www.intra2net.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation (or any later at your option).
> + */
> +#include<linux/init.h>
> +#include<linux/module.h>
> +#include<linux/kernel.h>
> +#include<linux/skbuff.h>
> +#include<linux/netlink.h>
> +#include<linux/rculist.h>
> +#include<linux/slab.h>
> +#include<linux/types.h>
> +#include<linux/errno.h>
> +#include<net/netlink.h>
> +#include<net/sock.h>
> +#include<asm/atomic.h>
> +
> +#include<linux/netfilter.h>
> +#include<linux/netfilter/nfnetlink.h>
> +#include<linux/netfilter/nfnetlink_acct.h>
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Pablo Neira Ayuso<pablo@netfilter.org>");
> +MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
> +
> +static LIST_HEAD(nfnl_acct_list);
> +
> +struct nf_acct {
> +	struct rcu_head		rcu_head;
> +	struct list_head	head;
> +	spinlock_t		lock;	/* to update the counters. */
> +	atomic_t		refcnt;
> +
> +	char			name[NFACCT_NAME_MAX];
> +	__u64			pkts;
> +	__u64			bytes;
> +};
> +
> +static void nfnl_acct_free_rcu(struct rcu_head *rcu_head)
> +{
> +	struct nf_acct *acct = container_of(rcu_head, struct nf_acct, rcu_head);
> +
> +	kfree(acct);
> +}
> +
> +static int
> +nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
> +	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
> +{
> +	int ret;
> +	struct nf_acct *acct, *matching = NULL;
> +	char *acct_name;
> +
> +	if (!tb[NFACCT_NAME])
> +		return -EINVAL;
> +
> +	acct_name = nla_data(tb[NFACCT_NAME]);
> +
> +	rcu_read_lock();
> +	list_for_each_entry(acct,&nfnl_acct_list, head) {

I don't really get the locking concept. All netlink operations happen under
the nfnl mutex, so using RCU for the lookup shouldn't be necessary
(applied to all netlink operations).

> +		if (strncmp(acct->name, acct_name, NFACCT_NAME_MAX) != 0)
> +			continue;
> +
> +                if (nlh->nlmsg_flags&  NLM_F_EXCL) {
> +			rcu_read_unlock();
> +			ret = -EEXIST;
> +			goto err;
> +		}
> +		matching = acct;

break?
> +        }
> +	rcu_read_unlock();
> +
> +	acct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
> +	if (acct == NULL) {
> +		ret = -ENOMEM;
> +		goto err;
> +	}
> +	spin_lock_init(&acct->lock);
> +	strncpy(acct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
> +	if (tb[NFACCT_BYTES])
> +		acct->bytes = be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES]));
> +	if (tb[NFACCT_PKTS])
> +		acct->pkts = be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS]));
> +
> +	atomic_inc(&acct->refcnt);
> +
> +	/* We are protected by nfnl mutex. */
> +	if (matching) {

This seems to be a replace operation, so I think you should
require NLM_F_REPLACE. Also it seems you could just
reinitialize the existing counter instead of unconditionally
allocating a new one.

> +		list_del_rcu(&matching->head);
> +		if (atomic_dec_and_test(&matching->refcnt))
> +			call_rcu(&matching->rcu_head, nfnl_acct_free_rcu);
> +
> +	}
> +	list_add_tail_rcu(&acct->head,&nfnl_acct_list);
> +	return 0;
> +err:
> +	return ret;
> +}
> +
> +static int
> +nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
> +		   int event, struct nf_acct *acct)
> +{
> +	struct nlmsghdr *nlh;
> +	struct nfgenmsg *nfmsg;
> +	unsigned int flags = pid ? NLM_F_MULTI : 0;
> +
> +	event |= NFNL_SUBSYS_ACCT<<  8;
> +	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
> +	if (nlh == NULL)
> +		goto nlmsg_failure;
> +
> +	nfmsg = nlmsg_data(nlh);
> +	nfmsg->nfgen_family = AF_UNSPEC;
> +	nfmsg->version = NFNETLINK_V0;
> +	nfmsg->res_id = 0;
> +
> +	NLA_PUT_STRING(skb, NFACCT_NAME, acct->name);
> +	NLA_PUT_BE64(skb, NFACCT_PKTS, cpu_to_be64(acct->pkts));
> +	NLA_PUT_BE64(skb, NFACCT_BYTES, cpu_to_be64(acct->bytes));
> +
> +	nlmsg_end(skb, nlh);
> +	return skb->len;
> +
> +nlmsg_failure:
> +nla_put_failure:
> +	nlmsg_cancel(skb, nlh);
> +	return -1;
> +}
> +
> +static int
> +nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
> +{
> +	struct nf_acct *cur, *last;
> +
> +	if (cb->args[2])
> +		return 0;
> +
> +	last = (struct nf_acct *)cb->args[1];
> +	if (cb->args[1])
> +		cb->args[1] = 0;
> +
> +	rcu_read_lock();
> +	list_for_each_entry(cur,&nfnl_acct_list, head) {
> +		if (last&&  cur != last)
> +			continue;
> +
> +		if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid,
> +				       cb->nlh->nlmsg_seq,
> +				       NFNL_MSG_ACCT_NEW, cur)<  0) {
> +			cb->args[1] = (unsigned long)cur;
> +			break;
> +		}
> +
> +		if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
> +						NFNL_MSG_ACCT_GET_CTRZERO) {
> +			spin_lock_bh(&cur->lock);
> +			cur->pkts = 0;
> +			cur->bytes = 0;
> +			spin_unlock_bh(&cur->lock);
> +		}
> +	}
> +	if (!cb->args[1])
> +		cb->args[2] = 1;
> +	rcu_read_unlock();
> +	return skb->len;
> +}
> +
> +static int
> +nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
> +	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
> +{
> +	int ret = 0;
> +	struct nf_acct *cur;
> +	char *acct_name;
> +
> +	if (nlh->nlmsg_flags&  NLM_F_DUMP) {
> +		return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump,
> +					  NULL, 0);
> +	}
> +
> +	if (!tb[NFACCT_NAME])
> +		return -EINVAL;
> +	acct_name = nla_data(tb[NFACCT_NAME]);
> +
> +	rcu_read_lock();
> +	list_for_each_entry(cur,&nfnl_acct_list, head) {
> +		struct sk_buff *skb2;
> +
> +		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
> +			continue;
> +
> +		skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
> +		if (skb2 == NULL)
> +			break;
> +
> +		ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid,
> +					 nlh->nlmsg_seq,
> +					 NFNL_MSG_ACCT_NEW, cur);
> +		if (ret<= 0)
> +			kfree_skb(skb2);
> +
> +		if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
> +						NFNL_MSG_ACCT_GET_CTRZERO) {
> +			spin_lock_bh(&cur->lock);
> +			cur->pkts = 0;
> +			cur->bytes = 0;
> +			spin_unlock_bh(&cur->lock);
> +		}
> +		break;
> +	}
> +	rcu_read_unlock();
> +	return ret;
> +}
> +
> +static int
> +nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
> +	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
> +{
> +	char *acct_name;
> +	struct nf_acct *cur;
> +	int ret = -ENOENT;
> +
> +	if (!tb[NFACCT_NAME]) {
> +		rcu_read_lock();
> +		list_for_each_entry(cur,&nfnl_acct_list, head) {
> +			/* We are protected by nfnl mutex. */
> +			list_del_rcu(&cur->head);
> +			if (atomic_dec_and_test(&cur->refcnt))
> +				call_rcu(&cur->rcu_head, nfnl_acct_free_rcu);

I think its strange to keep the object around after deletion if
it is still in use. In case it is still in use, I'd return -EBUSY.

> +		}
> +		rcu_read_lock();
> +		return 0;
> +	}
> +	acct_name = nla_data(tb[NFACCT_NAME]);
> +
> +	rcu_read_lock();
> +	list_for_each_entry(cur,&nfnl_acct_list, head) {
> +		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
> +			continue;
> +
> +		/* We are protected by nfnl mutex. */
> +		list_del_rcu(&cur->head);
> +		if (atomic_dec_and_test(&cur->refcnt))
> +			call_rcu(&cur->rcu_head, nfnl_acct_free_rcu);
> +		ret = 0;
> +		break;
> +	}
> +	rcu_read_lock();
> +	return ret;
> +}
> +
> +static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
> +	[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
> +	[NFACCT_BYTES] = { .type = NLA_U64 },
> +	[NFACCT_PKTS] = { .type = NLA_U64 },
> +};
> +
> +static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
> +	[NFNL_MSG_ACCT_NEW]		= { .call = nfnl_acct_new,
> +					    .attr_count = NFACCT_MAX,
> +					    .policy = nfnl_acct_policy },
> +	[NFNL_MSG_ACCT_GET] 		= { .call = nfnl_acct_get,
> +					    .attr_count = NFACCT_MAX,
> +					    .policy = nfnl_acct_policy },
> +	[NFNL_MSG_ACCT_GET_CTRZERO] 	= { .call = nfnl_acct_get,
> +					    .attr_count = NFACCT_MAX,
> +					    .policy = nfnl_acct_policy },
> +	[NFNL_MSG_ACCT_DEL]		= { .call = nfnl_acct_del,
> +					    .attr_count = NFACCT_MAX,
> +					    .policy = nfnl_acct_policy },
> +};
> +
> +static const struct nfnetlink_subsystem nfnl_acct_subsys = {
> +	.name				= "acct",
> +	.subsys_id			= NFNL_SUBSYS_ACCT,
> +	.cb_count			= NFNL_MSG_ACCT_MAX,
> +	.cb				= nfnl_acct_cb,
> +};
> +
> +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
> +
> +struct nf_acct *nfnl_acct_find_get(const char *acct_name)
> +{
> +	struct nf_acct *cur, *acct = NULL;
> +
> +	rcu_read_lock();
> +	list_for_each_entry(cur,&nfnl_acct_list, head) {
> +		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
> +			continue;
> +
> +		acct = cur;
> +		atomic_inc(&acct->refcnt);

This probably needs atomic_inc_not_zero() since the
lookup might race with deletion.

> +		break;
> +	}
> +	rcu_read_unlock();
> +	return acct;
> +}
> +EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
> +
> +void nfnl_acct_put(struct nf_acct *acct)
> +{
> +	if (atomic_dec_and_test(&acct->refcnt))
> +		call_rcu(&acct->rcu_head, nfnl_acct_free_rcu);
> +}
> +EXPORT_SYMBOL_GPL(nfnl_acct_put);
> +
> +void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
> +{
> +	spin_lock_bh(&nfacct->lock);
> +	nfacct->pkts++;
> +	nfacct->bytes += skb->len;
> +	spin_unlock_bh(&nfacct->lock);
> +}
> +EXPORT_SYMBOL_GPL(nfnl_acct_update);
> +
> +static int __init nfnl_acct_init(void)
> +{
> +	int ret;
> +
> +	pr_info("nfnl_acct: registering with nfnetlink.\n");
> +	ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
> +	if (ret<  0) {
> +		pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
> +		goto err_out;
> +	}
> +	return 0;
> +err_out:
> +	return ret;
> +}
> +
> +static void __exit nfnl_acct_exit(void)
> +{
> +	struct nf_acct *cur, *tmp;
> +
> +	pr_info("nfnl_acct: unregistering from nfnetlink.\n");
> +	nfnetlink_subsys_unregister(&nfnl_acct_subsys);
> +
> +	/* if we can remove this module, it means that it has no clients. */
> +	list_for_each_entry_safe(cur, tmp,&nfnl_acct_list, head) {
> +		list_del_rcu(&cur->head);
> +		if (atomic_dec_and_test(&cur->refcnt))
> +			kfree(cur);

What happens if it is non-zero? The iptables target should
take a module reference as long as its using objects that
this module is responsible for managing.

> +	}
> +}
> +
> +module_init(nfnl_acct_init);
> +module_exit(nfnl_acct_exit);


  parent reply	other threads:[~2011-12-14 11:23 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-12-14 11:00 [PATCH 0/2] [RFC] Extended accounting infrastructure for iptables pablo
2011-12-14 11:00 ` [PATCH 1/2] netfilter: add extended accounting infrastructure over nfnetlink pablo
2011-12-14 11:16   ` Eric Dumazet
2011-12-14 12:41     ` Pablo Neira Ayuso
2011-12-14 13:18       ` Eric Dumazet
2011-12-14 13:45         ` Eric Dumazet
2011-12-18  0:21           ` Pablo Neira Ayuso
2011-12-14 11:23   ` Patrick McHardy [this message]
2011-12-14 13:18     ` Pablo Neira Ayuso
2011-12-14 16:31       ` Patrick McHardy
2011-12-15 12:20         ` Pablo Neira Ayuso
2011-12-14 13:23   ` Changli Gao
2011-12-14 13:43   ` Jan Engelhardt
2011-12-14 16:50     ` Pablo Neira Ayuso
2011-12-14 18:30       ` Jozsef Kadlecsik
2011-12-14 23:06         ` Maciej Żenczykowski
2011-12-15 12:26         ` Pablo Neira Ayuso
2011-12-15 12:32           ` Jan Engelhardt
2011-12-14 13:49   ` Anand Raj Manickam
2011-12-14 13:54     ` Eric Dumazet
2011-12-14 11:00 ` [PATCH 2/2] netfilter: xtables: add NFACCT target to support extended accounting pablo
2011-12-14 13:12 ` [PATCH 0/2] [RFC] Extended accounting infrastructure for iptables Changli Gao
2011-12-14 13:30   ` Pablo Neira Ayuso
2011-12-14 13:37     ` Anand Raj Manickam
2011-12-14 14:52     ` Changli Gao
2011-12-14 15:59       ` Jan Engelhardt
2011-12-15 20:23         ` Ferenc Wagner
2011-12-15 21:01           ` Jan Engelhardt
2011-12-16 15:25             ` Ferenc Wagner
2011-12-17 18:05               ` Pablo Neira Ayuso
2011-12-16 13:08           ` Pablo Neira Ayuso
2011-12-14 19:29 ` Pete Holland
2011-12-15 13:22   ` Pablo Neira Ayuso
  -- strict thread matches above, loose matches on Subject: below --
2011-12-23 13:42 [PATCH 0/2] nfacct infrastructure (version 2) pablo
2011-12-23 13:42 ` [PATCH 1/2] netfilter: add extended accounting infrastructure over nfnetlink pablo
2011-12-23 14:10   ` Eric Dumazet
2011-12-23 14:12     ` Eric Dumazet
2011-12-24  0:24       ` Pablo Neira Ayuso
2011-12-24  0:23     ` Pablo Neira Ayuso
2011-12-23 14:54   ` Changli Gao
2011-12-24  0:55     ` Pablo Neira Ayuso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4EE88729.7010507@trash.net \
    --to=kaber@trash.net \
    --cc=jengelh@medozas.de \
    --cc=kadlec@blackhole.kfki.hu \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=pablo@netfilter.org \
    --cc=thomas.jarosch@intra2net.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.