Re: [2.6.16 PATCH] Filessytem Events Reporter V2

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
To: Yi Yang <yang.y.yi@gmail.com>
Cc: LKML <linux-kernel@vger.kernel.org>,
	Andrew Morton <akpm@osdl.org>, Matt Helsley <matthltc@us.ibm.com>
Subject: Re: [2.6.16 PATCH] Filessytem Events Reporter V2
Date: Fri, 7 Apr 2006 10:24:30 +0400	[thread overview]
Message-ID: <20060407062428.GA31351@2ka.mipt.ru> (raw)
In-Reply-To: <4433C456.7010708@gmail.com>

On Wed, Apr 05, 2006 at 09:21:26PM +0800, Yi Yang (yang.y.yi@gmail.com) wrote:
> Compared with Filesystem Events Reporter v1, the following changes are done:
>   - Use workqueue/keventd instead of kfseventd. 
>   - fsevent_mask can be set by sysctl and proc
>     interface.
>   - Add missed spinlock
>   - Ensure fsevent sequence number is uniqe
> 
> This patch implements a filsystem events report facitily, Filesystem Events
> Reporter, the user can monitor filesystem activities via it, currently, it
>  can monitor access, attribute change, open, create, modify, delete,
>  move and close of any file or directory as well as mount/umount.

Comments below.

...
> +
> +static int fsevent_ack(enum fsevent_type type, pid_t pid, u32 seq)
> +{
> +	struct fsevent *event;
> +	unsigned int size;
> +	struct sk_buff * skb = NULL;
> +	struct nlmsghdr * nlhdr = NULL;
> +
> +	size = NLMSG_SPACE(sizeof(struct fsevent));
> +	                                                                                                                                       
> +	skb = alloc_skb(size, GFP_KERNEL);
> +	if (!skb)
> +	        return -ENOMEM;
> +	                                                                                                                                       
> +	nlhdr = NLMSG_PUT(skb, 0, seq, NLMSG_DONE, size - sizeof(*nlhdr));
> +	event = NLMSG_DATA(nlhdr);
> +
> +	ktime_get_ts(&event->timestamp);
> +	event->cpu = -1;
> +	event->type = type; 
> +	event->pid = 0;
> +	event->uid = 0;
> +	event->gid = 0;
> +	event->len = 0;
> +	event->pname_len = 0;
> +	event->fname_len = 0;
> +	event->new_fname_len = 0;
> +	event->err = 0;
> +	                                                                                                                                       
> +	NETLINK_CB(skb).dst_group = 0;
> +	NETLINK_CB(skb).dst_pid = pid;
> +	NETLINK_CB(skb).pid = 0;
> +
> +	return (netlink_unicast(fsevent_sock, skb, pid, MSG_DONTWAIT));

netlink_unicast() uses boolean value but ont MSG_* flags for nonblocking, 
so this should be netlink_unicast(fsevent_sock, skb, pid, 0);

> +nlmsg_failure:
> +	kfree_skb(skb);
> +	return -1;
> +}

...

> +static void fsevent_recv(struct sock *sk, int len)
> +{
> +	struct sk_buff *skb = NULL;
> +	struct nlmsghdr *nlhdr = NULL;
> +	struct fsevent_filter * filter = NULL;
> +	pid_t pid;
> +
> +	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
> +		skb_get(skb);
> +		if (skb->len >= FSEVENT_FILTER_MSGSIZE) {
> +			nlhdr = (struct nlmsghdr *)skb->data;
> +			filter = NLMSG_DATA(nlhdr);
> +			pid = NETLINK_CREDS(skb)->pid;
> +			if (find_fsevent_listener(pid) == NULL)
> +				atomic_inc(&fsevent_listener_num);
> +			set_fsevent_filter(filter, pid);

What is the logic behind this steps?
If there are no listeners you increment it's number no matter if it will
or not be added in set_fsevent_filter().

> +		}
> +		kfree_skb(skb);
> +	}
> +}
> +
> +#define DEFINE_FILTER_MATCH_FUNC(filtertype, key) 			\
> +	static int match_##filtertype(listener * p,			\
> +				struct fsevent * event,			\
> +				struct sk_buff * skb)			\
> +	{								\
> +		int ret = 0;						\
> +		filtertype * xfilter = NULL;				\
> +		struct sk_buff * skb2 = NULL;				\
> +		struct list_head *  head = &(p->key##_filter_list_head);  \
> +		list_for_each_entry(xfilter, head, list) {		\
> +			if (xfilter->key != event->key)			\
> +				continue;				\
> +			ret = filter_fsevent(xfilter->mask, event->type); \
> +			if ( ret != 0)					\
> +				return -1;				\
> +			skb2 = skb_clone(skb, GFP_KERNEL);		\
> +       			if (skb2 == NULL)				\
> +				return -ENOMEM;				\
> +			NETLINK_CB(skb2).dst_group = 0;			\
> +			NETLINK_CB(skb2).dst_pid = p->pid;		\
> +			NETLINK_CB(skb2).pid = 0;			\
> +			return (netlink_unicast(fsevent_sock, skb2,	\
> +					p->pid, MSG_DONTWAIT));		\

The same issue about nonblocking sending.

> +		}							\
> +		return -ENODEV;						\
> +	}								\
> +
> +DEFINE_FILTER_MATCH_FUNC(pid_filter, pid)
> +
> +DEFINE_FILTER_MATCH_FUNC(uid_filter, uid)
> +
> +DEFINE_FILTER_MATCH_FUNC(gid_filter, gid)
> +
> +#define MATCH_XID(key, listenerp, event, skb) 			\
> +	ret = match_##key##_filter(listenerp, event, skb); 	\
> +	if (ret == 0) {					 	\
> +		kfree_skb(skb);				 	\
> +	        continue;				 	\
> +	}						 	\
> +	do {} while (0)					 	\
> +
> +static int fsevent_send_to_process(struct sk_buff * skb)
> +{
> +	listener * p  = NULL, * q = NULL;
> +	struct fsevent * event = NULL;
> +	struct sk_buff * skb2 = NULL;
> +	int ret = 0;
> +
> +	event = (struct fsevent *)(skb->data + sizeof(struct nlmsghdr));
> +	spin_lock(&listener_list_lock);
> +	list_for_each_entry_safe(p, q, &listener_list_head, list) {
> +		MATCH_XID(pid, p, event, skb);
> +		MATCH_XID(uid, p, event, skb);
> +		MATCH_XID(gid, p, event, skb);
> +
> +		if (filter_fsevent(p->mask, event->type) == 0) {
> +			 skb2 = skb_clone(skb, GFP_KERNEL);
> +	                 if (skb2 == NULL)
> +	                 	return -ENOMEM;
> +	                 NETLINK_CB(skb2).dst_group = 0;
> +	                 NETLINK_CB(skb2).dst_pid = p->pid;
> +	                 NETLINK_CB(skb2).pid = 0;
> +	                 ret = netlink_unicast(fsevent_sock, skb2,
> +	                                p->pid, MSG_DONTWAIT);
> +			if (ret == -ECONNREFUSED) {
> +				atomic_dec(&fsevent_listener_num);
> +				cleanup_dead_listener(p);
> +			}
> +		}
> +	}
> +	spin_unlock(&listener_list_lock);
> +	return ret;
> +}
> +
> +static void fsevent_commit(void * unused)
> +{
> +	struct sk_buff * skb = NULL;
> +		
> +	while((skb = skb_dequeue(&get_cpu_var(fsevent_send_queue)))
> +		!= NULL) {
> +		fsevent_send_to_process(skb);
> +		put_cpu_var(fsevent_send_queue);
> +	}
> +}

Really strange mix of per-cpu variables for optimized performance and
global spin locking.
Consider using RCU for list of listeners.

You use unicast delivery for netlink messages. 
According to my investigation [1], it's performance is better only when
there is only one listener (or maybe two in some cases), but then it is
noticebly slower than broadcasting.

1. http://marc.theaimsgroup.com/?l=linux-netdev&m=114424884216006&w=2

> +static struct ctl_table fsevent_mask_sysctl[] = {
> +	{
> +		.ctl_name = FSEVENT_MASK_CTL_NAME,
> +		.procname = "fsevent_mask",
> +		.data = &fsevents_mask,
> +		.maxlen = sizeof(u32),
> +		.mode = 0644,
> +		.proc_handler = &proc_dointvec,
> +	},
> +	{ .ctl_name = 0 }
> +};
> +
> +static struct ctl_table fs_root_sysctl[] = {
> +	{
> +		.ctl_name = CTL_FS,
> +		.procname = "fs",
> +		.mode = 0555,
> +		.child = fsevent_mask_sysctl,
> +	},
> +	{ .ctl_name = 0 }
> +};
> +
> +static int __init fsevent_init(void)
> +{
> +	int cpu;
> +	struct sk_buff_head * listptr;
> +	struct work_struct * workptr;
> +
> +	fsevent_sock = netlink_kernel_create(NETLINK_FSEVENT, 0,
> +					 fsevent_recv, THIS_MODULE);
> +	if (!fsevent_sock)
> +		return -EIO;
> +	for_each_cpu(cpu) {
> +		listptr = &per_cpu(fsevent_send_queue, cpu);
> +		skb_queue_head_init(listptr);
> +		workptr = &per_cpu(fsevent_work, cpu);
> +		INIT_WORK(workptr, fsevent_commit, NULL);
> +	}

Btw, you need some rebalancing of the per-cpu queues, probably in
keventd, since CPUs can go offline and your messages will stuck foreve
there.

> +	if (register_sysctl_table(fs_root_sysctl, 0) == NULL)
> +                return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +static void __exit fsevent_exit(void)
> +{
> +	listener * p = NULL, * q = NULL;
> +	sock_release(fsevent_sock->sk_socket);
> +	spin_lock(&listener_list_lock);
> +	list_for_each_entry_safe(p, q, &listener_list_head, list) {
> +		cleanup_dead_listener(p);
> +	}
> +	spin_unlock(&listener_list_lock);

Broken. Your work can be pending on this stage, but you already removed
and freed resources.

> +}
> +
> +module_init(fsevent_init);
> +module_exit(fsevent_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Yi Yang <yang.y.yi@gmail.com>");
> +MODULE_DESCRIPTION("File System Events Reporter");
> 

-- 
	Evgeniy Polyakov

next prev parent reply	other threads:[~2006-04-07  6:25 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-04-05 13:21 [2.6.16 PATCH] Filessytem Events Reporter V2 Yi Yang
2006-04-05 17:12 ` Carlos Silva
2006-04-05 23:45   ` Yi Yang
2006-04-07  6:24 ` Evgeniy Polyakov [this message]
2006-04-07  8:13   ` Yi Yang
2006-04-07  9:47     ` Evgeniy Polyakov
2006-04-07 10:03       ` Yi Yang
2006-04-07 10:26         ` Evgeniy Polyakov
2006-04-07 15:14           ` Yi Yang
2006-04-07 19:47             ` Evgeniy Polyakov
2006-04-07 20:27               ` Evgeniy Polyakov
  -- strict thread matches above, loose matches on Subject: below --
2006-04-05 14:13 Michael Guo
2006-04-05 23:44 ` Yi Yang
2006-04-06 14:34 Michael Guo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060407062428.GA31351@2ka.mipt.ru \
    --to=johnpol@2ka.mipt.ru \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matthltc@us.ibm.com \
    --cc=yang.y.yi@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.