Re: [2.6.16 PATCH] Filesystem Events Reporter v1

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Yi Yang <yang.y.yi@gmail.com>
To: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Cc: LKML <linux-kernel@vger.kernel.org>,
	Andrew Morton <akpm@osdl.org>, Matt Helsley <matthltc@us.ibm.com>
Subject: Re: [2.6.16 PATCH] Filesystem Events Reporter v1
Date: Wed, 05 Apr 2006 10:06:28 +0800	[thread overview]
Message-ID: <44332624.1070002@gmail.com> (raw)
In-Reply-To: <20060404061145.GC21358@2ka.mipt.ru>

Evgeniy Polyakov wrote:
> On Mon, Apr 03, 2006 at 10:28:10PM +0800, Yi Yang (yang.y.yi@gmail.com) wrote:
>   
>> This patch has some very big changes with comparison to Filesystem Events Connector v4, 
>> the following features are added newly:
>>
>> 	- Use netlink instead of connector. 
>> 	- Every application using fsevent can set its own fsevent filter list 
>> 	  without effect in other applications using fsevent, there are three
>> 	  filter lists, they are pid filter list, uid filetr list and gid 
>> 	  filter list, respectively, moreover, there is a fsevent mask used to
>> 	  control those fsevents which fail to match  three filter lists, an 
>> 	  application using fsevent can listen those fsevents it want to 
>> 	  monitor and ignore those fsevents it doesn't interest in by set 
>> 	  series of filters, there is a fsevent mask used to take effects on
>> 	  all the applications using fsevent, it can be set by sysctl and proc
>> 	  interface(not implemented).
>> 	- Remove lock race overhead and file system code path delay completely
>> 	  by a per cpu fsevent queue and kthread per cpu, fsevent_send just 
>> 	  mallocs a skb and insert the percpu queue and wake up fseventd 
>> 	  kthread, then return immediately, fseventd per cpu is responsible for
>> 	  send fsevent to userspace application.
>>     
>
> Just FYI, this step has been already implemented in CBUS - connector based message bus.
>
>   
>>  fs/Kconfig               |   10 
>>  fs/Makefile              |    1 
>>  fs/fsevent.c             |  642 ++++++++++++++++++++++++++++++++++++++++++++++
>>  fs/namespace.c           |   12 
>>  include/linux/fsevent.h  |  132 +++++++++
>>  include/linux/fsnotify.h |   37 ++
>>  include/linux/netlink.h  |    3 
>>  7 files changed, 836 insertions(+), 1 deletion(-)
>>
>> Signed-off-by: Yi Yang <yang.y.yi@gmail.com>
>>
>> --- a/include/linux/netlink.h.orig	2006-03-31 22:58:50.000000000 +0800
>> +++ b/include/linux/netlink.h	2006-03-31 23:00:22.000000000 +0800
>> @@ -20,7 +20,8 @@
>>  #define NETLINK_IP6_FW		13
>>  #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
>>  #define NETLINK_KOBJECT_UEVENT	15	/* Kernel messages to userspace */
>> -#define NETLINK_GENERIC		16
>> +#define NETLINK_FSEVENT		16	/* File system events to userspace */
>> +#define NETLINK_GENERIC		17
>>     
>
> You are not allowed to change other numbers, just add your own new number.
>
>   
>>  #define MAX_LINKS 32		
>>  
>> --- a/include/linux/fsnotify.h.orig	2006-01-03 11:21:10.000000000 +0800
>> +++ b/include/linux/fsnotify.h	2006-03-27 23:09:18.000000000 +0800
>> @@ -15,6 +15,7 @@
>>  
>>  #include <linux/dnotify.h>
>>  #include <linux/inotify.h>
>> +#include <linux/fsevent.h>
>>  
>>  /*
>>   * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
>> @@ -45,6 +46,8 @@ static inline void fsnotify_move(struct 
>>  	if (source) {
>>  		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL);
>>  	}
>> +	raise_fsevent_move(old_dir, old_name, new_dir, new_name,
>> +			   FSEVENT_MOVE | (isdir?FSEVENT_ISDIR:0));
>>  }
>>  
>>  /*
>> @@ -56,6 +59,8 @@ static inline void fsnotify_nameremove(s
>>  		isdir = IN_ISDIR;
>>  	dnotify_parent(dentry, DN_DELETE);
>>  	inotify_dentry_parent_queue_event(dentry, IN_DELETE|isdir, 0, dentry->d_name.name);
>> +	raise_fsevent(dentry,
>> +		      FSEVENT_DELETE | (isdir?FSEVENT_ISDIR:0));
>>  }
>>  
>>  /*
>> @@ -74,6 +79,7 @@ static inline void fsnotify_create(struc
>>  {
>>  	inode_dir_notify(inode, DN_CREATE);
>>  	inotify_inode_queue_event(inode, IN_CREATE, 0, name);
>> +	raise_fsevent_create(inode, name, FSEVENT_CREATE);
>>  }
>>  
>>  /*
>> @@ -83,6 +89,8 @@ static inline void fsnotify_mkdir(struct
>>  {
>>  	inode_dir_notify(inode, DN_CREATE);
>>  	inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, name);
>> +	raise_fsevent_create(inode, name,
>> +			     FSEVENT_CREATE | FSEVENT_ISDIR);
>>  }
>>  
>>  /*
>> @@ -99,6 +107,8 @@ static inline void fsnotify_access(struc
>>  	dnotify_parent(dentry, DN_ACCESS);
>>  	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
>>  	inotify_inode_queue_event(inode, mask, 0, NULL);
>> +	raise_fsevent(dentry, FSEVENT_ACCESS |
>> +				((S_ISDIR(inode->i_mode))?FSEVENT_ISDIR:0));
>>  }
>>  
>>  /*
>> @@ -115,6 +125,8 @@ static inline void fsnotify_modify(struc
>>  	dnotify_parent(dentry, DN_MODIFY);
>>  	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
>>  	inotify_inode_queue_event(inode, mask, 0, NULL);
>> +	raise_fsevent(dentry, FSEVENT_MODIFY |
>> +				((S_ISDIR(inode->i_mode))?FSEVENT_ISDIR:0));
>>  }
>>  
>>  /*
>> @@ -130,6 +142,9 @@ static inline void fsnotify_open(struct 
>>  
>>  	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
>>  	inotify_inode_queue_event(inode, mask, 0, NULL);	
>> +	raise_fsevent(dentry, FSEVENT_OPEN |
>> +				((S_ISDIR(inode->i_mode))?FSEVENT_ISDIR:0));
>> +
>>  }
>>  
>>  /*
>> @@ -148,6 +163,8 @@ static inline void fsnotify_close(struct
>>  
>>  	inotify_dentry_parent_queue_event(dentry, mask, 0, name);
>>  	inotify_inode_queue_event(inode, mask, 0, NULL);
>> +	raise_fsevent(dentry, FSEVENT_CLOSE |
>> +				((S_ISDIR(inode->i_mode))?FSEVENT_ISDIR:0));
>>  }
>>  
>>  /*
>> @@ -163,6 +180,8 @@ static inline void fsnotify_xattr(struct
>>  
>>  	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
>>  	inotify_inode_queue_event(inode, mask, 0, NULL);
>> +	raise_fsevent(dentry, FSEVENT_MODIFY_ATTRIB |
>> +				((S_ISDIR(inode->i_mode))?FSEVENT_ISDIR:0));
>>  }
>>  
>>  /*
>> @@ -213,6 +232,24 @@ static inline void fsnotify_change(struc
>>  		inotify_dentry_parent_queue_event(dentry, in_mask, 0,
>>  						  dentry->d_name.name);
>>  	}
>> +
>> +#ifdef CONFIG_FS_EVENTS
>> +	{
>> +	u32 fsevent_mask = 0;
>> +	if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE))
>> +		fsevent_mask |= FSEVENT_MODIFY_ATTRIB;
>> +	if ((ia_valid & ATTR_ATIME) && (ia_valid & ATTR_MTIME))
>> +		fsevent_mask |= FSEVENT_MODIFY_ATTRIB;
>> +	else if (ia_valid & ATTR_ATIME)
>> +		fsevent_mask |= FSEVENT_ACCESS;
>> +	else if (ia_valid & ATTR_MTIME)
>> +		fsevent_mask |= FSEVENT_MODIFY;
>> +	if (ia_valid & ATTR_SIZE)
>> +		fsevent_mask |= FSEVENT_MODIFY;
>> +	if (fsevent_mask)
>> +		raise_fsevent(dentry, fsevent_mask);
>> +	}
>> +#endif /* CONFIG_FS_EVENTS */
>>  }
>>  
>>  #ifdef CONFIG_INOTIFY	/* inotify helpers */
>> --- a/fs/namespace.c.orig	2006-03-25 23:13:42.000000000 +0800
>> +++ b/fs/namespace.c	2006-03-27 23:09:18.000000000 +0800
>> @@ -25,6 +25,7 @@
>>  #include <linux/mount.h>
>>  #include <asm/uaccess.h>
>>  #include <asm/unistd.h>
>> +#include <linux/fsevent.h>
>>  #include "pnode.h"
>>  
>>  extern int __init init_rootfs(void);
>> @@ -612,6 +613,13 @@ asmlinkage long sys_umount(char __user *
>>  		goto dput_and_out;
>>  
>>  	retval = do_umount(nd.mnt, flags);
>> +
>> +	if (retval == 0) {
>> +		char * tmp = getname(name);
>> +		raise_fsevent_umount(tmp);
>> +		putname(tmp);
>> +	}
>> +
>>  dput_and_out:
>>  	path_release_on_umount(&nd);
>>  out:
>> @@ -1459,6 +1467,10 @@ asmlinkage long sys_mount(char __user * 
>>  	retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
>>  			  flags, (void *)data_page);
>>  	unlock_kernel();
>> +
>> +	if (retval == 0)
>> +		raise_fsevent_mount((char *)dev_page, dir_page);
>> +
>>  	free_page(data_page);
>>  
>>  out3:
>> --- a/fs/Kconfig.orig	2006-03-31 21:23:20.000000000 +0800
>> +++ b/fs/Kconfig	2006-04-03 22:01:17.000000000 +0800
>> @@ -405,6 +405,16 @@ config INOTIFY
>>  
>>  	  If unsure, say Y.
>>  
>> +config FS_EVENTS
>> +	tristate "Report filesystem events to userspace"
>> +	---help---
>> +	  Provide a facility that reports filesystem events to userspace. The
>> +	  reported event include access, write, utime, chmod, chown, chgrp,
>> +	  close, open, create, rename, unlink, mkdir, rmdir, mount, umount.
>> +
>> +	  The user can set filesystem events filter to filter its events, so
>> +	  that he just get those events he concerns.
>> +
>>  config QUOTA
>>  	bool "Quota support"
>>  	help
>> --- a/fs/Makefile.orig	2006-03-31 21:23:33.000000000 +0800
>> +++ b/fs/Makefile	2006-03-31 21:30:58.000000000 +0800
>> @@ -13,6 +13,7 @@ obj-y :=	open.o read_write.o file_table.
>>  		ioprio.o pnode.o drop_caches.o
>>  
>>  obj-$(CONFIG_INOTIFY)		+= inotify.o
>> +obj-$(CONFIG_FS_EVENTS)		+= fsevent.o
>>  obj-$(CONFIG_EPOLL)		+= eventpoll.o
>>  obj-$(CONFIG_COMPAT)		+= compat.o compat_ioctl.o
>>  
>> --- /dev/null	2003-01-30 18:24:37.000000000 +0800
>> +++ b/include/linux/fsevent.h	2006-04-02 01:35:04.000000000 +0800
>> @@ -0,0 +1,132 @@
>> +/*
>> + * fsevent.h - filesystem events connector
>> + *
>> + * Copyright (C) 2006 Yi Yang <yang.y.yi@gmail.com>
>> + * Based on cn_proc.h by Matt Helsley, IBM Corp
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
>> + */
>> +
>> +#ifndef LINUX_FSEVENT_H
>> +#define LINUX_FSEVENT_H
>> +
>> +#include <linux/types.h>
>> +#include <linux/time.h>
>> +#include <linux/netlink.h>
>> +
>> +enum  fsevent_type {
>> +	FSEVENT_ACCESS = 	0x00000001,	/* File was accessed */
>> +	FSEVENT_MODIFY = 	0x00000002,	/* File was modified */
>> +	FSEVENT_MODIFY_ATTRIB = 0x00000004,	/* Metadata changed */
>> +	FSEVENT_CLOSE = 	0x00000008,	/* File was closed */
>> +	FSEVENT_OPEN = 		0x00000010,	/* File was opened */
>> +	FSEVENT_MOVE = 		0x00000020,	/* File was moved */
>> +	FSEVENT_CREATE = 	0x00000040,	/* File was created */
>> +	FSEVENT_DELETE =	0x00000080,	/* File was deleted */
>> +	FSEVENT_MOUNT =		0x00000100,	/* File system is mounted */
>> +	FSEVENT_UMOUNT =	0x00000200,	/* File system is umounted */
>> +
>> +	/* The following definitions are commands for event filter
>> +	 * , in acknowlege event for the corresponding command, it
>> +	 * will set to type field of struct fsevent.
>> +	*/
>> +	FSEVENT_FILTER_ALL = 	0x08000000,	/* For all events */
>> +	FSEVENT_FILTER_PID = 	0x10000000,	/* For some process ID */
>> +	FSEVENT_FILTER_UID = 	0x20000000,	/* For some user ID */
>> +	FSEVENT_FILTER_GID =	0x40000000,	/* For some group ID */
>> +
>> +	FSEVENT_ISDIR = 	0x80000000	/* It is set for a dir */
>> +};
>> +
>> +#define FSEVENT_MASK 0x800003ff
>> +
>> +typedef unsigned long fsevent_mask_t;
>> +
>> +enum filter_control {
>> +	FSEVENT_FILTER_LISTEN = 1,		/* Listen fsevents mask defines*/
>> +	FSEVENT_FILTER_IGNORE ,		/* Ignore fsevents mask defines*/
>> +	FSEVENT_FILTER_REMOVE,			/* Remove a given filter */
>> +};	
>> +
>> +struct fsevent_filter {
>> +	/* filter type, it just is one or bit-or of them
>> +	 * FSEVENT_FILTER_ALL
>> +	 * FSEVENT_FILTER_PID
>> +	 * FSEVENT_FILTER_UID
>> +	 * FSEVENT_FILTER_GID
>> +	 */
>> +	enum fsevent_type type;	/* filter type */
>> +
>> +	/* mask of file system events the user listen or ignore
>> +	 * if the user need to ignore all the events of some pid
>> +	 * , gid or uid, he(she) must set mask to FSEVENT_MASK.
>> +	 */ 
>> +	fsevent_mask_t mask;
>> +	union {
>> +		pid_t pid;
>> +		uid_t uid;
>> +		gid_t gid;
>> +	} id;
>> +
>> +	enum filter_control control;
>> +};
>> +
>> +struct fsevent {
>> +	__u32 type;
>> +	__u32 cpu;
>> +	struct timespec timestamp;
>> +	pid_t pid;
>> +	uid_t uid;
>> +	gid_t gid;
>> +	int err;
>> +	__u32 len;
>> +	__u32 pname_len;
>> +	__u32 fname_len;
>> +	__u32 new_fname_len;
>> +	char name[0];
>> +};
>> +
>> +#define FSEVENT_FILTER_MSGSIZE \
>> +	(sizeof(struct fsevent_filter) + sizeof(struct nlmsghdr))
>> +
>> +#ifdef __KERNEL__
>> +#ifdef CONFIG_FS_EVENTS
>> +extern void raise_fsevent(struct dentry * dentryp, u32 mask);
>> +extern void raise_fsevent_move(struct inode * olddir, const char * oldname, 
>> +		struct inode * newdir, const char * newname, u32 mask);
>> +extern void raise_fsevent_create(struct inode * inode, 
>> +		const char * name, u32 mask);
>> +extern void raise_fsevent_mount(const char * devname, const char * mountpoint);
>> +extern void raise_fsevent_umount(const char * mountpoint);
>> +#else
>> +static void raise_fsevent(struct dentry * dentryp,  u32 mask)
>> +{}
>> +
>> +static void raise_fsevent_move(struct inode * olddir, const char * oldname, 
>> +		struct inode * newdir, const char * newname, u32 mask)
>> +{}
>> +
>> +static void raise_fsevent_create(struct inode * inode, 
>> +		const char * name, u32 mask)
>> +{}
>> +
>> +static void raise_fsevent_mount(const char * devname, const char * mountpoint)
>> +{}
>> +
>> +static void raise_fsevent_umount(const char * mountpoint)
>> +{}
>> +#endif	/* CONFIG_FS_EVENTS */
>> +#endif	/* __KERNEL__ */
>> +#endif	/* LINUX_FSEVENT_H */
>> --- /dev/null	2003-01-30 18:24:37.000000000 +0800
>> +++ b/fs/fsevent.c	2006-04-02 23:10:45.000000000 +0800
>> @@ -0,0 +1,642 @@
>> +/*
>> + * 	fsevent.c
>> + * 
>> + * 2006 Copyright (c) Yi Yang <yang.y.yi@gmail.com>
>> + * All rights reserved.
>> + * 
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/module.h>
>> +#include <linux/skbuff.h>
>> +#include <linux/netlink.h>
>> +#include <linux/moduleparam.h>
>> +#include <linux/fsevent.h>
>> +#include <linux/skbuff.h>
>> +#include <net/sock.h>
>> +#include <linux/list.h>
>> +#include <linux/percpu.h>
>> +#include <linux/cpu.h>
>> +#include <linux/kthread.h>
>> +#include <linux/notifier.h>
>> +#include <linux/compiler.h>
>> +
>> +static DEFINE_PER_CPU(struct sk_buff_head, fsevent_send_queue);
>> +static DEFINE_PER_CPU(task_t *, kfseventd_task);
>> +
>> +typedef struct pid_filter {
>> +	pid_t pid;
>> +	u32 mask;
>> +	struct list_head list;
>> +} pid_filter;
>> +
>> +typedef struct uid_filter {
>> +	uid_t uid;
>> +	u32 mask;
>> +	struct list_head list;
>> +} uid_filter;
>> +
>> +typedef struct gid_filter {
>> +	gid_t gid;
>> +	u32 mask;
>> +	struct list_head list;
>> +} gid_filter;
>> +
>> +typedef struct fsevent_listener {
>> +	pid_t pid;
>> +	struct list_head pid_filter_list_head;
>> +	struct list_head uid_filter_list_head;
>> +	struct list_head gid_filter_list_head;
>> +	u32 mask;
>> +	struct list_head list;
>> +} listener;
>> +
>> +	
>> +/* The netlink socket. */
>> +static struct sock * fsevent_sock = NULL;
>> +static LIST_HEAD(listener_list_head);
>> +static DEFINE_SPINLOCK(listener_list_lock);
>> +
>> +static DEFINE_PER_CPU(__u32, cn_fs_event_counts) = { 0 };
>> +static int fsevent_burst_limit = 100;
>> +static int fsevent_ratelimit = 5 * HZ;
>> +static unsigned long last = 0;
>> +static int fsevent_sum = 0;
>> +static u32 fsevents_mask = FSEVENT_MASK;
>> +static atomic_t fsevent_listener_num = ATOMIC_INIT(0);
>> +
>> +static inline void get_seq(__u32 *ts, int *cpu)
>> +{
>> +	*ts = get_cpu_var(cn_fs_event_counts)++;
>> +	*cpu = smp_processor_id();
>> +	put_cpu_var(cn_fs_event_counts);
>> +}
>>     
>
> This is not enough, sequence number must be uniq for the whole system,
> not only one processor.
>
>   
I'll convert it to atomic_t type.
>> +
>> +static void append_string(char **dest, const char *src, size_t len)
>> +{
>> +	strncpy(*dest, src, len);
>> +	(*dest)[len] = '\0';
>> +	*dest += len + 1;
>> +}
>> +
>> +static inline int filter_fsevent(u32 filter_mask, u32 event_mask)
>> +{
>> +	event_mask &= FSEVENT_MASK;
>> +	event_mask &= filter_mask;
>> +	if (event_mask == 0) {
>> +		return -1;
>> +	}
>> +	return 0;
>> +}
>> +
>> +static int filter_fsevent_all(u32 * mask)
>> +{
>> +	int ret = 0;
>> +
>> +	(*mask) &= FSEVENT_MASK;
>> +
>> +	if ((((*mask) & FSEVENT_ISDIR) == FSEVENT_ISDIR)
>> +		 && ((fsevents_mask & FSEVENT_ISDIR) == 0)) {
>> +		ret = -1;
>> +		goto out;
>> +	}
>> +
>> +	(*mask) &= fsevents_mask;
>> +	if ((*mask) == 0) {
>> +		ret = -5;
>> +	}
>> +
>> +out:
>> +	return ret;
>> +}
>> +
>> +static void fsevent_send(struct sk_buff * skb)
>> +{
>> +	struct sk_buff_head * head = &get_cpu_var(fsevent_send_queue);
>> +	skb_queue_tail(head, skb);
>> +	wake_up_process(per_cpu(kfseventd_task, smp_processor_id()));
>> +	put_cpu_var(fsevent_send_queue);
>> +}
>> +
>>     
>
> This will heavily reorder messages, hope you have sequence number for
> them?
>   
I think the order of fsevents should doesn't matter, as you said, it is 
more appropriate to add a sequence
number into struct fsevent, let applications consider it.
>   
>> +int __raise_fsevent(const char * oldname, const char * newname, u32 mask)
>> +{
>> +	struct fsevent *event;
>> +	int namelen = 0;
>> +	char * nameptr = NULL;
>> +	unsigned int size;
>> +	struct nlmsghdr * nlhdr;
>> +	struct sk_buff * skb = NULL;
>> +
>> +	if (filter_fsevent_all(&mask) != 0)
>> +		return -1;
>> +
>> +	if (atomic_read(&fsevent_listener_num) <= 0)
>> +		return -1;
>> +
>> +	if (jiffies - last <= fsevent_ratelimit) {
>> +		if (fsevent_sum > fsevent_burst_limit)
>> +			return -2;
>> +		fsevent_sum++;
>> +	} else {
>> +		last = jiffies;
>> +		fsevent_sum = 0;
>> +	}
>> +
>> +	namelen = strlen(current->comm) + strlen(oldname) + 2;
>> +	if (newname)
>> +		namelen += strlen(newname) + 1;
>> +
>> +	size = NLMSG_SPACE(sizeof(struct fsevent) + namelen);
>> +	                                                                                                                                       
>> +	skb = alloc_skb(size, GFP_KERNEL);
>> +	if (!skb)
>> +	        return -ENOMEM;
>> +	                                                                                                                                       
>> +	nlhdr = NLMSG_PUT(skb, 0, 0, NLMSG_DONE, size - sizeof(*nlhdr));
>> +	event = NLMSG_DATA(nlhdr);
>> +
>> +	get_seq(&(nlhdr->nlmsg_seq), &event->cpu);
>> +	ktime_get_ts(&event->timestamp);
>>     
>
> Using timestamp is a good idea.
>
>   
>> +	event->type = mask;
>> +	event->pid = current->tgid;
>> +	event->uid = current->uid;
>> +	event->gid = current->gid;
>> +	nameptr = event->name;
>> +	event->pname_len = strlen(current->comm);
>> +	append_string(&nameptr, current->comm, event->pname_len);
>> +	event->fname_len = strlen(oldname);
>> +	append_string(&nameptr, oldname, event->fname_len);
>> +	event->len = event->pname_len +  event->fname_len + 2;
>> +	event->new_fname_len = 0;
>> +	if (newname) {
>> +		event->new_fname_len = strlen(newname);
>> +		append_string(&nameptr, newname, event->new_fname_len);
>> +		event->len += event->new_fname_len + 1;
>> +	}
>> +	fsevent_send(skb);
>> +	return 0;
>> +
>> +nlmsg_failure:
>> +	kfree_skb(skb);
>> +	return -1;
>> +}
>> +
>> +void raise_fsevent(struct dentry * dentryp, u32 mask)
>> +{
>> +	if (dentryp->d_inode && (MAJOR(dentryp->d_inode->i_rdev) == 4))
>>     
>
> Why only tty is missed?
>   
for console applications using fsevent, if they output fsevent messages 
to the console, a unlimited tty
 operation loop will happen, this will mask some important fsevents, so 
I mask tty operations.
In fact, I think nobody concerns activities on ttys.
>   
>> +		return;
>> +	__raise_fsevent(dentryp->d_name.name, NULL, mask);
>> +}
>> +EXPORT_SYMBOL_GPL(raise_fsevent);
>> +
>> +void raise_fsevent_create(struct inode * inode, const char * name, u32 mask)
>> +{
>> +	__raise_fsevent(name, NULL, mask);
>> +}
>> +EXPORT_SYMBOL_GPL(raise_fsevent_create);
>> +
>> +void raise_fsevent_move(struct inode * olddir, const char * oldname, 
>> +		struct inode * newdir, const char * newname, u32 mask)
>> +{
>> +	__raise_fsevent(oldname, newname, mask);
>> +}
>> +EXPORT_SYMBOL_GPL(raise_fsevent_move);
>> +
>> +void raise_fsevent_mount(const char * devname, const char * mountpoint)
>> +{
>> +	__raise_fsevent(devname, mountpoint, FSEVENT_MOUNT);
>> +}
>> +
>> +void raise_fsevent_umount(const char * mountpoint)
>> +{
>> +	__raise_fsevent(mountpoint, NULL, FSEVENT_UMOUNT);
>> +}
>> +
>> +static int fsevent_ack(enum fsevent_type type, pid_t pid, u32 seq)
>> +{
>> +	struct fsevent *event;
>> +	unsigned int size;
>> +	struct sk_buff * skb = NULL;
>> +	struct nlmsghdr * nlhdr = NULL;
>> +
>> +	size = NLMSG_SPACE(sizeof(struct fsevent));
>> +	                                                                                                                                       
>> +	skb = alloc_skb(size, GFP_KERNEL);
>> +	if (!skb)
>> +	        return -ENOMEM;
>> +	                                                                                                                                       
>> +	nlhdr = NLMSG_PUT(skb, 0, seq, NLMSG_DONE, size - sizeof(*nlhdr));
>> +	event = NLMSG_DATA(nlhdr);
>> +
>> +	ktime_get_ts(&event->timestamp);
>> +	event->cpu = -1;
>> +	event->type = type; 
>> +	event->pid = 0;
>> +	event->uid = 0;
>> +	event->gid = 0;
>> +	event->len = 0;
>> +	event->pname_len = 0;
>> +	event->fname_len = 0;
>> +	event->new_fname_len = 0;
>> +	event->err = 0;
>> +	                                                                                                                                       
>> +	NETLINK_CB(skb).dst_group = 0;
>> +	NETLINK_CB(skb).dst_pid = pid;
>> +	NETLINK_CB(skb).pid = 0;
>> +
>> +	return (netlink_unicast(fsevent_sock, skb, pid, MSG_DONTWAIT));
>>     
>
> Actually broadcasting is much more optimized that unicast for each
> user.
>   
because fsevent ack is just for one specific process, so netlink_unicast 
is only one option.
>   
>> +nlmsg_failure:
>> +	kfree_skb(skb);
>> +	return -1;
>> +}
>> +
>> +static void set_fsevent_mask(u32 * to_mask, u32 from_mask, int mode)
>> +{
>> +	if (mode == FSEVENT_FILTER_IGNORE)
>> +		(*to_mask) &= ~(from_mask);
>> +	else if (mode == FSEVENT_FILTER_LISTEN)
>> +		(*to_mask) |= from_mask;
>> +}
>> +
>> +#define DEFINE_FILTER_FIND_FUNC(type, key) 				\
>> +	type * find_##type(struct list_head * head, key##_t id)		\
>> +	{								\
>> +		int alloc_flag = 1;					\
>> +		type * entry = NULL;					\
>> +									\
>> +		list_for_each_entry(entry, head, list) {		\
>> +			if (entry->key == id) {				\
>> +				alloc_flag = 0;				\
>> +				break;					\
>> +			}						\
>> +		}							\
>> +									\
>> +		if (alloc_flag == 1) {					\
>> +			entry  = (type *)kmalloc(sizeof(type), GFP_KERNEL); \
>> +			if (entry == NULL) 				\
>> +				return NULL;				\
>> +			memset(entry, 0, sizeof(type));			\
>> +			entry->key = id;				\
>> +			list_add_tail(&(entry->list), head);		\
>> +		}							\
>> +		return entry;						\
>> +	}								\
>> +
>> +DEFINE_FILTER_FIND_FUNC(pid_filter, pid)
>> +
>> +DEFINE_FILTER_FIND_FUNC(uid_filter, uid)
>> +
>> +DEFINE_FILTER_FIND_FUNC(gid_filter, gid)
>> +		
>> +DEFINE_FILTER_FIND_FUNC(listener, pid)
>> +
>> +static void set_fsevent_filter(struct fsevent_filter * filter, pid_t pid)
>> +{
>> +	enum fsevent_type type;
>> +	u32 mask = 0;
>> +	int control = 0;
>> +	listener * listenerp = NULL;
>> +	pid_filter * pfilter = NULL;
>> +	uid_filter * ufilter = NULL;
>> +	gid_filter * gfilter = NULL;
>> +
>> +
>> +	mask = filter->mask;
>> +	control = filter->control;
>> +	type = filter->type;
>> +	mask &= FSEVENT_MASK;
>> +	if (mask == 0)
>> +		goto out;
>> +
>> +	spin_lock(&listener_list_lock);
>> +	listenerp = find_listener(&listener_list_head, pid);
>> +	if (unlikely(listenerp == NULL)) {
>> +		spin_unlock(&listener_list_lock);
>> +		return;
>> +	}
>> +
>> +	if (!(listenerp->pid_filter_list_head.next)) {
>> +		INIT_LIST_HEAD(&(listenerp->pid_filter_list_head));
>> +		INIT_LIST_HEAD(&(listenerp->uid_filter_list_head));
>> +		INIT_LIST_HEAD(&(listenerp->gid_filter_list_head));
>> +	}
>> +	spin_unlock(&listener_list_lock);
>> +
>> +	if ((type & FSEVENT_FILTER_ALL) == FSEVENT_FILTER_ALL) {
>> +		if (control == FSEVENT_FILTER_REMOVE) {
>> +			atomic_dec(&fsevent_listener_num);
>> +			spin_lock(&listener_list_lock);
>> +			list_del(&(listenerp->list));
>> +			spin_unlock(&listener_list_lock);
>> +			kfree(listenerp);
>> +		} else
>> +			set_fsevent_mask(&(listenerp->mask), mask, control);
>> +	}
>> +		
>> +
>> +	if ((type & FSEVENT_FILTER_PID) == FSEVENT_FILTER_PID) {
>> +		pfilter = find_pid_filter(&(listenerp->pid_filter_list_head),
>> +				filter->id.pid);
>>     
>
> What does prevent this lists to be changed in parallel?
> Defined above functions do not hold any lock.
>   
Yes, I missed spinlock, I'll add it.
>   
>> +		if (unlikely(pfilter == NULL))
>> +			return;
>> +
>> +		if (control == FSEVENT_FILTER_REMOVE) {
>> +			list_del(&(pfilter->list));
>> +			kfree(pfilter);
>> +		} else
>> +			set_fsevent_mask(&(pfilter->mask), mask, control);
>> +	}
>> +
>> +	if ((type & FSEVENT_FILTER_UID) == FSEVENT_FILTER_UID) {
>> +		ufilter = find_uid_filter(&(listenerp->uid_filter_list_head),
>> +						filter->id.uid);
>> +		if (unlikely(ufilter == NULL))
>> +			return;
>> +
>> +		if (control == FSEVENT_FILTER_REMOVE) {
>> +			list_del(&(ufilter->list));
>> +			kfree(ufilter);
>> +		} else
>> +			set_fsevent_mask(&(ufilter->mask), mask, control);
>> +	}
>> +
>> +	if ((type & FSEVENT_FILTER_GID) == FSEVENT_FILTER_GID) {
>> +		gfilter = find_gid_filter(&(listenerp->gid_filter_list_head),
>> +						filter->id.gid);
>> +		if (unlikely(gfilter == NULL))
>> +			return;
>> +
>> +		if (control == FSEVENT_FILTER_REMOVE) {
>> +			list_del(&(gfilter->list));
>> +			kfree(gfilter);
>> +		} else
>> +			set_fsevent_mask(&(gfilter->mask), mask, control);
>> +	}
>> +
>> +out:
>> +	fsevent_ack(type, pid, 0);
>> +}
>> +
>> +static listener * find_fsevent_listener(pid_t pid)
>> +{
>> +	listener * listenerp = NULL;
>> +	spin_lock(&listener_list_lock);
>> +	list_for_each_entry(listenerp, &listener_list_head, list) {
>> +		if (listenerp->pid == pid) {
>> +			spin_unlock(&listener_list_lock);
>> +			return listenerp;
>> +		}
>> +	}
>> +	spin_unlock(&listener_list_lock);
>> +	return NULL;
>> +}
>> +
>> +static void cleanup_dead_listener(listener * x)
>> +{
>> +	pid_filter * p = NULL, * pq = NULL;
>> +	uid_filter * u = NULL, * uq = NULL;
>> +	gid_filter * g = NULL, * gq = NULL;
>> +
>> +	if (p == NULL)
>> +		return;
>> +
>> +	list_del(&(x->list));
>> +
>> +	list_for_each_entry_safe(p, pq, &(x->pid_filter_list_head), list) {
>> +		list_del(&(p->list));
>> +		kfree(p);
>> +	}
>> +
>> +	list_for_each_entry_safe(u, uq, &(x->uid_filter_list_head), list) {
>> +		list_del(&(u->list));
>> +		kfree(u);
>> +	}
>> +
>> +	list_for_each_entry_safe(g, gq, &(x->gid_filter_list_head), list) {
>> +		list_del(&(g->list));
>> +		kfree(g);
>> +	}
>> +	
>> +	kfree(x);
>> +}
>> +
>> +static void fsevent_recv(struct sock *sk, int len)
>> +{
>> +	struct sk_buff *skb = NULL;
>> +	struct nlmsghdr *nlhdr = NULL;
>> +	struct fsevent_filter * filter = NULL;
>> +	pid_t pid;
>> +
>> +	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
>> +		skb_get(skb);
>> +		if (skb->len >= FSEVENT_FILTER_MSGSIZE) {
>> +			nlhdr = (struct nlmsghdr *)skb->data;
>> +			filter = NLMSG_DATA(nlhdr);
>> +			pid = NETLINK_CREDS(skb)->pid;
>> +			if (find_fsevent_listener(pid) == NULL)
>> +				atomic_inc(&fsevent_listener_num);
>> +			set_fsevent_filter(filter, pid);
>> +		}
>> +		kfree_skb(skb);
>> +	}
>> +}
>> +
>> +#define DEFINE_FILTER_MATCH_FUNC(filtertype, key) 			\
>> +	static int match_##filtertype(listener * p,			\
>> +				struct fsevent * event,			\
>> +				struct sk_buff * skb)			\
>> +	{								\
>> +		int ret = 0;						\
>> +		filtertype * xfilter = NULL;				\
>> +		struct sk_buff * skb2 = NULL;				\
>> +		struct list_head *  head = &(p->key##_filter_list_head);  \
>> +		list_for_each_entry(xfilter, head, list) {		\
>> +			if (xfilter->key != event->key)			\
>> +				continue;				\
>> +			ret = filter_fsevent(xfilter->mask, event->type); \
>> +			if ( ret != 0)					\
>> +				return -1;				\
>> +			skb2 = skb_clone(skb, GFP_KERNEL);		\
>> +       			if (skb2 == NULL)				\
>> +				return -ENOMEM;				\
>> +			NETLINK_CB(skb2).dst_group = 0;			\
>> +			NETLINK_CB(skb2).dst_pid = p->pid;		\
>> +			NETLINK_CB(skb2).pid = 0;			\
>> +			return (netlink_unicast(fsevent_sock, skb2,	\
>> +					p->pid, MSG_DONTWAIT));		\
>> +		}							\
>> +		return -ENODEV;						\
>> +	}								\
>> +
>> +DEFINE_FILTER_MATCH_FUNC(pid_filter, pid)
>> +
>> +DEFINE_FILTER_MATCH_FUNC(uid_filter, uid)
>> +
>> +DEFINE_FILTER_MATCH_FUNC(gid_filter, gid)
>> +
>> +#define MATCH_XID(key, listenerp, event, skb) 			\
>> +	ret = match_##key##_filter(listenerp, event, skb); 	\
>> +	if (ret == 0) {					 	\
>> +		kfree_skb(skb);				 	\
>> +	        continue;				 	\
>> +	}						 	\
>> +	do {} while (0)					 	\
>> +
>> +static int fsevent_send_to_process(struct sk_buff * skb)
>> +{
>> +	listener * p  = NULL, * q = NULL;
>> +	struct fsevent * event = NULL;
>> +	struct sk_buff * skb2 = NULL;
>> +	int ret = 0;
>> +
>> +	event = (struct fsevent *)(skb->data + sizeof(struct nlmsghdr));
>> +	spin_lock(&listener_list_lock);
>> +	list_for_each_entry_safe(p, q, &listener_list_head, list) {
>> +		MATCH_XID(pid, p, event, skb);
>> +		MATCH_XID(uid, p, event, skb);
>> +		MATCH_XID(gid, p, event, skb);
>> +
>> +		if (filter_fsevent(p->mask, event->type) == 0) {
>> +			 skb2 = skb_clone(skb, GFP_KERNEL);
>> +	                 if (skb2 == NULL)
>> +	                 	return -ENOMEM;
>> +	                 NETLINK_CB(skb2).dst_group = 0;
>> +	                 NETLINK_CB(skb2).dst_pid = p->pid;
>> +	                 NETLINK_CB(skb2).pid = 0;
>> +	                 ret = netlink_unicast(fsevent_sock, skb2,
>> +	                                p->pid, MSG_DONTWAIT);
>> +			if (ret == -ECONNREFUSED) {
>> +				atomic_dec(&fsevent_listener_num);
>> +				cleanup_dead_listener(p);
>> +			}
>> +		}
>> +	}
>> +	spin_unlock(&listener_list_lock);
>>     
>
> Systems with global locks do not scale well I think...
> Even if it only called from kfseventd thread.
> You should use per-cpu locks here as long as you use per-cpu queue.
>
>   
>> +	return ret;
>> +}
>> +
>> +static int kfseventd(void * bind_to_cpu)
>> +{
>> +	struct sk_buff * skb = NULL;
>> +	//int cpu = (unsigned long) bind_to_cpu;
>> +	current->flags |= PF_NOFREEZE;
>> +        set_current_state(TASK_INTERRUPTIBLE);
>> +
>> +	while (!kthread_should_stop()) {
>> +		
>> +		while((skb = skb_dequeue(&get_cpu_var(fsevent_send_queue)))
>> +			!= NULL) {
>> +			fsevent_send_to_process(skb);
>> +			put_cpu_var(fsevent_send_queue);
>> +		}
>> +        	set_current_state(TASK_INTERRUPTIBLE);
>> +		schedule();
>>     
>
> Scheduling after each message sent?
> Why don't you use keventd in this case?
>   
your suggest is very great, that will reduce amount of code.
>   
>> +		set_current_state(TASK_RUNNING);
>> +	}
>> +	set_current_state(TASK_RUNNING);
>> +	return 0;
>> +}
>> +
>> +#ifdef CONFIG_HOTPLUG_CPU
>> +static void takeover_fsevent(void * hcpu)
>> +{
>> +	struct sk_buff * skb = NULL;
>> +	int hotcpu = (unsigned long)hcpu;
>> +	struct sk_buff_head * fsevent_queue =
>> +			&per_cpu(fsevent_send_queue, hotcpu);
>> +
>> +	while((skb = skb_dequeue(fsevent_queue)) != NULL) {
>> +                fsevent_send_to_process(skb);
>> +        }
>> +}
>> +#endif
>> +	
>> +
>> +static int __devinit cpu_callback(struct notifier_block *nfb,
>> +                                  unsigned long action,
>> +                                  void *hcpu)
>> +{
>> +        int hotcpu = (unsigned long)hcpu;
>> +        struct task_struct *p;
>> +                                                                                                                                               
>> +        switch (action) {
>> +        case CPU_UP_PREPARE:
>> +                p = kthread_create(kfseventd, hcpu, "kfseventd/%d", hotcpu);
>> +                if (IS_ERR(p)) {
>> +                        printk("kfseventd for %i failed\n", hotcpu);
>> +                        return NOTIFY_BAD;
>> +                }
>> +                kthread_bind(p, hotcpu);
>> +                per_cpu(kfseventd_task, hotcpu) = p;
>> +                break;
>> +        case CPU_ONLINE:
>> +                wake_up_process(per_cpu(kfseventd_task, hotcpu));
>> +                break;
>> +#ifdef CONFIG_HOTPLUG_CPU
>> +        case CPU_UP_CANCELED:
>> +                /* Unbind so it can run.  Fall thru. */
>> +                kthread_bind(per_cpu(kfseventd, hotcpu),
>> +                             any_online_cpu(cpu_online_map));
>> +        case CPU_DEAD:
>> +                p = per_cpu(kfseventd, hotcpu);
>> +                per_cpu(kfseventd, hotcpu) = NULL;
>> +                kthread_stop(p);
>> +                takeover_fsevent(hotcpu);
>> +                break;
>> +#endif /* CONFIG_HOTPLUG_CPU */
>> +        }
>> +        return NOTIFY_OK;
>> +}
>> +
>> +static struct notifier_block __devinitdata cpu_nfb = {
>> +        .notifier_call = cpu_callback
>> +};
>> +
>> +static int __init fsevent_init(void)
>> +{
>> +	int cpu;
>> +	struct sk_buff_head * listptr;
>> +	void * hotcpu;
>> +
>> +	fsevent_sock = netlink_kernel_create(NETLINK_FSEVENT, 0,
>> +					 fsevent_recv, THIS_MODULE);
>> +	if (!fsevent_sock)
>> +		return -EIO;
>> +	for_each_cpu(cpu) {
>> +		listptr = &per_cpu(fsevent_send_queue, cpu);
>> +		skb_queue_head_init(listptr);
>> +	}
>> +
>> +	hotcpu = (void *)(long)smp_processor_id();
>> +	cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hotcpu);
>> +	cpu_callback(&cpu_nfb, CPU_ONLINE, hotcpu);
>> +	register_cpu_notifier(&cpu_nfb);
>> +
>> +	return 0;
>> +}
>> +
>> +static void __exit fsevent_exit(void)
>> +{
>> +	sock_release(fsevent_sock->sk_socket);
>>     
>
> Hmm, I doubt it is right deinitialization...
>   
You are right, I missed cleaning up listener list.
>   
>> +}
>> +
>> +module_init(fsevent_init);
>> +module_exit(fsevent_exit);
>> +
>> +MODULE_LICENSE("GPL");
>> +MODULE_AUTHOR("Yi Yang <yang.y.yi@gmail.com>");
>> +MODULE_DESCRIPTION("File System Events Reporter");
>>
>>     
>
>

     prev parent reply	other threads:[~2006-04-05  2:05 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-04-03 14:28 [2.6.16 PATCH] Filesystem Events Reporter v1 Yi Yang
2006-04-04  6:11 ` Evgeniy Polyakov
2006-04-05  2:06   ` Yi Yang [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=44332624.1070002@gmail.com \
    --to=yang.y.yi@gmail.com \
    --cc=akpm@osdl.org \
    --cc=johnpol@2ka.mipt.ru \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matthltc@us.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox