public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Evgeniy Polyakov <zbr@ioremap.net>
To: Eric Paris <eparis@redhat.com>
Cc: linux-kernel@vger.kernel.org, a.p.zijlstra@chello.nl,
	viro@ZenIV.linux.org.uk, hch@infradead.org,
	akpm@linux-foundation.org, alan@lxorguk.ukuu.org.uk
Subject: Re: [RFC PATCH -v4 07/14] fsnotify: add in inode fsnotify markings
Date: Sat, 13 Dec 2008 06:07:01 +0300	[thread overview]
Message-ID: <20081213030701.GD807@ioremap.net> (raw)
In-Reply-To: <20081212215151.27112.71399.stgit@paris.rdu.redhat.com>

On Fri, Dec 12, 2008 at 04:51:51PM -0500, Eric Paris (eparis@redhat.com) wrote:
> +void fsnotify_put_mark(struct fsnotify_mark_entry *entry)
> +{
> +	if (atomic_dec_and_test(&entry->refcnt)) {
> +		spin_lock(&entry->lock);
> +		/* entries can only be found by the kernel by searching the
> +		 * inode->i_fsnotify_entries or the group->mark_entries lists.
> +		 * if freeme is set that means this entry is off both lists.
> +		 * if refcnt is 0 that means we are the last thing still
> +		 * looking at this entry, so its time to free.
> +		 */
> +		if (!atomic_read(&entry->refcnt) && entry->freeme) {

Why do you check refcnt again? Does it mean its life cycle does not end
when it hits zero and should be freed only under lock, which in turn
means it may be accessed under that lock and zero refcnt? Please
describe this locking in more details.

> +			spin_unlock(&entry->lock);
> +			fsnotify_destroy_mark(entry);
> +			return;
> +		}
> +		spin_unlock(&entry->lock);
> +	}
> +}
> +
> +void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
> +{
> +	struct fsnotify_mark_entry *lentry, *entry;
> +	struct inode *inode;
> +	LIST_HEAD(free_list);
> +
> +	spin_lock(&group->mark_lock);
> +	list_for_each_entry_safe(entry, lentry, &group->mark_entries, g_list) {
> +		list_del_init(&entry->g_list);
> +		list_add(&entry->free_g_list, &free_list);
> +	}
> +	spin_unlock(&group->mark_lock);
> +
> +	list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) {
> +		fsnotify_get_mark(entry);
> +		spin_lock(&entry->lock);
> +		inode = entry->inode;
> +		if (!inode) {

This inode does not seem to be grabbed previously, or I missed that
part? What prevents it from being freed?

> +			entry->group = NULL;
> +			spin_unlock(&entry->lock);
> +			fsnotify_put_mark(entry);
> +			continue;
> +		}
> +		spin_lock(&inode->i_lock);
> +
> +		list_del_init(&entry->i_list);
> +		entry->inode = NULL;
> +		list_del_init(&entry->g_list);
> +		entry->group = NULL;
> +		entry->freeme = 1;
> +
> +		spin_unlock(&inode->i_lock);
> +		spin_unlock(&entry->lock);
> +
> +		fsnotify_put_mark(entry);
> +	}
> +}

> +void fsnotify_clear_marks_by_inode(struct inode *inode, unsigned int flags)
> +{
> +	struct fsnotify_mark_entry *lentry, *entry;
> +	LIST_HEAD(free_list);
> +
> +	spin_lock(&inode->i_lock);
> +	list_for_each_entry_safe(entry, lentry, &inode->i_fsnotify_mark_entries, i_list) {
> +		list_del_init(&entry->i_list);
> +		list_add(&entry->free_i_list, &free_list);
> +	}
> +	spin_unlock(&inode->i_lock);
> +
> +	/*
> +	 * at this point destroy_by_* might race.
> +	 *
> +	 * we used list_del_init() so it can be list_del_init'd again, no harm.
> +	 * we were called from an inode function so we know that other user can
> +	 * try to grab entry->inode->i_lock without a problem.
> +	 */
> +	list_for_each_entry_safe(entry, lentry, &free_list, free_i_list) {
> +		fsnotify_get_mark(entry);
> +		entry->group->ops->mark_clear_inode(entry, inode, flags);
> +		fsnotify_put_mark(entry);
> +	}

Since entry was not grabbed in the above locked list, what prevents it
to be freed before fsnotify_get_mark() executed? Could that get moved
under lock?

> +}
> +
> +/* caller must hold inode->i_lock */
> +struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, struct inode *inode)
> +{
> +	struct fsnotify_mark_entry *entry;
> +
> +	list_for_each_entry(entry, &inode->i_fsnotify_mark_entries, i_list) {
> +		if (entry->group == group) {
> +			fsnotify_get_mark(entry);
> +			return entry;
> +		}
> +	}
> +	return NULL;
> +}
> +/*

Missed newline before comment's start :)

> + * This is a low use function called when userspace is changing what is being
> + * watched.  I don't mind doing the allocation since I'm assuming we will have
> + * more new events than we have adding to old events...
> + *
> + * add (we use |=) the mark to the in core inode mark, if you need to change
> + * rather than | some new bits you needs to fsnotify_destroy_mark_by_inode()
> + * then call this with all the right bits in the mask.
> + */
> +struct fsnotify_mark_entry *fsnotify_mark_add(struct fsnotify_group *group, struct inode *inode, __u64 mask)
> +{
> +	/* we initialize entry to shut up the compiler in case we just to out... */
> +	struct fsnotify_mark_entry *entry = NULL, *lentry;
> +
> +	/* pre allocate an entry so we can hold the lock */
> +	entry = fsnotify_alloc_mark();
> +	if (!entry)
> +		return NULL;
> +
> +	/*
> +	 * LOCKING ORDER!!!!
> +	 * entry->lock
> +	 * group->mark_lock
> +	 * inode->i_lock
> +	 */
> +	spin_lock(&group->mark_lock);
> +	spin_lock(&inode->i_lock);
> +	lentry = fsnotify_find_mark_entry(group, inode);
> +	if (lentry) {
> +		/* we didn't use the new entry, kill it */
> +		fsnotify_destroy_mark(entry);
> +		entry = lentry;
> +		entry->mask |= mask;
> +		goto out_unlock;
> +	}
> +
> +	spin_lock_init(&entry->lock);
> +	atomic_set(&entry->refcnt, 1);
> +	entry->group = group;
> +	entry->mask = mask;
> +	entry->inode = inode;

That's what I talked about previously, what if this inode will be
released after inode unlocked?

> +	entry->freeme = 0;
> +	entry->private = NULL;
> +	entry->free_private = group->ops->free_mark_priv;
> +
> +	list_add(&entry->i_list, &inode->i_fsnotify_mark_entries);
> +	list_add(&entry->g_list, &group->mark_entries);
> +
> +out_unlock:
> +	spin_unlock(&inode->i_lock);
> +	spin_unlock(&group->mark_lock);
> +	return entry;
> +}
> +
> +void fsnotify_recalc_inode_mask(struct inode *inode)
> +{
> +	unsigned long new_mask = 0;
> +	struct fsnotify_mark_entry *entry;
> +
> +	spin_lock(&inode->i_lock);
> +	list_for_each_entry(entry, &inode->i_fsnotify_mark_entries, i_list) {
> +		new_mask |= entry->mask;
> +	}
> +	inode->i_fsnotify_mask = new_mask;
> +	spin_unlock(&inode->i_lock);
> +}
> +
> +
> +__init int fsnotify_mark_init(void)
> +{
> +	fsnotify_mark_kmem_cache = kmem_cache_create("fsnotify_mark_entry", sizeof(struct fsnotify_mark_entry), 0, SLAB_PANIC, NULL);
> +
> +	return 0;
> +}
> +subsys_initcall(fsnotify_mark_init);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 4a853ef..b5a7bce 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -665,6 +665,11 @@ struct inode {
>  
>  	__u32			i_generation;
>  
> +#ifdef CONFIG_FSNOTIFY
> +	__u64			i_fsnotify_mask; /* all events this inode cares about */
> +	struct list_head	i_fsnotify_mark_entries; /* fsnotify mark entries */
> +#endif
> +
>  #ifdef CONFIG_DNOTIFY
>  	unsigned long		i_dnotify_mask; /* Directory notify events */
>  	struct dnotify_struct	*i_dnotify; /* for directory notifications */
> diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
> index b084b98..c2ed916 100644
> --- a/include/linux/fsnotify.h
> +++ b/include/linux/fsnotify.h
> @@ -99,6 +99,14 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
>  }
>  
>  /*
> + * fsnotify_inode_delete - and inode is being evicted from cache, clean up is needed
> + */
> +static inline void fsnotify_inode_delete(struct inode *inode)
> +{
> +	__fsnotify_inode_delete(inode, FSNOTIFY_INODE_DESTROY);
> +}
> +
> +/*
>   * fsnotify_inoderemove - an inode is going away
>   */
>  static inline void fsnotify_inoderemove(struct inode *inode)
> @@ -107,6 +115,7 @@ static inline void fsnotify_inoderemove(struct inode *inode)
>  	inotify_inode_is_dead(inode);
>  
>  	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE);
> +	__fsnotify_inode_delete(inode, FSNOTIFY_LAST_DENTRY);
>  }
>  
>  /*
> diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
> index 924902e..0482a14 100644
> --- a/include/linux/fsnotify_backend.h
> +++ b/include/linux/fsnotify_backend.h
> @@ -13,6 +13,7 @@
>  #include <linux/fs.h>
>  #include <linux/list.h>
>  #include <linux/mutex.h>
> +#include <linux/spinlock.h>
>  #include <linux/wait.h>
>  
>  #include <asm/atomic.h>
> @@ -68,13 +69,21 @@
>  #define FSNOTIFY_EVENT_FILE     1
>  #define FSNOTIFY_EVENT_INODE    2
>  
> +/* these tell __fsnotify_inode_delete what kind of event this is */
> +#define FSNOTIFY_LAST_DENTRY	1
> +#define FSNOTIFY_INODE_DESTROY	2
> +
>  struct fsnotify_group;
>  struct fsnotify_event;
> +struct fsnotify_mark_entry;
>  
>  struct fsnotify_ops {
>  	int (*event_to_notif)(struct fsnotify_group *group, struct fsnotify_event *event);
> +	void (*mark_clear_inode)(struct fsnotify_mark_entry *entry, struct inode *inode, unsigned int flags);
> +	int (*should_send_event)(struct fsnotify_group *group, struct inode *inode, __u64 mask);
>  	void (*free_group_priv)(struct fsnotify_group *group);
>  	void (*free_event_priv)(struct fsnotify_group *group, struct fsnotify_event *event);
> +	void (*free_mark_priv)(struct fsnotify_mark_entry *entry);
>  };
>  
>  struct fsnotify_group {
> @@ -85,6 +94,10 @@ struct fsnotify_group {
>  
>  	const struct fsnotify_ops *ops;	/* how this group handles things */
>  
> +	/* stores all fastapth entries assoc with this group so they can be cleaned on unregister */
> +	spinlock_t mark_lock;    /* protect mark_entries list */
> +	struct list_head mark_entries; /* all inode mark entries for this group */
> +
>  	unsigned int priority;		/* order this group should receive msgs.  low first */
>  
>  	void *private;			/* private data for implementers (dnotify, inotify, fanotify) */
> @@ -94,17 +107,26 @@ struct fsnotify_group {
>  
>  /* called from the vfs to signal fs events */
>  extern void fsnotify(struct inode *to_tell, __u64 mask, void *data, int data_is);
> +extern void __fsnotify_inode_delete(struct inode *inode, int flag);
>  
>  /* called from fsnotify interfaces, such as fanotify or dnotify */
>  extern void fsnotify_recalc_global_mask(void);
> +extern void fsnotify_recalc_group_mask(struct fsnotify_group *group);
>  extern struct fsnotify_group *fsnotify_obtain_group(unsigned int priority, unsigned int group_num, __u64 mask, const struct fsnotify_ops *ops);
>  extern void fsnotify_put_group(struct fsnotify_group *group);
>  extern void fsnotify_get_group(struct fsnotify_group *group);
>  
> +extern void fsnotify_recalc_inode_mask(struct inode *inode);
> +extern struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, struct inode *inode);
> +extern struct fsnotify_mark_entry *fsnotify_mark_add(struct fsnotify_group *group, struct inode *inode, __u64 mask);
>  #else
>  
>  static inline void fsnotify(struct inode *to_tell, __u64 mask, void *data, int data_is);
>  {}
> +
> +static inline void __fsnotify_inode_delete(struct inode *inode, int flag)
> +{}
> +
>  #endif	/* CONFIG_FSNOTIFY */
>  
>  #endif	/* __KERNEL __ */

-- 
	Evgeniy Polyakov

  reply	other threads:[~2008-12-13  3:07 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-12-12 21:51 [RFC PATCH -v4 00/14] fsnotify, dnotify, and inotify Eric Paris
2008-12-12 21:51 ` [RFC PATCH -v4 01/14] filesystem notification: create fs/notify to contain all fs notification Eric Paris
2008-12-12 21:51 ` [RFC PATCH -v4 02/14] fsnotify: pass a file instead of an inode to open, read, and write Eric Paris
2008-12-12 21:51 ` [RFC PATCH -v4 03/14] fsnotify: sys_execve and sys_uselib do not call into fsnotify Eric Paris
2008-12-12 21:51 ` [RFC PATCH -v4 04/14] fsnotify: use the new open-exec hook for inotify and dnotify Eric Paris
2008-12-13 15:29   ` Christoph Hellwig
2008-12-12 21:51 ` [RFC PATCH -v4 05/14] fsnotify: unified filesystem notification backend Eric Paris
2008-12-13  2:54   ` Evgeniy Polyakov
2008-12-13 15:01     ` Eric Paris
2008-12-12 21:51 ` [RFC PATCH -v4 06/14] fsnotify: add group priorities Eric Paris
2008-12-12 21:51 ` [RFC PATCH -v4 07/14] fsnotify: add in inode fsnotify markings Eric Paris
2008-12-13  3:07   ` Evgeniy Polyakov [this message]
2008-12-13 16:35     ` Eric Paris
2008-12-22 13:43       ` Al Viro
2008-12-22 14:45         ` Eric Paris
2008-12-12 21:51 ` [RFC PATCH -v4 08/14] fsnotify: parent event notification Eric Paris
2008-12-12 21:52 ` [RFC PATCH -v4 09/14] dnotify: reimplement dnotify using fsnotify Eric Paris
2008-12-12 21:52 ` [RFC PATCH -v4 10/14] fsnotify: generic notification queue and waitq Eric Paris
2008-12-12 21:52 ` [RFC PATCH -v4 11/14] fsnotify: include pathnames with entries when possible Eric Paris
2008-12-13  3:19   ` Evgeniy Polyakov
2008-12-13 16:42     ` Eric Paris
2008-12-12 21:52 ` [RFC PATCH -v4 12/14] fsnotify: add correlations between events Eric Paris
2008-12-18 22:28   ` C. Scott Ananian
2008-12-22  2:40     ` Eric Paris
2008-12-22  9:01       ` Evgeniy Polyakov
2008-12-22 20:06       ` C. Scott Ananian
2008-12-12 21:52 ` [RFC PATCH -v4 13/14] inotify: reimplement inotify using fsnotify Eric Paris
2008-12-13  3:22   ` Evgeniy Polyakov
2008-12-13 16:44     ` Eric Paris
2008-12-15 15:48       ` Evgeniy Polyakov
2008-12-12 21:52 ` [RFC PATCH -v4 14/14] shit on top for debugging Eric Paris
2008-12-14 22:40   ` James Morris
2008-12-14 22:47     ` Eric Paris
2008-12-18 23:36 ` [RFC PATCH -v4 00/14] fsnotify, dnotify, and inotify C. Scott Ananian
2008-12-22  3:22   ` Eric Paris
2008-12-22 10:58     ` Niraj Kumar
2008-12-22 19:59     ` C. Scott Ananian
2008-12-22 20:53       ` Eric Paris
2008-12-29 18:19         ` C. Scott Ananian
2008-12-22 21:04       ` Al Viro
2008-12-22 23:08         ` C. Scott Ananian
2008-12-22 23:20           ` Al Viro
2008-12-22 23:21           ` Christoph Hellwig
2008-12-25 18:17             ` C. Scott Ananian
2008-12-25 20:33               ` Al Viro
2008-12-26  0:58                 ` C. Scott Ananian
2008-12-26  1:44                   ` Al Viro
2008-12-27 21:23                     ` C. Scott Ananian

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081213030701.GD807@ioremap.net \
    --to=zbr@ioremap.net \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=eparis@redhat.com \
    --cc=hch@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@ZenIV.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox