[patch] inotify: locking

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [patch] inotify: locking
@ 2004-09-22 19:37 Robert Love
  2004-09-22 19:38 ` Robert Love
  2004-09-22 23:22 ` John McCutchan
  0 siblings, 2 replies; 7+ messages in thread
From: Robert Love @ 2004-09-22 19:37 UTC (permalink / raw)
  To: ttb; +Cc: linux-kernel

Hey, John.

I went over the locking in drivers/char/inotify.c  Looks right.

I made two major changes:

	- In a couple places you used irq-safe locks, but in most places
	  you did not.  It has to be all or never.  We do not currently
	  need protection from interrupts, so I changed the few
	  irq-safe locks on dev->lock to normal spin_lock/spin_unlock
	  calls.

	- dev->event_count was an atomic_t, but it was never accessed
	  outside of dev->lock.  I also did not see why ->event_count
	  was atomic but not ->nr_watches.  So I made event_count an
	  unsigned int and removed the atomic operations.

The rest of the (admittedly a bit large) patch is documenting the
locking rules.  I tried to put the locking assumptions in comments at
the top of each function.  I made some coding style cleanups as I went
along, too, but not too many (those come next).

I do have one remaining concern: create_watcher() is called without the
lock on dev, but it later obtains the lock, before it touches dev.  So
it is safe in that regard, but what if dev is deallocated before it
grabs the lock?  dev is passed in, so, for example, dev could be freed
(or otherwise manipulated) and then the dereference of dev->lock would
oops.  A couple other functions do this.  We probably need proper ref
counting on dev. BUT, are all of these call chains off of VFS functions
on the device?  Perhaps so long as the device is open it is pinned?

Attached patch is against your latest, plus the previous postings.

Thanks,

	Robert Love

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] inotify: locking
  2004-09-22 19:37 [patch] inotify: locking Robert Love
@ 2004-09-22 19:38 ` Robert Love
  2004-09-23  9:09   ` Paul Jackson
  2004-09-22 23:22 ` John McCutchan
  1 sibling, 1 reply; 7+ messages in thread
From: Robert Love @ 2004-09-22 19:38 UTC (permalink / raw)
  To: ttb; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 316 bytes --]

On Wed, 2004-09-22 at 15:37 -0400, Robert Love wrote:

> Attached patch is against your latest, plus the previous postings.

Evolution needs a heuristic to detect when I say that I attached
something, but when in reality I did not.  I do this all the time -
sorry.

Patch attached for real this time.

	Robert Love


[-- Attachment #2: inotify-locking-1.patch --]
[-- Type: text/x-patch, Size: 10579 bytes --]

 drivers/char/inotify.c |  189 +++++++++++++++++++++++++++++--------------------
 1 files changed, 114 insertions(+), 75 deletions(-)

--- linux-inotify/drivers/char/inotify.c	2004-09-22 15:30:58.774848096 -0400
+++ linux/drivers/char/inotify.c	2004-09-22 15:30:46.207758584 -0400
@@ -73,7 +73,7 @@
 	struct list_head 	events;
 	struct list_head 	watchers;
 	spinlock_t		lock;
-	atomic_t		event_count;
+	unsigned int		event_count;
 	int			nr_watches;
 };
 
@@ -182,22 +182,33 @@
 						(event_mask == IN_IGNORED) || \
 						(event_mask & watchers_mask))
 
-
-/* dev->lock == locked before calling */
-static void inotify_dev_queue_event (struct inotify_device *dev, struct inotify_watcher *watcher, int mask, const char *filename) {
+/*
+ * inotify_dev_queue_event - add a new event to the given device
+ *
+ * Caller must hold dev->lock.
+ */
+static void inotify_dev_queue_event (struct inotify_device *dev,
+				     struct inotify_watcher *watcher, int mask,
+				     const char *filename)
+{
 	struct inotify_kernel_event *kevent;
 
-	/* The queue has already overflowed and we have already sent the Q_OVERFLOW event */
-	if (atomic_read(&dev->event_count) > MAX_INOTIFY_QUEUED_EVENTS) {
-		iprintk(INOTIFY_DEBUG_EVENTS, "event queue for %p overflowed\n", dev);
+	/*
+	 * the queue has already overflowed and we have already sent the
+	 * Q_OVERFLOW event
+	 */
+	if (dev->event_count > MAX_INOTIFY_QUEUED_EVENTS) {
+		iprintk(INOTIFY_DEBUG_EVENTS,
+			"event queue for %p overflowed\n", dev);
 		return;
 	}
 
-	/* The queue has just overflowed and we need to notify user space that it has */
-	if (atomic_read(&dev->event_count) == MAX_INOTIFY_QUEUED_EVENTS) {
-		atomic_inc(&dev->event_count);
+	/* the queue has just overflowed and we need to notify user space */
+	if (dev->event_count == MAX_INOTIFY_QUEUED_EVENTS) {
+		dev->event_count++;
 		kevent = kernel_event(-1, IN_Q_OVERFLOW, NULL);
-		iprintk(INOTIFY_DEBUG_EVENTS, "sending IN_Q_OVERFLOW to %p\n", dev);
+		iprintk(INOTIFY_DEBUG_EVENTS, "sending IN_Q_OVERFLOW to %p\n",
+			dev);
 		goto add_event_to_queue;
 	}
 
@@ -205,13 +216,13 @@
 		return;
 	}
 
-	atomic_inc(&dev->event_count);
+	dev->event_count++;
 	kevent = kernel_event (watcher->wd, mask, filename);
 
 add_event_to_queue:
 	if (!kevent) {
 		iprintk(INOTIFY_DEBUG_EVENTS, "failed to queue event for %p -- could not allocate kevent\n", dev);
-		atomic_dec(&dev->event_count);
+		dev->event_count--;
 		return;
 	}
 
@@ -222,66 +233,77 @@
 
 
 
-
-static void inotify_dev_event_dequeue (struct inotify_device *dev) {
+/*
+ * inotify_dev_event_dequeue - destroy an event on the given device
+ *
+ * Caller must hold dev->lock.
+ */
+static void inotify_dev_event_dequeue(struct inotify_device *dev)
+{
 	struct inotify_kernel_event *kevent;
 
-	if (!inotify_dev_has_events (dev)) {
+	if (!inotify_dev_has_events(dev))
 		return;
-	}
 
 	kevent = inotify_dev_get_event(dev);
 
 	list_del(&kevent->list);
-	atomic_dec(&dev->event_count);
+	dev->event_count--;
 
 	delete_kernel_event (kevent);
 
 	iprintk(INOTIFY_DEBUG_EVENTS, "dequeued event on %p\n", dev);
 }
 
+/*
+ * inotify_dev_get_wd - returns the next WD for use by the given dev
+ *
+ * Caller must hold dev->lock before calling.
+ */
 static int inotify_dev_get_wd (struct inotify_device *dev)
 {
 	int wd;
 
-	wd = -1;
-
-	if (!dev)
-		return -1;
-
-	if (dev->nr_watches == MAX_INOTIFY_DEV_WATCHERS) {
+	if (!dev || dev->nr_watches == MAX_INOTIFY_DEV_WATCHERS)
 		return -1;
-	}
 
 	dev->nr_watches++;
-
 	wd = find_first_zero_bit (dev->bitmask, MAX_INOTIFY_DEV_WATCHERS);
-
 	set_bit (wd, dev->bitmask);
 
 	return wd;
 }
 
-static int inotify_dev_put_wd (struct inotify_device *dev, int wd)
+/*
+ * inotify_dev_put_wd - release the given WD on the given device
+ *
+ * Caller must hold dev->lock.
+ */
+static int inotify_dev_put_wd(struct inotify_device *dev, int wd)
 {
-	if (!dev||wd < 0)
+	if (!dev || wd < 0)
 		return -1;
 
 	dev->nr_watches--;
-
-	clear_bit (wd, dev->bitmask);
+	clear_bit(wd, dev->bitmask);
 
 	return 0;
 }
 
-
-static struct inotify_watcher *create_watcher (struct inotify_device *dev, int mask, struct inode *inode) {
+/*
+ * create_watcher - creates a watcher on the given device.
+ *
+ * Grabs dev->lock, so the caller must not hold it.
+ */
+static struct inotify_watcher *create_watcher (struct inotify_device *dev,
+					       int mask, struct inode *inode)
+{
 	struct inotify_watcher *watcher;
 
 	watcher = kmem_cache_alloc (watcher_cache, GFP_KERNEL);
-
 	if (!watcher) {
-		iprintk(INOTIFY_DEBUG_ALLOC, "failed to allocate watcher (%p,%d)\n", inode, mask);
+		iprintk(INOTIFY_DEBUG_ALLOC,
+			"failed to allocate watcher (%p,%d)\n", inode, mask);
 		return NULL;
 	}
 
@@ -294,7 +316,7 @@
 	INIT_LIST_HEAD(&watcher->u_list);
 
 	spin_lock(&dev->lock);
-		watcher->wd = inotify_dev_get_wd (dev);
+	watcher->wd = inotify_dev_get_wd (dev);
 	spin_unlock(&dev->lock);
 
 	if (watcher->wd < 0) {
@@ -309,29 +331,35 @@
 	return watcher;
 }
 
-/* Must be called with dev->lock held */
-static void delete_watcher (struct inotify_device *dev, struct inotify_watcher *watcher) {
+/*
+ * delete_watcher - removes the given 'watcher' from the given 'dev'
+ *
+ * Caller must hold dev->lock.
+ */
+static void delete_watcher (struct inotify_device *dev,
+			    struct inotify_watcher *watcher)
+{
 	inotify_dev_put_wd (dev, watcher->wd);
-
 	iprintk(INOTIFY_DEBUG_ALLOC, "free'd watcher %p\n", watcher);
-
 	kmem_cache_free (watcher_cache, watcher);
-
 	watcher_object_count--;
 }
 
-
-static struct inotify_watcher *inode_find_dev (struct inode *inode, struct inotify_device *dev) {
+/*
+ * inotify_find_dev - find the watcher associated with the given inode and dev
+ *
+ * Caller must hold dev->lock.
+ */
+static struct inotify_watcher *inode_find_dev (struct inode *inode,
+					       struct inotify_device *dev)
+{
 	struct inotify_watcher *watcher;
 
-	watcher = NULL;
-
 	list_for_each_entry (watcher, &inode->watchers, i_list) {
-		if (watcher->dev == dev) {
+		if (watcher->dev == dev)
 			return watcher;
-		}
-
 	}
+
 	return NULL;
 }
 
@@ -385,16 +413,20 @@
 	return error;
 }
 
-static int inotify_dev_rm_watcher (struct inotify_device *dev, struct inotify_watcher *watcher) {
+/*
+ * inotify_dev_rm_watcher - remove the given watch from the given device
+ *
+ * Caller must hold dev->lock because we call inotify_dev_queue_event().
+ */
+static int inotify_dev_rm_watcher (struct inotify_device *dev,
+				   struct inotify_watcher *watcher)
+{
 	int error;
 
 	error = -EINVAL;
-
 	if (watcher) {
 		inotify_dev_queue_event (dev, watcher, IN_IGNORED, NULL);
-
 		list_del(&watcher->d_list);
-
 		error = 0;
 	} 
 
@@ -413,16 +445,21 @@
 	inode->watchers_mask = new_mask;
 }
 
-static int inode_add_watcher (struct inode *inode, struct inotify_watcher *watcher) {
-	if (!inode||!watcher||inode_find_dev (inode, watcher->dev))
+/*
+ * inode_add_watcher - add a watcher to the given inode
+ *
+ * Callers must hold dev->lock, because we call inode_find_dev().
+ */
+static int inode_add_watcher (struct inode *inode,
+			      struct inotify_watcher *watcher)
+{
+	if (!inode || !watcher || inode_find_dev (inode, watcher->dev))
 		return -EINVAL;
 
 	list_add(&watcher->i_list, &inode->watchers);
 	inode->watcher_count++;
-
 	inode_update_watchers_mask (inode);
 
-
 	return 0;
 }
 
@@ -440,16 +477,18 @@
 
 /* Kernel API */
 
-void inotify_inode_queue_event (struct inode *inode, unsigned long mask, const char *filename) {
+void inotify_inode_queue_event (struct inode *inode, unsigned long mask,
+				const char *filename)
+{
 	struct inotify_watcher *watcher;
 
 	spin_lock(&inode->i_lock);
 
-		list_for_each_entry (watcher, &inode->watchers, i_list) {
-			spin_lock(&watcher->dev->lock);
-				inotify_dev_queue_event (watcher->dev, watcher, mask, filename);
-			spin_unlock(&watcher->dev->lock);
-		}
+	list_for_each_entry(watcher, &inode->watchers, i_list) {
+		spin_lock(&watcher->dev->lock);
+		inotify_dev_queue_event(watcher->dev, watcher, mask, filename);
+		spin_unlock(&watcher->dev->lock);
+	}
 
 	spin_unlock(&inode->i_lock);
 }
@@ -479,7 +518,7 @@
 	dev = watcher->dev;
 
 	if (event)
-		inotify_dev_queue_event (dev, watcher, event, NULL);
+		inotify_dev_queue_event(dev, watcher, event, NULL);
 
 	inode_rm_watcher (inode, watcher);
 	inotify_dev_rm_watcher (watcher->dev, watcher);
@@ -529,7 +568,7 @@
 	INIT_LIST_HEAD(&umount);
 
 	spin_lock(&inode_lock);
-		build_umount_list (&inode_in_use, sb, &umount);
+	build_umount_list (&inode_in_use, sb, &umount);
 	spin_unlock(&inode_lock);
 
 	process_umount_list (&umount);
@@ -584,7 +623,6 @@
 		goto out;
 
 	events = 0;
-	event_count = 0;
 	out = 0;
 	err = 0;
 
@@ -609,12 +647,12 @@
 		}
 	}
 
-	spin_lock_irq(&dev->lock);
+	spin_lock(&dev->lock);
 
 	add_wait_queue(&dev->wait, &wait);
 repeat:
 	if (signal_pending(current)) {
-		spin_unlock_irq (&dev->lock);
+		spin_unlock(&dev->lock);
 		out = -ERESTARTSYS;
 		set_current_state (TASK_RUNNING);
 		remove_wait_queue(&dev->wait, &wait);
@@ -622,9 +660,9 @@
 	}
 	set_current_state(TASK_INTERRUPTIBLE);
 	if (!inotify_dev_has_events (dev)) {
-		spin_unlock_irq (&dev->lock);
+		spin_unlock(&dev->lock);
 		schedule ();
-		spin_lock_irq (&dev->lock);
+		spin_lock(&dev->lock);
 		goto repeat;
 	}
 
@@ -646,7 +684,7 @@
 		inotify_dev_event_dequeue (dev);
 	}
 
-	spin_unlock_irq (&dev->lock);
+	spin_unlock(&dev->lock);
 
 	/* Send the event buffer to user space */
 	err = copy_to_user (buf, eventbuf, events * sizeof(struct inotify_event));
@@ -693,7 +731,7 @@
 	init_timer(&dev->timer);
 	init_waitqueue_head(&dev->wait);
 
-	atomic_set(&dev->event_count, 0);
+	dev->event_count = 0;
 	dev->nr_watches = 0;
 	dev->lock = SPIN_LOCK_UNLOCKED;
 
@@ -719,12 +757,14 @@
 	}
 }
 
+/*
+ * inotify_release_all_events - destroy all of the events on a given device
+ */
 static void inotify_release_all_events (struct inotify_device *dev)
 {
 	spin_lock(&dev->lock);
-	while (inotify_dev_has_events(dev)) {
+	while (inotify_dev_has_events(dev))
 		inotify_dev_event_dequeue(dev);
-	}
 	spin_unlock(&dev->lock);
 }
 
@@ -797,7 +837,6 @@
 	spin_unlock (&inode->i_lock);
 	spin_unlock (&dev->lock);
 
-
 	watcher = create_watcher (dev, request->mask, inode);
 
 	if (!watcher) {
@@ -870,7 +909,7 @@
 	spin_lock(&dev->lock);
 
 	printk (KERN_ALERT "inotify device: %p\n", dev);
-	printk (KERN_ALERT "inotify event_count: %d\n", atomic_read(&dev->event_count));
+	printk (KERN_ALERT "inotify event_count: %u\n", dev->event_count);
 	printk (KERN_ALERT "inotify watch_count: %d\n", dev->nr_watches);
 
 	spin_unlock(&dev->lock);

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] inotify: locking
  2004-09-22 19:38 ` Robert Love
@ 2004-09-23  9:09   ` Paul Jackson
  0 siblings, 0 replies; 7+ messages in thread
From: Paul Jackson @ 2004-09-23  9:09 UTC (permalink / raw)
  To: Robert Love; +Cc: ttb, linux-kernel

> Evolution needs a heuristic to detect when I say that I attached
> something, but when in reality I did not.

I've mostly stopped submitting patches using my email client, and
instead submit them using a variant of a 'patch bomb' script.  This has
significantly improved the clerical accuracy of my patch submissions.

The variant I'm using is in pretty good shape - you're welcome to
give it a try.  See the embedded Usage string for documentation.

  http://www.speakeasy.org/~pj99/sgi/sendpatchset

It handles sending one or several related patches, to a list of email
addresses.  You prepare a text directive file with the addresses,
subjects and pathnames to the files containing the message contents.
Then you send it all off with a single invocation of this 'sendpatchset'
script.

-- 
                          I won't rest till it's the best ...
                          Programmer, Linux Scalability
                          Paul Jackson <pj@sgi.com> 1.650.933.1373

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] inotify: locking
  2004-09-22 19:37 [patch] inotify: locking Robert Love
  2004-09-22 19:38 ` Robert Love
@ 2004-09-22 23:22 ` John McCutchan
  2004-09-22 23:22   ` Robert Love
  1 sibling, 1 reply; 7+ messages in thread
From: John McCutchan @ 2004-09-22 23:22 UTC (permalink / raw)
  To: Robert Love; +Cc: linux-kernel

On Wed, 2004-09-22 at 15:37, Robert Love wrote:
> Hey, John.
> 
> I went over the locking in drivers/char/inotify.c  Looks right.
> 
> I made two major changes:
> 
> 	- In a couple places you used irq-safe locks, but in most places
> 	  you did not.  It has to be all or never.  We do not currently
> 	  need protection from interrupts, so I changed the few
> 	  irq-safe locks on dev->lock to normal spin_lock/spin_unlock
> 	  calls.
> 
> 	- dev->event_count was an atomic_t, but it was never accessed
> 	  outside of dev->lock.  I also did not see why ->event_count
> 	  was atomic but not ->nr_watches.  So I made event_count an
> 	  unsigned int and removed the atomic operations.
> 

Okay, this is my first kernel project so I didn't know/follow all of the
rules, I admit it is a bit of a mishmash.

> The rest of the (admittedly a bit large) patch is documenting the
> locking rules.  I tried to put the locking assumptions in comments at
> the top of each function.  I made some coding style cleanups as I went
> along, too, but not too many (those come next).
> 

The patch and your previous patches look excellent. I have applied them
to my tree and I will be making a new release this evening.

> I do have one remaining concern: create_watcher() is called without the
> lock on dev, but it later obtains the lock, before it touches dev.  So
> it is safe in that regard, but what if dev is deallocated before it
> grabs the lock?  dev is passed in, so, for example, dev could be freed
> (or otherwise manipulated) and then the dereference of dev->lock would
> oops.  A couple other functions do this.  We probably need proper ref
> counting on dev. BUT, are all of these call chains off of VFS functions
> on the device?  Perhaps so long as the device is open it is pinned?
> 

Yes, AFAIK the only places where we rely on the dev not going away are
when we are handling a request from user space. As long as VFS
operations are serialized I don't think we have to worry about that.

John

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] inotify: locking
  2004-09-22 23:22 ` John McCutchan
@ 2004-09-22 23:22   ` Robert Love
  0 siblings, 0 replies; 7+ messages in thread
From: Robert Love @ 2004-09-22 23:22 UTC (permalink / raw)
  To: John McCutchan; +Cc: linux-kernel

On Wed, 2004-09-22 at 19:22 -0400, John McCutchan wrote:

> Okay, this is my first kernel project so I didn't know/follow all of the
> rules, I admit it is a bit of a mishmash.

Heh, none of these issues were big, and everything else was fine.

> Yes, AFAIK the only places where we rely on the dev not going away are
> when we are handling a request from user space. As long as VFS
> operations are serialized I don't think we have to worry about that.

You can see what locks and serialization the VFS uses in
Documentation/filesystems/Locking

	Robert Love



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [RFC][PATCH] inotify 0.11.0 [WITH PATCH!]
@ 2004-09-28 22:33 John McCutchan
  2004-09-30 19:01 ` [patch] inotify: locking Robert Love
  0 siblings, 1 reply; 7+ messages in thread
From: John McCutchan @ 2004-09-28 22:33 UTC (permalink / raw)
  To: linux-kernel, gamin-list, rml, viro, akpm, iggy

[-- Attachment #1: Type: text/plain, Size: 7759 bytes --]

Hello,

Here is release 0.11.0 of inotify. Attached is a patch to 2.6.8.1

--New in this version--
-remove timer (rml)
-fix typo (rml)
-remove check for dev->file_private (rml)
-redo find_inode (rml)
-use the bitmap functions (rml)
-modularization (rml)
-misc cleanup (rml,me)
-redo inotify_read (me)

John McCutchan

Release notes:

--Why Not dnotify and Why inotify (By Robert Love)--

Everyone seems quick to deride the blunder known as "dnotify" and
applaud a
replacement, any replacement, man anything but that current mess, but in
the
name of fairness I present my treatise on why dnotify is what one might
call
not good:

* dnotify requires the opening of one fd per each directory that you
intend to
  watch.
	o The file descriptor pins the directory, disallowing the backing
	  device to be unmounted, which absolutely wrecks havoc with removable
	  media.
	o Watching many directories results in many open file descriptors,
	  possibly hitting a per-process fd limit. 
* dnotify is directory-based. You only learn about changes to
directories.
  Sure, a change to a file in a directory affects the directory, but you
are
  then forced to keep a cache of stat structures around to compare
things in
  order to find out which file.
* dnotify's interface to user-space is awful.
	o dnotify uses signals to communicate with user-space.
	o Specifically, dnotify uses SIGIO.
	o But then you can pick a different signal! So by "signals," I really
	  meant you need to use real-time signals if you want to queue the
	  events.
* dnotify basically ignores any problems that would arise in the VFS
from hard
  links.
* Rumor is that the "d" in "dnotify" does not stand for "directory" but
for
  "suck."

A suitable replacement is "inotify." And now, my tract on what inotify
brings
to the table:

* inotify's interface is a device node, not SIGIO.
	o You open only a single fd, to the device node. No more pinning
	  directories or opening a million file descriptors.
	o Usage is nice: open the device, issue simple commands via ioctl(),
	  and then block on the device. It returns events when, well, there are
	  events to be returned.
	o You can select() on the device node and so it integrates with main
	  loops like coffee mixed with vanilla milkshake.
* inotify has an event that says "the filesystem that the item you were
  watching is on was unmounted" (this is particularly cool).
* inotify can watch directories or files.
* The "i" in inotify does not stand for "suck" but for "inode" -- the
logical
  choice since inotify is inode-based. 


--COMPLEXITY--

I have been asked what the complexity of inotify is. Inotify has
2 path codes where complexity could be an issue:

Adding a watcher to a device
	This code has to check if the inode is already being watched 
	by the device, this is O(1) since the maximum number of 
	devices is limited to 8.


Removing a watch from a device
	This code has to do a search of all watches on the device to
	find the watch descriptor that is being asked to remove.
	This involves a linear search, but should not really be an issue
	because it is limited to 8192 entries. If this does turn in to
	a concern, I would replace the list of watches on the device
	with a sorted binary tree, so that the search could be done
	very quickly.


The calls to inotify from the VFS code has a complexity of O(1) so
inotify does not affect the speed of VFS operations.

--MEMORY USAGE--

The inotify data structures are light weight:

inotify watch is 40 bytes
inotify device is 68 bytes
inotify event is 272 bytes

So assuming a device has 8192 watches, the structures are only going
to consume 320KB of memory. With a maximum number of 8 devices allowed
to exist at a time, this is still only 2.5 MB

Each device can also have 256 events queued at a time, which sums to
68KB per device. And only .5 MB if all devices are opened and have
a full event queue.

So approximately 3 MB of memory are used in the rare case of 
everything open and full.

Each inotify watch pins the inode of a directory/file in memory,
the size of an inode is different per file system but lets assume
that it is 512 byes. 

So assuming the maximum number of global watches are active, this would
pin down 32 MB of inodes in the inode cache. Again not a problem
on a modern system. 

On smaller systems, the maximum watches / events could be lowered
to provide a smaller foot print.

Keep in mind that this is an absolute worst case memory analysis.
In reality it will most likely cost approximately 5MB.

--HOWTO USE--
Inotify is a character device that when opened offers 2 IOCTL's.
(It actually has 4 but the other 2 are used for debugging)

INOTIFY_WATCH:
        Which takes a path and event mask and returns a unique 
        (to the instance of the driver) integer (wd [watch descriptor]
        from here on) that is a 1:1 mapping to the path passed. 
        What happens is inotify gets the inode (and ref's the inode)
        for the path and adds a inotify_watcher structure to the inodes
        list of watchers. If this instance of the driver is already
        watching the path, the event mask will be updated and
        the original wd will be returned.

INOTIFY_IGNORE:
        Which takes an integer (that you got from INOTIFY_WATCH) 
        representing a wd that you are not interested in watching
        anymore. This will:

        send an IGNORE event to the device
        remove the inotify_watcher structure from the device and 
        from the inode and unref the inode.
        
After you are watching 1 or more paths, you can read from the fd
and get events. The events are struct inotify_event. If you are
watching a directory and something happens to a file in the directory
the event will contain the filename (just the filename not the full
path).

-- EVENTS --
IN_ACCESS - Sent when file is accessed.
IN_MODIFY - Sent when file is modified.
IN_ATTRIB - Sent when file is chmod'ed.
IN_CLOSE - Sent when file is closed
IN_OPEN - Sent when file is opened.
IN_MOVED_FROM - Sent to the source folder of a move.
IN_MOVED_TO - Sent to the destination folder of a move.
IN_DELETE_SUBDIR - Sent when a sub directory is deleted. (When watching
parent)
IN_DELETE_FILE - Sent when a file is deleted. (When watching parent)
IN_CREATE_SUBDIR - Sent when a sub directory is created. (When watching
parent)
IN_CREATE_FILE - Sent when a file is created. (When watching parent)
IN_DELETE_SELF - Sent when file is deleted.
IN_UNMOUNT - Sent when the filesystem is being unmounted.
IN_Q_OVERFLOW - Sent when your event queue has over flowed.

The MOVED_FROM/MOVED_TO events are always sent in pairs.
MOVED_FROM/MOVED_TO
is also sent when a file is renamed. The cookie field in the event pairs
up MOVED_FROM/MOVED_TO events. These two events are not guaranteed to be
successive in the event stream. You must rely on the cookie to pair
them up. (Note, the cookie is not sent yet.)

If you aren't watching the source and destination folders in a MOVE.
You will only get MOVED_TO or MOVED_FROM. In this case, MOVED_TO
is equivelent to a CREATE and MOVED_FROM is equivelent to a DELETE.

--KERNEL CHANGES--
inotify char device driver.

Adding calls to inotify_inode_queue_event and 
inotify_dentry_parent_queue_event from VFS operations.
Dnotify has the same function calls. The complexity of the VFS 
operations is not affected because inotify_*_queue_event is O(1).


Adding a call to inotify_super_block_umount from
generic_shutdown_superblock

inotify_super_block_umount consists of this:
find all of the inodes that are on the super block being shut down,
sends each watcher on each inode the UNMOUNT and IGNORED event
removes the watcher structures from each instance of the device driver 
and each inode.
unref's the inode.


[-- Attachment #2: inotify-0.11.0.diff --]
[-- Type: text/x-patch, Size: 40270 bytes --]

--- clean/linux/drivers/char/inotify.c	1969-12-31 19:00:00.000000000 -0500
+++ linux/drivers/char/inotify.c	2004-09-28 18:06:48.000000000 -0400
@@ -0,0 +1,1017 @@
+/*
+ * Inode based directory notifications for Linux.
+ *
+ * Copyright (C) 2004 John McCutchan
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+/* TODO: 
+ * unmount events don't get sent if filesystem is mounted in two places.
+ * dynamically allocate event filename
+ * watchers and events could use their own slab cache
+ * rename inotify_watcher to inotify_watch
+ * need a way to connect MOVED_TO/MOVED_FROM events in user space
+ */
+
+#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/poll.h>
+#include <linux/miscdevice.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/writeback.h>
+#include <linux/inotify.h>
+
+#define MAX_INOTIFY_DEVS	  8	/* max open inotify devices */
+#define MAX_INOTIFY_DEV_WATCHERS  8192	/* max total watches */
+#define MAX_INOTIFY_QUEUED_EVENTS 256	/* max events queued on the dev*/
+
+static atomic_t watcher_count;
+static kmem_cache_t *watcher_cache;
+static kmem_cache_t *kevent_cache;
+
+/* For debugging */
+static int inotify_debug_flags;
+#define iprintk(f, str...) if (inotify_debug_flags & f) printk (KERN_ALERT str)
+
+/*
+ * struct inotify_device - represents an open instance of an inotify device
+ *
+ * For each inotify device, we need to keep track of the events queued on it,
+ * a list of the inodes that we are watching, and so on.
+ *
+ * 'bitmask' holds one bit for each possible watcher descriptor: a set bit
+ * implies that the given WD is valid, unset implies it is not.
+ *
+ * This structure is protected by 'lock'.  Lock ordering:
+ *	dev->lock
+ *		dev->wait->lock
+ * FIXME: Define lock ordering wrt inode and dentry locking!
+ */
+struct inotify_device {
+	DECLARE_BITMAP(bitmask, MAX_INOTIFY_DEV_WATCHERS);
+	wait_queue_head_t 	wait;
+	struct list_head 	events;
+	struct list_head 	watchers;
+	spinlock_t		lock;
+	unsigned int		event_count;
+	int			nr_watches;
+};
+
+struct inotify_watcher {
+	int 			wd; // watcher descriptor
+	unsigned long		mask;
+	struct inode *		inode;
+	struct inotify_device *	dev;
+	struct list_head	d_list; // device list
+	struct list_head	i_list; // inode list
+	struct list_head	u_list; // unmount list 
+};
+#define inotify_watcher_d_list(pos) list_entry((pos), struct inotify_watcher, d_list)
+#define inotify_watcher_i_list(pos) list_entry((pos), struct inotify_watcher, i_list)
+#define inotify_watcher_u_list(pos) list_entry((pos), struct inotify_watcher, u_list)
+
+/*
+ * A list of these is attached to each instance of the driver
+ * when the drivers read() gets called, this list is walked and
+ * all events that can fit in the buffer get delivered
+ */
+struct inotify_kernel_event {
+        struct list_head        list;
+	struct inotify_event	event;
+};
+#define list_to_inotify_kernel_event(pos) list_entry((pos), struct inotify_kernel_event, list)
+
+/*
+ * find_inode - resolve a user-given path to a specific inode and iget() it
+ */
+static struct inode * find_inode(const char __user *dirname)
+{
+	struct inode *inode;
+	struct nameidata nd;
+	int error;
+
+	error = __user_walk(dirname, LOOKUP_FOLLOW, &nd);
+	if (error) {
+		iprintk(INOTIFY_DEBUG_INODE, "could not find inode\n");
+		inode = ERR_PTR(error);
+		goto out;
+	}
+
+	inode = nd.dentry->d_inode;
+	__iget(inode);
+	iprintk(INOTIFY_DEBUG_INODE, "ref'd inode\n");
+	path_release(&nd);
+out:
+	return inode;
+}
+
+static void unref_inode(struct inode *inode)
+{
+	iprintk(INOTIFY_DEBUG_INODE, "unref'd inode\n");
+	iput(inode);
+}
+
+struct inotify_kernel_event *kernel_event(int wd, int mask,
+					  const char *filename)
+{
+	struct inotify_kernel_event *kevent;
+
+	kevent = kmem_cache_alloc(kevent_cache, GFP_ATOMIC);
+	if (!kevent) {
+		iprintk(INOTIFY_DEBUG_ALLOC,
+			"failed to alloc kevent (%d,%d)\n", wd, mask);
+		goto out;
+	}
+
+	iprintk(INOTIFY_DEBUG_ALLOC, "alloced kevent %p (%d,%d)\n",
+		kevent, wd, mask);
+
+	kevent->event.wd = wd;
+	kevent->event.mask = mask;
+	INIT_LIST_HEAD(&kevent->list);
+
+	if (filename) {
+		iprintk(INOTIFY_DEBUG_FILEN,
+			"filename for event was %p %s\n", filename, filename);
+		strncpy(kevent->event.filename, filename,
+			INOTIFY_FILENAME_MAX);
+		kevent->event.filename[INOTIFY_FILENAME_MAX-1] = '\0';
+		iprintk(INOTIFY_DEBUG_FILEN,
+			"filename after copying %s\n", kevent->event.filename);
+	} else {
+		iprintk(INOTIFY_DEBUG_FILEN, "no filename for event\n");
+		kevent->event.filename[0] = '\0';
+	}
+	
+
+out:
+	return kevent;
+}
+
+void delete_kernel_event(struct inotify_kernel_event *kevent)
+{
+	if (!kevent)
+		return;
+
+	INIT_LIST_HEAD(&kevent->list);
+	kevent->event.wd = -1;
+	kevent->event.mask = 0;
+
+	iprintk(INOTIFY_DEBUG_ALLOC, "free'd kevent %p\n", kevent);
+	kmem_cache_free(kevent_cache, kevent);
+}
+
+#define inotify_dev_has_events(dev) (!list_empty(&dev->events))
+#define inotify_dev_get_event(dev) (list_to_inotify_kernel_event(dev->events.next))
+/* Does this events mask get sent to the watcher ? */
+#define event_and(event_mask,watchers_mask) 	((event_mask == IN_UNMOUNT) || \
+						(event_mask == IN_IGNORED) || \
+						(event_mask & watchers_mask))
+
+/*
+ * inotify_dev_queue_event - add a new event to the given device
+ *
+ * Caller must hold dev->lock.
+ */
+static void inotify_dev_queue_event(struct inotify_device *dev,
+				    struct inotify_watcher *watcher, int mask,
+				    const char *filename)
+{
+	struct inotify_kernel_event *kevent;
+
+	/*
+	 * Check if the new event is a duplicate of the last event queued.
+	 */
+	if (dev->event_count && 
+		inotify_dev_get_event(dev)->event.mask == mask &&
+		inotify_dev_get_event(dev)->event.wd == watcher->wd) {
+
+		/* Check if the filenames match */
+		if (!filename && inotify_dev_get_event(dev)->event.filename[0] == '\0')
+			return;
+		if (filename && !strcmp(inotify_dev_get_event(dev)->event.filename, filename))
+			return;
+	}
+	/*
+	 * the queue has already overflowed and we have already sent the
+	 * Q_OVERFLOW event
+	 */
+	if (dev->event_count > MAX_INOTIFY_QUEUED_EVENTS) {
+		iprintk(INOTIFY_DEBUG_EVENTS,
+			"event queue for %p overflowed\n", dev);
+		return;
+	}
+
+	/* the queue has just overflowed and we need to notify user space */
+	if (dev->event_count == MAX_INOTIFY_QUEUED_EVENTS) {
+		dev->event_count++;
+		kevent = kernel_event(-1, IN_Q_OVERFLOW, NULL);
+		iprintk(INOTIFY_DEBUG_EVENTS, "sending IN_Q_OVERFLOW to %p\n",
+			dev);
+		goto add_event_to_queue;
+	}
+
+	if (!event_and(mask, watcher->inode->watchers_mask) ||
+			!event_and(mask, watcher->mask))
+		return;
+
+	dev->event_count++;
+	kevent = kernel_event(watcher->wd, mask, filename);
+
+add_event_to_queue:
+	if (!kevent) {
+		iprintk(INOTIFY_DEBUG_EVENTS, "failed to queue event for %p"
+			" -- could not allocate kevent\n", dev);
+		dev->event_count--;
+		return;
+	}
+
+	/* queue the event and wake up anyone waiting */
+	list_add_tail(&kevent->list, &dev->events);
+	iprintk(INOTIFY_DEBUG_EVENTS,
+		"queued event %x for %p\n", kevent->event.mask, dev);
+	wake_up_interruptible(&dev->wait);
+}
+
+/*
+ * inotify_dev_event_dequeue - destroy an event on the given device
+ *
+ * Caller must hold dev->lock.
+ */
+static void inotify_dev_event_dequeue(struct inotify_device *dev)
+{
+	struct inotify_kernel_event *kevent;
+
+	if (!inotify_dev_has_events(dev))
+		return;
+
+	kevent = inotify_dev_get_event(dev);
+	list_del(&kevent->list);
+	dev->event_count--;
+	delete_kernel_event(kevent);
+
+	iprintk(INOTIFY_DEBUG_EVENTS, "dequeued event on %p\n", dev);
+}
+
+/*
+ * inotify_dev_get_wd - returns the next WD for use by the given dev
+ *
+ * Caller must hold dev->lock before calling.
+ */
+static int inotify_dev_get_wd(struct inotify_device *dev)
+{
+	int wd;
+
+	if (!dev || dev->nr_watches == MAX_INOTIFY_DEV_WATCHERS)
+		return -1;
+
+	dev->nr_watches++;
+	wd = find_first_zero_bit(dev->bitmask, MAX_INOTIFY_DEV_WATCHERS);
+	set_bit(wd, dev->bitmask);
+
+	return wd;
+}
+
+/*
+ * inotify_dev_put_wd - release the given WD on the given device
+ *
+ * Caller must hold dev->lock.
+ */
+static int inotify_dev_put_wd(struct inotify_device *dev, int wd)
+{
+	if (!dev || wd < 0)
+		return -1;
+
+	dev->nr_watches--;
+	clear_bit(wd, dev->bitmask);
+
+	return 0;
+}
+
+/*
+ * create_watcher - creates a watcher on the given device.
+ *
+ * Grabs dev->lock, so the caller must not hold it.
+ */
+static struct inotify_watcher *create_watcher(struct inotify_device *dev,
+					      int mask, struct inode *inode)
+{
+	struct inotify_watcher *watcher;
+
+	watcher = kmem_cache_alloc(watcher_cache, GFP_KERNEL);
+	if (!watcher) {
+		iprintk(INOTIFY_DEBUG_ALLOC,
+			"failed to allocate watcher (%p,%d)\n", inode, mask);
+		return NULL;
+	}
+
+	watcher->wd = -1;
+	watcher->mask = mask;
+	watcher->inode = inode;
+	watcher->dev = dev;
+	INIT_LIST_HEAD(&watcher->d_list);
+	INIT_LIST_HEAD(&watcher->i_list);
+	INIT_LIST_HEAD(&watcher->u_list);
+
+	spin_lock(&dev->lock);
+	watcher->wd = inotify_dev_get_wd(dev);
+	spin_unlock(&dev->lock);
+
+	if (watcher->wd < 0) {
+		iprintk(INOTIFY_DEBUG_ERRORS,
+			"Could not get wd for watcher %p\n", watcher);
+		iprintk(INOTIFY_DEBUG_ALLOC, "free'd watcher %p\n", watcher);
+		kmem_cache_free(watcher_cache, watcher);
+		return NULL;
+	}
+
+	return watcher;
+}
+
+/*
+ * delete_watcher - removes the given 'watcher' from the given 'dev'
+ *
+ * Caller must hold dev->lock.
+ */
+static void delete_watcher(struct inotify_device *dev,
+			   struct inotify_watcher *watcher)
+{
+	inotify_dev_put_wd(dev, watcher->wd);
+	iprintk(INOTIFY_DEBUG_ALLOC, "free'd watcher %p\n", watcher);
+	kmem_cache_free(watcher_cache, watcher);
+}
+
+/*
+ * inotify_find_dev - find the watcher associated with the given inode and dev
+ *
+ * Caller must hold dev->lock.
+ */
+static struct inotify_watcher *inode_find_dev(struct inode *inode,
+					      struct inotify_device *dev)
+{
+	struct inotify_watcher *watcher;
+
+	list_for_each_entry(watcher, &inode->watchers, i_list) {
+		if (watcher->dev == dev)
+			return watcher;
+	}
+
+	return NULL;
+}
+
+static struct inotify_watcher *dev_find_wd(struct inotify_device *dev, int wd)
+{
+	struct inotify_watcher *watcher;
+
+	list_for_each_entry(watcher, &dev->watchers, d_list) {
+		if (watcher->wd == wd)
+			return watcher;
+	}
+	return NULL;
+}
+
+static int inotify_dev_is_watching_inode(struct inotify_device *dev,
+					  struct inode *inode)
+{
+	struct inotify_watcher *watcher;
+
+	list_for_each_entry(watcher, &dev->watchers, d_list) {
+		if (watcher->inode == inode)
+			return 1;
+	}
+	
+	return 0;
+}
+
+static int inotify_dev_add_watcher(struct inotify_device *dev,
+				   struct inotify_watcher *watcher)
+{
+	int error;
+
+	error = 0;
+
+	if (!dev || !watcher) {
+		error = -EINVAL;
+		goto out;
+	}
+
+	if (dev_find_wd (dev, watcher->wd)) {
+		error = -EINVAL;
+		goto out;
+	}
+
+	if (dev->nr_watches == MAX_INOTIFY_DEV_WATCHERS) {
+		error = -ENOSPC;
+		goto out;
+	}
+
+	list_add(&watcher->d_list, &dev->watchers);
+out:
+	return error;
+}
+
+/*
+ * inotify_dev_rm_watcher - remove the given watch from the given device
+ *
+ * Caller must hold dev->lock because we call inotify_dev_queue_event().
+ */
+static int inotify_dev_rm_watcher(struct inotify_device *dev,
+				  struct inotify_watcher *watcher)
+{
+	int error;
+
+	error = -EINVAL;
+	if (watcher) {
+		inotify_dev_queue_event(dev, watcher, IN_IGNORED, NULL);
+		list_del(&watcher->d_list);
+		error = 0;
+	} 
+
+	return error;
+}
+
+void inode_update_watchers_mask(struct inode *inode)
+{
+	struct inotify_watcher *watcher;
+	unsigned long new_mask;
+
+	new_mask = 0;
+	list_for_each_entry(watcher, &inode->watchers, i_list)
+		new_mask |= watcher->mask;
+
+	inode->watchers_mask = new_mask;
+}
+
+/*
+ * inode_add_watcher - add a watcher to the given inode
+ *
+ * Callers must hold dev->lock, because we call inode_find_dev().
+ */
+static int inode_add_watcher(struct inode *inode,
+			     struct inotify_watcher *watcher)
+{
+	if (!inode || !watcher || inode_find_dev(inode, watcher->dev))
+		return -EINVAL;
+
+	list_add(&watcher->i_list, &inode->watchers);
+	inode->watcher_count++;
+	inode_update_watchers_mask(inode);
+
+	return 0;
+}
+
+static int inode_rm_watcher(struct inode *inode,
+			     struct inotify_watcher *watcher)
+{
+	if (!inode || !watcher)
+		return -EINVAL;
+
+	list_del(&watcher->i_list);
+	inode->watcher_count--;
+
+	inode_update_watchers_mask(inode);
+
+	return 0;
+}
+
+/* Kernel API */
+
+void inotify_inode_queue_event(struct inode *inode, unsigned long mask,
+			       const char *filename)
+{
+	struct inotify_watcher *watcher;
+
+	spin_lock(&inode->i_lock);
+
+	list_for_each_entry(watcher, &inode->watchers, i_list) {
+		spin_lock(&watcher->dev->lock);
+		inotify_dev_queue_event(watcher->dev, watcher, mask, filename);
+		spin_unlock(&watcher->dev->lock);
+	}
+
+	spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
+
+void inotify_inode_queue_event_pair(struct inode *inode1, unsigned long mask1,
+				    const char *filename1,
+				    struct inode *inode2, unsigned long mask2,
+				    const char *filename2)
+{
+	struct inotify_watcher *watcher;
+
+	spin_lock(&inode1->i_lock);
+	spin_lock(&inode2->i_lock);
+
+	list_for_each_entry(watcher, &inode1->watchers, i_list) {
+		spin_lock(&watcher->dev->lock);
+		inotify_dev_queue_event(watcher->dev, watcher,
+					mask1, filename1);
+		spin_unlock(&watcher->dev->lock);
+	}
+
+	list_for_each_entry(watcher, &inode2->watchers, i_list) {
+		spin_lock(&watcher->dev->lock);
+		inotify_dev_queue_event(watcher->dev, watcher,
+					mask2, filename2);
+		spin_unlock(&watcher->dev->lock);
+	}
+
+	spin_unlock(&inode2->i_lock);
+	spin_unlock(&inode1->i_lock);
+}
+EXPORT_SYMBOL_GPL(inotify_inode_queue_event_pair);
+
+void inotify_dentry_parent_queue_event(struct dentry *dentry,
+				       unsigned long mask,
+				       const char *filename)
+{
+	struct dentry *parent;
+
+	spin_lock(&dentry->d_lock);
+	dget(dentry->d_parent);
+	parent = dentry->d_parent;
+	inotify_inode_queue_event(parent->d_inode, mask, filename);
+	dput(parent);
+	spin_unlock(&dentry->d_lock);
+}
+EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
+
+static void ignore_helper(struct inotify_watcher *watcher, int event)
+{
+	struct inotify_device *dev;
+	struct inode *inode;
+
+	spin_lock(&watcher->dev->lock);
+	spin_lock(&watcher->inode->i_lock);
+
+	inode = watcher->inode;
+	dev = watcher->dev;
+
+	if (event)
+		inotify_dev_queue_event(dev, watcher, event, NULL);
+
+	inode_rm_watcher(inode, watcher);
+	inotify_dev_rm_watcher(watcher->dev, watcher);
+	list_del(&watcher->u_list);
+
+	spin_unlock(&inode->i_lock);
+	delete_watcher(dev, watcher);
+	spin_unlock(&dev->lock);
+
+	unref_inode(inode);
+}
+
+static void process_umount_list(struct list_head *umount) {
+	struct inotify_watcher *watcher, *next;
+
+	list_for_each_entry_safe(watcher, next, umount, u_list)
+		ignore_helper(watcher, IN_UNMOUNT);
+}
+
+static void build_umount_list(struct list_head *head, struct super_block *sb,
+			       struct list_head *umount)
+{
+	struct inode *	inode;
+
+	list_for_each_entry(inode, head, i_list) {
+		struct inotify_watcher *watcher;
+
+		if (inode->i_sb != sb)
+			continue;
+
+		spin_lock(&inode->i_lock);
+
+		list_for_each_entry(watcher, &inode->watchers, i_list)
+			list_add(&watcher->u_list, umount);
+
+		spin_unlock(&inode->i_lock);
+	}
+}
+
+void inotify_super_block_umount(struct super_block *sb)
+{
+	struct list_head umount;
+
+	INIT_LIST_HEAD(&umount);
+
+	spin_lock(&inode_lock);
+	build_umount_list(&inode_in_use, sb, &umount);
+	spin_unlock(&inode_lock);
+
+	process_umount_list(&umount);
+}
+EXPORT_SYMBOL_GPL(inotify_super_block_umount);
+
+void inotify_inode_is_dead(struct inode *inode)
+{
+	struct inotify_watcher *watcher, *next;
+
+	list_for_each_entry_safe(watcher, next, &inode->watchers, i_list)
+		ignore_helper(watcher, 0);
+}
+EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
+
+/* The driver interface is implemented below */
+
+static unsigned int inotify_poll(struct file *file, poll_table *wait)
+{
+        struct inotify_device *dev;
+
+        dev = file->private_data;
+
+        poll_wait(file, &dev->wait, wait);
+
+        if (inotify_dev_has_events(dev))
+                return POLLIN | POLLRDNORM;
+
+        return 0;
+}
+
+static ssize_t inotify_read(struct file *file, __user char *buf,
+			    size_t count, loff_t *pos)
+{
+	size_t out;
+	struct inotify_kernel_event *kevent;
+	struct inotify_device *dev;
+	char *obuf;
+	int err;
+	DECLARE_WAITQUEUE(wait, current);
+
+	obuf = buf;
+	dev = file->private_data;
+
+	/* We only hand out full inotify events */
+	if (count < sizeof(struct inotify_event)) {
+		return -EINVAL;
+	}
+
+	spin_lock(&dev->lock);
+	add_wait_queue(&dev->wait, &wait);
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (inotify_dev_has_events(dev))
+			break;
+
+		if (file->f_flags & O_NONBLOCK) {
+			out = -EAGAIN;
+			goto out;
+		}
+
+		if (signal_pending(current)) {
+			out = -ERESTARTSYS;
+			goto out;
+		}
+	} while(1);
+
+
+	remove_wait_queue(&dev->wait, &wait);
+	set_current_state(TASK_RUNNING);
+	spin_unlock(&dev->lock);
+
+
+	while (count >= sizeof(struct inotify_event)) {
+		spin_lock(&dev->lock);
+		if (!inotify_dev_has_events(dev)) {
+			spin_unlock(&dev->lock);
+			break;
+		}
+		kevent = inotify_dev_get_event(dev);
+		spin_unlock(&dev->lock);
+		err = copy_to_user (buf, &kevent->event, sizeof (struct inotify_event));
+		if (err) {
+			return -EFAULT;
+		}
+		spin_lock(&dev->lock);
+		inotify_dev_event_dequeue(dev);
+		spin_unlock(&dev->lock);
+		count -= sizeof (struct inotify_event);
+		buf += sizeof (struct inotify_event);
+	}
+
+	out = buf - obuf;
+	return out;
+
+out:
+	remove_wait_queue(&dev->wait, &wait);
+	set_current_state(TASK_RUNNING);
+	spin_unlock(&dev->lock);
+	return out;
+}
+
+static int inotify_open(struct inode *inode, struct file *file)
+{
+	struct inotify_device *dev;
+
+	if (atomic_read(&watcher_count) == MAX_INOTIFY_DEVS)
+		return -ENODEV;
+
+	atomic_inc(&watcher_count);
+
+	dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	bitmap_zero(dev->bitmask, MAX_INOTIFY_DEV_WATCHERS);
+
+	INIT_LIST_HEAD(&dev->events);
+	INIT_LIST_HEAD(&dev->watchers);
+	init_waitqueue_head(&dev->wait);
+
+	dev->event_count = 0;
+	dev->nr_watches = 0;
+	dev->lock = SPIN_LOCK_UNLOCKED;
+
+	file->private_data = dev;
+
+	printk(KERN_ALERT "inotify device opened\n");
+
+	return 0;
+}
+
+static void inotify_release_all_watchers(struct inotify_device *dev)
+{
+	struct inotify_watcher *watcher,*next;
+
+	list_for_each_entry_safe(watcher, next, &dev->watchers, d_list)
+		ignore_helper(watcher, 0);
+}
+
+/*
+ * inotify_release_all_events - destroy all of the events on a given device
+ */
+static void inotify_release_all_events(struct inotify_device *dev)
+{
+	spin_lock(&dev->lock);
+	while (inotify_dev_has_events(dev))
+		inotify_dev_event_dequeue(dev);
+	spin_unlock(&dev->lock);
+}
+
+
+static int inotify_release(struct inode *inode, struct file *file)
+{
+	struct inotify_device *dev;
+
+	dev = file->private_data;
+	inotify_release_all_watchers(dev);
+	inotify_release_all_events(dev);
+	kfree(dev);
+
+	printk(KERN_ALERT "inotify device released\n");
+
+	atomic_dec(&watcher_count);
+	return 0;
+}
+
+static int inotify_watch(struct inotify_device *dev,
+			 struct inotify_watch_request *request)
+{
+	int err;
+	struct inode *inode;
+	struct inotify_watcher *watcher;
+	err = 0;
+
+	inode = find_inode(request->dirname);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		goto exit;
+	}
+
+	if (!S_ISDIR(inode->i_mode))
+		iprintk(INOTIFY_DEBUG_ERRORS, "watching file\n");
+
+	spin_lock(&dev->lock);
+	spin_lock(&inode->i_lock);
+
+	/*
+	 * This handles the case of re-adding a directory we are already
+	 * watching, we just update the mask and return 0
+	 */
+	if (inotify_dev_is_watching_inode(dev, inode)) {
+		struct inotify_watcher *owatcher;	/* the old watcher */
+
+		iprintk(INOTIFY_DEBUG_ERRORS,
+				"adjusting event mask for inode %p\n", inode);
+
+		owatcher = inode_find_dev(inode, dev);
+		owatcher->mask = request->mask;
+		inode_update_watchers_mask(inode);
+		spin_unlock(&inode->i_lock);
+		spin_unlock(&dev->lock);
+		unref_inode(inode);
+
+		return 0;
+	}
+
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&dev->lock);
+
+	watcher = create_watcher(dev, request->mask, inode);
+	if (!watcher) {
+		unref_inode(inode);
+		return -ENOSPC;
+	}
+
+	spin_lock(&dev->lock);
+	spin_lock(&inode->i_lock);
+
+	/* We can't add anymore watchers to this device */
+	if (inotify_dev_add_watcher(dev, watcher) == -ENOSPC) {
+		iprintk(INOTIFY_DEBUG_ERRORS,
+			"can't add watcher dev is full\n");
+		spin_unlock(&inode->i_lock);
+		delete_watcher(dev, watcher);
+		spin_unlock(&dev->lock);
+
+		unref_inode(inode);
+		return -ENOSPC;
+	}
+
+	inode_add_watcher(inode, watcher);
+
+	/* we keep a reference on the inode */
+	if (!err)
+		err = watcher->wd;
+
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&dev->lock);
+exit:
+	return err;
+}
+
+static int inotify_ignore(struct inotify_device *dev, int wd)
+{
+	struct inotify_watcher *watcher;
+
+	watcher = dev_find_wd(dev, wd);
+	if (!watcher)
+		return -EINVAL;
+	ignore_helper(watcher, 0);
+
+	return 0;
+}
+
+static void inotify_print_stats(struct inotify_device *dev)
+{
+	int sizeof_inotify_watcher;
+	int sizeof_inotify_device;
+	int sizeof_inotify_kernel_event;
+
+	sizeof_inotify_watcher = sizeof (struct inotify_watcher);
+	sizeof_inotify_device = sizeof (struct inotify_device);
+	sizeof_inotify_kernel_event = sizeof (struct inotify_kernel_event);
+
+	printk(KERN_ALERT "GLOBAL INOTIFY STATS\n");
+	printk(KERN_ALERT "watcher_count = %d\n", atomic_read(&watcher_count));
+
+	printk(KERN_ALERT "sizeof(struct inotify_watcher) = %d\n",
+		sizeof_inotify_watcher);
+	printk(KERN_ALERT "sizeof(struct inotify_device) = %d\n",
+		sizeof_inotify_device);
+	printk(KERN_ALERT "sizeof(struct inotify_kernel_event) = %d\n",
+		sizeof_inotify_kernel_event);
+
+	spin_lock(&dev->lock);
+
+	printk(KERN_ALERT "inotify device: %p\n", dev);
+	printk(KERN_ALERT "inotify event_count: %u\n", dev->event_count);
+	printk(KERN_ALERT "inotify watch_count: %d\n", dev->nr_watches);
+
+	spin_unlock(&dev->lock);
+}
+
+static int inotify_ioctl(struct inode *ip, struct file *fp,
+			 unsigned int cmd, unsigned long arg)
+{
+	int err;
+	struct inotify_device *dev;
+	struct inotify_watch_request request;
+	int wd;
+
+	dev = fp->private_data;
+	err = 0;
+
+	if (_IOC_TYPE(cmd) != INOTIFY_IOCTL_MAGIC)
+		return -EINVAL;
+	if (_IOC_NR(cmd) > INOTIFY_IOCTL_MAXNR)
+		return -EINVAL;
+
+	if (_IOC_DIR(cmd) & _IOC_READ)
+		err = !access_ok(VERIFY_READ, (void *) arg, _IOC_SIZE(cmd));
+
+	if (err) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	if (_IOC_DIR(cmd) & _IOC_WRITE)
+		err = !access_ok(VERIFY_WRITE, (void *)arg, _IOC_SIZE(cmd));
+
+	if (err) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	err = -EINVAL;
+
+	switch (cmd) {
+	case INOTIFY_WATCH:
+		iprintk(INOTIFY_DEBUG_ERRORS, "INOTIFY_WATCH ioctl\n");
+		if (copy_from_user(&request, (void *) arg,
+				sizeof(struct inotify_watch_request))) {
+			err = -EFAULT;
+			goto out;
+		}
+		err = inotify_watch(dev, &request);
+		break;
+	case INOTIFY_IGNORE:
+		iprintk(INOTIFY_DEBUG_ERRORS, "INOTIFY_IGNORE ioctl\n");
+		if (copy_from_user(&wd, (void *)arg, sizeof (int))) {
+			err = -EFAULT;
+			goto out;
+		}
+		err = inotify_ignore(dev, wd);
+		break;
+	case INOTIFY_STATS:
+		iprintk(INOTIFY_DEBUG_ERRORS, "INOTIFY_STATS ioctl\n");
+		inotify_print_stats(dev);
+		err = 0;
+		break;
+	case INOTIFY_SETDEBUG:
+		iprintk(INOTIFY_DEBUG_ERRORS,
+			"INOTIFY_SETDEBUG ioctl\n");
+		if (copy_from_user(&inotify_debug_flags, (void *) arg,
+				sizeof (int))) {
+			err = -EFAULT;
+			goto out;
+		}
+		break;
+	}
+
+out:
+	return err;
+}
+
+static struct file_operations inotify_fops = {
+	.owner		= THIS_MODULE,
+	.poll		= inotify_poll,
+	.read		= inotify_read,
+	.open		= inotify_open,
+	.release	= inotify_release,
+	.ioctl		= inotify_ioctl,
+};
+
+struct miscdevice inotify_device = {
+	.minor  = MISC_DYNAMIC_MINOR,
+	.name	= "inotify",
+	.fops	= &inotify_fops,
+};
+
+static int __init inotify_init(void)
+{
+	int ret;
+
+	ret = misc_register(&inotify_device);
+	if (ret)
+		goto out;
+
+	inotify_debug_flags = INOTIFY_DEBUG_NONE;
+
+	watcher_cache = kmem_cache_create("watcher_cache",
+			sizeof(struct inotify_watcher), 0, SLAB_PANIC,
+			NULL, NULL);
+
+	kevent_cache = kmem_cache_create("kevent_cache",
+			sizeof(struct inotify_kernel_event), 0,
+			SLAB_PANIC, NULL, NULL);
+
+	printk(KERN_INFO "inotify init: minor=%d\n", inotify_device.minor);
+out:
+	return ret;
+}
+
+module_init(inotify_init);
--- clean/linux/include/linux/inotify.h	1969-12-31 19:00:00.000000000 -0500
+++ linux/include/linux/inotify.h	2004-09-28 16:44:34.000000000 -0400
@@ -0,0 +1,118 @@
+/*
+ * Inode based directory notification for Linux
+ *
+ * Copyright (C) 2004 John McCutchan
+ */
+
+#ifndef _LINUX_INOTIFY_H
+#define _LINUX_INOTIFY_H
+
+#include <linux/limits.h>
+
+/* this size could limit things, since technically we could need PATH_MAX */
+#define INOTIFY_FILENAME_MAX	256
+
+/*
+ * struct inotify_event - structure read from the inotify device for each event
+ *
+ * When you are watching a directory, you will receive the filename for events
+ * such as IN_CREATE, IN_DELETE, IN_OPEN, IN_CLOSE, ...
+ *
+ * Note: When reading from the device you must provide a buffer that is a
+ * multiple of sizeof(struct inotify_event)
+ */
+struct inotify_event {
+	int wd;
+	int mask;
+	int cookie;
+	char filename[INOTIFY_FILENAME_MAX];
+};
+
+/* the following are legal, implemented events */
+#define IN_ACCESS		0x00000001	/* File was accessed */
+#define IN_MODIFY		0x00000002	/* File was modified */
+#define IN_ATTRIB		0x00000004	/* File changed attributes */
+#define IN_CLOSE		0x00000008	/* File was closed */
+#define IN_OPEN			0x00000010	/* File was opened */
+#define IN_MOVED_FROM		0x00000020	/* File was moved from X */
+#define IN_MOVED_TO		0x00000040	/* File was moved to Y */
+#define IN_DELETE_SUBDIR	0x00000080	/* Subdir was deleted */ 
+#define IN_DELETE_FILE		0x00000100	/* Subfile was deleted */
+#define IN_CREATE_SUBDIR	0x00000200	/* Subdir was created */
+#define IN_CREATE_FILE		0x00000400	/* Subfile was created */
+#define IN_DELETE_SELF		0x00000800	/* Self was deleted */
+#define IN_UNMOUNT		0x00001000	/* Backing fs was unmounted */
+#define IN_Q_OVERFLOW		0x00002000	/* Event queued overflowed */
+#define IN_IGNORED		0x00004000	/* File was ignored */
+
+/* special flags */
+#define IN_ALL_EVENTS		0xffffffff	/* All the events */
+
+/*
+ * struct inotify_watch_request - represents a watch request
+ *
+ * Pass to the inotify device via the INOTIFY_WATCH ioctl
+ */
+struct inotify_watch_request {
+	char *dirname;		/* directory name */
+	unsigned long mask;	/* event mask */
+};
+
+#define INOTIFY_IOCTL_MAGIC	'Q'
+#define INOTIFY_IOCTL_MAXNR	4
+
+#define INOTIFY_WATCH  		_IOR(INOTIFY_IOCTL_MAGIC, 1, struct inotify_watch_request)
+#define INOTIFY_IGNORE 		_IOR(INOTIFY_IOCTL_MAGIC, 2, int)
+#define INOTIFY_STATS		_IOR(INOTIFY_IOCTL_MAGIC, 3, int)
+#define INOTIFY_SETDEBUG	_IOR(INOTIFY_IOCTL_MAGIC, 4, int)
+
+#define INOTIFY_DEBUG_NONE	0x00000000
+#define INOTIFY_DEBUG_ALLOC	0x00000001
+#define INOTIFY_DEBUG_EVENTS	0x00000002
+#define INOTIFY_DEBUG_INODE	0x00000004
+#define INOTIFY_DEBUG_ERRORS	0x00000008
+#define INOTIFY_DEBUG_FILEN	0x00000010
+#define INOTIFY_DEBUG_ALL	0xffffffff
+
+#ifdef __KERNEL__
+
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/config.h>
+
+#ifdef CONFIG_INOTIFY
+
+extern void inotify_inode_queue_event(struct inode *, unsigned long,
+		const char *);
+extern void inotify_dentry_parent_queue_event(struct dentry *, unsigned long,
+		const char *);
+extern void inotify_super_block_umount(struct super_block *);
+extern void inotify_inode_is_dead(struct inode *);
+
+#else
+
+static inline void inotify_inode_queue_event(struct inode *inode,
+					     unsigned long mask,
+					     const char *filename)
+{
+}
+
+static inline void inotify_dentry_parent_queue_event(struct dentry *dentry,
+						     unsigned long mask,
+						     const char *filename)
+{
+}
+
+static inline void inotify_super_block_umount(struct super_block *sb)
+{
+}
+
+static inline void inotify_inode_is_dead(struct inode *inode)
+{
+}
+
+#endif	/* CONFIG_INOTIFY */
+
+#endif	/* __KERNEL __ */
+
+#endif	/* _LINUX_INOTIFY_H */
--- clean/linux/include/linux/fs.h	2004-08-14 06:55:09.000000000 -0400
+++ linux/include/linux/fs.h	2004-09-28 16:44:34.000000000 -0400
@@ -458,6 +458,12 @@
 	unsigned long		i_dnotify_mask; /* Directory notify events */
 	struct dnotify_struct	*i_dnotify; /* for directory notifications */
 
+#ifdef CONFIG_INOTIFY
+	struct list_head	watchers;
+	unsigned long		watchers_mask;
+	int			watcher_count;
+#endif
+
 	unsigned long		i_state;
 	unsigned long		dirtied_when;	/* jiffies of first dirtying */
 
--- clean/linux/fs/super.c	2004-08-14 06:55:22.000000000 -0400
+++ linux/fs/super.c	2004-09-18 02:24:33.000000000 -0400
@@ -36,6 +36,7 @@
 #include <linux/writeback.h>		/* for the emergency remount stuff */
 #include <linux/idr.h>
 #include <asm/uaccess.h>
+#include <linux/inotify.h>
 
 
 void get_filesystem(struct file_system_type *fs);
@@ -204,6 +205,7 @@
 
 	if (root) {
 		sb->s_root = NULL;
+		inotify_super_block_umount (sb);
 		shrink_dcache_parent(root);
 		shrink_dcache_anon(&sb->s_anon);
 		dput(root);
--- clean/linux/fs/read_write.c	2004-08-14 06:55:35.000000000 -0400
+++ linux/fs/read_write.c	2004-08-19 00:11:52.000000000 -0400
@@ -11,6 +11,7 @@
 #include <linux/uio.h>
 #include <linux/smp_lock.h>
 #include <linux/dnotify.h>
+#include <linux/inotify.h>
 #include <linux/security.h>
 #include <linux/module.h>
 
@@ -216,8 +217,11 @@
 				ret = file->f_op->read(file, buf, count, pos);
 			else
 				ret = do_sync_read(file, buf, count, pos);
-			if (ret > 0)
+			if (ret > 0) {
 				dnotify_parent(file->f_dentry, DN_ACCESS);
+				inotify_dentry_parent_queue_event(file->f_dentry, IN_ACCESS, file->f_dentry->d_name.name);
+				inotify_inode_queue_event (file->f_dentry->d_inode, IN_ACCESS, NULL);
+			}
 		}
 	}
 
@@ -260,8 +264,11 @@
 				ret = file->f_op->write(file, buf, count, pos);
 			else
 				ret = do_sync_write(file, buf, count, pos);
-			if (ret > 0)
+			if (ret > 0) {
 				dnotify_parent(file->f_dentry, DN_MODIFY);
+				inotify_dentry_parent_queue_event(file->f_dentry, IN_MODIFY, file->f_dentry->d_name.name);
+				inotify_inode_queue_event (file->f_dentry->d_inode, IN_MODIFY, NULL);
+			}
 		}
 	}
 
@@ -493,9 +500,13 @@
 out:
 	if (iov != iovstack)
 		kfree(iov);
-	if ((ret + (type == READ)) > 0)
+	if ((ret + (type == READ)) > 0) {
 		dnotify_parent(file->f_dentry,
 				(type == READ) ? DN_ACCESS : DN_MODIFY);
+		inotify_dentry_parent_queue_event(file->f_dentry, 
+				(type == READ) ? IN_ACCESS : IN_MODIFY, file->f_dentry->d_name.name);
+		inotify_inode_queue_event (file->f_dentry->d_inode, (type == READ) ? IN_ACCESS : IN_MODIFY, NULL);
+	}
 	return ret;
 }
 
--- clean/linux/fs/open.c	2004-08-14 06:54:48.000000000 -0400
+++ linux/fs/open.c	2004-08-19 00:11:52.000000000 -0400
@@ -11,6 +11,7 @@
 #include <linux/smp_lock.h>
 #include <linux/quotaops.h>
 #include <linux/dnotify.h>
+#include <linux/inotify.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/tty.h>
@@ -955,6 +956,8 @@
 			error = PTR_ERR(f);
 			if (IS_ERR(f))
 				goto out_error;
+			inotify_inode_queue_event (f->f_dentry->d_inode, IN_OPEN, NULL);
+			inotify_dentry_parent_queue_event (f->f_dentry, IN_OPEN, f->f_dentry->d_name.name);
 			fd_install(fd, f);
 		}
 out:
@@ -1034,6 +1037,8 @@
 	FD_CLR(fd, files->close_on_exec);
 	__put_unused_fd(files, fd);
 	spin_unlock(&files->file_lock);
+	inotify_dentry_parent_queue_event (filp->f_dentry, IN_CLOSE, filp->f_dentry->d_name.name);
+	inotify_inode_queue_event (filp->f_dentry->d_inode, IN_CLOSE, NULL);
 	return filp_close(filp, files);
 
 out_unlock:
--- clean/linux/fs/namei.c	2004-08-14 06:55:10.000000000 -0400
+++ linux/fs/namei.c	2004-09-24 13:10:13.000000000 -0400
@@ -22,6 +22,7 @@
 #include <linux/quotaops.h>
 #include <linux/pagemap.h>
 #include <linux/dnotify.h>
+#include <linux/inotify.h>
 #include <linux/smp_lock.h>
 #include <linux/personality.h>
 #include <linux/security.h>
@@ -1221,6 +1222,7 @@
 	error = dir->i_op->create(dir, dentry, mode, nd);
 	if (!error) {
 		inode_dir_notify(dir, DN_CREATE);
+		inotify_inode_queue_event(dir, IN_CREATE_FILE, dentry->d_name.name);
 		security_inode_post_create(dir, dentry, mode);
 	}
 	return error;
@@ -1535,6 +1537,7 @@
 	error = dir->i_op->mknod(dir, dentry, mode, dev);
 	if (!error) {
 		inode_dir_notify(dir, DN_CREATE);
+		inotify_inode_queue_event(dir, IN_CREATE_FILE, dentry->d_name.name);
 		security_inode_post_mknod(dir, dentry, mode, dev);
 	}
 	return error;
@@ -1608,6 +1611,7 @@
 	error = dir->i_op->mkdir(dir, dentry, mode);
 	if (!error) {
 		inode_dir_notify(dir, DN_CREATE);
+		inotify_inode_queue_event(dir, IN_CREATE_SUBDIR, dentry->d_name.name);
 		security_inode_post_mkdir(dir,dentry, mode);
 	}
 	return error;
@@ -1703,6 +1707,9 @@
 	up(&dentry->d_inode->i_sem);
 	if (!error) {
 		inode_dir_notify(dir, DN_DELETE);
+		inotify_inode_queue_event(dir, IN_DELETE_SUBDIR, dentry->d_name.name);
+		inotify_inode_queue_event(dentry->d_inode, IN_DELETE_SELF, NULL);
+		inotify_inode_is_dead (dentry->d_inode);
 		d_delete(dentry);
 	}
 	dput(dentry);
@@ -1775,8 +1782,11 @@
 
 	/* We don't d_delete() NFS sillyrenamed files--they still exist. */
 	if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
-		d_delete(dentry);
 		inode_dir_notify(dir, DN_DELETE);
+		inotify_inode_queue_event(dir, IN_DELETE_FILE, dentry->d_name.name);
+		inotify_inode_queue_event(dentry->d_inode, IN_DELETE_SELF, NULL);
+		inotify_inode_is_dead (dentry->d_inode);
+		d_delete(dentry);
 	}
 	return error;
 }
@@ -1853,6 +1863,7 @@
 	error = dir->i_op->symlink(dir, dentry, oldname);
 	if (!error) {
 		inode_dir_notify(dir, DN_CREATE);
+		inotify_inode_queue_event(dir, IN_CREATE_FILE, dentry->d_name.name);
 		security_inode_post_symlink(dir, dentry, oldname);
 	}
 	return error;
@@ -1926,6 +1937,7 @@
 	up(&old_dentry->d_inode->i_sem);
 	if (!error) {
 		inode_dir_notify(dir, DN_CREATE);
+		inotify_inode_queue_event(dir, IN_CREATE_FILE, new_dentry->d_name.name);
 		security_inode_post_link(old_dentry, dir, new_dentry);
 	}
 	return error;
@@ -2115,12 +2127,15 @@
 	else
 		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
 	if (!error) {
-		if (old_dir == new_dir)
+		if (old_dir == new_dir) {
 			inode_dir_notify(old_dir, DN_RENAME);
-		else {
+		} else {
 			inode_dir_notify(old_dir, DN_DELETE);
 			inode_dir_notify(new_dir, DN_CREATE);
 		}
+
+		inotify_inode_queue_event (old_dir, IN_MOVED_FROM, old_dentry->d_name.name);
+		inotify_inode_queue_event (new_dir, IN_MOVED_TO, new_dentry->d_name.name);
 	}
 	return error;
 }
--- clean/linux/fs/inode.c	2004-08-14 06:56:23.000000000 -0400
+++ linux/fs/inode.c	2004-09-28 16:44:34.000000000 -0400
@@ -114,6 +114,9 @@
 	if (inode) {
 		struct address_space * const mapping = &inode->i_data;
 
+#ifdef CONFIG_INOTIFY
+		INIT_LIST_HEAD(&inode->watchers);
+#endif
 		inode->i_sb = sb;
 		inode->i_blkbits = sb->s_blocksize_bits;
 		inode->i_flags = 0;
--- clean/linux/fs/attr.c	2004-08-14 06:54:50.000000000 -0400
+++ linux/fs/attr.c	2004-09-28 16:44:34.000000000 -0400
@@ -11,6 +11,7 @@
 #include <linux/string.h>
 #include <linux/smp_lock.h>
 #include <linux/dnotify.h>
+#include <linux/inotify.h>
 #include <linux/fcntl.h>
 #include <linux/quotaops.h>
 #include <linux/security.h>
@@ -128,6 +129,28 @@
 	return dn_mask;
 }
 
+static int setattr_mask_inotify(unsigned int ia_valid)
+{
+	unsigned long dn_mask = 0;
+
+	if (ia_valid & ATTR_UID)
+		dn_mask |= IN_ATTRIB;
+	if (ia_valid & ATTR_GID)
+		dn_mask |= IN_ATTRIB;
+	if (ia_valid & ATTR_SIZE)
+		dn_mask |= IN_MODIFY;
+	/* both times implies a utime(s) call */
+	if ((ia_valid & (ATTR_ATIME|ATTR_MTIME)) == (ATTR_ATIME|ATTR_MTIME))
+		dn_mask |= IN_ATTRIB;
+	else if (ia_valid & ATTR_ATIME)
+		dn_mask |= IN_ACCESS;
+	else if (ia_valid & ATTR_MTIME)
+		dn_mask |= IN_MODIFY;
+	if (ia_valid & ATTR_MODE)
+		dn_mask |= IN_ATTRIB;
+	return dn_mask;
+}
+
 int notify_change(struct dentry * dentry, struct iattr * attr)
 {
 	struct inode *inode = dentry->d_inode;
@@ -185,8 +208,16 @@
 	}
 	if (!error) {
 		unsigned long dn_mask = setattr_mask(ia_valid);
+		unsigned long in_mask = setattr_mask_inotify(ia_valid);
+
 		if (dn_mask)
 			dnotify_parent(dentry, dn_mask);
+		if (in_mask) {
+			inotify_inode_queue_event(dentry->d_inode, in_mask,
+						  NULL);
+			inotify_dentry_parent_queue_event(dentry, in_mask,
+							  dentry->d_name.name);
+		}
 	}
 	return error;
 }
--- clean/linux/drivers/char/Makefile	2004-08-14 06:56:22.000000000 -0400
+++ linux/drivers/char/Makefile	2004-09-28 16:46:05.000000000 -0400
@@ -9,6 +9,8 @@
 
 obj-y	 += mem.o random.o tty_io.o n_tty.o tty_ioctl.o pty.o misc.o
 
+
+obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_VT)		+= vt_ioctl.o vc_screen.o consolemap.o \
 				   consolemap_deftbl.o selection.o keyboard.o
 obj-$(CONFIG_HW_CONSOLE)	+= vt.o defkeymap.o
--- clean/linux/drivers/char/Kconfig	2004-08-14 06:54:47.000000000 -0400
+++ linux/drivers/char/Kconfig	2004-09-28 16:44:34.000000000 -0400
@@ -62,6 +62,19 @@
 	depends on VT && !S390 && !UM
 	default y
 
+config INOTIFY
+	bool "Inotify file change notification support"
+	default y
+	---help---
+	  Say Y here to enable inotify support and the /dev/inotify character
+	  device.  Inotify is a file change notification system and a
+	  replacement for dnotify.  Inotify fixes numerous shortcomings in
+	  dnotify and introduces several new features.  It allows monitoring
+	  of both files and directories via a single open fd.  Multiple file
+	  events are supported.
+	  
+	  If unsure, say Y.
+
 config SERIAL_NONSTANDARD
 	bool "Non-standard serial port support"
 	---help---

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [patch] inotify: locking
  2004-09-28 22:33 [RFC][PATCH] inotify 0.11.0 [WITH PATCH!] John McCutchan
@ 2004-09-30 19:01 ` Robert Love
  2004-09-30 19:17   ` Robert Love
  0 siblings, 1 reply; 7+ messages in thread
From: Robert Love @ 2004-09-30 19:01 UTC (permalink / raw)
  To: John McCutchan; +Cc: linux-kernel, gamin-list, viro, akpm, iggy

I finally got around to reviewing the locking in inotify, again, in
response to Andrew's points.

There are still three TODO items:

	- Look into replacing i_lock with i_sem.
	- dev->lock nests inode->i_lock.  Preferably i_lock should
	  remain an outermost lock.  Maybe i_sem can fix this.
	- inotify_inode_is_dead() should always have the inode's
	  i_lock locked when called.  I have not yet reviewed the
	  VFS paths that call it to ensure this is the case.

Anyhow, this patch does the following

	- More locking documentation/comments
	- Respect lock ranking when locking two different
	  inodes' i_lock
	- Don't grab dentry->d_lock and just use dget_parent(). [1]
	- Respect lock ranking with dev->lock vs. inode->i_lock.
	- inotify_release_all_watches() needed dev->lock.
	- misc. cleanup

Patch is on top of 0.11.0.

	Robert Love



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] inotify: locking
  2004-09-30 19:01 ` [patch] inotify: locking Robert Love
@ 2004-09-30 19:17   ` Robert Love
  0 siblings, 0 replies; 7+ messages in thread
From: Robert Love @ 2004-09-30 19:17 UTC (permalink / raw)
  To: John McCutchan; +Cc: linux-kernel, gamin-list, viro, akpm, iggy

[-- Attachment #1: Type: text/plain, Size: 387 bytes --]

On Thu, 2004-09-30 at 15:01 -0400, Robert Love wrote:

> I finally got around to reviewing the locking in inotify, again, in
> response to Andrew's points.

As I was saying.  I need to walk down the hall and have the Evolution
hackers add the "Robert intended to add a patch but did not, so let's
automatically add it for him" feature.

Here it is, this time for serious.

	Robert Love


[-- Attachment #2: inotify-0.11.0-rml-locking-redux-1.patch --]
[-- Type: text/x-patch, Size: 6149 bytes --]

locking cleanup for inotify, the code equivalent of fine cheese.

Signed-Off-By: Robert Love <rml@novell.com>

 drivers/char/inotify.c |   86 ++++++++++++++++++++++++++++++-------------------
 1 files changed, 54 insertions(+), 32 deletions(-)

diff -urN linux-inotify/drivers/char/inotify.c linux/drivers/char/inotify.c
--- linux-inotify/drivers/char/inotify.c	2004-09-30 12:09:41.317737160 -0400
+++ linux/drivers/char/inotify.c	2004-09-30 14:18:33.369286656 -0400
@@ -65,9 +65,12 @@
  * implies that the given WD is valid, unset implies it is not.
  *
  * This structure is protected by 'lock'.  Lock ordering:
+ *
+ * inode->i_lock
  *	dev->lock
  *		dev->wait->lock
- * FIXME: Define lock ordering wrt inode and dentry locking!
+ *
+ * FIXME: Look at replacing i_lock with i_sem.
  */
 struct inotify_device {
 	DECLARE_BITMAP(bitmask, MAX_INOTIFY_DEV_WATCHERS);
@@ -522,8 +525,13 @@
 {
 	struct inotify_watcher *watcher;
 
-	spin_lock(&inode1->i_lock);
-	spin_lock(&inode2->i_lock);
+	if (inode1 < inode2) {
+		spin_lock(&inode1->i_lock);
+		spin_lock(&inode2->i_lock);
+	} else {
+		spin_lock(&inode2->i_lock);
+		spin_lock(&inode1->i_lock);
+	}
 
 	list_for_each_entry(watcher, &inode1->watchers, i_list) {
 		spin_lock(&watcher->dev->lock);
@@ -550,12 +558,9 @@
 {
 	struct dentry *parent;
 
-	spin_lock(&dentry->d_lock);
-	dget(dentry->d_parent);
-	parent = dentry->d_parent;
+	parent = dget_parent(dentry);
 	inotify_inode_queue_event(parent->d_inode, mask, filename);
 	dput(parent);
-	spin_unlock(&dentry->d_lock);
 }
 EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
 
@@ -564,8 +569,8 @@
 	struct inotify_device *dev;
 	struct inode *inode;
 
-	spin_lock(&watcher->dev->lock);
 	spin_lock(&watcher->inode->i_lock);
+	spin_lock(&watcher->dev->lock);
 
 	inode = watcher->inode;
 	dev = watcher->dev;
@@ -577,24 +582,30 @@
 	inotify_dev_rm_watcher(watcher->dev, watcher);
 	list_del(&watcher->u_list);
 
-	spin_unlock(&inode->i_lock);
 	delete_watcher(dev, watcher);
 	spin_unlock(&dev->lock);
+	spin_unlock(&inode->i_lock);
 
 	unref_inode(inode);
 }
 
-static void process_umount_list(struct list_head *umount) {
+static void process_umount_list(struct list_head *umount)
+{
 	struct inotify_watcher *watcher, *next;
 
 	list_for_each_entry_safe(watcher, next, umount, u_list)
 		ignore_helper(watcher, IN_UNMOUNT);
 }
 
+/*
+ * build_umount_list - build a list of watchers affected by an unmount.
+ *
+ * Caller must hold inode_lock.
+ */
 static void build_umount_list(struct list_head *head, struct super_block *sb,
 			       struct list_head *umount)
 {
-	struct inode *	inode;
+	struct inode *inode;
 
 	list_for_each_entry(inode, head, i_list) {
 		struct inotify_watcher *watcher;
@@ -625,6 +636,11 @@
 }
 EXPORT_SYMBOL_GPL(inotify_super_block_umount);
 
+/*
+ * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
+ *
+ * FIXME: Callers need to always hold inode->i_lock.
+ */
 void inotify_inode_is_dead(struct inode *inode)
 {
 	struct inotify_watcher *watcher, *next;
@@ -752,6 +768,11 @@
 	return 0;
 }
 
+/*
+ * inotify_release_all_watchers - destroy all watchers on a given device
+ *
+ * Caller must hold dev->lock.
+ */
 static void inotify_release_all_watchers(struct inotify_device *dev)
 {
 	struct inotify_watcher *watcher,*next;
@@ -762,13 +783,13 @@
 
 /*
  * inotify_release_all_events - destroy all of the events on a given device
+ *
+ * Caller must hold dev->lock.
  */
 static void inotify_release_all_events(struct inotify_device *dev)
 {
-	spin_lock(&dev->lock);
 	while (inotify_dev_has_events(dev))
 		inotify_dev_event_dequeue(dev);
-	spin_unlock(&dev->lock);
 }
 
 
@@ -777,8 +798,10 @@
 	struct inotify_device *dev;
 
 	dev = file->private_data;
+	spin_lock(&dev->lock);
 	inotify_release_all_watchers(dev);
 	inotify_release_all_events(dev);
+	spin_unlock(&dev->lock);
 	kfree(dev);
 
 	printk(KERN_ALERT "inotify device released\n");
@@ -790,22 +813,18 @@
 static int inotify_watch(struct inotify_device *dev,
 			 struct inotify_watch_request *request)
 {
-	int err;
 	struct inode *inode;
 	struct inotify_watcher *watcher;
-	err = 0;
 
 	inode = find_inode(request->dirname);
-	if (IS_ERR(inode)) {
-		err = PTR_ERR(inode);
-		goto exit;
-	}
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
 
 	if (!S_ISDIR(inode->i_mode))
 		iprintk(INOTIFY_DEBUG_ERRORS, "watching file\n");
 
-	spin_lock(&dev->lock);
 	spin_lock(&inode->i_lock);
+	spin_lock(&dev->lock);
 
 	/*
 	 * This handles the case of re-adding a directory we are already
@@ -820,15 +839,15 @@
 		owatcher = inode_find_dev(inode, dev);
 		owatcher->mask = request->mask;
 		inode_update_watchers_mask(inode);
+		spin_unlock(&dev->lock);		
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&dev->lock);
 		unref_inode(inode);
 
 		return 0;
 	}
 
+	spin_unlock(&dev->lock);	
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&dev->lock);
 
 	watcher = create_watcher(dev, request->mask, inode);
 	if (!watcher) {
@@ -836,31 +855,27 @@
 		return -ENOSPC;
 	}
 
+	spin_lock(&inode->i_lock);	
 	spin_lock(&dev->lock);
-	spin_lock(&inode->i_lock);
 
 	/* We can't add anymore watchers to this device */
 	if (inotify_dev_add_watcher(dev, watcher) == -ENOSPC) {
 		iprintk(INOTIFY_DEBUG_ERRORS,
 			"can't add watcher dev is full\n");
-		spin_unlock(&inode->i_lock);
 		delete_watcher(dev, watcher);
 		spin_unlock(&dev->lock);
-
+		spin_unlock(&inode->i_lock);
 		unref_inode(inode);
+
 		return -ENOSPC;
 	}
 
 	inode_add_watcher(inode, watcher);
 
-	/* we keep a reference on the inode */
-	if (!err)
-		err = watcher->wd;
-
-	spin_unlock(&inode->i_lock);
 	spin_unlock(&dev->lock);
-exit:
-	return err;
+	spin_unlock(&inode->i_lock);
+
+	return watcher->wd;
 }
 
 static int inotify_ignore(struct inotify_device *dev, int wd)
@@ -904,6 +919,13 @@
 	spin_unlock(&dev->lock);
 }
 
+/*
+ * inotify_ioctl() - our device file's ioctl method
+ *
+ * The VFS serializes all of our calls via the BKL and we rely on that.  We
+ * could, alternatively, grab dev->lock.  Right now lower levels grab that
+ * where needed.
+ */
 static int inotify_ioctl(struct inode *ip, struct file *fp,
 			 unsigned int cmd, unsigned long arg)
 {

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2004-09-30 19:24 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-09-22 19:37 [patch] inotify: locking Robert Love
2004-09-22 19:38 ` Robert Love
2004-09-23  9:09   ` Paul Jackson
2004-09-22 23:22 ` John McCutchan
2004-09-22 23:22   ` Robert Love
  -- strict thread matches above, loose matches on Subject: below --
2004-09-28 22:33 [RFC][PATCH] inotify 0.11.0 [WITH PATCH!] John McCutchan
2004-09-30 19:01 ` [patch] inotify: locking Robert Love
2004-09-30 19:17   ` Robert Love

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox