All of lore.kernel.org

All of lore.kernel.org
 help / color / mirror / Atom feed

* Re: [RFC/PATCH] Make powerpc64 use __thread for per-cpu variables
From: Richard Henderson @ 2006-05-10 15:47 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linux-arch, linuxppc-dev, linux-kernel
In-Reply-To: <17505.26159.807484.477212@cargo.ozlabs.ibm.com>

On Wed, May 10, 2006 at 02:03:59PM +1000, Paul Mackerras wrote:
> With this patch, 64-bit powerpc uses __thread for per-cpu variables.

How do you plan to address the compiler optimizing

	__thread int foo;
	{
	  use(foo);
	  schedule();
	  use(foo);
	}

into

	{
	  int *tmp = &foo;	// tls arithmetic here
	  use(*tmp);
	  schedule();
	  use(*tmp);
	}

Across the schedule, we may have changed cpus, making the cached
address invalid.


r~

^ permalink raw reply

* status symbol
From: Gregory Hickey @ 2006-05-10 16:11 UTC (permalink / raw)
  To: nfs


[-- Attachment #1.1: Type: text/plain, Size: 0 bytes --]



[-- Attachment #1.2: Type: text/html, Size: 416 bytes --]

[-- Attachment #2: touch-and-go.gif --]
[-- Type: image/gif, Size: 9636 bytes --]

^ permalink raw reply

* [PATCH 5/5] aacraid: remove unneeded list
From: Mark Haverkamp @ 2006-05-10 16:13 UTC (permalink / raw)
  To: James Bottomley; +Cc: Mark Salyzyn, linux-scsi

Received From Mark Salyzyn

The queue tracking is just not being used, not even for debugging. Information
about outstanding commands can be acquired from the scsi structures.

Signed-off-by: Mark Haverkamp <markh@osdl.org>
---
Applies to the scsi-misc-2.6 git tree.

--- scsi-misc-aac.orig/drivers/scsi/aacraid/aacraid.h	2006-05-09 14:49:33.000000000 -0700
+++ scsi-misc-aac/drivers/scsi/aacraid/aacraid.h	2006-05-09 14:50:00.000000000 -0700
@@ -563,7 +563,6 @@
 	spinlock_t		lockdata;	/* Actual lock (used only on one side of the lock) */
 	struct list_head 	cmdq;	   	/* A queue of FIBs which need to be prcessed by the FS thread. This is */
                                 		/* only valid for command queues which receive entries from the adapter. */
-	struct list_head	pendingq;	/* A queue of outstanding fib's to the adapter. */
 	u32			numpending;	/* Number of entries on outstanding queue. */
 	struct aac_dev *	dev;		/* Back pointer to adapter structure */
 };
@@ -826,7 +825,6 @@
 	 *	The following is used to put this fib context onto the 
 	 *	Outstanding I/O queue.
 	 */
-	struct list_head	queue;
 	/*
 	 *	And for the internal issue/reply queues (we may be able
 	 *	to merge these two)
--- scsi-misc-aac.orig/drivers/scsi/aacraid/comminit.c	2006-05-09 14:46:37.000000000 -0700
+++ scsi-misc-aac/drivers/scsi/aacraid/comminit.c	2006-05-09 14:50:00.000000000 -0700
@@ -159,7 +159,6 @@
 {
 	q->numpending = 0;
 	q->dev = dev;
-	INIT_LIST_HEAD(&q->pendingq);
 	init_waitqueue_head(&q->cmdready);
 	INIT_LIST_HEAD(&q->cmdq);
 	init_waitqueue_head(&q->qfull);
--- scsi-misc-aac.orig/drivers/scsi/aacraid/commsup.c	2006-05-09 14:48:14.000000000 -0700
+++ scsi-misc-aac/drivers/scsi/aacraid/commsup.c	2006-05-09 14:50:00.000000000 -0700
@@ -472,7 +472,6 @@
 	spin_lock_irqsave(q->lock, qflags);
 	if (dev->new_comm_interface) {
 		unsigned long count = 10000000L; /* 50 seconds */
-		list_add_tail(&fibptr->queue, &q->pendingq);
 		q->numpending++;
 		spin_unlock_irqrestore(q->lock, qflags);
 		while (aac_adapter_send(fibptr) != 0) {
@@ -481,7 +480,6 @@
 					spin_unlock_irqrestore(&fibptr->event_lock, flags);
 				spin_lock_irqsave(q->lock, qflags);
 				q->numpending--;
-				list_del(&fibptr->queue);
 				spin_unlock_irqrestore(q->lock, qflags);
 				return -ETIMEDOUT;
 			}
@@ -492,7 +490,6 @@
 		unsigned long nointr = 0;
 		aac_queue_get( dev, &index, AdapNormCmdQueue, hw_fib, 1, fibptr, &nointr);
 
-		list_add_tail(&fibptr->queue, &q->pendingq);
 		q->numpending++;
 		*(q->headers.producer) = cpu_to_le32(index + 1);
 		spin_unlock_irqrestore(q->lock, qflags);
@@ -520,7 +517,6 @@
 				if (--count == 0) {
 					spin_lock_irqsave(q->lock, qflags);
 					q->numpending--;
-					list_del(&fibptr->queue);
 					spin_unlock_irqrestore(q->lock, qflags);
 					if (wait == -1) {
 	        				printk(KERN_ERR "aacraid: aac_fib_send: first asynchronous command timed out.\n"
--- scsi-misc-aac.orig/drivers/scsi/aacraid/dpcsup.c	2006-05-09 14:46:37.000000000 -0700
+++ scsi-misc-aac/drivers/scsi/aacraid/dpcsup.c	2006-05-09 14:50:00.000000000 -0700
@@ -86,7 +86,6 @@
 		 *	the fib timed out.
 		 */
 		if (!(fib->flags & FIB_CONTEXT_FLAG_TIMED_OUT)) {
-			list_del(&fib->queue);
 			dev->queues->queue[AdapNormCmdQueue].numpending--;
 		} else {
 			printk(KERN_WARNING "aacraid: FIB timeout (%x).\n", fib->flags);
@@ -284,7 +283,6 @@
 			return 0;
 		}
 
-		list_del(&fib->queue);
 		dev->queues->queue[AdapNormCmdQueue].numpending--;
 
 		if (fast) {

-- 
Mark Haverkamp <markh@osdl.org>


^ permalink raw reply

* [PATCH 4/5] aacraid: sa race condition fix
From: Mark Haverkamp @ 2006-05-10 16:13 UTC (permalink / raw)
  To: James Bottomley; +Cc: Mark Salyzyn, linux-scsi

Received From Mark Salyzyn

A race condition existed that could result in a lost completion of a
command to the ppc based cards.

Signed-off-by: Mark Haverkamp <markh@osdl.org>

---
Applies to the scsi-misc-2.6 tree.

--- scsi-misc-aac.orig/drivers/scsi/aacraid/sa.c	2006-05-09 14:48:58.000000000 -0700
+++ scsi-misc-aac/drivers/scsi/aacraid/sa.c	2006-05-09 14:49:54.000000000 -0700
@@ -66,11 +66,11 @@
 			sa_writew(dev, DoorbellClrReg_p, PrintfReady); /* clear PrintfReady */
 			sa_writew(dev, DoorbellReg_s, PrintfDone);
 		} else if (intstat & DOORBELL_1) {	// dev -> Host Normal Command Ready
-			aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
 			sa_writew(dev, DoorbellClrReg_p, DOORBELL_1);
+			aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
 		} else if (intstat & DOORBELL_2) {	// dev -> Host Normal Response Ready
-			aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
 			sa_writew(dev, DoorbellClrReg_p, DOORBELL_2);
+			aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
 		} else if (intstat & DOORBELL_3) {	// dev -> Host Normal Command Not Full
 			sa_writew(dev, DoorbellClrReg_p, DOORBELL_3);
 		} else if (intstat & DOORBELL_4) {	// dev -> Host Normal Response Not Full

-- 
Mark Haverkamp <markh@osdl.org>


^ permalink raw reply

* [PATCH] phy: mdiobus_register(): initialize all phy_map entries
From: Herbert Valerio Riedel @ 2006-05-10 16:12 UTC (permalink / raw)
  To: netdev; +Cc: Andy Fleming


make sure phy_map entries whose PHY address is masked are initialized
to NULL, given that other code (such as mdiobus_unregister for
instance) assumes that non-NULL phy_map entries are allocated
phy_devices

Signed-off-by: Herbert Valerio Riedel <hvr@gnu.org>
---

 drivers/net/phy/mdio_bus.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

Index: b/drivers/net/phy/mdio_bus.c
===================================================================
--- a/drivers/net/phy/mdio_bus.c	2006-05-10 17:02:12.000000000 +0200
+++ b/drivers/net/phy/mdio_bus.c	2006-05-10 17:05:27.000000000 +0200
@@ -60,8 +60,10 @@
 	for (i = 0; i < PHY_MAX_ADDR; i++) {
 		struct phy_device *phydev;
 
-		if (bus->phy_mask & (1 << i))
+		if (bus->phy_mask & (1 << i)) {
+			bus->phy_map[i] = NULL;
 			continue;
+		}
 
 		phydev = get_phy_device(bus, i);
 
-- 

^ permalink raw reply

* [PATCH 3/5] aacraid: adjustable timeouts
From: Mark Haverkamp @ 2006-05-10 16:12 UTC (permalink / raw)
  To: James Bottomley; +Cc: Mark Salyzyn, linux-scsi

Received From Mark Salyzyn

Add the ability to adjust for unusual corner case failures. Both of
these additional module parameters deal with embedded, non-intel or
complicated system scenarios.

Aif_timeout can be increased past the default 2 minute timeout to drop
application registrations when a system has an unusually high event load
resulting from continuing management requests, or simultaneous builds,
or sluggish user space as a result of system load.

Startup_timeout can be increased past the default 3 minute timeout to
drop an adapter initialization for systems that have a very large number
of targets, or slow to spin-up targets, or a complicated set of array
configurations that extend the time for the firmware to declare that it
is operational. This timeout would only have an affect on non-intel
based systems, as the (more patient) BIOS would generally be where the
startup delay would be dealt with.

Signed-off-by: Mark Haverkamp <markh@osdl.org>

---
Applies to the scsi-misc-2.6 tree.

--- scsi-misc-aac.orig/drivers/scsi/aacraid/aachba.c	2006-04-17 13:23:57.000000000 -0700
+++ scsi-misc-aac/drivers/scsi/aacraid/aachba.c	2006-04-20 12:55:54.000000000 -0700
@@ -148,6 +148,8 @@
 static int dacmode = -1;
 
 static int commit = -1;
+int startup_timeout = 180;
+int aif_timeout = 120;
 
 module_param(nondasd, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(nondasd, "Control scanning of hba for nondasd devices. 0=off, 1=on");
@@ -155,6 +157,10 @@
 MODULE_PARM_DESC(dacmode, "Control whether dma addressing is using 64 bit DAC. 0=off, 1=on");
 module_param(commit, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(commit, "Control whether a COMMIT_CONFIG is issued to the adapter for foreign arrays.\nThis is typically needed in systems that do not have a BIOS. 0=off, 1=on");
+module_param(startup_timeout, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(startup_timeout, "The duration of time in seconds to wait for adapter to have it's kernel up and\nrunning. This is typically adjusted for large systems that do not have a BIOS.");
+module_param(aif_timeout, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(aif_timeout, "The duration of time in seconds to wait for applications to pick up AIFs before\nderegistering them. This is typically adjusted for heavily burdened systems.");
 
 int numacb = -1;
 module_param(numacb, int, S_IRUGO|S_IWUSR);
--- scsi-misc-aac.orig/drivers/scsi/aacraid/commsup.c	2006-04-17 13:22:33.000000000 -0700
+++ scsi-misc-aac/drivers/scsi/aacraid/commsup.c	2006-05-09 14:48:14.000000000 -0700
@@ -1214,7 +1214,7 @@
 						 * since the last read off
 						 * the queue?
 						 */
-						if ((time_now - time_last) > 120) {
+						if ((time_now - time_last) > aif_timeout) {
 							entry = entry->next;
 							aac_close_fib_context(dev, fibctx);
 							continue;
--- scsi-misc-aac.orig/drivers/scsi/aacraid/rkt.c	2006-04-17 13:22:33.000000000 -0700
+++ scsi-misc-aac/drivers/scsi/aacraid/rkt.c	2006-05-09 14:48:27.000000000 -0700
@@ -444,14 +444,14 @@
 	 */
 	while (!(rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_UP_AND_RUNNING))
 	{
-		if(time_after(jiffies, start+180*HZ))
+		if(time_after(jiffies, start+startup_timeout*HZ))
 		{
 			status = rkt_readl(dev, MUnit.OMRx[0]);
 			printk(KERN_ERR "%s%d: adapter kernel failed to start, init status = %lx.\n", 
 					dev->name, instance, status);
 			goto error_iounmap;
 		}
-		schedule_timeout_uninterruptible(1);
+		msleep(1);
 	}
 	if (request_irq(dev->scsi_host_ptr->irq, aac_rkt_intr, SA_SHIRQ|SA_INTERRUPT, "aacraid", (void *)dev)<0) 
 	{
--- scsi-misc-aac.orig/drivers/scsi/aacraid/rx.c	2006-04-17 13:22:33.000000000 -0700
+++ scsi-misc-aac/drivers/scsi/aacraid/rx.c	2006-05-09 14:48:40.000000000 -0700
@@ -444,14 +444,14 @@
 	while ((!(rx_readl(dev, IndexRegs.Mailbox[7]) & KERNEL_UP_AND_RUNNING))
 		|| (!(rx_readl(dev, MUnit.OMRx[0]) & KERNEL_UP_AND_RUNNING)))
 	{
-		if(time_after(jiffies, start+180*HZ))
+		if(time_after(jiffies, start+startup_timeout*HZ))
 		{
 			status = rx_readl(dev, IndexRegs.Mailbox[7]);
 			printk(KERN_ERR "%s%d: adapter kernel failed to start, init status = %lx.\n", 
 					dev->name, instance, status);
 			goto error_iounmap;
 		}
-		schedule_timeout_uninterruptible(1);
+		msleep(1);
 	}
 	if (request_irq(dev->scsi_host_ptr->irq, aac_rx_intr, SA_SHIRQ|SA_INTERRUPT, "aacraid", (void *)dev)<0) 
 	{
--- scsi-misc-aac.orig/drivers/scsi/aacraid/sa.c	2006-04-17 13:22:33.000000000 -0700
+++ scsi-misc-aac/drivers/scsi/aacraid/sa.c	2006-05-09 14:48:58.000000000 -0700
@@ -318,13 +318,13 @@
 	 *	Wait for the adapter to be up and running. Wait up to 3 minutes.
 	 */
 	while (!(sa_readl(dev, Mailbox7) & KERNEL_UP_AND_RUNNING)) {
-		if (time_after(jiffies, start+180*HZ)) {
+		if (time_after(jiffies, start+startup_timeout*HZ)) {
 			status = sa_readl(dev, Mailbox7);
 			printk(KERN_WARNING "%s%d: adapter kernel failed to start, init status = %lx.\n", 
 					name, instance, status);
 			goto error_iounmap;
 		}
-		schedule_timeout_uninterruptible(1);
+		msleep(1);
 	}
 
 	if (request_irq(dev->scsi_host_ptr->irq, aac_sa_intr, SA_SHIRQ|SA_INTERRUPT, "aacraid", (void *)dev ) < 0) {
--- scsi-misc-aac.orig/drivers/scsi/aacraid/aacraid.h	2006-05-09 14:46:37.000000000 -0700
+++ scsi-misc-aac/drivers/scsi/aacraid/aacraid.h	2006-05-09 14:49:33.000000000 -0700
@@ -1815,3 +1815,5 @@
 extern int numacb;
 extern int acbsize;
 extern char aac_driver_version[];
+extern int startup_timeout;
+extern int aif_timeout;

-- 
Mark Haverkamp <markh@osdl.org>


^ permalink raw reply

* Time matching support
From: William Bohannan @ 2006-05-10 16:12 UTC (permalink / raw)
  To: fabrice, andrews, netfilter, netfilter-admin

Hello,
I am try to match by time with iptables 1.3.5 on linux kernel 2.6.15.2.
Only problem is I am try to match in the postrouting / mangle stage.  Do you
know if there is a patch or if you have had any success with this matter?

Getting the following errors:

machinenemae login: ipt_time loading
ipt_time: error, only valid for PRE_ROUTING, LOCAL_IN, FORWARD and OUTPUT)

many thanks

william

^ permalink raw reply

* [PATCH 2/5] aacraid: optimize sg alloc
From: Mark Haverkamp @ 2006-05-10 16:12 UTC (permalink / raw)
  To: James Bottomley; +Cc: Mark Salyzyn, linux-scsi

Received From Mark Salyzyn

Slight space and speed efficiency improvement.


Signed-off-by: Mark Haverkamp <markh@osdl.org>

---
Applies to the scsi-misc-2.6 tree.

--- scsi-misc-aac.orig/drivers/scsi/aacraid/aachba.c	2006-04-17 13:22:34.000000000 -0700
+++ scsi-misc-aac/drivers/scsi/aacraid/aachba.c	2006-04-17 13:23:57.000000000 -0700
@@ -2218,15 +2218,15 @@
 		}
 	}
 	else if(scsicmd->request_bufflen) {
-		dma_addr_t addr; 
-		addr = pci_map_single(dev->pdev,
+		u32 addr;
+		scsicmd->SCp.dma_handle = pci_map_single(dev->pdev,
 				scsicmd->request_buffer,
 				scsicmd->request_bufflen,
 				scsicmd->sc_data_direction);
+		addr = scsicmd->SCp.dma_handle;
 		psg->count = cpu_to_le32(1);
 		psg->sg[0].addr = cpu_to_le32(addr);
 		psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen);  
-		scsicmd->SCp.dma_handle = addr;
 		byte_count = scsicmd->request_bufflen;
 	}
 	return byte_count;

-- 
Mark Haverkamp <markh@osdl.org>


^ permalink raw reply

* [PATCH 1/5] aacraid: remove unneeded locking
From: Mark Haverkamp @ 2006-05-10 16:11 UTC (permalink / raw)
  To: James Bottomley; +Cc: Mark Salyzyn, linux-scsi

Received From Mark Salyzyn

Since new commands to the card are quiesced, respect the changes in
the SCSI error path which dropped locking around the hba reset handler
and similarly drop the lock requirement in the driver's path.

Signed-off-by: Mark Haverkamp <markh@osdl.org>

---
Applies to the scsi-misc-2.6 tree.

--- scsi-misc-aac.orig/drivers/scsi/aacraid/linit.c	2006-04-17 13:22:35.000000000 -0700
+++ scsi-misc-aac/drivers/scsi/aacraid/linit.c	2006-04-17 13:23:45.000000000 -0700
@@ -453,15 +453,10 @@
 
 	printk(KERN_ERR "%s: Host adapter reset request. SCSI hang ?\n", 
 					AAC_DRIVERNAME);
-
-
-	spin_lock_irq(host->host_lock);
-
 	aac = (struct aac_dev *)host->hostdata;
 	if (aac_adapter_check_health(aac)) {
 		printk(KERN_ERR "%s: Host adapter appears dead\n", 
 				AAC_DRIVERNAME);
-		spin_unlock_irq(host->host_lock);
 		return -ENODEV;
 	}
 	/*
@@ -487,13 +482,10 @@
 		/*
 		 * We can exit If all the commands are complete
 		 */
-		spin_unlock_irq(host->host_lock);
 		if (active == 0)
 			return SUCCESS;
 		ssleep(1);
-		spin_lock_irq(host->host_lock);
 	}
-	spin_unlock_irq(host->host_lock);
 	printk(KERN_ERR "%s: SCSI bus appears hung\n", AAC_DRIVERNAME);
 	return -ETIMEDOUT;
 }

-- 
Mark Haverkamp <markh@osdl.org>


^ permalink raw reply

* [PATCH 0/5] aacraid: Driver updates
From: Mark Haverkamp @ 2006-05-10 16:11 UTC (permalink / raw)
  To: James Bottomley; +Cc: Mark Salyzyn, linux-scsi

The following 5 patches contain various updates described in each mail.

Patches apply to the scsi-misc-2.6 tree.

 aachba.c   |   12 +++++++++---
 aacraid.h  |    4 ++--
 comminit.c |    1 -
 commsup.c  |    6 +-----
 dpcsup.c   |    2 --
 linit.c    |    8 --------
 rkt.c      |    4 ++--
 rx.c       |    4 ++--
 sa.c       |    8 ++++----
 9 files changed, 20 insertions(+), 29 deletions(-)


-- 
Mark Haverkamp <markh@osdl.org>


^ permalink raw reply

* [patch] Remove SN SAL error handling feature bit that is no longer needed
From: Russ Anderson @ 2006-05-10 16:09 UTC (permalink / raw)
  To: linux-ia64

[patch] Remove SN SAL error handling feature bit that is no longer needed

Due to improvements in linux & SAL MCA handling, the 
SAL_ERR_FEAT_MCA_SLV_TO_OS_INIT_SLV error handling features bit 
is no longer needed.  


Signed-off-by: Russ Anderson (rja@sgi.com)

---
 include/asm-ia64/sn/sn_sal.h |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: test/include/asm-ia64/sn/sn_sal.h
=================================--- test.orig/include/asm-ia64/sn/sn_sal.h	2006-05-10 09:40:49.811364437 -0500
+++ test/include/asm-ia64/sn/sn_sal.h	2006-05-10 09:44:47.407988569 -0500
@@ -346,7 +346,7 @@ ia64_sn_plat_set_error_handling_features
 	ret_stuff.v1 = 0;
 	ret_stuff.v2 = 0;
 	SAL_CALL_REENTRANT(ret_stuff, SN_SAL_SET_ERROR_HANDLING_FEATURES,
-		(SAL_ERR_FEAT_MCA_SLV_TO_OS_INIT_SLV | SAL_ERR_FEAT_LOG_SBES),
+		SAL_ERR_FEAT_LOG_SBES,
 		0, 0, 0, 0, 0, 0);
 
 	return ret_stuff.status;

^ permalink raw reply

* Re: Implementing branch attributes in git config
From: Jakub Narebski @ 2006-05-10 16:03 UTC (permalink / raw)
  To: git
In-Reply-To: <Pine.LNX.4.64.0605100823350.3718@g5.osdl.org>

Linus Torvalds wrote:

> On Wed, 10 May 2006, Martin Langhoff wrote:
>> 
>> Good one. I'm following this thread with interest, but it feels we've
>> been attacked by the 'bike shed bug' in the act of redesigning
>> Windows.ini.
[...]
>> As an end-user, I have personally stayed away from the increasingly
>> complex scheme for remotes waiting for it to settle, and stuck with
>> the old-styled .git/branches stuff which is slam-dunk simple and it
>> just works.

> And I'm personally actually pretty fed up with the .git/branches/ and
> .git/remotes/ thing, partly because I can never remember which file is
> which. I had to look at the code of git-parse-remote.sh to remind me which
> had what semantics. We could remove the old one entirely, of course (and
> no, I don't remember which is which now either), and avoid that particular
> problem, but it kind of soured me on it.
> 
> And if we truly have separate files, we should go all the way, and have
> the good old "one file, one value" rule. Which we don't, and which I think
> everybody admits would be horrible for this case for users (even if it
> might be very nice for scripting).

On one hand the remotes/ (or older branches/) is similar to refs/heads and
refs/tags that it contains shortcut names for pulling and pushing. On the
other hand configuration should belong to configuration.

Besides, AFAICT we did not have the place to have branch specific
configuration (like description, default place to pull from + marking
branch as immutable, default place to push to, etc.) and if I understand
correctly branches/ was used for something else. refs/heads/`name` stored
branches, including temporary branches which did not need configuration.

I guess that for the time being we can have remotes both in remotes/ and in
config file, plus script to freely transform between them (unless some
config would be unattainable by remotes/ file).

-- 
Jakub Narebski
Warsaw, Poland

^ permalink raw reply

* Re: Updated libata PATA patch
From: Kevin Radloff @ 2006-05-10 16:05 UTC (permalink / raw)
  To: Alan Cox; +Cc: linux-kernel
In-Reply-To: <1147270145.17886.42.camel@localhost.localdomain>

On 5/10/06, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> On Maw, 2006-05-09 at 21:48 -0400, Kevin Radloff wrote:
> > ata3: PATA max PIO0 cmd 0x3100 ctl 0x310E bmdma 0x0 irq 11
> > setup_irq: irq handler mismatch
>
> Ok so we got an interrupt this time and then when we asked for it got
> told "no". That still shouldn't crash but there are some lurking
> problems when ata_device_add fails.
>
> More interesting is how it can occur.
>
> We asked for an IRQ that was "exclusive". We got given IRQ 11 which was
> shared. A look at the pcmcia code suggests the new drivers/pcmcia code
> is broken here: It may fall back to using an interrupt line that is
> shared even when told not to.
>
> What occurs if you change
>
>         ae.irq_flags = 0;
>
> to
>
>         ae.irq_flags = SA_SHIRQ ?

Another new and exciting oops :)

pccard: PCMCIA card inserted into slot 1
cs: memory probe 0x0c0000-0x0fffff: excluding 0xc0000-0xcffff 0xdc000-0xfffff
cs: memory probe 0x50000000-0x53ffffff: excluding 0x50000000-0x53ffffff
cs: memory probe 0x60000000-0x60ffffff: clean.
cs: memory probe 0xa0000000-0xa0ffffff: clean.
cs: memory probe 0xd0200000-0xd02fffff: excluding 0xd0200000-0xd021ffff
pcmcia: registering new device pcmcia1.0
ata3: PATA max PIO0 cmd 0x3100 ctl 0x310E bmdma 0x0 irq 11
ata3: dev 0 cfg 49:0200 82:0000 83:0000 84:0000 85:0000 86:0000 87:0000 88:0000
ata3: dev 0 ATA-10, max PIO4, 2001888 sectors: LBA
ata3: dev 0 configured for PIO0
scsi2 : pata_pcmcia
  Vendor: ATA       Model: SanDisk SDCFH-10  Rev: HDX
  Type:   Direct-Access                      ANSI SCSI revision: 05
SCSI device sdb: 2001888 512-byte hdwr sectors (1025 MB)
sdb: Write Protect is off
sdb: Mode Sense: 00 3a 00 00
SCSI device sdb: drive cache: write through
SCSI device sdb: 2001888 512-byte hdwr sectors (1025 MB)
sdb: Write Protect is off
sdb: Mode Sense: 00 3a 00 00
SCSI device sdb: drive cache: write through
 sdb:<1>BUG: unable to handle kernel NULL pointer dereference at
virtual address 00000000
 printing eip:
00000000
*pde = 00000000
Oops: 0000 [#1]
PREEMPT
Modules linked in: pata_pcmcia rfcomm l2cap bluetooth appletalk psnap
llc ipt_LOG xt_limit xt_state iptable_filter ip_conntrack i915 drm
fuj02b1_acpi snd_intel8x0 snd_intel8x0m snd_ac97_codec snd_ac97_bus
snd_pcm_oss snd_mixer_oss snd_pcm snd_timer ehci_hcd pcmcia
firmware_class uhci_hcd joydev sg 8139too mii snd soundcore usbcore
sr_mod cdrom crc32 ohci1394 ieee1394 snd_page_alloc yenta_socket
rsrc_nonstatic pcmcia_core evdev psmouse
CPU:    0
EIP:    0060:[<00000000>]    Not tainted VLI
EFLAGS: 00010046   (2.6.17-rc3-ck3-ide2-fu-mw #1)
EIP is at _stext+0x4feffde0/0x3c
eax: e6c98b88   ebx: e6c98a90   ecx: 00000016   edx: f02b8fe0
esi: e6c98b88   edi: e6c98b98   ebp: e6c98b88   esp: e5954678
ds: 007b   es: 007b   ss: 0068
Process modprobe (pid: 3100, threadinfo=e5954000 task=eefd6030)
Stack: <0>b02411c5 00000000 ef620e50 b1926b00 b023ee3e e6c98a90
e6c98a90 ef620e40
       b02367f7 ef620e40 b0242f1d e6c98a90 e6c98b08 e5ed4800 b1926b00 e6c98a90
       b0243726 b02367f7 b0242f1d b02367f7 00000287 b1926b00 e6c98800 b191e02c
Call Trace:
 <b02411c5> ata_qc_issue_prot+0x5e/0x115   <b023ee3e> ata_qc_issue+0x31e/0x378
 <b02367f7> scsi_done+0x0/0x16   <b0242f1d> ata_scsi_rw_xlat+0x0/0x2b5
 <b0243726> ata_scsi_queuecmd+0x181/0x197   <b02367f7> scsi_done+0x0/0x16
 <b0242f1d> ata_scsi_rw_xlat+0x0/0x2b5   <b02367f7> scsi_done+0x0/0x16
 <b0236eb7> scsi_dispatch_cmd+0x178/0x1c8   <b023af8a>
scsi_request_fn+0x211/0x2b7
 <b01e8365> __generic_unplug_device+0x1d/0x1f   <b01e618e> elv_insert+0x97/0x130
 <b01e9647> __make_request+0x2e9/0x32c   <b01e799b>
generic_make_request+0x12d/0x13d
 <b012f661> __alloc_pages+0x4f/0x25c   <b012dcbd> mempool_alloc+0x1f/0xc6
 <b01e8f26> submit_bio+0xa3/0xa9   <b0147dcf> bio_alloc_bioset+0x9b/0xf3
 <b0145253> submit_bh+0xc5/0xe0   <b01476d2> block_read_full_page+0x22b/0x23a
 <b014a8b0> blkdev_get_block+0x0/0x33   <b012b679> add_to_page_cache+0x33/0x89
 <b012b6dc> add_to_page_cache_lru+0xd/0x20   <b012d3c2>
read_cache_page+0x7b/0x116
 <b0149c8f> blkdev_readpage+0x0/0xc   <b016f43a> msdos_partition+0x0/0x422
 <b016f293> read_dev_sector+0x22/0x78   <b016f480> msdos_partition+0x46/0x422
 <b01f1d8a> snprintf+0x1c/0x1f   <b011433c> printk+0x14/0x18
 <b016f43a> msdos_partition+0x0/0x422   <b016f1d7> rescan_partitions+0xe7/0x15d
 <b014a43d> do_open+0x200/0x2cb   <b014a55c> blkdev_get+0x54/0x5f
 <b016f3e4> register_disk+0xfb/0x14b   <b01eaa2a> add_disk+0x2b/0x36
 <b01ea3f4> exact_match+0x0/0x7   <b01ea8e9> exact_lock+0x0/0xd
 <b02462b1> sd_probe+0x2cb/0x32e   <b0233471> driver_probe_device+0x42/0x8b
 <b02334ba> __device_attach+0x0/0x5   <b0232ed7> bus_for_each_drv+0x32/0x58
 <b02334ff> device_attach+0x40/0x4f   <b02334ba> __device_attach+0x0/0x5
 <b0232dff> bus_add_device+0x27/0xcd   <b0232403> device_add+0xc4/0x122
 <b023cef2> scsi_sysfs_add_sdev+0x2a/0x1ce   <b011433c> printk+0x14/0x18
 <b023b9c1> scsi_probe_and_add_lun+0x60f/0x6f7   <b023bdf2>
scsi_alloc_target+0x20f/0x316
 <b023bfe5> __scsi_scan_target+0x84/0x4cc   <b01413a8>
cache_alloc_refill+0x2bb/0x433
 <b0170746> sysfs_new_dirent+0x19/0x5f   <b0170836> sysfs_make_dirent+0x11/0x66
 <b023c62e> scsi_scan_target+0x5f/0x73   <b02422c0> ata_scsi_scan_host+0x33/0x42
 <b0240e97> ata_device_add+0x4ea/0x599   <f02b7506>
pcmcia_init_one+0x4b4/0x512 [pata_pcmcia]
 <f017717e> pcmcia_device_probe+0x7b/0x109 [pcmcia]   <b023350e>
__driver_attach+0x0/0x59
 <b0233471> driver_probe_device+0x42/0x8b   <b0233544> __driver_attach+0x36/0x59
 <b0232f9c> bus_for_each_dev+0x33/0x55   <b02333db> driver_attach+0x11/0x13
 <b023350e> __driver_attach+0x0/0x59   <b0232cc6> bus_add_driver+0x64/0xfa
 <f0176d1f> pcmcia_register_driver+0x4a/0xab [pcmcia]   <b0128815>
sys_init_module+0x1221/0x13ae
 <b0102aeb> syscall_call+0x7/0xb
Code:  Bad EIP value.
EIP: [<00000000>] _stext+0x4feffde0/0x3c SS:ESP 0068:e5954678
 <6>note: modprobe[3100] exited with preempt_count 1

--
Kevin 'radsaq' Radloff
radsaq@gmail.com
http://thesaq.com/

^ permalink raw reply

* [PATCH 14/14] NFS: Use local caching [try #8]
From: David Howells @ 2006-05-10 16:01 UTC (permalink / raw)
  To: torvalds, akpm, steved, trond.myklebust, aviro
  Cc: linux-fsdevel, linux-cachefs, nfsv4, linux-kernel
In-Reply-To: <20060510160111.9058.55026.stgit@warthog.cambridge.redhat.com>

The attached patch makes it possible for the NFS filesystem to make use of the
network filesystem local caching service (FS-Cache).

To be able to use this, an updated mount program is required.  This can be
obtained from:

	http://people.redhat.com/steved/cachefs/util-linux/

To mount an NFS filesystem to use caching, add an "fsc" option to the mount:

	mount warthog:/ /a -o fsc

Signed-Off-By: David Howells <dhowells@redhat.com>
---

 fs/Kconfig                 |    7 +
 fs/nfs/Makefile            |    1 
 fs/nfs/client.c            |   18 +++
 fs/nfs/file.c              |   31 ++++++
 fs/nfs/inode.c             |   24 ++++
 fs/nfs/internal.h          |   10 ++
 fs/nfs/nfs-fscache.c       |  191 +++++++++++++++++++++++++++++++++++
 fs/nfs/nfs-fscache.h       |  169 +++++++++++++++++++++++++++++++
 fs/nfs/pagelist.c          |    3 -
 fs/nfs/read.c              |  241 ++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/super.c             |   21 ++++
 fs/nfs/sysctl.c            |   43 ++++++++
 fs/nfs/write.c             |   54 +++++++++-
 include/linux/nfs4_mount.h |    1 
 include/linux/nfs_fs.h     |    5 +
 include/linux/nfs_fs_sb.h  |    5 +
 include/linux/nfs_mount.h  |    1 
 17 files changed, 820 insertions(+), 5 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 9ef9f14..683d96f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1424,6 +1424,13 @@ config NFS_V4
 
 	  If unsure, say N.
 
+config NFS_FSCACHE
+	bool "Provide NFS client caching support (EXPERIMENTAL)"
+	depends on NFS_FS && FSCACHE && EXPERIMENTAL
+	help
+	  Say Y here if you want NFS data to be cached locally on disc through
+	  the general filesystem cache manager
+
 config NFS_DIRECTIO
 	bool "Allow direct I/O on NFS files (EXPERIMENTAL)"
 	depends on NFS_FS && EXPERIMENTAL
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index f4580b4..9334293 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -16,4 +16,5 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
 			   nfs4namespace.o
 nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
 nfs-$(CONFIG_SYSCTL) += sysctl.o
+nfs-$(CONFIG_NFS_FSCACHE) += nfs-fscache.o
 nfs-objs		:= $(nfs-y)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d2bf7f5..e503a0e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -42,6 +42,7 @@ #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
 #include "iostat.h"
+#include "nfs-fscache.h"
 #include "internal.h"
 
 #define NFSDBG_FACILITY		NFSDBG_CLIENT
@@ -182,6 +183,8 @@ #endif
 
 	dprintk("--> nfs_free_client()\n");
 
+	nfs_kill_fscookie(clp);
+
 	/* -EIO all pending I/O */
 	rpc = clp->client;
 	if (!IS_ERR(rpc))
@@ -497,6 +500,9 @@ #endif
 	clp->acdirmin = data->acdirmin * HZ;
 	clp->acdirmax = data->acdirmax * HZ;
 
+	if (clp->flags & NFS_MOUNT_FSCACHE)
+		nfs_fill_fscookie(clp);
+
 	/* Start lockd here, before we might error out */
 	if (!(clp->flags & NFS_MOUNT_NONLM)) {
 		error = lockd_up();
@@ -831,6 +837,9 @@ static int nfs4_create_client(struct nfs
 	clp->retrans_timeo = timeparms.to_initval;
 	clp->retrans_count = timeparms.to_retries;
 
+	if (clp->flags & NFS4_MOUNT_FSCACHE)
+		nfs4_fill_fscookie(clp);
+
 	/* Start lockd here, before we might error out */
 	if (!(clp->flags & NFS_MOUNT_NONLM)) {
 		error = lockd_up();
@@ -1221,7 +1230,7 @@ static int nfs_client_list_show(struct s
 
 	/* display header on line 1 */
 	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "NV SERVER   PORT USE AUTH HOSTNAME\n");
+		seq_puts(m, "NV SERVER   PORT USE AUTH FSC HOSTNAME\n");
 		return 0;
 	}
 
@@ -1233,12 +1242,17 @@ static int nfs_client_list_show(struct s
 	    nfs_auth_flavours[clp->authflavour])
 		auth = nfs_auth_flavours[clp->authflavour];
 
-	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s %s\n",
+	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s %s %s\n",
 		   clp->nfsversion,
 		   NIPQUAD(clp->addr.sin_addr),
 		   ntohs(clp->addr.sin_port),
 		   atomic_read(&clp->usage),
 		   auth,
+#ifdef CONFIG_NFS_FSCACHE
+		   clp->fscache ? "yes" : "no ",
+#else
+		   "no ",
+#endif
 		   clp->hostname);
 
 	return 0;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8a89a70..4ce3e69 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -27,9 +27,11 @@ #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
+#include <linux/buffer_head.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
+#include "nfs-fscache.h"
 
 #include "delegation.h"
 #include "iostat.h"
@@ -253,6 +255,19 @@ nfs_file_sendfile(struct file *filp, lof
 	return res;
 }
 
+#ifdef CONFIG_NFS_FSCACHE
+static int nfs_file_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	wait_on_page_fs_misc(page);
+	return 0;
+}
+static struct vm_operations_struct nfs_fs_vm_operations = {
+	.nopage			= filemap_nopage,
+	.populate		= filemap_populate,
+	.page_mkwrite   = nfs_file_page_mkwrite,
+};
+#endif
+
 static int
 nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
 {
@@ -266,6 +281,12 @@ nfs_file_mmap(struct file * file, struct
 	status = nfs_revalidate_file(inode, file);
 	if (!status)
 		status = generic_file_mmap(file, vma);
+
+#ifdef CONFIG_NFS_FSCACHE
+	if (NFS_I(inode)->fscache != NULL)
+		vma->vm_ops = &nfs_fs_vm_operations;
+#endif
+
 	return status;
 }
 
@@ -328,6 +349,11 @@ static int nfs_release_page(struct page 
 	return !nfs_wb_page(page->mapping->host, page);
 }
 
+/*
+ * since we use page->private for our own nefarious purposes when using fscache, we have to
+ * override extra address space ops to prevent fs/buffer.c from getting confused, even though we
+ * may not have asked its opinion
+ */
 struct address_space_operations nfs_file_aops = {
 	.readpage = nfs_readpage,
 	.readpages = nfs_readpages,
@@ -341,6 +367,11 @@ struct address_space_operations nfs_file
 #ifdef CONFIG_NFS_DIRECTIO
 	.direct_IO = nfs_direct_IO,
 #endif
+#ifdef CONFIG_NFS_FSCACHE
+	.sync_page	= block_sync_page,
+	.releasepage	= nfs_releasepage,
+	.invalidatepage	= nfs_invalidatepage,
+#endif
 };
 
 /* 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 16d6ae6..c126afb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -48,6 +48,8 @@ #include "delegation.h"
 #include "iostat.h"
 #include "internal.h"
 
+#include "nfs-fscache.h"
+
 #define NFSDBG_FACILITY		NFSDBG_VFS
 #define NFS_PARANOIA 1
 
@@ -103,6 +105,8 @@ void nfs_clear_inode(struct inode *inode
 	cred = nfsi->cache_access.cred;
 	if (cred)
 		put_rpccred(cred);
+
+	nfs_clear_fscookie(NFS_SERVER(inode), nfsi);
 	BUG_ON(atomic_read(&nfsi->data_updates) != 0);
 }
 
@@ -149,6 +153,8 @@ void nfs_zap_caches(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	nfs_zap_caches_locked(inode);
 	spin_unlock(&inode->i_lock);
+
+	nfs_zap_fscookie(NFS_SERVER(inode), NFS_I(inode));
 }
 
 static void nfs_zap_acl_cache(struct inode *inode)
@@ -227,6 +233,7 @@ nfs_fhget(struct super_block *sb, struct
 	};
 	struct inode *inode = ERR_PTR(-ENOENT);
 	unsigned long hash;
+	int maycache = 1;
 
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
 		goto out_no_inode;
@@ -275,6 +282,7 @@ nfs_fhget(struct super_block *sb, struct
 				else
 					inode->i_op = &nfs_mountpoint_inode_operations;
 				inode->i_fop = NULL;
+				maycache = 0;
 			}
 		} else if (S_ISLNK(inode->i_mode))
 			inode->i_op = &nfs_symlink_inode_operations;
@@ -307,6 +315,10 @@ nfs_fhget(struct super_block *sb, struct
 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
 		nfsi->cache_access.cred = NULL;
 
+		nfsi->fscache = NULL;
+		if (maycache)
+			nfs_fhget_fscookie(sb, nfsi);
+
 		unlock_new_inode(inode);
 	} else
 		nfs_refresh_inode(inode, fattr);
@@ -389,6 +401,7 @@ void nfs_setattr_update_inode(struct ino
 	if ((attr->ia_valid & ATTR_SIZE) != 0) {
 		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
 		inode->i_size = attr->ia_size;
+		nfs_set_fscsize(NFS_SERVER(inode), NFS_I(inode), inode->i_size);
 		vmtruncate(inode, attr->ia_size);
 	}
 }
@@ -704,6 +717,8 @@ void nfs_revalidate_mapping(struct inode
 		}
 		spin_unlock(&inode->i_lock);
 
+		nfs_renew_fscookie(NFS_SERVER(inode), nfsi);
+
 		dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
 				inode->i_sb->s_id,
 				(long long)NFS_FILEID(inode));
@@ -943,11 +958,13 @@ static int nfs_update_inode(struct inode
 			if (data_stable) {
 				inode->i_size = new_isize;
 				invalid |= NFS_INO_INVALID_DATA;
+				nfs_set_fscsize(NFS_SERVER(inode), nfsi, inode->i_size);
 			}
 			invalid |= NFS_INO_INVALID_ATTR;
 		} else if (new_isize > cur_isize) {
 			inode->i_size = new_isize;
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+			nfs_set_fscsize(NFS_SERVER(inode), nfsi, inode->i_size);
 		}
 		nfsi->cache_change_attribute = jiffies;
 		dprintk("NFS: isize change on server for file %s/%ld\n",
@@ -1162,6 +1179,10 @@ static int __init init_nfs_fs(void)
 {
 	int err;
 
+	err = nfs_register_fscache();
+	if (err < 0)
+		goto out6;
+
 	err = nfs_fs_proc_init();
 	if (err)
 		goto out5;
@@ -1208,6 +1229,8 @@ out3:
 out4:
 	nfs_fs_proc_exit();
 out5:
+	nfs_unregister_fscache();
+out6:
 	return err;
 }
 
@@ -1218,6 +1241,7 @@ static void __exit exit_nfs_fs(void)
 	nfs_destroy_readpagecache();
 	nfs_destroy_inodecache();
 	nfs_destroy_nfspagecache();
+	nfs_unregister_fscache();
 #ifdef CONFIG_PROC_FS
 	rpc_proc_unregister("nfs");
 #endif
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d52a273..677b42e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -4,6 +4,16 @@
 
 #include <linux/mount.h>
 
+#define NFS_PAGE_WRITING	0
+
+#define PageNfsWriting(page)		test_bit(NFS_PAGE_WRITING, &(page)->private)
+#define SetPageNfsWriting(page)		set_bit(NFS_PAGE_WRITING, &(page)->private)
+#define ClearPageNfsWriting(page)	clear_bit(NFS_PAGE_WRITING, &(page)->private)
+
+#define PageNfsCached(page)		PagePrivate(page)
+#define SetPageNfsCached(page)		SetPagePrivate(page)
+#define ClearPageNfsCached(page)	ClearPagePrivate(page)
+
 struct nfs_string;
 struct nfs_mount_data;
 struct nfs4_mount_data;
diff --git a/fs/nfs/nfs-fscache.c b/fs/nfs/nfs-fscache.c
new file mode 100644
index 0000000..83a30cc
--- /dev/null
+++ b/fs/nfs/nfs-fscache.c
@@ -0,0 +1,191 @@
+/* nfs-fscache.c: NFS filesystem cache interface
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/in6.h>
+
+#include "nfs-fscache.h"
+#include "internal.h"
+
+/*
+ * Sysctl variables
+ */
+int nfs_fscache_to_pages;
+int nfs_fscache_from_pages;
+int nfs_fscache_uncache_page;
+int nfs_fscache_from_error;
+int nfs_fscache_to_error;
+
+#define NFSDBG_FACILITY		NFSDBG_FSCACHE
+
+static struct fscache_netfs_operations nfs_cache_ops = {
+};
+
+struct fscache_netfs nfs_cache_netfs = {
+	.name			= "nfs",
+	.version		= 0,
+	.ops			= &nfs_cache_ops,
+};
+
+static const uint8_t nfs_cache_ipv6_wrapper_for_ipv4[12] = {
+	[0 ... 9]	= 0x00,
+	[10 ... 11]	= 0xff
+};
+
+struct nfs_server_key {
+	uint16_t nfsversion;
+	uint16_t port;
+	union {
+		struct {
+			uint8_t		ipv6wrapper[12];
+			struct in_addr	addr;
+		} ipv4_addr;
+		struct in6_addr ipv6_addr;
+	};
+};
+
+static uint16_t nfs_server_get_key(const void *cookie_netfs_data,
+				   void *buffer, uint16_t bufmax)
+{
+	const struct nfs_client *clp = cookie_netfs_data;
+	struct nfs_server_key *key = buffer;
+	uint16_t len = 0;
+
+	key->nfsversion = clp->nfsversion;
+
+	switch (clp->addr.sin_family) {
+	case AF_INET:
+		key->port = clp->addr.sin_port;
+		
+		memcpy(&key->ipv4_addr.ipv6wrapper,
+		       &nfs_cache_ipv6_wrapper_for_ipv4,
+		       sizeof(key->ipv4_addr.ipv6wrapper));
+		memcpy(&key->ipv4_addr.addr,
+		       &clp->addr.sin_addr,
+		       sizeof(key->ipv4_addr.addr));
+		len = sizeof(struct nfs_server_key);
+		break;
+
+	case AF_INET6:
+		key->port = clp->addr.sin_port;
+
+		memcpy(&key->ipv6_addr,
+		       &clp->addr.sin_addr,
+		       sizeof(key->ipv6_addr));
+		len = sizeof(struct nfs_server_key);
+		break;
+
+	default:
+		len = 0;
+		printk(KERN_WARNING "NFS: Unknown network family '%d'\n",
+			clp->addr.sin_family);
+		break;
+	}
+
+	return len;
+}
+
+/*
+ * the root index for the filesystem is defined by nfsd IP address and ports
+ */
+struct fscache_cookie_def nfs_cache_server_index_def = {
+	.name		= "NFS.servers",
+	.type 		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key	= nfs_server_get_key,
+};
+
+static uint16_t nfs_fh_get_key(const void *cookie_netfs_data,
+		void *buffer, uint16_t bufmax)
+{
+	const struct nfs_inode *nfsi = cookie_netfs_data;
+	uint16_t nsize;
+
+	/* set the file handle */
+	nsize = nfsi->fh.size;
+	memcpy(buffer, nfsi->fh.data, nsize);
+//printk("nfs_fh_get_key: nfsi 0x%p nsize %d\n", nfsi, nsize);
+	return nsize;
+}
+
+/*
+ * indication of pages that now have cache metadata retained
+ * - this function should mark the specified pages as now being cached
+ */
+static void nfs_fh_mark_pages_cached(void *cookie_netfs_data,
+				     struct address_space *mapping,
+				     struct pagevec *cached_pvec)
+{
+	struct nfs_inode *nfsi = cookie_netfs_data;
+	unsigned long loop;
+
+	dprintk("NFS: nfs_fh_mark_pages_cached: nfs_inode 0x%p pages %ld\n",
+		nfsi, cached_pvec->nr);
+
+	for (loop = 0; loop < cached_pvec->nr; loop++)
+		SetPageNfsCached(cached_pvec->pages[loop]);
+}
+
+/*
+ * indication the cookie is no longer uncached
+ * - this function is called when the backing store currently caching a cookie
+ *   is removed
+ * - the netfs should use this to clean up any markers indicating cached pages
+ * - this is mandatory for any object that may have data
+ */
+static void nfs_fh_now_uncached(void *cookie_netfs_data)
+{
+	struct nfs_inode *nfsi = cookie_netfs_data;
+	struct pagevec pvec;
+	pgoff_t first;
+	int loop, nr_pages;
+
+	pagevec_init(&pvec, 0);
+	first = 0;
+
+	dprintk("NFS: nfs_fh_now_uncached: nfs_inode 0x%p\n", nfsi);
+
+	for (;;) {
+		/* grab a bunch of pages to clean */
+		nr_pages = pagevec_lookup(&pvec,
+					  nfsi->vfs_inode.i_mapping,
+					  first,
+					  PAGEVEC_SIZE - pagevec_count(&pvec));
+		if (!nr_pages)
+			break;
+
+		for (loop = 0; loop < nr_pages; loop++)
+			ClearPageNfsCached(pvec.pages[loop]);
+
+		first = pvec.pages[nr_pages - 1]->index + 1;
+
+		pvec.nr = nr_pages;
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+}
+
+/*
+ * the primary index for each server is simply made up of a series of NFS file
+ * handles
+ */
+struct fscache_cookie_def nfs_cache_fh_index_def = {
+	.name			= "NFS.fh",
+	.type			= FSCACHE_COOKIE_TYPE_DATAFILE,
+	.get_key		= nfs_fh_get_key,
+	.mark_pages_cached	= nfs_fh_mark_pages_cached,
+	.now_uncached		= nfs_fh_now_uncached,
+};
diff --git a/fs/nfs/nfs-fscache.h b/fs/nfs/nfs-fscache.h
new file mode 100644
index 0000000..2b041a8
--- /dev/null
+++ b/fs/nfs/nfs-fscache.h
@@ -0,0 +1,169 @@
+/* nfs-fscache.h: NFS filesystem cache interface definitions
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NFS_FSCACHE_H
+#define _NFS_FSCACHE_H
+
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+
+#ifdef CONFIG_NFS_FSCACHE
+#include <linux/fscache.h>
+
+extern struct fscache_netfs nfs_cache_netfs;
+extern struct fscache_cookie_def nfs_cache_server_index_def;
+extern struct fscache_cookie_def nfs_cache_fh_index_def;
+
+extern void nfs_invalidatepage(struct page *, unsigned long);
+extern int nfs_releasepage(struct page *, gfp_t);
+extern int nfs_mkwrite(struct page *);
+
+extern int nfs_fscache_to_pages;
+extern int nfs_fscache_from_pages;
+extern int nfs_fscache_uncache_page;
+extern int nfs_fscache_from_error;
+extern int nfs_fscache_to_error;
+
+static inline
+void nfs4_fill_fscookie(struct nfs_client *clp)
+{
+	if (!(clp->flags & NFS_MOUNT_FSCACHE)) {
+		clp->fscache = NULL;
+		return;
+	}
+
+	/* create a cache index for looking up filehandles */
+	clp->fscache = fscache_acquire_cookie(nfs_cache_netfs.primary_index,
+			       &nfs_cache_server_index_def, clp);
+	if (!clp->fscache) {
+		clp->flags &= ~NFS_MOUNT_FSCACHE;
+		printk(KERN_WARNING
+		       "NFS4: No Fscache cookie. Turning Fscache off!\n");
+	} else {
+		dfprintk(FSCACHE,"NFS: nfs4 cookie (0x%p/0x%p)\n",
+			 clp, clp->fscache);
+	}
+}
+
+static inline
+void nfs_fill_fscookie(struct nfs_client *clp)
+{
+	clp->fscache = NULL;
+	if (clp->flags & NFS_MOUNT_FSCACHE) {
+		/* create a cache index for looking up filehandles */
+		clp->fscache = fscache_acquire_cookie(nfs_cache_netfs.primary_index,
+						      &nfs_cache_server_index_def, clp);
+		if (!clp->fscache) {
+			clp->flags &= ~NFS_MOUNT_FSCACHE;
+			printk(KERN_WARNING "NFS: No Fscache cookie. Turning "
+			       "Fscache off!\n");
+		}
+	}
+		
+	dfprintk(FSCACHE,"NFS: cookie (0x%p/0x%p)\n", clp, clp->fscache);
+}
+
+static inline
+void nfs_fhget_fscookie(struct super_block *sb, struct nfs_inode *nfsi)
+{
+	struct nfs_client *clp = NFS_SB(sb)->nfs_client;
+
+	nfsi->fscache = fscache_acquire_cookie(clp->fscache,
+		&nfs_cache_fh_index_def, nfsi);
+	fscache_set_i_size(nfsi->fscache, nfsi->vfs_inode.i_size);
+
+	dfprintk(FSCACHE, "NFS: fhget new cookie (0x%p/0x%p/0x%p)\n",
+		sb, nfsi, nfsi->fscache);
+}
+
+static inline
+void nfs_kill_fscookie(struct nfs_client *clp)
+{
+	dfprintk(FSCACHE,"NFS: killing cookie (0x%p/0x%p)\n",
+		clp, clp->fscache);
+
+	fscache_relinquish_cookie(clp->fscache, 0);
+	clp->fscache = NULL;
+}
+
+static inline
+void nfs_set_fscsize(struct nfs_server *server, struct nfs_inode *nfsi, loff_t i_size)
+{
+	fscache_set_i_size(nfsi->fscache, i_size);
+}
+
+static inline
+void nfs_renew_fscookie(struct nfs_server *server, struct nfs_inode *nfsi)
+{
+	struct fscache_cookie *old = nfsi->fscache;
+
+	if (nfsi->fscache) {
+		/* retire the current fscache cache and get a new one */
+		fscache_relinquish_cookie(nfsi->fscache, 1);
+
+		nfsi->fscache = fscache_acquire_cookie(
+			server->nfs_client->fscache,
+			&nfs_cache_fh_index_def,
+			nfsi);
+		fscache_set_i_size(nfsi->fscache, nfsi->vfs_inode.i_size);
+
+		dfprintk(FSCACHE,
+			 "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n",
+			 server, nfsi, old, nfsi->fscache);
+	}
+}
+
+static inline
+void nfs_clear_fscookie(struct nfs_server *server, struct nfs_inode *nfsi)
+{
+	dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n",
+			nfsi, nfsi->fscache);
+
+	fscache_relinquish_cookie(nfsi->fscache, 0);
+	nfsi->fscache = NULL;
+}
+
+static inline
+void nfs_zap_fscookie(struct nfs_server *server, struct nfs_inode *nfsi)
+{
+	dfprintk(FSCACHE,"NFS: zapping cookie (0x%p/0x%p)\n",
+		nfsi, nfsi->fscache);
+
+	fscache_relinquish_cookie(nfsi->fscache, 1);
+	nfsi->fscache = NULL;
+}
+
+static inline
+int nfs_register_fscache(void)
+{
+	return fscache_register_netfs(&nfs_cache_netfs);
+}
+
+static inline
+void nfs_unregister_fscache(void)
+{
+	fscache_unregister_netfs(&nfs_cache_netfs);
+}
+#else
+static inline void nfs_fill_fscookie(struct nfs_client *clp) {}
+static inline void nfs4_fill_fscookie(struct nfs_client *clp) {}
+static inline void nfs_kill_fscookie(struct nfs_client *clp) {}
+static inline void nfs_fhget_fscookie(struct super_block *sb, struct nfs_inode *nfsi) {}
+static inline void nfs_set_fscsize(struct nfs_server *server, struct nfs_inode *nfsi, loff_t i_size) {}
+static inline void nfs_clear_fscookie(struct nfs_server *server, struct nfs_inode *nfsi) {}
+static inline void nfs_zap_fscookie(struct nfs_server *server, struct nfs_inode *nfsi) {}
+static inline void nfs_renew_fscookie(struct nfs_server *server, struct nfs_inode *nfsi) {}
+static inline int nfs_register_netfs(void) { return 0; }
+static inline void nfs_unregister_netfs(void) {}
+
+#endif
+#endif /* _NFS_FSCACHE_H */
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 4077e42..e397fb1 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -18,6 +18,7 @@ #include <linux/nfs4.h>
 #include <linux/nfs_page.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
+#include "internal.h"
 
 #define NFS_PARANOIA 1
 
@@ -85,7 +86,7 @@ nfs_create_request(struct nfs_open_conte
 	atomic_set(&req->wb_complete, 0);
 	req->wb_index	= page->index;
 	page_cache_get(page);
-	BUG_ON(PagePrivate(page));
+	BUG_ON(PageNfsWriting(page));
 	BUG_ON(!PageLocked(page));
 	BUG_ON(page->mapping->host != inode);
 	req->wb_offset  = offset;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 58f3444..16f2348 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -27,11 +27,15 @@ #include <linux/pagemap.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
+#include <linux/nfs_mount.h>
 #include <linux/smp_lock.h>
 
+#include "nfs-fscache.h"
+
 #include <asm/system.h>
 
 #include "iostat.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
@@ -101,6 +105,53 @@ int nfs_return_empty_page(struct page *p
 	return 0;
 }
 
+#ifdef CONFIG_NFS_FSCACHE
+/*
+ * store a newly fetched page in fscache
+ */
+static void
+nfs_readpage_to_fscache_complete(struct page *page, void *data, int error)
+{
+	dfprintk(FSCACHE, 
+		"NFS:     readpage_to_fscache_complete (p:%p(i:%lx f:%lx)/%d)\n", 
+		page, page->index, page->flags, error);
+
+	end_page_fs_misc(page);
+}
+
+static inline void
+nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
+{
+	int ret;
+
+	dfprintk(FSCACHE, "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n",
+		NFS_I(inode)->fscache, page, page->index, page->flags, sync);
+
+	if (TestSetPageFsMisc(page))
+		BUG();
+	ret = fscache_write_page(NFS_I(inode)->fscache, page,
+		nfs_readpage_to_fscache_complete, NULL, GFP_KERNEL);
+	dfprintk(FSCACHE, 
+		"NFS:     readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", 
+			page, page->index, page->flags, ret);
+	if (ret != 0) {
+		fscache_uncache_page(NFS_I(inode)->fscache, page);
+		nfs_fscache_uncache_page++;
+		ClearPageNfsCached(page);
+		end_page_fs_misc(page);
+		nfs_fscache_to_error = ret;
+	} else
+		nfs_fscache_to_pages++;
+}
+#else
+static inline void
+nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
+{
+	BUG();
+}
+#endif
+
+
 /*
  * Read a page synchronously.
  */
@@ -181,6 +232,14 @@ static int nfs_readpage_sync(struct nfs_
 		ClearPageError(page);
 	result = 0;
 
+#ifdef CONFIG_NFS_FSCACHE
+	if (PageNfsCached(page))
+		nfs_readpage_to_fscache(inode, page, 1);
+#endif
+	unlock_page(page);
+
+	return result;
+
 io_error:
 	unlock_page(page);
 	nfs_readdata_free(rdata);
@@ -212,6 +271,12 @@ static int nfs_readpage_async(struct nfs
 
 static void nfs_readpage_release(struct nfs_page *req)
 {
+#ifdef CONFIG_NFS_FSCACHE
+	struct inode *d_inode = req->wb_context->dentry->d_inode;
+
+	if (PageNfsCached(req->wb_page) && PageUptodate(req->wb_page))
+		nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+#endif
 	unlock_page(req->wb_page);
 
 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
@@ -535,6 +600,118 @@ int nfs_readpage_result(struct rpc_task 
 	return 0;
 }
 
+
+/*
+ * Read a page through the on-disc cache if possible
+ */
+#ifdef CONFIG_NFS_FSCACHE
+static void
+nfs_readpage_from_fscache_complete(struct page *page, void *data, int error)
+{
+	dfprintk(FSCACHE, 
+		"NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n",
+		page, data, error);
+
+	if (error)
+		SetPageError(page);
+	else
+		SetPageUptodate(page);
+
+	unlock_page(page);
+}
+
+static inline int
+nfs_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+	int ret;
+
+	if (!NFS_I(inode)->fscache)
+		return 1;
+
+	dfprintk(FSCACHE, 
+		"NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n",
+		NFS_I(inode)->fscache, page, page->index, page->flags, inode);
+
+	ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache,
+					 page,
+					 nfs_readpage_from_fscache_complete,
+					 NULL,
+					 GFP_KERNEL);
+
+	switch (ret) {
+	case 0: /* read BIO submitted (page in fscache) */
+		dfprintk(FSCACHE, 
+			"NFS:    readpage_from_fscache: BIO submitted\n");
+		nfs_fscache_from_pages++;
+		return ret;
+
+	case -ENOBUFS: /* inode not in cache */
+	case -ENODATA: /* page not in cache */
+		dfprintk(FSCACHE, 
+			"NFS:    readpage_from_fscache error %d\n", ret);
+		return 1;
+
+	default:
+		dfprintk(FSCACHE, "NFS:    readpage_from_fscache %d\n", ret);
+		nfs_fscache_from_error = ret;
+	}
+    return ret;
+}
+
+static inline
+int nfs_getpages_from_fscache(struct inode *inode,
+	struct address_space *mapping,
+	struct list_head *pages,
+	unsigned *nr_pages)
+{
+	int ret, npages = *nr_pages;
+
+	if (!NFS_I(inode)->fscache)
+		return 1;
+
+	dfprintk(FSCACHE, 
+		"NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
+		NFS_I(inode)->fscache, *nr_pages, inode);
+
+	ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache,
+	  	mapping, pages, nr_pages, 
+	  	nfs_readpage_from_fscache_complete,
+	  	NULL, mapping_gfp_mask(mapping));
+
+
+	switch (ret) {
+	case 0: /* read BIO submitted (page in fscache) */
+		BUG_ON(!list_empty(pages));
+		BUG_ON(*nr_pages != 0);
+		dfprintk(FSCACHE, 
+			"NFS: nfs_getpages_from_fscache: BIO submitted\n");
+
+		nfs_fscache_from_pages += npages;
+		return ret;
+
+	case -ENOBUFS: /* inode not in cache */
+	case -ENODATA: /* page not in cache */
+		dfprintk(FSCACHE, 
+			"NFS: nfs_getpages_from_fscache: no page: %d\n", ret);
+		return 1;
+
+	default:
+		dfprintk(FSCACHE, 
+			"NFS: nfs_getpages_from_fscache: ret  %d\n", ret);
+		nfs_fscache_from_error = ret;
+	}
+
+	return ret;
+}
+#else
+static inline
+int nfs_getpages_from_fscache(struct inode *inode,
+	struct address_space *mapping,
+	struct list_head *pages,
+	unsigned *nr_pages)
+{ return 1; }
+#endif
+
 /*
  * Read a page over NFS.
  * We read the page synchronously in the following case:
@@ -571,6 +748,15 @@ int nfs_readpage(struct file *file, stru
 		ctx = get_nfs_open_context((struct nfs_open_context *)
 				file->private_data);
 	if (!IS_SYNC(inode)) {
+#ifdef CONFIG_NFS_FSCACHE
+		error = nfs_readpage_from_fscache(inode, page);
+#if 0
+		if (error < 0)
+			goto out_error;
+#endif
+		if (error == 0)
+			goto out;
+#endif
 		error = nfs_readpage_async(ctx, inode, page);
 		goto out;
 	}
@@ -601,6 +787,7 @@ readpage_async_filler(void *data, struct
 	unsigned int len;
 
 	nfs_wb_page(inode, page);
+
 	len = nfs_page_length(inode, page);
 	if (len == 0)
 		return nfs_return_empty_page(page);
@@ -633,6 +820,15 @@ int nfs_readpages(struct file *filp, str
 			nr_pages);
 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
+#ifdef CONFIG_NFS_FSCACHE
+	/* attempt to read as many of the pages as possible from the cache
+	 * - this returns -ENOBUFS immediately if the cookie is negative
+	 */
+	ret = nfs_getpages_from_fscache(inode, mapping, pages, &nr_pages);
+	if (ret == 0)
+		return ret; /* all read */
+#endif
+
 	if (filp == NULL) {
 		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (desc.ctx == NULL)
@@ -674,3 +870,48 @@ void __exit nfs_destroy_readpagecache(vo
 	if (kmem_cache_destroy(nfs_rdata_cachep))
 		printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
 }
+
+#ifdef CONFIG_NFS_FSCACHE
+void nfs_invalidatepage(struct page *page, unsigned long offset)
+{
+	BUG_ON(!PageLocked(page));
+
+	if (PageNfsCached(page)) {
+		struct nfs_inode *nfsi = NFS_I(page->mapping->host);
+
+		BUG_ON(nfsi->fscache == NULL);
+
+		dfprintk(FSCACHE,
+			"NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n",
+			 nfsi->fscache, page, nfsi);
+
+		if (offset == 0) {
+			BUG_ON(!PageLocked(page));
+			if (!PageWriteback(page))
+				page->mapping->a_ops->releasepage(page, 0);
+		}
+	}
+}
+
+int nfs_releasepage(struct page *page, gfp_t gfp_flags)
+{
+	struct nfs_inode *nfsi = NFS_I(page->mapping->host);
+
+	BUG_ON(nfsi->fscache == NULL);
+
+	dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
+		 nfsi->fscache, page, nfsi);
+
+	wait_on_page_fs_misc(page);
+	fscache_uncache_page(nfsi->fscache, page);
+	nfs_fscache_uncache_page++;
+	ClearPageNfsCached(page);
+	return 0;
+}
+
+int nfs_mkwrite(struct page *page)
+{
+	wait_on_page_fs_misc(page);
+	return 0;
+}
+#endif
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 9bdaf07..bb6c5a6 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -295,6 +295,7 @@ static void nfs_show_mount_options(struc
 		{ NFS_MOUNT_NOAC, ",noac", "" },
 		{ NFS_MOUNT_NONLM, ",nolock", "" },
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
+		{ NFS_MOUNT_FSCACHE, ",fsc", "" },
 		{ 0, NULL, NULL }
 	};
 	const struct proc_nfs_info *nfs_infop;
@@ -512,6 +513,16 @@ #endif /* CONFIG_NFS_V3 */
 		memset(mntfh->data + mntfh->size, 0,
 		       sizeof(mntfh->data) - mntfh->size);
 
+	/* if filesystem caching isn't compiled in, then requesting its use is
+	 * invalid */
+#ifndef CONFIG_NFS_FSCACHE
+	if (data->flags & NFS_MOUNT_FSCACHE) {
+		printk(KERN_WARNING
+		       "NFS: kernel not compiled with CONFIG_NFS_FSCACHE\n");
+		return -EINVAL;
+	}
+#endif
+
 	return 0;
 }
 
@@ -821,6 +832,16 @@ static int nfs4_get_sb(struct file_syste
 		return -EINVAL;
 	}
 
+	/* if filesystem caching isn't compiled in, then requesting its use is
+	 * invalid */
+#ifndef CONFIG_NFS_FSCACHE
+	if (data->flags & NFS_MOUNT_FSCACHE) {
+		printk(KERN_WARNING
+		       "NFS: kernel not compiled with CONFIG_NFS_FSCACHE\n");
+		return -EINVAL;
+	}
+#endif
+
 	/* We now require that the mount process passes the remote address */
 	if (data->host_addrlen != sizeof(addr))
 		return -EINVAL;
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index db61e51..5f020b1 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -15,6 +15,7 @@ #include <linux/nfs_idmap.h>
 #include <linux/nfs_fs.h>
 
 #include "callback.h"
+#include "nfs-fscache.h"
 
 static const int nfs_set_port_min = 0;
 static const int nfs_set_port_max = 65535;
@@ -56,6 +57,48 @@ #endif
 		.proc_handler	= &proc_dointvec_jiffies,
 		.strategy	= &sysctl_jiffies,
 	},
+#ifdef CONFIG_NFS_FSCACHE
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "fscache_from_error",
+		.data = &nfs_fscache_from_error,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "fscache_to_error",
+		.data = &nfs_fscache_to_error,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "fscache_uncache_page",
+		.data = &nfs_fscache_uncache_page,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "fscache_to_pages",
+		.data = &nfs_fscache_to_pages,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_minmax,
+	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "fscache_from_pages",
+		.data = &nfs_fscache_from_pages,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+	},
+#endif
 	{ .ctl_name = 0 }
 };
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6a90ccc..31a80a5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -65,6 +65,9 @@ #include <linux/smp_lock.h>
 #include "delegation.h"
 #include "iostat.h"
 
+#include "nfs-fscache.h"
+#include "internal.h"
+
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
 #define MIN_POOL_WRITE		(32)
@@ -164,6 +167,9 @@ static void nfs_grow_file(struct page *p
 		return;
 	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
 	i_size_write(inode, end);
+#ifdef FSCACHE_WRITE_SUPPORT
+	nfs_set_fscsize(NFS_SERVER(inode), NFS_I(inode), end);
+#endif
 }
 
 /* We can set the PG_uptodate flag if we see that a write request
@@ -296,6 +302,47 @@ static int wb_priority(struct writeback_
 }
 
 /*
+ * store an updated page in fscache
+ */
+#ifdef CONFIG_NFS_FSCACHE
+static void
+nfs_writepage_to_fscache_complete(struct page *page, void *data, int error)
+{
+	/* really need to synchronise the end of writeback, probably using a page flag */
+}
+static inline void
+nfs_writepage_to_fscache(struct inode *inode, struct page *page)
+{
+	int ret; 
+
+	if (!NFS_I(inode)->fscache)
+		return;
+
+	if (PageNfsCached(page)) {
+		dfprintk(FSCACHE,
+			"NFS: writepage_to_fscache (0x%p/0x%p/0x%p)\n",
+			NFS_I(inode)->fscache, page, inode);
+
+		ret = fscache_write_page(NFS_I(inode)->fscache, page,
+					 nfs_writepage_to_fscache_complete,
+					 NULL, GFP_KERNEL);
+		if (ret != 0) {
+			dfprintk(FSCACHE,
+				"NFS:    fscache_write_page error %d\n", ret);
+				fscache_uncache_page(NFS_I(inode)->fscache, 
+				page);
+		}
+	}
+}
+#else
+static inline void
+nfs_writepage_to_fscache(struct inode *inode, struct page *page)
+{
+	BUG_ON(PageNfsCached(page));
+}
+#endif
+
+/*
  * Write an mmapped page to the server.
  */
 int nfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -343,6 +390,9 @@ do_it:
 		err = -EBADF;
 		goto out;
 	}
+#ifdef FSCACHE_WRITE_SUPPORT
+	nfs_writepage_to_fscache(inode, page);
+#endif
 	lock_kernel();
 	if (!IS_SYNC(inode) && inode_referenced) {
 		err = nfs_writepage_async(ctx, inode, page, 0, offset);
@@ -425,7 +475,7 @@ static int nfs_inode_add_request(struct 
 		if (nfs_have_delegation(inode, FMODE_WRITE))
 			nfsi->change_attr++;
 	}
-	SetPagePrivate(req->wb_page);
+	SetPageNfsWriting(req->wb_page);
 	nfsi->npages++;
 	atomic_inc(&req->wb_count);
 	return 0;
@@ -442,7 +492,7 @@ static void nfs_inode_remove_request(str
 	BUG_ON (!NFS_WBACK_BUSY(req));
 
 	spin_lock(&nfsi->req_lock);
-	ClearPagePrivate(req->wb_page);
+	ClearPageNfsWriting(req->wb_page);
 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
 	nfsi->npages--;
 	if (!nfsi->npages) {
diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h
index 26b4c83..15199cc 100644
--- a/include/linux/nfs4_mount.h
+++ b/include/linux/nfs4_mount.h
@@ -65,6 +65,7 @@ #define NFS4_MOUNT_INTR		0x0002	/* 1 */
 #define NFS4_MOUNT_NOCTO	0x0010	/* 1 */
 #define NFS4_MOUNT_NOAC		0x0020	/* 1 */
 #define NFS4_MOUNT_STRICTLOCK	0x1000	/* 1 */
+#define NFS4_MOUNT_FSCACHE	0x4000	/* 1 */
 #define NFS4_MOUNT_FLAGMASK	0xFFFF
 
 #endif
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c39538e..6050fe0 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -30,6 +30,7 @@ #include <linux/nfs_fs_sb.h>
 
 #include <linux/rwsem.h>
 #include <linux/mempool.h>
+#include <linux/fscache.h>
 
 /*
  * Enable debugging support for nfs client.
@@ -180,6 +181,9 @@ #ifdef CONFIG_NFS_V4
 	int			 delegation_state;
 	struct rw_semaphore	rwsem;
 #endif /* CONFIG_NFS_V4*/
+#ifdef CONFIG_NFS_FSCACHE
+	struct fscache_cookie	*fscache;
+#endif
 	struct inode		vfs_inode;
 };
 
@@ -579,6 +583,7 @@ #define NFSDBG_FILE		0x0040
 #define NFSDBG_ROOT		0x0080
 #define NFSDBG_CALLBACK		0x0100
 #define NFSDBG_CLIENT		0x0200
+#define NFSDBG_FSCACHE		0x0400
 #define NFSDBG_ALL		0xFFFF
 
 #ifdef __KERNEL__
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index f99dd66..e20189f 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -3,6 +3,7 @@ #define _NFS_FS_SB
 
 #include <linux/list.h>
 #include <linux/backing-dev.h>
+#include <linux/fscache.h>
 
 struct nfs_iostats;
 
@@ -86,6 +87,10 @@ #ifdef CONFIG_NFS_V4
 	 */
 	char			ip_addr[16];
 #endif
+
+#ifdef CONFIG_NFS_FSCACHE
+	struct fscache_cookie	*fscache;	/* client index cache cookie */
+#endif
 };
 
 /*
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index 659c754..278bb4e 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -61,6 +61,7 @@ #define NFS_MOUNT_BROKEN_SUID	0x0400	/* 
 #define NFS_MOUNT_NOACL		0x0800	/* 4 */
 #define NFS_MOUNT_STRICTLOCK	0x1000	/* reserved for NFSv4 */
 #define NFS_MOUNT_SECFLAVOUR	0x2000	/* 5 */
+#define NFS_MOUNT_FSCACHE	0x4000
 #define NFS_MOUNT_FLAGMASK	0xFFFF
 
 #endif


^ permalink raw reply related

* [PATCH 11/14] FS-Cache: Make kAFS use FS-Cache [try #8]
From: David Howells @ 2006-05-10 16:01 UTC (permalink / raw)
  To: torvalds, akpm, steved, trond.myklebust, aviro
  Cc: linux-fsdevel, linux-cachefs, nfsv4, linux-kernel
In-Reply-To: <20060510160111.9058.55026.stgit@warthog.cambridge.redhat.com>

The attached patch makes the kAFS filesystem in fs/afs/ use FS-Cache, and
through it any attached caches.  The kAFS filesystem will use caching
automatically if it's available.

Signed-Off-By: David Howells <dhowells@redhat.com>
---

 fs/Kconfig         |    7 +
 fs/afs/cache.h     |   27 ------
 fs/afs/cell.c      |  109 ++++++++++++++---------
 fs/afs/cell.h      |   16 +--
 fs/afs/cmservice.c |    2 
 fs/afs/dir.c       |   10 +-
 fs/afs/file.c      |  237 +++++++++++++++++++++++++++++++++-----------------
 fs/afs/fsclient.c  |    4 +
 fs/afs/inode.c     |   43 ++++++---
 fs/afs/internal.h  |   24 ++---
 fs/afs/main.c      |   24 ++---
 fs/afs/mntpt.c     |   12 +--
 fs/afs/proc.c      |    1 
 fs/afs/server.c    |    3 -
 fs/afs/vlocation.c |  179 +++++++++++++++++++++++---------------
 fs/afs/vnode.c     |  248 +++++++++++++++++++++++++++++++++++++++++++---------
 fs/afs/vnode.h     |   10 +-
 fs/afs/volume.c    |   78 ++++++----------
 fs/afs/volume.h    |   28 +-----
 19 files changed, 654 insertions(+), 408 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 66acf29..6c95e58 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1816,6 +1816,13 @@ # for fs/nls/Config.in
 
 	  If unsure, say N.
 
+config AFS_FSCACHE
+	bool "Provide AFS client caching support"
+	depends on AFS_FS && FSCACHE && EXPERIMENTAL
+	help
+	  Say Y here if you want AFS data to be cached locally on through the
+	  generic filesystem cache manager
+
 config RXRPC
 	tristate
 
diff --git a/fs/afs/cache.h b/fs/afs/cache.h
deleted file mode 100644
index 9eb7722..0000000
--- a/fs/afs/cache.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* cache.h: AFS local cache management interface
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _LINUX_AFS_CACHE_H
-#define _LINUX_AFS_CACHE_H
-
-#undef AFS_CACHING_SUPPORT
-
-#include <linux/mm.h>
-#ifdef AFS_CACHING_SUPPORT
-#include <linux/cachefs.h>
-#endif
-#include "types.h"
-
-#ifdef __KERNEL__
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_AFS_CACHE_H */
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 009a9ae..93a0846 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -31,17 +31,21 @@ static DEFINE_RWLOCK(afs_cells_lock);
 static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
 static struct afs_cell *afs_cell_root;
 
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_cell_cache_match(void *target,
-						const void *entry);
-static void afs_cell_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_cache_cell_index_def = {
-	.name			= "cell_ix",
-	.data_size		= sizeof(struct afs_cache_cell),
-	.keys[0]		= { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
-	.match			= afs_cell_cache_match,
-	.update			= afs_cell_cache_update,
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
+				       void *buffer, uint16_t buflen);
+static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
+				       void *buffer, uint16_t buflen);
+static fscache_checkaux_t afs_cell_cache_check_aux(void *cookie_netfs_data,
+						   const void *buffer,
+						   uint16_t buflen);
+
+static struct fscache_cookie_def afs_cell_cache_index_def = {
+	.name		= "AFS cell",
+	.type		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key	= afs_cell_cache_get_key,
+	.get_aux	= afs_cell_cache_get_aux,
+	.check_aux	= afs_cell_cache_check_aux,
 };
 #endif
 
@@ -115,12 +119,11 @@ int afs_cell_create(const char *name, ch
 	if (ret < 0)
 		goto error;
 
-#ifdef AFS_CACHING_SUPPORT
-	/* put it up for caching */
-	cachefs_acquire_cookie(afs_cache_netfs.primary_index,
-			       &afs_vlocation_cache_index_def,
-			       cell,
-			       &cell->cache);
+#ifdef CONFIG_AFS_FSCACHE
+	/* put it up for caching (this never returns an error) */
+	cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
+					     &afs_cell_cache_index_def,
+					     cell);
 #endif
 
 	/* add to the cell lists */
@@ -345,8 +348,8 @@ static void afs_cell_destroy(struct afs_
 	list_del_init(&cell->proc_link);
 	up_write(&afs_proc_cells_sem);
 
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_relinquish_cookie(cell->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_relinquish_cookie(cell->cache, 0);
 #endif
 
 	up_write(&afs_cells_sem);
@@ -526,44 +529,62 @@ void afs_cell_purge(void)
 
 /*****************************************************************************/
 /*
- * match a cell record obtained from the cache
+ * set the key for the index entry
  */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_cell_cache_match(void *target,
-						const void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
+				       void *buffer, uint16_t bufmax)
 {
-	const struct afs_cache_cell *ccell = entry;
-	struct afs_cell *cell = target;
+	const struct afs_cell *cell = cookie_netfs_data;
+	uint16_t klen;
 
-	_enter("{%s},{%s}", ccell->name, cell->name);
+	_enter("%p,%p,%u", cell, buffer, bufmax);
 
-	if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) {
-		_leave(" = SUCCESS");
-		return CACHEFS_MATCH_SUCCESS;
-	}
+	klen = strlen(cell->name);
+	if (klen > bufmax)
+		return 0;
+
+	memcpy(buffer, cell->name, klen);
+	return klen;
 
-	_leave(" = FAILED");
-	return CACHEFS_MATCH_FAILED;
-} /* end afs_cell_cache_match() */
+} /* end afs_cell_cache_get_key() */
 #endif
 
 /*****************************************************************************/
 /*
- * update a cell record in the cache
+ * provide new auxilliary cache data
  */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_cell_cache_update(void *source, void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
+				       void *buffer, uint16_t bufmax)
 {
-	struct afs_cache_cell *ccell = entry;
-	struct afs_cell *cell = source;
+	const struct afs_cell *cell = cookie_netfs_data;
+	uint16_t dlen;
 
-	_enter("%p,%p", source, entry);
+	_enter("%p,%p,%u", cell, buffer, bufmax);
 
-	strncpy(ccell->name, cell->name, sizeof(ccell->name));
+	dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]);
+	dlen = min(dlen, bufmax);
+	dlen &= ~(sizeof(cell->vl_addrs[0]) - 1);
 
-	memcpy(ccell->vl_servers,
-	       cell->vl_addrs,
-	       min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs)));
+	memcpy(buffer, cell->vl_addrs, dlen);
+
+	return dlen;
+
+} /* end afs_cell_cache_get_aux() */
+#endif
+
+/*****************************************************************************/
+/*
+ * check that the auxilliary data indicates that the entry is still valid
+ */
+#ifdef CONFIG_AFS_FSCACHE
+static fscache_checkaux_t afs_cell_cache_check_aux(void *cookie_netfs_data,
+						   const void *buffer,
+						   uint16_t buflen)
+{
+	_leave(" = OKAY");
+	return FSCACHE_CHECKAUX_OKAY;
 
-} /* end afs_cell_cache_update() */
+} /* end afs_cell_cache_check_aux() */
 #endif
diff --git a/fs/afs/cell.h b/fs/afs/cell.h
index 4834910..d670502 100644
--- a/fs/afs/cell.h
+++ b/fs/afs/cell.h
@@ -13,7 +13,7 @@ #ifndef _LINUX_AFS_CELL_H
 #define _LINUX_AFS_CELL_H
 
 #include "types.h"
-#include "cache.h"
+#include <linux/fscache.h>
 
 #define AFS_CELL_MAX_ADDRS 15
 
@@ -21,16 +21,6 @@ extern volatile int afs_cells_being_purg
 
 /*****************************************************************************/
 /*
- * entry in the cached cell catalogue
- */
-struct afs_cache_cell
-{
-	char			name[64];	/* cell name (padded with NULs) */
-	struct in_addr		vl_servers[15];	/* cached cell VL servers */
-};
-
-/*****************************************************************************/
-/*
  * AFS cell record
  */
 struct afs_cell
@@ -39,8 +29,8 @@ struct afs_cell
 	struct list_head	link;		/* main cell list link */
 	struct list_head	proc_link;	/* /proc cell list link */
 	struct proc_dir_entry	*proc_dir;	/* /proc dir for this cell */
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_cookie	*cache;		/* caching cookie */
+#ifdef CONFIG_AFS_FSCACHE
+	struct fscache_cookie	*cache;		/* caching cookie */
 #endif
 
 	/* server record management */
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 3d097fd..f87d5a7 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -24,7 +24,7 @@ #include "cmservice.h"
 #include "internal.h"
 
 static unsigned afscm_usage;		/* AFS cache manager usage count */
-static struct rw_semaphore afscm_sem;	/* AFS cache manager start/stop semaphore */
+static DECLARE_RWSEM(afscm_sem);	/* AFS cache manager start/stop semaphore */
 
 static int afscm_new_call(struct rxrpc_call *call);
 static void afscm_attention(struct rxrpc_call *call);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index c23de2b..b8e7c32 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -145,7 +145,7 @@ #endif
 	qty /= sizeof(union afs_dir_block);
 
 	/* check them */
-	dbuf = page_address(page);
+	dbuf = kmap_atomic(page, KM_USER0);
 	for (tmp = 0; tmp < qty; tmp++) {
 		if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
 			printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
@@ -154,10 +154,12 @@ #endif
 			goto error;
 		}
 	}
+	kunmap_atomic(dbuf, KM_USER0);
 
 	return;
 
  error:
+	kunmap_atomic(dbuf, KM_USER0);
 	SetPageError(page);
 
 } /* end afs_dir_check_page() */
@@ -168,7 +170,6 @@ #endif
  */
 static inline void afs_dir_put_page(struct page *page)
 {
-	kunmap(page);
 	page_cache_release(page);
 
 } /* end afs_dir_put_page() */
@@ -188,7 +189,6 @@ static struct page *afs_dir_get_page(str
 			       NULL);
 	if (!IS_ERR(page)) {
 		wait_on_page_locked(page);
-		kmap(page);
 		if (!PageUptodate(page))
 			goto fail;
 		afs_dir_check_page(dir, page);
@@ -356,7 +356,7 @@ static int afs_dir_iterate(struct inode 
 
 		limit = blkoff & ~(PAGE_SIZE - 1);
 
-		dbuf = page_address(page);
+		dbuf = kmap_atomic(page, KM_USER0);
 
 		/* deal with the individual blocks stashed on this page */
 		do {
@@ -365,6 +365,7 @@ static int afs_dir_iterate(struct inode 
 			ret = afs_dir_iterate_block(fpos, dblock, blkoff,
 						    cookie, filldir);
 			if (ret != 1) {
+				kunmap_atomic(dbuf, KM_USER0);
 				afs_dir_put_page(page);
 				goto out;
 			}
@@ -373,6 +374,7 @@ static int afs_dir_iterate(struct inode 
 
 		} while (*fpos < dir->i_size && blkoff < limit);
 
+		kunmap_atomic(dbuf, KM_USER0);
 		afs_dir_put_page(page);
 		ret = 0;
 	}
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 7bb7168..21e45bb 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -16,12 +16,15 @@ #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/pagevec.h>
 #include <linux/buffer_head.h>
 #include "volume.h"
 #include "vnode.h"
 #include <rxrpc/call.h>
 #include "internal.h"
 
+#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
+
 #if 0
 static int afs_file_open(struct inode *inode, struct file *file);
 static int afs_file_release(struct inode *inode, struct file *file);
@@ -30,30 +33,68 @@ #endif
 static int afs_file_readpage(struct file *file, struct page *page);
 static void afs_file_invalidatepage(struct page *page, unsigned long offset);
 static int afs_file_releasepage(struct page *page, gfp_t gfp_flags);
+static int afs_file_mmap(struct file * file, struct vm_area_struct * vma);
+
+#ifdef CONFIG_AFS_FSCACHE
+static int afs_file_readpages(struct file *filp, struct address_space *mapping,
+			      struct list_head *pages, unsigned nr_pages);
+static int afs_file_page_mkwrite(struct vm_area_struct *vma, struct page *page);
+#endif
 
 struct inode_operations afs_file_inode_operations = {
 	.getattr	= afs_inode_getattr,
 };
 
+struct file_operations afs_file_file_operations = {
+	.read		= generic_file_read,
+	.mmap		= afs_file_mmap,
+};
+
 struct address_space_operations afs_fs_aops = {
 	.readpage	= afs_file_readpage,
+#ifdef CONFIG_AFS_FSCACHE
+	.readpages	= afs_file_readpages,
+#endif
 	.sync_page	= block_sync_page,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
 	.releasepage	= afs_file_releasepage,
 	.invalidatepage	= afs_file_invalidatepage,
 };
 
+static struct vm_operations_struct afs_fs_vm_operations = {
+	.nopage		= filemap_nopage,
+	.populate	= filemap_populate,
+#ifdef CONFIG_AFS_FSCACHE
+	.page_mkwrite	= afs_file_page_mkwrite,
+#endif
+};
+
+/*****************************************************************************/
+/*
+ * set up a memory mapping on an AFS file
+ * - we set our own VMA ops so that we can catch the page becoming writable for
+ *   userspace for shared-writable mmap
+ */
+static int afs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	_enter("");
+
+	file_accessed(file);
+	vma->vm_ops = &afs_fs_vm_operations;
+	return 0;
+
+} /* end afs_file_mmap() */
+
 /*****************************************************************************/
 /*
  * deal with notification that a page was read from the cache
  */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_file_readpage_read_complete(void *cookie_data,
-					    struct page *page,
+#ifdef CONFIG_AFS_FSCACHE
+static void afs_file_readpage_read_complete(struct page *page,
 					    void *data,
 					    int error)
 {
-	_enter("%p,%p,%p,%d", cookie_data, page, data, error);
+	_enter("%p,%p,%d", page, data, error);
 
 	if (error)
 		SetPageError(page);
@@ -68,15 +109,16 @@ #endif
 /*
  * deal with notification that a page was written to the cache
  */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_file_readpage_write_complete(void *cookie_data,
-					     struct page *page,
+#ifdef CONFIG_AFS_FSCACHE
+static void afs_file_readpage_write_complete(struct page *page,
 					     void *data,
 					     int error)
 {
-	_enter("%p,%p,%p,%d", cookie_data, page, data, error);
+	_enter("%p,%p,%d", page, data, error);
 
-	unlock_page(page);
+	/* note that the page has been written to the cache and can now be
+	 * modified */
+	end_page_fs_misc(page);
 
 } /* end afs_file_readpage_write_complete() */
 #endif
@@ -88,16 +130,13 @@ #endif
 static int afs_file_readpage(struct file *file, struct page *page)
 {
 	struct afs_rxfs_fetch_descriptor desc;
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_page *pageio;
-#endif
 	struct afs_vnode *vnode;
 	struct inode *inode;
 	int ret;
 
 	inode = page->mapping->host;
 
-	_enter("{%lu},{%lu}", inode->i_ino, page->index);
+	_enter("{%lu},%p{%lu}", inode->i_ino, page, page->index);
 
 	vnode = AFS_FS_I(inode);
 
@@ -107,13 +146,9 @@ #endif
 	if (vnode->flags & AFS_VNODE_DELETED)
 		goto error;
 
-#ifdef AFS_CACHING_SUPPORT
-	ret = cachefs_page_get_private(page, &pageio, GFP_NOIO);
-	if (ret < 0)
-		goto error;
-
+#ifdef CONFIG_AFS_FSCACHE
 	/* is it cached? */
-	ret = cachefs_read_or_alloc_page(vnode->cache,
+	ret = fscache_read_or_alloc_page(vnode->cache,
 					 page,
 					 afs_file_readpage_read_complete,
 					 NULL,
@@ -123,18 +158,20 @@ #else
 #endif
 
 	switch (ret) {
-		/* read BIO submitted and wb-journal entry found */
-	case 1:
-		BUG(); // TODO - handle wb-journal match
-
 		/* read BIO submitted (page in cache) */
 	case 0:
 		break;
 
-		/* no page available in cache */
-	case -ENOBUFS:
+		/* page not yet cached */
 	case -ENODATA:
+		_debug("cache said ENODATA");
+		goto go_on;
+
+		/* page will not be cached */
+	case -ENOBUFS:
+		_debug("cache said ENOBUFS");
 	default:
+	go_on:
 		desc.fid	= vnode->fid;
 		desc.offset	= page->index << PAGE_CACHE_SHIFT;
 		desc.size	= min((size_t) (inode->i_size - desc.offset),
@@ -148,34 +185,40 @@ #endif
 		ret = afs_vnode_fetch_data(vnode, &desc);
 		kunmap(page);
 		if (ret < 0) {
-			if (ret==-ENOENT) {
-				_debug("got NOENT from server"
+			if (ret == -ENOENT) {
+				kdebug("got NOENT from server"
 				       " - marking file deleted and stale");
 				vnode->flags |= AFS_VNODE_DELETED;
 				ret = -ESTALE;
 			}
 
-#ifdef AFS_CACHING_SUPPORT
-			cachefs_uncache_page(vnode->cache, page);
+#ifdef CONFIG_AFS_FSCACHE
+			fscache_uncache_page(vnode->cache, page);
+			ClearPagePrivate(page);
 #endif
 			goto error;
 		}
 
 		SetPageUptodate(page);
 
-#ifdef AFS_CACHING_SUPPORT
-		if (cachefs_write_page(vnode->cache,
-				       page,
-				       afs_file_readpage_write_complete,
-				       NULL,
-				       GFP_KERNEL) != 0
-		    ) {
-			cachefs_uncache_page(vnode->cache, page);
-			unlock_page(page);
+		/* send the page to the cache */
+#ifdef CONFIG_AFS_FSCACHE
+		if (PagePrivate(page)) {
+			if (TestSetPageFsMisc(page))
+				BUG();
+			if (fscache_write_page(vnode->cache,
+					       page,
+					       afs_file_readpage_write_complete,
+					       NULL,
+					       GFP_KERNEL) != 0
+			    ) {
+				fscache_uncache_page(vnode->cache, page);
+				ClearPagePrivate(page);
+				end_page_fs_misc(page);
+			}
 		}
-#else
-		unlock_page(page);
 #endif
+		unlock_page(page);
 	}
 
 	_leave(" = 0");
@@ -192,20 +235,63 @@ #endif
 
 /*****************************************************************************/
 /*
- * get a page cookie for the specified page
+ * read a set of pages
  */
-#ifdef AFS_CACHING_SUPPORT
-int afs_cache_get_page_cookie(struct page *page,
-			      struct cachefs_page **_page_cookie)
+#ifdef CONFIG_AFS_FSCACHE
+static int afs_file_readpages(struct file *filp, struct address_space *mapping,
+			      struct list_head *pages, unsigned nr_pages)
 {
-	int ret;
+	struct afs_vnode *vnode;
+#if 0
+	struct pagevec lru_pvec;
+	unsigned page_idx;
+#endif
+	int ret = 0;
 
-	_enter("");
-	ret = cachefs_page_get_private(page,_page_cookie, GFP_NOIO);
+	_enter(",{%lu},,%d", mapping->host->i_ino, nr_pages);
 
-	_leave(" = %d", ret);
+	vnode = AFS_FS_I(mapping->host);
+	if (vnode->flags & AFS_VNODE_DELETED) {
+		_leave(" = -ESTALE");
+		return -ESTALE;
+	}
+
+	/* attempt to read as many of the pages as possible */
+	ret = fscache_read_or_alloc_pages(vnode->cache,
+					  mapping,
+					  pages,
+					  &nr_pages,
+					  afs_file_readpage_read_complete,
+					  NULL,
+					  mapping_gfp_mask(mapping));
+
+	switch (ret) {
+		/* all pages are being read from the cache */
+	case 0:
+		BUG_ON(!list_empty(pages));
+		BUG_ON(nr_pages != 0);
+		_leave(" = 0 [reading all]");
+		return 0;
+
+		/* there were pages that couldn't be read from the cache */
+	case -ENODATA:
+	case -ENOBUFS:
+		break;
+
+		/* other error */
+	default:
+		_leave(" = %d", ret);
+		return ret;
+	}
+
+	/* load the missing pages from the network */
+	ret = read_cache_pages(mapping, pages,
+			       (void *) afs_file_readpage, NULL);
+
+	_leave(" = %d [netting]", ret);
 	return ret;
-} /* end afs_cache_get_page_cookie() */
+
+} /* end afs_file_readpages() */
 #endif
 
 /*****************************************************************************/
@@ -214,35 +300,22 @@ #endif
  */
 static void afs_file_invalidatepage(struct page *page, unsigned long offset)
 {
-	int ret = 1;
-
 	_enter("{%lu},%lu", page->index, offset);
 
 	BUG_ON(!PageLocked(page));
 
 	if (PagePrivate(page)) {
-#ifdef AFS_CACHING_SUPPORT
-		struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
-		cachefs_uncache_page(vnode->cache,page);
-#endif
-
 		/* We release buffers only if the entire page is being
 		 * invalidated.
 		 * The get_block cached value has been unconditionally
 		 * invalidated, so real IO is not possible anymore.
 		 */
-		if (offset == 0) {
-			BUG_ON(!PageLocked(page));
-
-			ret = 0;
-			if (!PageWriteback(page))
-				ret = page->mapping->a_ops->releasepage(page,
-									0);
-			/* possibly should BUG_ON(!ret); - neilb */
-		}
+		if (offset == 0 && !PageWriteback(page))
+			page->mapping->a_ops->releasepage(page, 0);
 	}
 
-	_leave(" = %d", ret);
+	_leave("");
+
 } /* end afs_file_invalidatepage() */
 
 /*****************************************************************************/
@@ -251,23 +324,29 @@ #endif
  */
 static int afs_file_releasepage(struct page *page, gfp_t gfp_flags)
 {
-	struct cachefs_page *pageio;
-
 	_enter("{%lu},%x", page->index, gfp_flags);
 
-	if (PagePrivate(page)) {
-#ifdef AFS_CACHING_SUPPORT
-		struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
-		cachefs_uncache_page(vnode->cache, page);
+#ifdef CONFIG_AFS_FSCACHE
+	wait_on_page_fs_misc(page);
+	fscache_uncache_page(AFS_FS_I(page->mapping->host)->cache, page);
+	ClearPagePrivate(page);
 #endif
 
-		pageio = (struct cachefs_page *) page_private(page);
-		set_page_private(page, 0);
-		ClearPagePrivate(page);
-
-		kfree(pageio);
-	}
-
 	_leave(" = 0");
 	return 0;
+
 } /* end afs_file_releasepage() */
+
+/*****************************************************************************/
+/*
+ * wait for the disc cache to finish writing before permitting modification of
+ * our page in the page cache
+ */
+#ifdef CONFIG_AFS_FSCACHE
+static int afs_file_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	wait_on_page_fs_misc(page);
+	return 0;
+
+} /* end afs_file_page_mkwrite() */
+#endif
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 61bc371..c88c41a 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -398,6 +398,8 @@ int afs_rxfs_fetch_file_status(struct af
 		bp++; /* spare6 */
 	}
 
+	_debug("Data Version %llx\n", vnode->status.version);
+
 	/* success */
 	ret = 0;
 
@@ -408,7 +410,7 @@ int afs_rxfs_fetch_file_status(struct af
  out_put_conn:
 	afs_server_release_callslot(server, &callslot);
  out:
-	_leave("");
+	_leave(" = %d", ret);
 	return ret;
 
  abort:
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 4ebb30a..d188380 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -65,6 +65,11 @@ static int afs_inode_map_status(struct a
 		return -EBADMSG;
 	}
 
+#ifdef CONFIG_AFS_FSCACHE
+	if (vnode->status.size != inode->i_size)
+		fscache_set_i_size(vnode->cache, vnode->status.size);
+#endif
+
 	inode->i_nlink		= vnode->status.nlink;
 	inode->i_uid		= vnode->status.owner;
 	inode->i_gid		= 0;
@@ -101,13 +106,33 @@ static int afs_inode_fetch_status(struct
 	struct afs_vnode *vnode;
 	int ret;
 
+	_enter("");
+
 	vnode = AFS_FS_I(inode);
 
 	ret = afs_vnode_fetch_status(vnode);
 
-	if (ret == 0)
+	if (ret == 0) {
+#ifdef CONFIG_AFS_FSCACHE
+		if (vnode->cache == FSCACHE_NEGATIVE_COOKIE) {
+			vnode->cache =
+				fscache_acquire_cookie(vnode->volume->cache,
+						       &afs_vnode_cache_index_def,
+						       vnode);
+			if (!vnode->cache)
+				printk("Negative\n");
+		}
+#endif
 		ret = afs_inode_map_status(vnode);
+#ifdef CONFIG_AFS_FSCACHE
+		if (ret < 0) {
+			fscache_relinquish_cookie(vnode->cache, 0);
+			vnode->cache = FSCACHE_NEGATIVE_COOKIE;
+		}
+#endif
+	}
 
+	_leave(" = %d", ret);
 	return ret;
 
 } /* end afs_inode_fetch_status() */
@@ -122,6 +147,7 @@ static int afs_iget5_test(struct inode *
 
 	return inode->i_ino == data->fid.vnode &&
 		inode->i_version == data->fid.unique;
+
 } /* end afs_iget5_test() */
 
 /*****************************************************************************/
@@ -179,20 +205,11 @@ inline int afs_iget(struct super_block *
 		return ret;
 	}
 
-#ifdef AFS_CACHING_SUPPORT
-	/* set up caching before reading the status, as fetch-status reads the
-	 * first page of symlinks to see if they're really mntpts */
-	cachefs_acquire_cookie(vnode->volume->cache,
-			       NULL,
-			       vnode,
-			       &vnode->cache);
-#endif
-
 	/* okay... it's a new inode */
 	inode->i_flags |= S_NOATIME;
 	vnode->flags |= AFS_VNODE_CHANGED;
 	ret = afs_inode_fetch_status(inode);
-	if (ret<0)
+	if (ret < 0)
 		goto bad_inode;
 
 	/* success */
@@ -278,8 +295,8 @@ void afs_clear_inode(struct inode *inode
 
 	afs_vnode_give_up_callback(vnode);
 
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_relinquish_cookie(vnode->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_relinquish_cookie(vnode->cache, 0);
 	vnode->cache = NULL;
 #endif
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 72febdf..0bddcdf 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -16,15 +16,17 @@ #include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/fscache.h>
 
 /*
  * debug tracing
  */
-#define kenter(FMT, a...)	printk("==> %s("FMT")\n",__FUNCTION__ , ## a)
-#define kleave(FMT, a...)	printk("<== %s()"FMT"\n",__FUNCTION__ , ## a)
-#define kdebug(FMT, a...)	printk(FMT"\n" , ## a)
-#define kproto(FMT, a...)	printk("### "FMT"\n" , ## a)
-#define knet(FMT, a...)		printk(FMT"\n" , ## a)
+#define __kdbg(FMT, a...)	printk("[%05d] "FMT"\n", current->pid , ## a)
+#define kenter(FMT, a...)	__kdbg("==> %s("FMT")", __FUNCTION__ , ## a)
+#define kleave(FMT, a...)	__kdbg("<== %s()"FMT, __FUNCTION__ , ## a)
+#define kdebug(FMT, a...)	__kdbg(FMT , ## a)
+#define kproto(FMT, a...)	__kdbg("### "FMT , ## a)
+#define knet(FMT, a...)		__kdbg(FMT , ## a)
 
 #ifdef __KDEBUG
 #define _enter(FMT, a...)	kenter(FMT , ## a)
@@ -56,9 +58,6 @@ static inline void afs_discard_my_signal
  */
 extern struct rw_semaphore afs_proc_cells_sem;
 extern struct list_head afs_proc_cells;
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_cache_cell_index_def;
-#endif
 
 /*
  * dir.c
@@ -72,11 +71,6 @@ extern const struct file_operations afs_
 extern struct address_space_operations afs_fs_aops;
 extern struct inode_operations afs_file_inode_operations;
 
-#ifdef AFS_CACHING_SUPPORT
-extern int afs_cache_get_page_cookie(struct page *page,
-				     struct cachefs_page **_page_cookie);
-#endif
-
 /*
  * inode.c
  */
@@ -97,8 +91,8 @@ #endif
 /*
  * main.c
  */
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_netfs afs_cache_netfs;
+#ifdef CONFIG_AFS_FSCACHE
+extern struct fscache_netfs afs_cache_netfs;
 #endif
 
 /*
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 913c689..5840bb2 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -1,6 +1,6 @@
 /* main.c: AFS client file system
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -14,11 +14,11 @@ #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/completion.h>
+#include <linux/fscache.h>
 #include <rxrpc/rxrpc.h>
 #include <rxrpc/transport.h>
 #include <rxrpc/call.h>
 #include <rxrpc/peer.h>
-#include "cache.h"
 #include "cell.h"
 #include "server.h"
 #include "fsclient.h"
@@ -51,12 +51,11 @@ static struct rxrpc_peer_ops afs_peer_op
 struct list_head afs_cb_hash_tbl[AFS_CB_HASH_COUNT];
 DEFINE_SPINLOCK(afs_cb_hash_lock);
 
-#ifdef AFS_CACHING_SUPPORT
-static struct cachefs_netfs_operations afs_cache_ops = {
-	.get_page_cookie	= afs_cache_get_page_cookie,
+#ifdef CONFIG_AFS_FSCACHE
+static struct fscache_netfs_operations afs_cache_ops = {
 };
 
-struct cachefs_netfs afs_cache_netfs = {
+struct fscache_netfs afs_cache_netfs = {
 	.name			= "afs",
 	.version		= 0,
 	.ops			= &afs_cache_ops,
@@ -83,10 +82,9 @@ static int __init afs_init(void)
 	if (ret < 0)
 		return ret;
 
-#ifdef AFS_CACHING_SUPPORT
+#ifdef CONFIG_AFS_FSCACHE
 	/* we want to be able to cache */
-	ret = cachefs_register_netfs(&afs_cache_netfs,
-				     &afs_cache_cell_index_def);
+	ret = fscache_register_netfs(&afs_cache_netfs);
 	if (ret < 0)
 		goto error;
 #endif
@@ -137,8 +135,8 @@ #ifdef CONFIG_KEYS_TURNED_OFF
 	afs_key_unregister();
  error_cache:
 #endif
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_unregister_netfs(&afs_cache_netfs);
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_unregister_netfs(&afs_cache_netfs);
  error:
 #endif
 	afs_cell_purge();
@@ -167,8 +165,8 @@ static void __exit afs_exit(void)
 #ifdef CONFIG_KEYS_TURNED_OFF
 	afs_key_unregister();
 #endif
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_unregister_netfs(&afs_cache_netfs);
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_unregister_netfs(&afs_cache_netfs);
 #endif
 	afs_proc_cleanup();
 
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 7b6dc03..9be8289 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -82,7 +82,7 @@ int afs_mntpt_check_symlink(struct afs_v
 
 	ret = -EIO;
 	wait_on_page_locked(page);
-	buf = kmap(page);
+	buf = kmap_atomic(page, KM_USER0);
 	if (!PageUptodate(page))
 		goto out_free;
 	if (PageError(page))
@@ -105,7 +105,7 @@ int afs_mntpt_check_symlink(struct afs_v
 	ret = 0;
 
  out_free:
-	kunmap(page);
+	kunmap_atomic(buf, KM_USER0);
 	page_cache_release(page);
  out:
 	_leave(" = %d", ret);
@@ -195,9 +195,9 @@ static struct vfsmount *afs_mntpt_do_aut
 	if (!PageUptodate(page) || PageError(page))
 		goto error;
 
-	buf = kmap(page);
+	buf = kmap_atomic(page, KM_USER0);
 	memcpy(devname, buf, size);
-	kunmap(page);
+	kunmap_atomic(buf, KM_USER0);
 	page_cache_release(page);
 	page = NULL;
 
@@ -276,12 +276,12 @@ static void *afs_mntpt_follow_link(struc
  */
 static void afs_mntpt_expiry_timed_out(struct afs_timer *timer)
 {
-	kenter("");
+//	kenter("");
 
 	mark_mounts_for_expiry(&afs_vfsmounts);
 
 	afs_kafstimod_add_timer(&afs_mntpt_expiry_timer,
 				afs_mntpt_expiry_timeout * HZ);
 
-	kleave("");
+//	kleave("");
 } /* end afs_mntpt_expiry_timed_out() */
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 101d21b..db58488 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -177,6 +177,7 @@ int afs_proc_init(void)
  */
 void afs_proc_cleanup(void)
 {
+	remove_proc_entry("rootcell", proc_afs);
 	remove_proc_entry("cells", proc_afs);
 
 	remove_proc_entry("fs/afs", NULL);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 62b093a..7103e10 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -377,7 +377,6 @@ int afs_server_request_callslot(struct a
 	else if (list_empty(&server->fs_callq)) {
 		/* no one waiting */
 		server->fs_conn_cnt[nconn]++;
-		spin_unlock(&server->fs_lock);
 	}
 	else {
 		/* someone's waiting - dequeue them and wake them up */
@@ -395,9 +394,9 @@ int afs_server_request_callslot(struct a
 		}
 		pcallslot->ready = 1;
 		wake_up_process(pcallslot->task);
-		spin_unlock(&server->fs_lock);
 	}
 
+	spin_unlock(&server->fs_lock);
 	rxrpc_put_connection(callslot->conn);
 	callslot->conn = NULL;
 
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index eced206..cfab969 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -59,17 +59,21 @@ static LIST_HEAD(afs_vlocation_update_pe
 static struct afs_vlocation *afs_vlocation_update;	/* VL currently being updated */
 static DEFINE_SPINLOCK(afs_vlocation_update_lock); /* lock guarding update queue */
 
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vlocation_cache_match(void *target,
-						     const void *entry);
-static void afs_vlocation_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_vlocation_cache_index_def = {
-	.name		= "vldb",
-	.data_size	= sizeof(struct afs_cache_vlocation),
-	.keys[0]	= { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
-	.match		= afs_vlocation_cache_match,
-	.update		= afs_vlocation_cache_update,
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
+					    void *buffer, uint16_t buflen);
+static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
+					    void *buffer, uint16_t buflen);
+static fscache_checkaux_t afs_vlocation_cache_check_aux(void *cookie_netfs_data,
+							const void *buffer,
+							uint16_t buflen);
+
+static struct fscache_cookie_def afs_vlocation_cache_index_def = {
+	.name		= "AFS.vldb",
+	.type		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key	= afs_vlocation_cache_get_key,
+	.get_aux	= afs_vlocation_cache_get_aux,
+	.check_aux	= afs_vlocation_cache_check_aux,
 };
 #endif
 
@@ -300,13 +304,12 @@ int afs_vlocation_lookup(struct afs_cell
 
 	list_add_tail(&vlocation->link, &cell->vl_list);
 
-#ifdef AFS_CACHING_SUPPORT
+#ifdef CONFIG_AFS_FSCACHE
 	/* we want to store it in the cache, plus it might already be
 	 * encached */
-	cachefs_acquire_cookie(cell->cache,
-			       &afs_volume_cache_index_def,
-			       vlocation,
-			       &vlocation->cache);
+	vlocation->cache = fscache_acquire_cookie(cell->cache,
+						  &afs_vlocation_cache_index_def,
+						  vlocation);
 
 	if (vlocation->valid)
 		goto found_in_cache;
@@ -341,7 +344,7 @@ #endif
  active:
 	active = 1;
 
-#ifdef AFS_CACHING_SUPPORT
+#ifdef CONFIG_AFS_FSCACHE
  found_in_cache:
 #endif
 	/* try to look up a cached volume in the cell VL databases by ID */
@@ -423,9 +426,9 @@ #endif
 
 	afs_kafstimod_add_timer(&vlocation->upd_timer, 10 * HZ);
 
-#ifdef AFS_CACHING_SUPPORT
+#ifdef CONFIG_AFS_FSCACHE
 	/* update volume entry in local cache */
-	cachefs_update_cookie(vlocation->cache);
+	fscache_update_cookie(vlocation->cache);
 #endif
 
 	*_vlocation = vlocation;
@@ -439,8 +442,8 @@ #endif
 		}
 		else {
 			list_del(&vlocation->link);
-#ifdef AFS_CACHING_SUPPORT
-			cachefs_relinquish_cookie(vlocation->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+			fscache_relinquish_cookie(vlocation->cache, 0);
 #endif
 			afs_put_cell(vlocation->cell);
 			kfree(vlocation);
@@ -538,8 +541,8 @@ void afs_vlocation_do_timeout(struct afs
 	}
 
 	/* we can now destroy it properly */
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_relinquish_cookie(vlocation->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_relinquish_cookie(vlocation->cache, 0);
 #endif
 	afs_put_cell(cell);
 
@@ -890,65 +893,103 @@ static void afs_vlocation_update_discard
 
 /*****************************************************************************/
 /*
- * match a VLDB record stored in the cache
- * - may also load target from entry
+ * set the key for the index entry
  */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vlocation_cache_match(void *target,
-						     const void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
+					    void *buffer, uint16_t bufmax)
 {
-	const struct afs_cache_vlocation *vldb = entry;
-	struct afs_vlocation *vlocation = target;
+	const struct afs_vlocation *vlocation = cookie_netfs_data;
+	uint16_t klen;
 
-	_enter("{%s},{%s}", vlocation->vldb.name, vldb->name);
+	_enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
 
-	if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0
-	    ) {
-		if (!vlocation->valid ||
-		    vlocation->vldb.rtime == vldb->rtime
-		    ) {
-			vlocation->vldb = *vldb;
-			vlocation->valid = 1;
-			_leave(" = SUCCESS [c->m]");
-			return CACHEFS_MATCH_SUCCESS;
-		}
-		/* need to update cache if cached info differs */
-		else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) {
-			/* delete if VIDs for this name differ */
-			if (memcmp(&vlocation->vldb.vid,
-				   &vldb->vid,
-				   sizeof(vldb->vid)) != 0) {
-				_leave(" = DELETE");
-				return CACHEFS_MATCH_SUCCESS_DELETE;
-			}
+	klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name));
+	if (klen > bufmax)
+		return 0;
 
-			_leave(" = UPDATE");
-			return CACHEFS_MATCH_SUCCESS_UPDATE;
-		}
-		else {
-			_leave(" = SUCCESS");
-			return CACHEFS_MATCH_SUCCESS;
-		}
-	}
+	memcpy(buffer, vlocation->vldb.name, klen);
+
+	_leave(" = %u", klen);
+	return klen;
 
-	_leave(" = FAILED");
-	return CACHEFS_MATCH_FAILED;
-} /* end afs_vlocation_cache_match() */
+} /* end afs_vlocation_cache_get_key() */
 #endif
 
 /*****************************************************************************/
 /*
- * update a VLDB record stored in the cache
+ * provide new auxilliary cache data
  */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_vlocation_cache_update(void *source, void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
+					    void *buffer, uint16_t bufmax)
 {
-	struct afs_cache_vlocation *vldb = entry;
-	struct afs_vlocation *vlocation = source;
+	const struct afs_vlocation *vlocation = cookie_netfs_data;
+	uint16_t dlen;
 
-	_enter("");
+	_enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
+
+	dlen = sizeof(struct afs_cache_vlocation);
+	dlen -= offsetof(struct afs_cache_vlocation, nservers);
+	if (dlen > bufmax)
+		return 0;
+
+	memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen);
+
+	_leave(" = %u", dlen);
+	return dlen;
+
+} /* end afs_vlocation_cache_get_aux() */
+#endif
+
+/*****************************************************************************/
+/*
+ * check that the auxilliary data indicates that the entry is still valid
+ */
+#ifdef CONFIG_AFS_FSCACHE
+static fscache_checkaux_t afs_vlocation_cache_check_aux(void *cookie_netfs_data,
+							const void *buffer,
+							uint16_t buflen)
+{
+	const struct afs_cache_vlocation *cvldb;
+	struct afs_vlocation *vlocation = cookie_netfs_data;
+	uint16_t dlen;
+
+	_enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen);
+
+	/* check the size of the data is what we're expecting */
+	dlen = sizeof(struct afs_cache_vlocation);
+	dlen -= offsetof(struct afs_cache_vlocation, nservers);
+	if (dlen != buflen)
+		return FSCACHE_CHECKAUX_OBSOLETE;
+
+	cvldb = container_of(buffer, struct afs_cache_vlocation, nservers);
+
+	/* if what's on disk is more valid than what's in memory, then use the
+	 * VL record from the cache */
+	if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) {
+		memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen);
+		vlocation->valid = 1;
+		_leave(" = SUCCESS [c->m]");
+		return FSCACHE_CHECKAUX_OKAY;
+	}
+
+	/* need to update the cache if the cached info differs */
+	if (memcmp(&vlocation->vldb, buffer, dlen) != 0) {
+		/* delete if the volume IDs for this name differ */
+		if (memcmp(&vlocation->vldb.vid, &cvldb->vid,
+			   sizeof(cvldb->vid)) != 0
+		    ) {
+			_leave(" = OBSOLETE");
+			return FSCACHE_CHECKAUX_OBSOLETE;
+		}
+
+		_leave(" = UPDATE");
+		return FSCACHE_CHECKAUX_NEEDS_UPDATE;
+	}
 
-	*vldb = vlocation->vldb;
+	_leave(" = OKAY");
+	return FSCACHE_CHECKAUX_OKAY;
 
-} /* end afs_vlocation_cache_update() */
+} /* end afs_vlocation_cache_check_aux() */
 #endif
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index 9867fef..c380f66 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -29,17 +29,30 @@ struct afs_timer_ops afs_vnode_cb_timed_
 	.timed_out	= afs_vnode_cb_timed_out,
 };
 
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vnode_cache_match(void *target,
-						 const void *entry);
-static void afs_vnode_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_vnode_cache_index_def = {
-	.name		= "vnode",
-	.data_size	= sizeof(struct afs_cache_vnode),
-	.keys[0]	= { CACHEFS_INDEX_KEYS_BIN, 4 },
-	.match		= afs_vnode_cache_match,
-	.update		= afs_vnode_cache_update,
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
+					void *buffer, uint16_t buflen);
+static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
+				     uint64_t *size);
+static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
+					void *buffer, uint16_t buflen);
+static fscache_checkaux_t afs_vnode_cache_check_aux(void *cookie_netfs_data,
+						    const void *buffer,
+						    uint16_t buflen);
+static void afs_vnode_cache_mark_pages_cached(void *cookie_netfs_data,
+					      struct address_space *mapping,
+					      struct pagevec *cached_pvec);
+static void afs_vnode_cache_now_uncached(void *cookie_netfs_data);
+
+struct fscache_cookie_def afs_vnode_cache_index_def = {
+	.name			= "AFS.vnode",
+	.type			= FSCACHE_COOKIE_TYPE_DATAFILE,
+	.get_key		= afs_vnode_cache_get_key,
+	.get_attr		= afs_vnode_cache_get_attr,
+	.get_aux		= afs_vnode_cache_get_aux,
+	.check_aux		= afs_vnode_cache_check_aux,
+	.mark_pages_cached	= afs_vnode_cache_mark_pages_cached,
+	.now_uncached		= afs_vnode_cache_now_uncached,
 };
 #endif
 
@@ -189,6 +202,8 @@ int afs_vnode_fetch_status(struct afs_vn
 
 	if (vnode->update_cnt > 0) {
 		/* someone else started a fetch */
+		_debug("conflict");
+
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&vnode->update_waitq, &myself);
 
@@ -220,6 +235,7 @@ int afs_vnode_fetch_status(struct afs_vn
 		spin_unlock(&vnode->lock);
 		set_current_state(TASK_RUNNING);
 
+		_leave(" [conflicted, %d", !!(vnode->flags & AFS_VNODE_DELETED));
 		return vnode->flags & AFS_VNODE_DELETED ? -ENOENT : 0;
 	}
 
@@ -342,54 +358,198 @@ int afs_vnode_give_up_callback(struct af
 
 /*****************************************************************************/
 /*
- * match a vnode record stored in the cache
+ * set the key for the index entry
  */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vnode_cache_match(void *target,
-						 const void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
+					void *buffer, uint16_t bufmax)
 {
-	const struct afs_cache_vnode *cvnode = entry;
-	struct afs_vnode *vnode = target;
+	const struct afs_vnode *vnode = cookie_netfs_data;
+	uint16_t klen;
 
-	_enter("{%x,%x,%Lx},{%x,%x,%Lx}",
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       vnode->status.version,
-	       cvnode->vnode_id,
-	       cvnode->vnode_unique,
-	       cvnode->data_version);
-
-	if (vnode->fid.vnode != cvnode->vnode_id) {
-		_leave(" = FAILED");
-		return CACHEFS_MATCH_FAILED;
+	_enter("{%x,%x,%Lx},%p,%u",
+	       vnode->fid.vnode, vnode->fid.unique, vnode->status.version,
+	       buffer, bufmax);
+
+	klen = sizeof(vnode->fid.vnode);
+	if (klen > bufmax)
+		return 0;
+
+	memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode));
+
+	_leave(" = %u", klen);
+	return klen;
+
+} /* end afs_vnode_cache_get_key() */
+#endif
+
+/*****************************************************************************/
+/*
+ * provide an updated file attributes
+ */
+#ifdef CONFIG_AFS_FSCACHE
+static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
+				     uint64_t *size)
+{
+	const struct afs_vnode *vnode = cookie_netfs_data;
+
+	_enter("{%x,%x,%Lx},",
+	       vnode->fid.vnode, vnode->fid.unique, vnode->status.version);
+
+	*size = i_size_read((struct inode *) &vnode->vfs_inode);
+
+} /* end afs_vnode_cache_get_attr() */
+#endif
+
+/*****************************************************************************/
+/*
+ * provide new auxilliary cache data
+ */
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
+					void *buffer, uint16_t bufmax)
+{
+	const struct afs_vnode *vnode = cookie_netfs_data;
+	uint16_t dlen;
+
+	_enter("{%x,%x,%Lx},%p,%u",
+	       vnode->fid.vnode, vnode->fid.unique, vnode->status.version,
+	       buffer, bufmax);
+
+	dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.version);
+	if (dlen > bufmax)
+		return 0;
+
+	memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique));
+	buffer += sizeof(vnode->fid.unique);
+	memcpy(buffer, &vnode->status.version, sizeof(vnode->status.version));
+
+	_leave(" = %u", dlen);
+	return dlen;
+
+} /* end afs_vnode_cache_get_aux() */
+#endif
+
+/*****************************************************************************/
+/*
+ * check that the auxilliary data indicates that the entry is still valid
+ */
+#ifdef CONFIG_AFS_FSCACHE
+static fscache_checkaux_t afs_vnode_cache_check_aux(void *cookie_netfs_data,
+						    const void *buffer,
+						    uint16_t buflen)
+{
+	struct afs_vnode *vnode = cookie_netfs_data;
+	uint16_t dlen;
+
+	_enter("{%x,%x,%Lx},%p,%u",
+	       vnode->fid.vnode, vnode->fid.unique, vnode->status.version,
+	       buffer, buflen);
+
+	/* check the size of the data is what we're expecting */
+	dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.version);
+	if (dlen != buflen) {
+		_leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen);
+		return FSCACHE_CHECKAUX_OBSOLETE;
 	}
 
-	if (vnode->fid.unique != cvnode->vnode_unique ||
-	    vnode->status.version != cvnode->data_version) {
-		_leave(" = DELETE");
-		return CACHEFS_MATCH_SUCCESS_DELETE;
+	if (memcmp(buffer,
+		   &vnode->fid.unique,
+		   sizeof(vnode->fid.unique)
+		   ) != 0
+	    ) {
+		unsigned unique;
+
+		memcpy(&unique, buffer, sizeof(unique));
+
+		_leave(" = OBSOLETE [uniq %x != %x]",
+		       unique, vnode->fid.unique);
+		return FSCACHE_CHECKAUX_OBSOLETE;
+	}
+
+	if (memcmp(buffer + sizeof(vnode->fid.unique),
+		   &vnode->status.version,
+		   sizeof(vnode->status.version)
+		   ) != 0
+	    ) {
+		afs_dataversion_t version;
+
+		memcpy(&version, buffer + sizeof(vnode->fid.unique),
+		       sizeof(version));
+
+		_leave(" = OBSOLETE [vers %llx != %llx]",
+		       version, vnode->status.version);
+		return FSCACHE_CHECKAUX_OBSOLETE;
 	}
 
 	_leave(" = SUCCESS");
-	return CACHEFS_MATCH_SUCCESS;
-} /* end afs_vnode_cache_match() */
+	return FSCACHE_CHECKAUX_OKAY;
+
+} /* end afs_vnode_cache_check_aux() */
 #endif
 
 /*****************************************************************************/
 /*
- * update a vnode record stored in the cache
+ * indication of pages that now have cache metadata retained
+ * - this function should mark the specified pages as now being cached
  */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_vnode_cache_update(void *source, void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static void afs_vnode_cache_mark_pages_cached(void *cookie_netfs_data,
+					      struct address_space *mapping,
+					      struct pagevec *cached_pvec)
 {
-	struct afs_cache_vnode *cvnode = entry;
-	struct afs_vnode *vnode = source;
+	unsigned long loop;
 
-	_enter("");
+	for (loop = 0; loop < cached_pvec->nr; loop++) {
+		struct page *page = cached_pvec->pages[loop];
 
-	cvnode->vnode_id	= vnode->fid.vnode;
-	cvnode->vnode_unique	= vnode->fid.unique;
-	cvnode->data_version	= vnode->status.version;
+		_debug("- mark %p{%lx}", page, page->index);
 
-} /* end afs_vnode_cache_update() */
+		SetPagePrivate(page);
+	}
+
+} /* end afs_vnode_cache_mark_pages_cached() */
 #endif
+
+/*****************************************************************************/
+/*
+ * indication the cookie is no longer uncached
+ * - this function is called when the backing store currently caching a cookie
+ *   is removed
+ * - the netfs should use this to clean up any markers indicating cached pages
+ * - this is mandatory for any object that may have data
+ */
+static void afs_vnode_cache_now_uncached(void *cookie_netfs_data)
+{
+	struct afs_vnode *vnode = cookie_netfs_data;
+	struct pagevec pvec;
+	pgoff_t first;
+	int loop, nr_pages;
+
+	_enter("{%x,%x,%Lx}",
+	       vnode->fid.vnode, vnode->fid.unique, vnode->status.version);
+
+	pagevec_init(&pvec, 0);
+	first = 0;
+
+	for (;;) {
+		/* grab a bunch of pages to clean */
+		nr_pages = pagevec_lookup(&pvec, vnode->vfs_inode.i_mapping,
+					  first,
+					  PAGEVEC_SIZE - pagevec_count(&pvec));
+		if (!nr_pages)
+			break;
+
+		for (loop = 0; loop < nr_pages; loop++)
+			ClearPagePrivate(pvec.pages[loop]);
+
+		first = pvec.pages[nr_pages - 1]->index + 1;
+
+		pvec.nr = nr_pages;
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+
+	_leave("");
+
+} /* end afs_vnode_cache_now_uncached() */
diff --git a/fs/afs/vnode.h b/fs/afs/vnode.h
index b86a971..3f0602d 100644
--- a/fs/afs/vnode.h
+++ b/fs/afs/vnode.h
@@ -13,9 +13,9 @@ #ifndef _LINUX_AFS_VNODE_H
 #define _LINUX_AFS_VNODE_H
 
 #include <linux/fs.h>
+#include <linux/fscache.h>
 #include "server.h"
 #include "kafstimod.h"
-#include "cache.h"
 
 #ifdef __KERNEL__
 
@@ -32,8 +32,8 @@ struct afs_cache_vnode
 	afs_dataversion_t	data_version;	/* data version */
 };
 
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_vnode_cache_index_def;
+#ifdef CONFIG_AFS_FSCACHE
+extern struct fscache_cookie_def afs_vnode_cache_index_def;
 #endif
 
 /*****************************************************************************/
@@ -47,8 +47,8 @@ struct afs_vnode
 	struct afs_volume	*volume;	/* volume on which vnode resides */
 	struct afs_fid		fid;		/* the file identifier for this inode */
 	struct afs_file_status	status;		/* AFS status info for this file */
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_cookie	*cache;		/* caching cookie */
+#ifdef CONFIG_AFS_FSCACHE
+	struct fscache_cookie	*cache;		/* caching cookie */
 #endif
 
 	wait_queue_head_t	update_waitq;	/* status fetch waitqueue */
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 0ff4b86..0bd5578 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -15,10 +15,10 @@ #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/fscache.h>
 #include "volume.h"
 #include "vnode.h"
 #include "cell.h"
-#include "cache.h"
 #include "cmservice.h"
 #include "fsclient.h"
 #include "vlclient.h"
@@ -28,18 +28,14 @@ #ifdef __KDEBUG
 static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
 #endif
 
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_volume_cache_match(void *target,
-						  const void *entry);
-static void afs_volume_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_volume_cache_index_def = {
-	.name		= "volume",
-	.data_size	= sizeof(struct afs_cache_vhash),
-	.keys[0]	= { CACHEFS_INDEX_KEYS_BIN, 1 },
-	.keys[1]	= { CACHEFS_INDEX_KEYS_BIN, 1 },
-	.match		= afs_volume_cache_match,
-	.update		= afs_volume_cache_update,
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
+					 void *buffer, uint16_t buflen);
+
+static struct fscache_cookie_def afs_volume_cache_index_def = {
+	.name		= "AFS.volume",
+	.type		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key	= afs_volume_cache_get_key,
 };
 #endif
 
@@ -214,11 +210,10 @@ int afs_volume_lookup(const char *name, 
 	}
 
 	/* attach the cache and volume location */
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_acquire_cookie(vlocation->cache,
-			       &afs_vnode_cache_index_def,
-			       volume,
-			       &volume->cache);
+#ifdef CONFIG_AFS_FSCACHE
+	volume->cache = fscache_acquire_cookie(vlocation->cache,
+					       &afs_volume_cache_index_def,
+					       volume);
 #endif
 
 	afs_get_vlocation(vlocation);
@@ -286,8 +281,8 @@ void afs_put_volume(struct afs_volume *v
 	up_write(&vlocation->cell->vl_sem);
 
 	/* finish cleaning up the volume */
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_relinquish_cookie(volume->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_relinquish_cookie(volume->cache, 0);
 #endif
 	afs_put_vlocation(vlocation);
 
@@ -481,40 +476,25 @@ int afs_volume_release_fileserver(struct
 
 /*****************************************************************************/
 /*
- * match a volume hash record stored in the cache
+ * set the key for the index entry
  */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_volume_cache_match(void *target,
-						  const void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
+					void *buffer, uint16_t bufmax)
 {
-	const struct afs_cache_vhash *vhash = entry;
-	struct afs_volume *volume = target;
-
-	_enter("{%u},{%u}", volume->type, vhash->vtype);
+	const struct afs_volume *volume = cookie_netfs_data;
+	uint16_t klen;
 
-	if (volume->type == vhash->vtype) {
-		_leave(" = SUCCESS");
-		return CACHEFS_MATCH_SUCCESS;
-	}
-
-	_leave(" = FAILED");
-	return CACHEFS_MATCH_FAILED;
-} /* end afs_volume_cache_match() */
-#endif
+	_enter("{%u},%p,%u", volume->type, buffer, bufmax);
 
-/*****************************************************************************/
-/*
- * update a volume hash record stored in the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_volume_cache_update(void *source, void *entry)
-{
-	struct afs_cache_vhash *vhash = entry;
-	struct afs_volume *volume = source;
+	klen = sizeof(volume->type);
+	if (klen > bufmax)
+		return 0;
 
-	_enter("");
+	memcpy(buffer, &volume->type, sizeof(volume->type));
 
-	vhash->vtype = volume->type;
+	_leave(" = %u", klen);
+	return klen;
 
-} /* end afs_volume_cache_update() */
+} /* end afs_volume_cache_get_key() */
 #endif
diff --git a/fs/afs/volume.h b/fs/afs/volume.h
index bfdcf19..fc9895a 100644
--- a/fs/afs/volume.h
+++ b/fs/afs/volume.h
@@ -12,11 +12,11 @@
 #ifndef _LINUX_AFS_VOLUME_H
 #define _LINUX_AFS_VOLUME_H
 
+#include <linux/fscache.h>
 #include "types.h"
 #include "fsclient.h"
 #include "kafstimod.h"
 #include "kafsasyncd.h"
-#include "cache.h"
 
 typedef enum {
 	AFS_VLUPD_SLEEP,		/* sleeping waiting for update timer to fire */
@@ -45,24 +45,6 @@ #define AFS_VOL_VTM_BAK	0x04 /* backup v
 	time_t			rtime;		/* last retrieval time */
 };
 
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_vlocation_cache_index_def;
-#endif
-
-/*****************************************************************************/
-/*
- * volume -> vnode hash table entry
- */
-struct afs_cache_vhash
-{
-	afs_voltype_t		vtype;		/* which volume variation */
-	uint8_t			hash_bucket;	/* which hash bucket this represents */
-} __attribute__((packed));
-
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_volume_cache_index_def;
-#endif
-
 /*****************************************************************************/
 /*
  * AFS volume location record
@@ -73,8 +55,8 @@ struct afs_vlocation
 	struct list_head	link;		/* link in cell volume location list */
 	struct afs_timer	timeout;	/* decaching timer */
 	struct afs_cell		*cell;		/* cell to which volume belongs */
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_cookie	*cache;		/* caching cookie */
+#ifdef CONFIG_AFS_FSCACHE
+	struct fscache_cookie	*cache;		/* caching cookie */
 #endif
 	struct afs_cache_vlocation vldb;	/* volume information DB record */
 	struct afs_volume	*vols[3];	/* volume access record pointer (index by type) */
@@ -109,8 +91,8 @@ struct afs_volume
 	atomic_t		usage;
 	struct afs_cell		*cell;		/* cell to which belongs (unrefd ptr) */
 	struct afs_vlocation	*vlocation;	/* volume location */
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_cookie	*cache;		/* caching cookie */
+#ifdef CONFIG_AFS_FSCACHE
+	struct fscache_cookie	*cache;		/* caching cookie */
 #endif
 	afs_volid_t		vid;		/* volume ID */
 	afs_voltype_t		type;		/* type of volume */


^ permalink raw reply related

* [PATCH 03/14] NFS: Abstract out namespace initialisation [try #8]
From: David Howells @ 2006-05-10 16:01 UTC (permalink / raw)
  To: torvalds, akpm, steved, trond.myklebust, aviro
  Cc: linux-fsdevel, linux-cachefs, nfsv4, linux-kernel
In-Reply-To: <20060510160111.9058.55026.stgit@warthog.cambridge.redhat.com>

The attached patch abstracts out the namespace initialisation so that temporary
namespaces can be set up elsewhere.


The following changes were made in [try #8]:

 (*) init_namespace() has been made out-of-line.


Signed-Off-By: David Howells <dhowells@redhat.com>
---

 fs/namespace.c            |   28 +++++++++++++++++++++-------
 include/linux/namespace.h |    1 +
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 84a3bec..6d2e8fb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1790,6 +1790,26 @@ out3:
 	goto out2;
 }
 
+/*
+ * initialise a namespace, rooting it at the given specified mountpoint if one
+ * is given
+ */
+void init_namespace(struct namespace *namespace, struct vfsmount *mnt)
+{
+	atomic_set(&namespace->count, 1);
+	INIT_LIST_HEAD(&namespace->list);
+	init_waitqueue_head(&namespace->poll);
+	namespace->event = 0;
+	namespace->root = mnt;
+
+	if (mnt) {
+		list_add(&mnt->mnt_list, &namespace->list);
+		mnt->mnt_namespace = namespace;
+	}
+}
+
+EXPORT_SYMBOL_GPL(init_namespace);
+
 static void __init init_mount_tree(void)
 {
 	struct vfsmount *mnt;
@@ -1802,13 +1822,7 @@ static void __init init_mount_tree(void)
 	namespace = kmalloc(sizeof(*namespace), GFP_KERNEL);
 	if (!namespace)
 		panic("Can't allocate initial namespace");
-	atomic_set(&namespace->count, 1);
-	INIT_LIST_HEAD(&namespace->list);
-	init_waitqueue_head(&namespace->poll);
-	namespace->event = 0;
-	list_add(&mnt->mnt_list, &namespace->list);
-	namespace->root = mnt;
-	mnt->mnt_namespace = namespace;
+	init_namespace(namespace, mnt);
 
 	init_task.namespace = namespace;
 	read_lock(&tasklist_lock);
diff --git a/include/linux/namespace.h b/include/linux/namespace.h
index 3abc8e3..c371a30 100644
--- a/include/linux/namespace.h
+++ b/include/linux/namespace.h
@@ -16,6 +16,7 @@ struct namespace {
 extern int copy_namespace(int, struct task_struct *);
 extern void __put_namespace(struct namespace *namespace);
 extern struct namespace *dup_namespace(struct task_struct *, struct fs_struct *);
+extern void init_namespace(struct namespace *, struct vfsmount *);
 
 static inline void put_namespace(struct namespace *namespace)
 {


^ permalink raw reply related

* Re: Alubook 5,8: No sound with 2.6.17-rc3-g5528e568-dirty
From: Johannes Berg @ 2006-05-10 16:04 UTC (permalink / raw)
  To: Wolfgang Pfeiffer; +Cc: linuxppc-dev list
In-Reply-To: <20060510154930.GD3878@localhost>

> As to the reason why I came here:
> As I can't be sure whether the issue announced in the subject line is
> really a kernel issue

Did it ever work? I don't quite know what machine a 5,8 is since I have a
5,6 only. Also, try snd-aoa.

johannes

^ permalink raw reply

* [PATCH 08/14] FS-Cache: Add notification of page becoming writable to VMA ops [try #8]
From: David Howells @ 2006-05-10 16:01 UTC (permalink / raw)
  To: torvalds, akpm, steved, trond.myklebust, aviro
  Cc: linux-fsdevel, linux-cachefs, nfsv4, linux-kernel
In-Reply-To: <20060510160111.9058.55026.stgit@warthog.cambridge.redhat.com>

The attached patch adds a new VMA operation to notify a filesystem or other
driver about the MMU generating a fault because userspace attempted to write
to a page mapped through a read-only PTE.

This facility permits the filesystem or driver to:

 (*) Implement storage allocation/reservation on attempted write, and so to
     deal with problems such as ENOSPC more gracefully (perhaps by generating
     SIGBUS).

 (*) Delay making the page writable until the contents have been written to a
     backing cache. This is useful for NFS/AFS when using FS-Cache/CacheFS.
     It permits the filesystem to have some guarantee about the state of the
     cache.

 (*) Account and limit number of dirty pages. This is one piece of the puzzle
     needed to make shared writable mapping work safely in FUSE.

Signed-Off-By: David Howells <dhowells@redhat.com>
---

 include/linux/mm.h |    4 ++
 mm/memory.c        |   99 +++++++++++++++++++++++++++++++++++++++-------------
 mm/mmap.c          |   12 +++++-
 mm/mprotect.c      |   11 +++++-
 4 files changed, 98 insertions(+), 28 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1154684..cd3c2cf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -200,6 +200,10 @@ struct vm_operations_struct {
 	void (*close)(struct vm_area_struct * area);
 	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
 	int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
+
+	/* notification that a previously read-only page is about to become
+	 * writable, if an error is returned it will cause a SIGBUS */
+	int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
 #ifdef CONFIG_NUMA
 	int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
 	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
diff --git a/mm/memory.c b/mm/memory.c
index 0ec7bc6..6c6891e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1445,25 +1445,59 @@ static int do_wp_page(struct mm_struct *
 {
 	struct page *old_page, *new_page;
 	pte_t entry;
-	int ret = VM_FAULT_MINOR;
+	int reuse, ret = VM_FAULT_MINOR;
 
 	old_page = vm_normal_page(vma, address, orig_pte);
 	if (!old_page)
 		goto gotten;
 
-	if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
-		int reuse = can_share_swap_page(old_page);
-		unlock_page(old_page);
-		if (reuse) {
-			flush_cache_page(vma, address, pte_pfn(orig_pte));
-			entry = pte_mkyoung(orig_pte);
-			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-			ptep_set_access_flags(vma, address, page_table, entry, 1);
-			update_mmu_cache(vma, address, entry);
-			lazy_mmu_prot_update(entry);
-			ret |= VM_FAULT_WRITE;
-			goto unlock;
+	if (unlikely(vma->vm_flags & VM_SHARED)) {
+		if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
+			/*
+			 * Notify the address space that the page is about to
+			 * become writable so that it can prohibit this or wait
+			 * for the page to get into an appropriate state.
+			 *
+			 * We do this without the lock held, so that it can
+			 * sleep if it needs to.
+			 */
+			page_cache_get(old_page);
+			pte_unmap_unlock(page_table, ptl);
+
+			if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
+				goto unwritable_page;
+
+			page_cache_release(old_page);
+
+			/*
+			 * Since we dropped the lock we need to revalidate
+			 * the PTE as someone else may have changed it.  If
+			 * they did, we just return, as we can count on the
+			 * MMU to tell us if they didn't also make it writable.
+			 */
+			page_table = pte_offset_map_lock(mm, pmd, address,
+							 &ptl);
+			if (!pte_same(*page_table, orig_pte))
+				goto unlock;
 		}
+
+		reuse = 1;
+	} else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
+		reuse = can_share_swap_page(old_page);
+		unlock_page(old_page);
+	} else {
+		reuse = 0;
+	}
+
+	if (reuse) {
+		flush_cache_page(vma, address, pte_pfn(orig_pte));
+		entry = pte_mkyoung(orig_pte);
+		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		ptep_set_access_flags(vma, address, page_table, entry, 1);
+		update_mmu_cache(vma, address, entry);
+		lazy_mmu_prot_update(entry);
+		ret |= VM_FAULT_WRITE;
+		goto unlock;
 	}
 
 	/*
@@ -1523,6 +1557,10 @@ oom:
 	if (old_page)
 		page_cache_release(old_page);
 	return VM_FAULT_OOM;
+
+unwritable_page:
+	page_cache_release(old_page);
+	return VM_FAULT_SIGBUS;
 }
 
 /*
@@ -2074,18 +2112,31 @@ retry:
 	/*
 	 * Should we do an early C-O-W break?
 	 */
-	if (write_access && !(vma->vm_flags & VM_SHARED)) {
-		struct page *page;
+	if (write_access) {
+		if (!(vma->vm_flags & VM_SHARED)) {
+			struct page *page;
 
-		if (unlikely(anon_vma_prepare(vma)))
-			goto oom;
-		page = alloc_page_vma(GFP_HIGHUSER, vma, address);
-		if (!page)
-			goto oom;
-		copy_user_highpage(page, new_page, address);
-		page_cache_release(new_page);
-		new_page = page;
-		anon = 1;
+			if (unlikely(anon_vma_prepare(vma)))
+				goto oom;
+			page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+			if (!page)
+				goto oom;
+			copy_user_highpage(page, new_page, address);
+			page_cache_release(new_page);
+			new_page = page;
+			anon = 1;
+
+		} else {
+			/* if the page will be shareable, see if the backing
+			 * address space wants to know that the page is about
+			 * to become writable */
+			if (vma->vm_ops->page_mkwrite &&
+			    vma->vm_ops->page_mkwrite(vma, new_page) < 0
+			    ) {
+				page_cache_release(new_page);
+				return VM_FAULT_SIGBUS;
+			}
+		}
 	}
 
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
diff --git a/mm/mmap.c b/mm/mmap.c
index e6ee123..6446c61 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1065,7 +1065,8 @@ munmap_back:
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
 	vma->vm_flags = vm_flags;
-	vma->vm_page_prot = protection_map[vm_flags & 0x0f];
+	vma->vm_page_prot = protection_map[vm_flags &
+				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
 	vma->vm_pgoff = pgoff;
 
 	if (file) {
@@ -1089,6 +1090,12 @@ munmap_back:
 			goto free_vma;
 	}
 
+	/* Don't make the VMA automatically writable if it's shared, but the
+	 * backer wishes to know when pages are first written to */
+	if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+		vma->vm_page_prot =
+			protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)];
+
 	/* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
 	 * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
 	 * that memory reservation must be checked; but that reservation
@@ -1921,7 +1928,8 @@ unsigned long do_brk(unsigned long addr,
 	vma->vm_end = addr + len;
 	vma->vm_pgoff = pgoff;
 	vma->vm_flags = flags;
-	vma->vm_page_prot = protection_map[flags & 0x0f];
+	vma->vm_page_prot = protection_map[flags &
+				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 out:
 	mm->total_vm += len >> PAGE_SHIFT;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 4c14d42..2697abd 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -106,6 +106,7 @@ mprotect_fixup(struct vm_area_struct *vm
 	unsigned long oldflags = vma->vm_flags;
 	long nrpages = (end - start) >> PAGE_SHIFT;
 	unsigned long charged = 0;
+	unsigned int mask;
 	pgprot_t newprot;
 	pgoff_t pgoff;
 	int error;
@@ -132,8 +133,6 @@ mprotect_fixup(struct vm_area_struct *vm
 		}
 	}
 
-	newprot = protection_map[newflags & 0xf];
-
 	/*
 	 * First try to merge with previous and/or next vma.
 	 */
@@ -160,6 +159,14 @@ mprotect_fixup(struct vm_area_struct *vm
 	}
 
 success:
+	/* Don't make the VMA automatically writable if it's shared, but the
+	 * backer wishes to know when pages are first written to */
+	mask = VM_READ|VM_WRITE|VM_EXEC|VM_SHARED;
+	if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+		mask &= ~VM_SHARED;
+
+	newprot = protection_map[newflags & mask];
+
 	/*
 	 * vm_flags and vm_page_prot are protected by the mmap_sem
 	 * held in write mode.


^ permalink raw reply related

* [PATCH 13/14] FS-Cache: Release page->private in failed readahead [try #8]
From: David Howells @ 2006-05-10 16:01 UTC (permalink / raw)
  To: torvalds, akpm, steved, trond.myklebust, aviro
  Cc: linux-fsdevel, linux-cachefs, nfsv4, linux-kernel
In-Reply-To: <20060510160111.9058.55026.stgit@warthog.cambridge.redhat.com>

The attached patch causes read_cache_pages() to release page-private data on a
page for which add_to_page_cache() fails or the filler function fails. This
permits pages with caching references associated with them to be cleaned up.

Signed-Off-By: David Howells <dhowells@redhat.com>
---

 mm/readahead.c |   16 ++++++++++++++++
 1 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/mm/readahead.c b/mm/readahead.c
index 0f142a4..82deb7f 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -141,6 +141,12 @@ int read_cache_pages(struct address_spac
 		page = list_to_page(pages);
 		list_del(&page->lru);
 		if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) {
+			if (PagePrivate(page) && mapping->a_ops->releasepage) {
+				page->mapping = mapping;
+				mapping->a_ops->releasepage(page, GFP_KERNEL);
+				page->mapping = NULL;
+			}
+				
 			page_cache_release(page);
 			continue;
 		}
@@ -153,6 +159,16 @@ int read_cache_pages(struct address_spac
 
 				victim = list_to_page(pages);
 				list_del(&victim->lru);
+
+				if (PagePrivate(victim) &&
+				    mapping->a_ops->releasepage
+				    ) {
+					victim->mapping = mapping;
+					mapping->a_ops->releasepage(
+						victim, GFP_KERNEL);
+					victim->mapping = NULL;
+				}
+
 				page_cache_release(victim);
 			}
 			break;


^ permalink raw reply related

* [ALSA - driver 0001134]: alsa 1.0.9rc4 + intel 82801FB/.../ = No sound (Benq 52-101)
From: bugtrack @ 2006-05-10 16:03 UTC (permalink / raw)
  To: alsa-devel


A NOTE has been added to this issue.
======================================================================
<https://bugtrack.alsa-project.org/alsa-bug/view.php?id=1134> 
======================================================================
Reported By:                liangalei
Assigned To:                
======================================================================
Project:                    ALSA - driver
Issue ID:                   1134
Category:                   PCI - intel8x0
Reproducibility:            always
Severity:                   minor
Priority:                   normal
Status:                     assigned
Distribution:               Fedora Core 3
Kernel Version:             2.6.11-1.14
======================================================================
Date Submitted:             05-28-2005 08:49 CEST
Last Modified:              05-10-2006 18:03 CEST
======================================================================
Summary:                    alsa 1.0.9rc4 + intel 82801FB/.../ = No sound (Benq
52-101)
Description: 
My notebook is Benq 52-101, update FC3 to kernel 2.6.11-1.14_FC3

alsamixer report: Card=intel ICH6, chip=Conexant id 3 (Question, Conexant
just support modem's AC-97 or else provide whole sound ? )

alsaconf: can find "Intel Corp 82801FB/.../ ICH6 family AC'97 Audio", and
will contine to "Have a lot fun".

I have ever, update linux kernel from 2.6.9 to 2.6.11(yum update, no
recompiling), and alsa driver from 0.9.0/1.0.8/1.0.9rc3/4.

Recomipling ALsa driver/lib/oss/tools/util.., with drive, ever try 
----- ./configure --with-cards=azx (error: no this card)
----- ./configure ; make; make install (no error report), not run
./snddevices

No sound ! So much days, so much test..... (if i915GM chipset + Conexant
chip...(see below lspci), will be powered by alsa-1.0.9rc4 ? --- if still
not, I will just wait for new alsa-driver.)

thanks
======================================================================
Relationships       ID      Summary
----------------------------------------------------------------------
has duplicate       0001931 Unknown AC97 codec, digital sound works
======================================================================

----------------------------------------------------------------------
 fedukoff - 05-10-06 17:41 
----------------------------------------------------------------------
>From oss forum: (http://4front-tech.com/forum/viewtopic.php?p=3300#3300)
some more info: they told me that it looks like the driver is using the
modem for sound. I should try to disable the modem in the bios..but you
know..there isn't such in option in the bios

And I found on linpus site "Conexant 6.03-1 HSF modem driver"
http://www.linpus.com/Downloads/desktop/2004-10-1_hsf.htm

----------------------------------------------------------------------
 fedukoff - 05-10-06 18:03 
----------------------------------------------------------------------
I have received an automatic email from BenQ:
Thank you for choosing BenQ!  In order to serve your inquiry more
effectively, please 
link to BenQ Contact Center http://Support.BenQ.com for assistance
regarding any 
product and service inquiry, our BenQ Technical Expert will be glad to
serve you. 

And when ask status of my enquiry
(http://bcc.benq.com/BCCFRONTUI/process/querycase.aspx) it is SOLVED!

Issue History
Date Modified  Username       Field                    Change              
======================================================================
05-28-05 08:49 liangalei      New Issue                                    
05-28-05 08:49 liangalei      Distribution              => Fedora Core 3   
05-28-05 08:49 liangalei      Kernel Version            => 2.6.11-1.14     
05-28-05 09:17 liangalei      Note Added: 0004759                          
05-28-05 09:18 liangalei      Issue Monitored: liangalei                    
05-30-05 03:29 liangalei      Note Added: 0004795                          
06-01-05 16:58 tiwai          Note Added: 0004863                          
06-01-05 17:04 tiwai          Category                 0_compilation problem_!!!
=> PCI - intel8x0
06-02-05 10:42 liangalei      Note Added: 0004875                          
06-06-05 09:53 perex          Assigned To              perex =>            
06-23-05 12:42 tiwai          Note Added: 0005182                          
06-24-05 03:12 liangalei      File Added: asound.state                     
06-24-05 03:13 liangalei      File Added: ac97#0-0                         
06-24-05 03:14 liangalei      File Added: ac97#0-0+regs                    
06-24-05 03:18 liangalei      Note Added: 0005202                          
08-28-05 15:28 ybar           Note Added: 0005962                          
11-29-05 05:59 derek          Issue Monitored: derek                       
11-29-05 06:00 derek          Issue End Monitor: derek                     
11-29-05 06:03 derek          Note Added: 0006870                          
02-07-06 10:03 anhi1968       Issue Monitored: anhi1968                    
02-17-06 22:22 gandhi         Note Added: 0008110                          
02-17-06 23:26 gandhi         Issue Monitored: gandhi                      
02-17-06 23:28 gandhi         Note Edited: 0008110                         
02-17-06 23:43 gandhi         Note Edited: 0008110                         
02-18-06 02:05 gandhi         Note Added: 0008112                          
02-18-06 08:13 liangalei      Note Added: 0008116                          
02-18-06 15:56 gandhi         Issue Monitored: tiwai                       
02-18-06 15:56 gandhi         Note Added: 0008120                          
02-28-06 12:02 anhi1968       Issue End Monitor: anhi1968                    
03-03-06 08:29 robitech       Note Added: 0008336                          
03-15-06 05:34 JProgrammer    Note Added: 0008526                          
03-15-06 05:37 JProgrammer    Issue Monitored: JProgrammer                    
03-15-06 06:39 Kooper         Note Added: 0008527                          
03-15-06 11:40 tiwai          Note Added: 0008531                          
03-15-06 11:53 Kooper         Note Added: 0008535                          
03-15-06 12:13 JProgrammer    Note Added: 0008538                          
03-15-06 12:18 tiwai          Note Added: 0008540                          
03-15-06 23:48 JProgrammer    Note Added: 0008579                          
03-16-06 15:04 tiwai          Note Added: 0008606                          
03-16-06 15:05 tiwai          File Added: ac97-remove-modem.diff                
   
03-16-06 19:34 rlrevell       Relationship added       has duplicate 0001931
03-17-06 04:10 Kooper         Note Added: 0008633                          
03-17-06 12:07 tiwai          Note Added: 0008641                          
03-17-06 12:32 Kooper         File Added: kooper-alsa-bugreport.tgz             
      
03-17-06 12:35 Kooper         Note Added: 0008644                          
03-21-06 02:38 JProgrammer    Note Added: 0008736                          
03-25-06 10:51 alessiodf      Note Added: 0008944                          
03-26-06 20:12 alessiodf      Note Edited: 0008944                         
03-26-06 20:15 alessiodf      Note Edited: 0008944                         
03-30-06 15:47 trava          Note Added: 0009034                          
04-07-06 13:05 kesiev         Note Added: 0009137                          
04-07-06 13:06 kesiev         Note Edited: 0009137                         
04-09-06 03:23 alt_ackbar     Note Added: 0009177                          
04-11-06 11:34 kesiev         Issue Monitored: kesiev                      
04-11-06 11:39 kesiev         Issue End Monitor: kesiev                    
04-12-06 16:21 kesiev         Note Added: 0009246                          
04-12-06 16:21 kesiev         File Added: qta058Ba.inf                     
04-13-06 14:00 kesiev         Note Added: 0009259                          
04-13-06 14:00 kesiev         File Added: windows-details.txt                   

04-13-06 14:54 kesiev         Note Edited: 0009259                         
04-13-06 15:34 kesiev         Issue Monitored: kesiev                      
04-15-06 12:33 kesiev         Note Edited: 0009259                         
04-15-06 12:33 kesiev         File Added: alsasound.conf                    
04-18-06 09:35 fedukoff       Note Added: 0009326                          
05-01-06 20:24 rawsock        Issue Monitored: rawsock                     
05-03-06 15:55 fedukoff       Issue Monitored: fedukoff                    
05-09-06 14:36 drseergio      Note Added: 0009658                          
05-09-06 16:40 rlrevell       Note Added: 0009661                          
05-09-06 17:16 drseergio      Note Added: 0009662                          
05-09-06 17:19 drseergio      Note Edited: 0009662                         
05-09-06 17:28 rlrevell       Note Added: 0009664                          
05-09-06 17:31 drseergio      Note Edited: 0009662                         
05-09-06 17:32 drseergio      Note Added: 0009665                          
05-09-06 17:41 drseergio      Note Edited: 0009665                         
05-09-06 17:49 rlrevell       Note Added: 0009667                          
05-09-06 17:52 drseergio      Note Added: 0009668                          
05-09-06 17:57 rlrevell       Note Added: 0009669                          
05-09-06 18:00 drseergio      Note Added: 0009670                          
05-09-06 18:33 fedukoff       Note Added: 0009672                          
05-09-06 18:44 drseergio      Note Added: 0009673                          
05-09-06 19:11 fedukoff       Note Added: 0009674                          
05-09-06 19:12 rlrevell       Note Added: 0009675                          
05-09-06 19:21 drseergio      Note Added: 0009676                          
05-09-06 19:27 fedukoff       Note Added: 0009677                          
05-09-06 19:28 rlrevell       Note Added: 0009678                          
05-09-06 19:30 drseergio      Note Added: 0009679                          
05-09-06 19:39 fedukoff       Note Added: 0009680                          
05-09-06 19:42 rlrevell       Note Added: 0009681                          
05-09-06 19:50 drseergio      Note Added: 0009682                          
05-09-06 20:16 fedukoff       Note Added: 0009683                          
05-09-06 20:17 rlrevell       Note Added: 0009684                          
05-09-06 20:19 rlrevell       Note Added: 0009685                          
05-09-06 20:28 drseergio      Note Added: 0009686                          
05-09-06 20:29 fedukoff       Note Added: 0009687                          
05-09-06 20:34 rlrevell       Note Added: 0009688                          
05-09-06 22:07 fedukoff       Note Added: 0009691                          
05-10-06 15:58 fedukoff       Note Added: 0009719                          
05-10-06 17:10 rlrevell       Note Added: 0009735                          
05-10-06 17:19 kesiev         Note Added: 0009738                          
05-10-06 17:21 trava          Note Added: 0009740                          
05-10-06 17:25 trava          Note Added: 0009741                          
05-10-06 17:41 fedukoff       Note Added: 0009743                          
05-10-06 18:03 fedukoff       Note Added: 0009745                          
======================================================================




-------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642

^ permalink raw reply

* [PATCH 07/14] FS-Cache: Provide a filesystem-specific sync'able page bit [try #8]
From: David Howells @ 2006-05-10 16:01 UTC (permalink / raw)
  To: torvalds, akpm, steved, trond.myklebust, aviro
  Cc: linux-fsdevel, linux-cachefs, nfsv4, linux-kernel
In-Reply-To: <20060510160111.9058.55026.stgit@warthog.cambridge.redhat.com>

The attached patch provides a filesystem-specific page bit that a filesystem
can synchronise upon.  This can be used, for example, by a netfs to synchronise
with CacheFS writing its pages to disk.

The PG_checked bit is replaced with PG_fs_misc, and various operations are
provided based upon that.  The *PageChecked() macros still exist, though now
they just convert to *PageFsMisc() macros.  The name of the "checked" macros
seems appropriate as they're used for metadata page validation by various
filesystems.

Signed-Off-By: David Howells <dhowells@redhat.com>
---

 fs/afs/dir.c               |    5 +----
 fs/ext2/dir.c              |    6 +++---
 fs/ext3/inode.c            |   10 +++++-----
 fs/freevxfs/vxfs_subr.c    |    2 +-
 fs/reiserfs/inode.c        |   10 +++++-----
 include/linux/page-flags.h |   15 ++++++++++-----
 include/linux/pagemap.h    |   11 +++++++++++
 mm/filemap.c               |   17 +++++++++++++++++
 mm/migrate.c               |    4 ++--
 mm/page_alloc.c            |    2 +-
 10 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index a6dff6a..c23de2b 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -155,11 +155,9 @@ #endif
 		}
 	}
 
-	SetPageChecked(page);
 	return;
 
  error:
-	SetPageChecked(page);
 	SetPageError(page);
 
 } /* end afs_dir_check_page() */
@@ -193,8 +191,7 @@ static struct page *afs_dir_get_page(str
 		kmap(page);
 		if (!PageUptodate(page))
 			goto fail;
-		if (!PageChecked(page))
-			afs_dir_check_page(dir, page);
+		afs_dir_check_page(dir, page);
 		if (PageError(page))
 			goto fail;
 	}
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index d672aa9..cf9cee4 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -112,7 +112,7 @@ static void ext2_check_page(struct page 
 	if (offs != limit)
 		goto Eend;
 out:
-	SetPageChecked(page);
+	SetPageFsMisc(page);
 	return;
 
 	/* Too bad, we had an error */
@@ -152,7 +152,7 @@ Eend:
 		dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
 		(unsigned long) le32_to_cpu(p->inode));
 fail:
-	SetPageChecked(page);
+	SetPageFsMisc(page);
 	SetPageError(page);
 }
 
@@ -166,7 +166,7 @@ static struct page * ext2_get_page(struc
 		kmap(page);
 		if (!PageUptodate(page))
 			goto fail;
-		if (!PageChecked(page))
+		if (!PageFsMisc(page))
 			ext2_check_page(page);
 		if (PageError(page))
 			goto fail;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2edd7ee..2a38eee 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1528,12 +1528,12 @@ static int ext3_journalled_writepage(str
 		goto no_write;
 	}
 
-	if (!page_has_buffers(page) || PageChecked(page)) {
+	if (!page_has_buffers(page) || PageFsMisc(page)) {
 		/*
 		 * It's mmapped pagecache.  Add buffers and journal it.  There
 		 * doesn't seem much point in redirtying the page here.
 		 */
-		ClearPageChecked(page);
+		ClearPageFsMisc(page);
 		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
 					ext3_get_block);
 		if (ret != 0) {
@@ -1590,7 +1590,7 @@ static void ext3_invalidatepage(struct p
 	 * If it's a full truncate we just forget about the pending dirtying
 	 */
 	if (offset == 0)
-		ClearPageChecked(page);
+		ClearPageFsMisc(page);
 
 	journal_invalidatepage(journal, page, offset);
 }
@@ -1599,7 +1599,7 @@ static int ext3_releasepage(struct page 
 {
 	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
 
-	WARN_ON(PageChecked(page));
+	WARN_ON(PageFsMisc(page));
 	if (!page_has_buffers(page))
 		return 0;
 	return journal_try_to_free_buffers(journal, page, wait);
@@ -1695,7 +1695,7 @@ out:
  */
 static int ext3_journalled_set_page_dirty(struct page *page)
 {
-	SetPageChecked(page);
+	SetPageFsMisc(page);
 	return __set_page_dirty_nobuffers(page);
 }
 
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 50aae77..e884bfc 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -79,7 +79,7 @@ vxfs_get_page(struct address_space *mapp
 		kmap(pp);
 		if (!PageUptodate(pp))
 			goto fail;
-		/** if (!PageChecked(pp)) **/
+		/** if (!PageFsMisc(pp)) **/
 			/** vxfs_check_page(pp); **/
 		if (PageError(pp))
 			goto fail;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9857e50..3c79e02 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2352,7 +2352,7 @@ static int reiserfs_write_full_page(stru
 	struct buffer_head *head, *bh;
 	int partial = 0;
 	int nr = 0;
-	int checked = PageChecked(page);
+	int checked = PageFsMisc(page);
 	struct reiserfs_transaction_handle th;
 	struct super_block *s = inode->i_sb;
 	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
@@ -2421,7 +2421,7 @@ static int reiserfs_write_full_page(stru
 	 * blocks we're going to log
 	 */
 	if (checked) {
-		ClearPageChecked(page);
+		ClearPageFsMisc(page);
 		reiserfs_write_lock(s);
 		error = journal_begin(&th, s, bh_per_page + 1);
 		if (error) {
@@ -2802,7 +2802,7 @@ static void reiserfs_invalidatepage(stru
 	BUG_ON(!PageLocked(page));
 
 	if (offset == 0)
-		ClearPageChecked(page);
+		ClearPageFsMisc(page);
 
 	if (!page_has_buffers(page))
 		goto out;
@@ -2843,7 +2843,7 @@ static int reiserfs_set_page_dirty(struc
 {
 	struct inode *inode = page->mapping->host;
 	if (reiserfs_file_data_log(inode)) {
-		SetPageChecked(page);
+		SetPageFsMisc(page);
 		return __set_page_dirty_nobuffers(page);
 	}
 	return __set_page_dirty_buffers(page);
@@ -2866,7 +2866,7 @@ static int reiserfs_releasepage(struct p
 	struct buffer_head *bh;
 	int ret = 1;
 
-	WARN_ON(PageChecked(page));
+	WARN_ON(PageFsMisc(page));
 	spin_lock(&j->j_dirty_buffers_lock);
 	head = page_buffers(page);
 	bh = head;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d276a4e..7d7ef97 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -73,7 +73,7 @@ #define PG_lru			 5
 #define PG_active		 6
 #define PG_slab			 7	/* slab debug (Suparna wants this) */
 
-#define PG_checked		 8	/* kill me in 2.5.<early>. */
+#define PG_fs_misc		 8
 #define PG_arch_1		 9
 #define PG_reserved		10
 #define PG_private		11	/* Has something at ->private */
@@ -274,10 +274,6 @@ #else
 #define PageHighMem(page)	0 /* needed to optimize away at compile time */
 #endif
 
-#define PageChecked(page)	test_bit(PG_checked, &(page)->flags)
-#define SetPageChecked(page)	set_bit(PG_checked, &(page)->flags)
-#define ClearPageChecked(page)	clear_bit(PG_checked, &(page)->flags)
-
 #define PageReserved(page)	test_bit(PG_reserved, &(page)->flags)
 #define SetPageReserved(page)	set_bit(PG_reserved, &(page)->flags)
 #define ClearPageReserved(page)	clear_bit(PG_reserved, &(page)->flags)
@@ -376,4 +372,13 @@ static inline void set_page_writeback(st
 	test_set_page_writeback(page);
 }
 
+/*
+ * Filesystem-specific page bit testing
+ */
+#define PageFsMisc(page)		test_bit(PG_fs_misc, &(page)->flags)
+#define SetPageFsMisc(page)		set_bit(PG_fs_misc, &(page)->flags)
+#define TestSetPageFsMisc(page)		test_and_set_bit(PG_fs_misc, &(page)->flags)
+#define ClearPageFsMisc(page)		clear_bit(PG_fs_misc, &(page)->flags)
+#define TestClearPageFsMisc(page)	test_and_clear_bit(PG_fs_misc, &(page)->flags)
+
 #endif	/* PAGE_FLAGS_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 7a1af57..049382d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -208,6 +208,17 @@ static inline void wait_on_page_writebac
 extern void end_page_writeback(struct page *page);
 
 /*
+ * Wait for filesystem-specific page synchronisation to complete
+ */
+static inline void wait_on_page_fs_misc(struct page *page)
+{
+	if (PageFsMisc(page))
+		wait_on_page_bit(page, PG_fs_misc);
+}
+
+extern void fastcall end_page_fs_misc(struct page *page);
+
+/*
  * Fault a userspace page into pagetables.  Return non-zero on a fault.
  *
  * This assumes that two userspace pages are always sufficient.  That's
diff --git a/mm/filemap.c b/mm/filemap.c
index fd57442..02c4925 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -545,6 +545,23 @@ void fastcall __lock_page(struct page *p
 EXPORT_SYMBOL(__lock_page);
 
 /*
+ * Note completion of filesystem specific page synchronisation
+ *
+ * This is used to allow a page to be written to a filesystem cache in the
+ * background without holding up the completion of readpage
+ */
+void fastcall end_page_fs_misc(struct page *page)
+{
+	smp_mb__before_clear_bit();
+	if (!TestClearPageFsMisc(page))
+		BUG();
+	smp_mb__after_clear_bit();
+	__wake_up_bit(page_waitqueue(page), &page->flags, PG_fs_misc);
+}
+
+EXPORT_SYMBOL(end_page_fs_misc);
+
+/*
  * a rather lightweight function, finding and getting a reference to a
  * hashed page atomically.
  */
diff --git a/mm/migrate.c b/mm/migrate.c
index 1c25040..bb3f22f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -272,8 +272,8 @@ void migrate_page_copy(struct page *newp
 		SetPageUptodate(newpage);
 	if (PageActive(page))
 		SetPageActive(newpage);
-	if (PageChecked(page))
-		SetPageChecked(newpage);
+	if (PageFsMisc(page))
+		SetPageFsMisc(newpage);
 	if (PageMappedToDisk(page))
 		SetPageMappedToDisk(newpage);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ea77c99..b40e04a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -544,7 +544,7 @@ static int prep_new_page(struct page *pa
 
 	page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
 			1 << PG_referenced | 1 << PG_arch_1 |
-			1 << PG_checked | 1 << PG_mappedtodisk);
+			1 << PG_fs_misc | 1 << PG_mappedtodisk);
 	set_page_private(page, 0);
 	set_page_refcounted(page);
 	kernel_map_pages(page, 1 << order, 1);


^ permalink raw reply related

* [PATCH 09/14] FS-Cache: Avoid ENFILE checking for kernel-specific open files [try #8]
From: David Howells @ 2006-05-10 16:01 UTC (permalink / raw)
  To: torvalds, akpm, steved, trond.myklebust, aviro
  Cc: linux-fsdevel, linux-cachefs, nfsv4, linux-kernel
In-Reply-To: <20060510160111.9058.55026.stgit@warthog.cambridge.redhat.com>

Make it possible to avoid ENFILE checking for kernel specific open files, such
as are used by the CacheFiles module.

After, for example, tarring up a kernel source tree over the network, the
CacheFiles module may easily have 20000+ files open in the backing filesystem,
thus causing all non-root processes to be given error ENFILE when they try to
open a file, socket, pipe, etc..

Signed-Off-By: David Howells <dhowells@redhat.com>
---

 Documentation/sysctl/fs.txt |    6 ++++-
 fs/file_table.c             |   48 +++++++++++++++++++++++++++++++++++--------
 fs/open.c                   |   20 ++++++++++++++++++
 include/linux/file.h        |    1 -
 include/linux/fs.h          |   10 +++++++++
 include/linux/sysctl.h      |    1 +
 kernel/sysctl.c             |   11 ++++++++++
 7 files changed, 86 insertions(+), 11 deletions(-)

diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 0b62c62..ead15f0 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -71,7 +71,7 @@ you might want to raise the limit.
 
 ==============================================================
 
-file-max & file-nr:
+file-max, file-nr & file-kernel:
 
 The kernel allocates file handles dynamically, but as yet it
 doesn't free them again.
@@ -88,6 +88,10 @@ close to the maximum, but the number of 
 significantly greater than 0, you've encountered a peak in your 
 usage of file handles and you don't need to increase the maximum.
 
+The value in file-kernel denotes the number of internal file handles
+that the kernel has open.  These do not contribute to ENFILE
+accounting.
+
 ==============================================================
 
 inode-max, inode-nr & inode-state:
diff --git a/fs/file_table.c b/fs/file_table.c
index bcea199..0b42be9 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -30,10 +30,13 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
+struct files_kernel_stat_struct files_kernel_stat;
+
 /* public. Not pretty! */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
 
 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
+static atomic_t nr_kernel_files;
 
 static inline void file_free_rcu(struct rcu_head *head)
 {
@@ -43,7 +46,10 @@ static inline void file_free_rcu(struct 
 
 static inline void file_free(struct file *f)
 {
-	percpu_counter_dec(&nr_files);
+	if (f->f_kernel_flags & FKFLAGS_NO_ENFILE)
+		atomic_dec(&nr_kernel_files);
+	else
+		percpu_counter_dec(&nr_files);
 	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
 }
 
@@ -74,45 +80,64 @@ int proc_nr_files(ctl_table *table, int 
 	files_stat.nr_files = get_nr_files();
 	return proc_dointvec(table, write, filp, buffer, lenp, ppos);
 }
+int proc_files_kernel(ctl_table *table, int write, struct file *filp,
+		      void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	files_kernel_stat.nr_kernel_files = atomic_read(&nr_kernel_files);
+	return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
 #else
 int proc_nr_files(ctl_table *table, int write, struct file *filp,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
+int proc_files_kernel(ctl_table *table, int write, struct file *filp,
+		      void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
 #endif
 
 /* Find an unused file structure and return a pointer to it.
  * Returns NULL, if there are no more free file structures or
  * we run out of memory.
  */
-struct file *get_empty_filp(void)
+struct file *get_empty_kernel_filp(unsigned short kflags)
 {
 	struct task_struct *tsk;
 	static int old_max;
 	struct file * f;
 
 	/*
-	 * Privileged users can go above max_files
+	 * Privileged users can go above max_files and internal kernel users
+	 * can avoid it completely
 	 */
-	if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
+	if (!(kflags & FKFLAGS_NO_ENFILE) &&
+	    get_nr_files() >= files_stat.max_files &&
+	    !capable(CAP_SYS_ADMIN)
+	    ) {
 		/*
-		 * percpu_counters are inaccurate.  Do an expensive check before
-		 * we go and fail.
+		 * percpu_counters are inaccurate.  Do an expensive
+		 * check before we go and fail.
 		 */
 		if (percpu_counter_sum(&nr_files) >= files_stat.max_files)
 			goto over;
 	}
 
-	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
+	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
 	if (f == NULL)
 		goto fail;
 
-	percpu_counter_inc(&nr_files);
-	memset(f, 0, sizeof(*f));
+	if (kflags & FKFLAGS_NO_ENFILE)
+		atomic_inc(&nr_kernel_files);
+	else
+		percpu_counter_inc(&nr_files);
+
 	if (security_file_alloc(f))
 		goto fail_sec;
 
+	f->f_kernel_flags = kflags;
 	tsk = current;
 	INIT_LIST_HEAD(&f->f_u.fu_list);
 	atomic_set(&f->f_count, 1);
@@ -138,6 +163,11 @@ fail:
 	return NULL;
 }
 
+struct file *get_empty_filp(void)
+{
+	return get_empty_kernel_filp(0);
+}
+
 EXPORT_SYMBOL(get_empty_filp);
 
 void fastcall fput(struct file *file)
diff --git a/fs/open.c b/fs/open.c
index c7a48ee..a7d293f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -974,6 +974,26 @@ struct file *dentry_open(struct dentry *
 EXPORT_SYMBOL(dentry_open);
 
 /*
+ * open a specifically in-kernel file
+ */
+struct file *dentry_open_kernel(struct dentry *dentry, struct vfsmount *mnt, int flags)
+{
+	int error;
+	struct file *f;
+
+	error = -ENFILE;
+	f = get_empty_kernel_filp(FKFLAGS_NO_ENFILE);
+	if (f == NULL) {
+		dput(dentry);
+		mntput(mnt);
+		return ERR_PTR(error);
+	}
+
+	return __dentry_open(dentry, mnt, flags, f, NULL);
+}
+EXPORT_SYMBOL_GPL(dentry_open_kernel);
+
+/*
  * Find an empty file descriptor entry, and mark it busy.
  */
 int get_unused_fd(void)
diff --git a/include/linux/file.h b/include/linux/file.h
index 9f7c251..da7be8f 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -79,7 +79,6 @@ extern void FASTCALL(set_close_on_exec(u
 extern void put_filp(struct file *);
 extern int get_unused_fd(void);
 extern void FASTCALL(put_unused_fd(unsigned int fd));
-struct kmem_cache;
 
 extern struct file ** alloc_fd_array(int);
 extern void free_fd_array(struct file **, int);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e57518e..cdb0972 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -34,7 +34,11 @@ struct files_stat_struct {
 	int nr_free_files;	/* read only */
 	int max_files;		/* tunable */
 };
+struct files_kernel_stat_struct {
+	int nr_kernel_files;	/* read only */
+};
 extern struct files_stat_struct files_stat;
+extern struct files_kernel_stat_struct files_kernel_stat;
 extern int get_max_files(void);
 
 struct inodes_stat_t {
@@ -70,6 +74,8 @@ #define FMODE_PWRITE	FMODE_PREAD	/* Thes
    behavior for cross-node execution/opening_for_writing of files */
 #define FMODE_EXEC	16
 
+#define FKFLAGS_NO_ENFILE	1	/* kernel internal file (ignored for ENFILE accounting) */
+
 #define RW_MASK		1
 #define RWA_MASK	2
 #define READ 0
@@ -640,6 +646,7 @@ struct file {
 	atomic_t		f_count;
 	unsigned int 		f_flags;
 	mode_t			f_mode;
+	unsigned short		f_kernel_flags;
 	loff_t			f_pos;
 	struct fown_struct	f_owner;
 	unsigned int		f_uid, f_gid;
@@ -1382,6 +1389,7 @@ extern long do_sys_open(int fdf, const c
 			int mode);
 extern struct file *filp_open(const char *, int, int);
 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
+extern struct file * dentry_open_kernel(struct dentry *, struct vfsmount *, int);
 extern int filp_close(struct file *, fl_owner_t id);
 extern char * getname(const char __user *);
 
@@ -1583,6 +1591,7 @@ static inline void insert_inode_hash(str
 }
 
 extern struct file * get_empty_filp(void);
+extern struct file * get_empty_kernel_filp(unsigned short fkflags);
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_kill(struct file *f);
 struct bio;
@@ -1608,6 +1617,7 @@ extern ssize_t generic_file_direct_write
 		unsigned long *, loff_t, loff_t *, size_t, size_t);
 extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
 		unsigned long, loff_t, loff_t *, size_t, ssize_t);
+extern int generic_file_buffered_write_one_kernel_page(struct file *, pgoff_t, struct page *);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
 ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 76eaeff..8a0d4f8 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -787,6 +787,7 @@ enum
 	FS_AIO_NR=18,	/* current system-wide number of aio requests */
 	FS_AIO_MAX_NR=19,	/* system-wide maximum number of aio requests */
 	FS_INOTIFY=20,	/* inotify submenu */
+	FS_FILE_KERNEL=21,	/* int: number of internal kernel files */
 };
 
 /* /proc/sys/fs/quota/ */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e82726f..f849104 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -53,6 +53,9 @@ #include <asm/processor.h>
 extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
                      void __user *buffer, size_t *lenp, loff_t *ppos);
 
+extern int proc_files_kernel(ctl_table *table, int write, struct file *filp,
+                     void __user *buffer, size_t *lenp, loff_t *ppos);
+
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
@@ -956,6 +959,14 @@ static ctl_table fs_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 	{
+		.ctl_name	= FS_FILE_KERNEL,
+		.procname	= "file-kernel",
+		.data		= &files_stat,
+		.maxlen		= 1*sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= &proc_files_kernel,
+	},
+	{
 		.ctl_name	= FS_DENTRY,
 		.procname	= "dentry-state",
 		.data		= &dentry_stat,


^ permalink raw reply related

* [PATCH 01/14] NFS: Permit filesystem to override root dentry on mount [try #8]
From: David Howells @ 2006-05-10 16:01 UTC (permalink / raw)
  To: torvalds, akpm, steved, trond.myklebust, aviro
  Cc: linux-fsdevel, linux-cachefs, nfsv4, linux-kernel
In-Reply-To: <20060510160111.9058.55026.stgit@warthog.cambridge.redhat.com>

The attached patch extends the get_sb() filesystem operation to take an extra
argument that permits the VFS to pass in the target vfsmount that defines the
mountpoint.

The filesystem is then required to manually set the superblock and root dentry
pointers. For most filesystems, this should be done with simple_set_mnt() which
will set the superblock pointer and then set the root dentry to the
superblock's s_root (as per the old default behaviour).

This patch permits a superblock to be implicitly shared amongst several mount
points, such as can be done with NFS to avoid potential inode aliasing (see
patch #5). In such a case, simple_set_mnt() would not be called, and instead
the mnt_root and mnt_sb would be set directly.

This patch also changes the superblock cleanup routine to make it use
shrink_dcache_sb() instead of shrink_dcache_anon(). This required is because
the superblock may now have multiple trees that aren't actually bound to
s_root, but that still need to be cleaned up. The currently called functions
assume that the whole tree is rooted at s_root, and that anonymous dentries are
not the roots of trees which results in dentries being left unculled.


Following discussion with Al Viro, the following changes [try #2] were
made to the previous attempt at this set of patches:

 (*) The vfsmount is now passed into the get_sb() method for a filesystem
     instead of passing a pointer to a pointer to a dentry into which get_sb()
     could stick a root dentry if it wanted. get_sb() now instantiates the
     superblock and root dentry pointers in the vfsmount itself.

 (*) The get_sb() method now returns an integer (0 or -ve error number) rather
     than the superblock pointer or cast error number.

 (*) the get_sb_*() convenience functions in the core kernel now take a
     vfsmount pointer argument and return an integer, so most filesystems have
     to change very little.

 (*) If one of the convenience function is not used, then get_sb() should
     normally call simple_set_mnt() to instantiate the vfsmount. This will
     always return 0, and so can be tail-called from get_sb().

 (*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the
     dcache upon superblock destruction rather than shrink_dcache_parent() and
     shrink_dcache_anon(). This is because, as far as I can tell, the current
     code assumes that all the dentries will be linked into a tree depending
     from sb->s_root, and that anonymous dentries won't have children.

     However, with the way these patches implement NFS superblock sharing,
     these assumptions are violated: the root of the filesystem is simply a
     dummy dentry and inode (the real inode for '/' may well be inaccessible),
     and all the vfsmounts are rooted on anonymous[*] dentries with child
     trees.

     [*] Anonymous until discovered from another tree.

Further changes [try #3] that have been made:

 (*) The patches are now against Trond's NFS git tree, so won't apply to
     Linus's tree.

 (*) Core documentation changes have been moved to the first patch.

Further changes [try #4]:

 (*) shrink_dcache_parent() has been reintroduced to handle final cleanup of
     filesystem types such as nfsd and binfmt_misc that have pinned dentries.

 (*) get_sb_bdev() has been fixed to handle the superblock sharing case
     correctly (it should not go down the error path).

Further changes [try #5]:

 (*) Make futex_get_sb() static once again.

 (*) Fold together the patch that changes the core and the patch that modifies
     all the filesystems to cope with the core change as some people are unable
     to cope with two interdependent patches.


Signed-Off-By: David Howells <dhowells@redhat.com>
---

 Documentation/filesystems/Locking         |    7 +
 Documentation/filesystems/porting         |    7 +
 Documentation/filesystems/vfs.txt         |    4 -
 arch/ia64/kernel/perfmon.c                |    7 +
 arch/powerpc/platforms/cell/spufs/inode.c |    6 +
 drivers/infiniband/core/uverbs_main.c     |    7 +
 drivers/isdn/capi/capifs.c                |    6 +
 drivers/misc/ibmasm/ibmasmfs.c            |    7 +
 drivers/oprofile/oprofilefs.c             |    6 +
 drivers/usb/core/inode.c                  |    6 +
 drivers/usb/gadget/inode.c                |    6 +
 fs/9p/vfs_super.c                         |   21 ++--
 fs/adfs/super.c                           |    7 +
 fs/affs/super.c                           |    7 +
 fs/afs/super.c                            |   24 +++--
 fs/autofs/init.c                          |    6 +
 fs/autofs4/init.c                         |    6 +
 fs/befs/linuxvfs.c                        |    7 +
 fs/bfs/inode.c                            |    6 +
 fs/binfmt_misc.c                          |    6 +
 fs/block_dev.c                            |    6 +
 fs/cifs/cifsfs.c                          |   10 +-
 fs/coda/inode.c                           |    6 +
 fs/configfs/mount.c                       |    6 +
 fs/cramfs/inode.c                         |    7 +
 fs/debugfs/inode.c                        |    8 +-
 fs/devfs/base.c                           |    8 +-
 fs/devpts/inode.c                         |    6 +
 fs/efs/super.c                            |    6 +
 fs/eventpoll.c                            |   13 +--
 fs/ext2/super.c                           |    6 +
 fs/ext3/super.c                           |    6 +
 fs/freevxfs/vxfs_super.c                  |    7 +
 fs/fuse/inode.c                           |    8 +-
 fs/hfs/super.c                            |    7 +
 fs/hfsplus/super.c                        |    8 +-
 fs/hostfs/hostfs_kern.c                   |    8 +-
 fs/hpfs/super.c                           |    7 +
 fs/hppfs/hppfs_kern.c                     |    8 +-
 fs/hugetlbfs/inode.c                      |    6 +
 fs/inotify.c                              |    6 +
 fs/isofs/inode.c                          |    7 +
 fs/jffs/inode-v23.c                       |    7 +
 fs/jffs2/super.c                          |   49 +++++-----
 fs/jfs/super.c                            |    7 +
 fs/libfs.c                                |   12 +-
 fs/minix/inode.c                          |    7 +
 fs/msdos/namei.c                          |    9 +-
 fs/namespace.c                            |    9 ++
 fs/ncpfs/inode.c                          |    6 +
 fs/nfs/inode.c                            |  146 ++++++++++++++++++-----------
 fs/nfsd/nfsctl.c                          |    6 +
 fs/ntfs/super.c                           |    7 +
 fs/ocfs2/dlm/dlmfs.c                      |    6 +
 fs/ocfs2/super.c                          |   12 +-
 fs/openpromfs/inode.c                     |    6 +
 fs/pipe.c                                 |    9 +-
 fs/proc/root.c                            |    6 +
 fs/qnx4/inode.c                           |    7 +
 fs/ramfs/inode.c                          |   13 +--
 fs/reiserfs/super.c                       |    9 +-
 fs/romfs/inode.c                          |    7 +
 fs/smbfs/inode.c                          |    6 +
 fs/super.c                                |   97 +++++++++++--------
 fs/sysfs/mount.c                          |    6 +
 fs/sysv/super.c                           |   13 +--
 fs/udf/super.c                            |    6 +
 fs/ufs/super.c                            |    6 +
 fs/vfat/namei.c                           |    9 +-
 fs/xfs/linux-2.6/xfs_super.c              |    8 +-
 include/linux/fs.h                        |   25 +++--
 include/linux/ramfs.h                     |    4 -
 ipc/mqueue.c                              |    8 +-
 kernel/cpuset.c                           |    8 +-
 kernel/futex.c                            |    8 +-
 mm/shmem.c                                |    6 +
 net/socket.c                              |    7 +
 net/sunrpc/rpc_pipe.c                     |    6 +
 security/inode.c                          |    8 +-
 security/selinux/selinuxfs.c              |    7 +
 80 files changed, 498 insertions(+), 399 deletions(-)

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 1045da5..3abf08f 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -142,15 +142,16 @@ see also dquot_operations section.
 
 --------------------------- file_system_type ---------------------------
 prototypes:
-	struct super_block *(*get_sb) (struct file_system_type *, int,
-			const char *, void *);
+	struct int (*get_sb) (struct file_system_type *, int,
+			const char *, void *, struct vfsmount *);
 	void (*kill_sb) (struct super_block *);
 locking rules:
 		may block	BKL
 get_sb		yes		yes
 kill_sb		yes		yes
 
-->get_sb() returns error or a locked superblock (exclusive on ->s_umount).
+->get_sb() returns error or 0 with locked superblock attached to the vfsmount
+(exclusive on ->s_umount).
 ->kill_sb() takes a write-locked superblock, does all shutdown work on it,
 unlocks and drops the reference.
 
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 2f38846..ae8db97 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -50,10 +50,11 @@ Turn your foo_read_super() into a functi
 success and negative number in case of error (-EINVAL unless you have more
 informative error value to report).  Call it foo_fill_super().  Now declare
 
-struct super_block foo_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+int foo_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super,
+			   mnt);
 }
 
 (or similar with s/bdev/nodev/ or s/bdev/single/, depending on the kind of
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 3a2e552..dd7d0dc 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -113,8 +113,8 @@ members are defined:
 struct file_system_type {
 	const char *name;
 	int fs_flags;
-        struct super_block *(*get_sb) (struct file_system_type *, int,
-                                       const char *, void *);
+        struct int (*get_sb) (struct file_system_type *, int,
+                              const char *, void *, struct vfsmount *);
         void (*kill_sb) (struct super_block *);
         struct module *owner;
         struct file_system_type * next;
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 077f212..2359e28 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -595,10 +595,11 @@ pfm_get_unmapped_area(struct file *file,
 }
 
 
-static struct super_block *
-pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
+static int
+pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data,
+	     struct vfsmount *mnt)
 {
-	return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC);
+	return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt);
 }
 
 static struct file_system_type pfm_fs_type = {
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index d955419..45cbd05 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -428,11 +428,11 @@ spufs_fill_super(struct super_block *sb,
 	return spufs_create_root(sb, data);
 }
 
-static struct super_block *
+static int
 spufs_get_sb(struct file_system_type *fstype, int flags,
-		const char *name, void *data)
+		const char *name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fstype, flags, data, spufs_fill_super);
+	return get_sb_single(fstype, flags, data, spufs_fill_super, mnt);
 }
 
 static struct file_system_type spufs_type = {
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index ff092a0..7a05783 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -820,11 +820,12 @@ static void ib_uverbs_remove_one(struct 
 	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
 }
 
-static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
-					       const char *dev_name, void *data)
+static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
+			       const char *dev_name, void *data,
+			       struct vfsmount *mnt)
 {
 	return get_sb_pseudo(fs_type, "infinibandevent:", NULL,
-			     INFINIBANDEVENTFS_MAGIC);
+			     INFINIBANDEVENTFS_MAGIC, mnt);
 }
 
 static struct file_system_type uverbs_event_fs = {
diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c
index 0a37ade..9ea6bd0 100644
--- a/drivers/isdn/capi/capifs.c
+++ b/drivers/isdn/capi/capifs.c
@@ -121,10 +121,10 @@ fail:
 	return -ENOMEM;
 }
 
-static struct super_block *capifs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int capifs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, capifs_fill_super);
+	return get_sb_single(fs_type, flags, data, capifs_fill_super, mnt);
 }
 
 static struct file_system_type capifs_fs_type = {
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index 26a230b..4a35caf 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -90,10 +90,11 @@ static void ibmasmfs_create_files (struc
 static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent);
 
 
-static struct super_block *ibmasmfs_get_super(struct file_system_type *fst,
-			int flags, const char *name, void *data)
+static int ibmasmfs_get_super(struct file_system_type *fst,
+			int flags, const char *name, void *data,
+			struct vfsmount *mnt)
 {
-	return get_sb_single(fst, flags, data, ibmasmfs_fill_super);
+	return get_sb_single(fst, flags, data, ibmasmfs_fill_super, mnt);
 }
 
 static struct super_operations ibmasmfs_s_ops = {
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index b62da9b..71c2da2 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -272,10 +272,10 @@ static int oprofilefs_fill_super(struct 
 }
 
 
-static struct super_block *oprofilefs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int oprofilefs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, oprofilefs_fill_super);
+	return get_sb_single(fs_type, flags, data, oprofilefs_fill_super, mnt);
 }
 
 
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 695b90a..bfc9b28 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -543,10 +543,10 @@ static void fs_remove_file (struct dentr
 
 /* --------------------------------------------------------------------- */
 
-static struct super_block *usb_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int usb_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, usbfs_fill_super);
+	return get_sb_single(fs_type, flags, data, usbfs_fill_super, mnt);
 }
 
 static struct file_system_type usb_fs_type = {
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 0eb010a..6b64986 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -2064,11 +2064,11 @@ gadgetfs_fill_super (struct super_block 
 }
 
 /* "mount -t gadgetfs path /dev/gadget" ends up here */
-static struct super_block *
+static int
 gadgetfs_get_sb (struct file_system_type *t, int flags,
-		const char *path, void *opts)
+		const char *path, void *opts, struct vfsmount *mnt)
 {
-	return get_sb_single (t, flags, opts, gadgetfs_fill_super);
+	return get_sb_single (t, flags, opts, gadgetfs_fill_super, mnt);
 }
 
 static void
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 00c1f6b..8b15bb2 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -99,12 +99,13 @@ v9fs_fill_super(struct super_block *sb, 
  * @flags: mount flags
  * @dev_name: device name that was mounted
  * @data: mount options
+ * @mnt: mountpoint record to be instantiated
  *
  */
 
-static struct super_block *v9fs_get_sb(struct file_system_type
-				       *fs_type, int flags,
-				       const char *dev_name, void *data)
+static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
+		       const char *dev_name, void *data,
+		       struct vfsmount *mnt)
 {
 	struct super_block *sb = NULL;
 	struct v9fs_fcall *fcall = NULL;
@@ -123,17 +124,19 @@ static struct super_block *v9fs_get_sb(s
 
 	v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
 	if (!v9ses)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
 		dprintk(DEBUG_ERROR, "problem initiating session\n");
-		sb = ERR_PTR(newfid);
+		retval = newfid;
 		goto out_free_session;
 	}
 
 	sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
-	if (IS_ERR(sb))
+	if (IS_ERR(sb)) {
+		retval = PTR_ERR(sb);
 		goto out_close_session;
+	}
 	v9fs_fill_super(sb, v9ses, flags);
 
 	inode = v9fs_get_inode(sb, S_IFDIR | mode);
@@ -184,19 +187,19 @@ static struct super_block *v9fs_get_sb(s
 		goto put_back_sb;
 	}
 
-	return sb;
+	return simple_set_mnt(mnt, sb);
 
 out_close_session:
 	v9fs_session_close(v9ses);
 out_free_session:
 	kfree(v9ses);
-	return sb;
+	return retval;
 
 put_back_sb:
 	/* deactivate_super calls v9fs_kill_super which will frees the rest */
 	up_write(&sb->s_umount);
 	deactivate_super(sb);
-	return ERR_PTR(retval);
+	return retval;
 }
 
 /**
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 252abda..1b58a9b 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -470,10 +470,11 @@ error:
 	return -EINVAL;
 }
 
-static struct super_block *adfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int adfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type adfs_fs_type = {
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 4d7e5b1..6a52e78 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -524,10 +524,11 @@ affs_statfs(struct super_block *sb, stru
 	return 0;
 }
 
-static struct super_block *affs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int affs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type affs_fs_type = {
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 93a7821..67d1f5c 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -38,9 +38,9 @@ struct afs_mount_params {
 static void afs_i_init_once(void *foo, kmem_cache_t *cachep,
 			    unsigned long flags);
 
-static struct super_block *afs_get_sb(struct file_system_type *fs_type,
-				      int flags, const char *dev_name,
-				      void *data);
+static int afs_get_sb(struct file_system_type *fs_type,
+		      int flags, const char *dev_name,
+		      void *data, struct vfsmount *mnt);
 
 static struct inode *afs_alloc_inode(struct super_block *sb);
 
@@ -294,10 +294,11 @@ static int afs_fill_super(struct super_b
  * get an AFS superblock
  * - TODO: don't use get_sb_nodev(), but rather call sget() directly
  */
-static struct super_block *afs_get_sb(struct file_system_type *fs_type,
-				      int flags,
-				      const char *dev_name,
-				      void *options)
+static int afs_get_sb(struct file_system_type *fs_type,
+		      int flags,
+		      const char *dev_name,
+		      void *options,
+		      struct vfsmount *mnt)
 {
 	struct afs_mount_params params;
 	struct super_block *sb;
@@ -311,7 +312,7 @@ static struct super_block *afs_get_sb(st
 	ret = afscm_start();
 	if (ret < 0) {
 		_leave(" = %d", ret);
-		return ERR_PTR(ret);
+		return ret;
 	}
 
 	/* parse the options */
@@ -348,18 +349,19 @@ static struct super_block *afs_get_sb(st
 		goto error;
 	}
 	sb->s_flags |= MS_ACTIVE;
+	simple_set_mnt(mnt, sb);
 
 	afs_put_volume(params.volume);
 	afs_put_cell(params.default_cell);
-	_leave(" = %p", sb);
-	return sb;
+	_leave(" = 0 [%p]", 0, sb);
+	return 0;
 
  error:
 	afs_put_volume(params.volume);
 	afs_put_cell(params.default_cell);
 	afscm_stop();
 	_leave(" = %d", ret);
-	return ERR_PTR(ret);
+	return ret;
 } /* end afs_get_sb() */
 
 /*****************************************************************************/
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
index b977ece..aca1237 100644
--- a/fs/autofs/init.c
+++ b/fs/autofs/init.c
@@ -14,10 +14,10 @@ #include <linux/module.h>
 #include <linux/init.h>
 #include "autofs_i.h"
 
-static struct super_block *autofs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int autofs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, autofs_fill_super);
+	return get_sb_nodev(fs_type, flags, data, autofs_fill_super, mnt);
 }
 
 static struct file_system_type autofs_fs_type = {
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index acecec8..5d91933 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -14,10 +14,10 @@ #include <linux/module.h>
 #include <linux/init.h>
 #include "autofs_i.h"
 
-static struct super_block *autofs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int autofs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, autofs4_fill_super);
+	return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt);
 }
 
 static struct file_system_type autofs_fs_type = {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 68ebd10..6ed07a5 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -899,11 +899,12 @@ befs_statfs(struct super_block *sb, stru
 	return 0;
 }
 
-static struct super_block *
+static int
 befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name,
-	    void *data)
+	    void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type befs_fs_type = {
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 55a7a78..e7da03f 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -410,10 +410,10 @@ out:
 	return -EINVAL;
 }
 
-static struct super_block *bfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int bfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt);
 }
 
 static struct file_system_type bfs_fs_type = {
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c0a909e..692cca9 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -741,10 +741,10 @@ static int bm_fill_super(struct super_bl
 	return err;
 }
 
-static struct super_block *bm_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int bm_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, bm_fill_super);
+	return get_sb_single(fs_type, flags, data, bm_fill_super, mnt);
 }
 
 static struct linux_binfmt misc_format = {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f5958f4..38a2a66 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -300,10 +300,10 @@ static struct super_operations bdev_sops
 	.clear_inode = bdev_clear_inode,
 };
 
-static struct super_block *bd_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int bd_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
+	return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt);
 }
 
 static struct file_system_type bd_type = {
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3fdc225..6779837 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -462,9 +462,9 @@ #endif
 	.remount_fs = cifs_remount,
 };
 
-static struct super_block *
+static int
 cifs_get_sb(struct file_system_type *fs_type,
-	    int flags, const char *dev_name, void *data)
+	    int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
 	int rc;
 	struct super_block *sb = sget(fs_type, NULL, set_anon_super, NULL);
@@ -472,7 +472,7 @@ cifs_get_sb(struct file_system_type *fs_
 	cFYI(1, ("Devname: %s flags: %d ", dev_name, flags));
 
 	if (IS_ERR(sb))
-		return sb;
+		return PTR_ERR(sb);
 
 	sb->s_flags = flags;
 
@@ -480,10 +480,10 @@ cifs_get_sb(struct file_system_type *fs_
 	if (rc) {
 		up_write(&sb->s_umount);
 		deactivate_super(sb);
-		return ERR_PTR(rc);
+		return rc;
 	}
 	sb->s_flags |= MS_ACTIVE;
-	return sb;
+	return simple_set_mnt(mnt, sb);
 }
 
 static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov,
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index ada1a81..cba7020 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -307,10 +307,10 @@ static int coda_statfs(struct super_bloc
 
 /* init_coda: used by filesystems.c to register coda */
 
-static struct super_block *coda_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int coda_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, coda_fill_super);
+	return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt);
 }
 
 struct file_system_type coda_fs_type = {
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index be5d86a..3e5fe84 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -103,10 +103,10 @@ static int configfs_fill_super(struct su
 	return 0;
 }
 
-static struct super_block *configfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int configfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, configfs_fill_super);
+	return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt);
 }
 
 static struct file_system_type configfs_fs_type = {
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 9efcc3a..37a91a1 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -528,10 +528,11 @@ static struct super_operations cramfs_op
 	.statfs		= cramfs_statfs,
 };
 
-static struct super_block *cramfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int cramfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type cramfs_fs_type = {
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 579e1b6..5cd8500 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -110,11 +110,11 @@ static int debug_fill_super(struct super
 	return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
 }
 
-static struct super_block *debug_get_sb(struct file_system_type *fs_type,
-				        int flags, const char *dev_name,
-					void *data)
+static int debug_get_sb(struct file_system_type *fs_type,
+			int flags, const char *dev_name,
+			void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, debug_fill_super);
+	return get_sb_single(fs_type, flags, data, debug_fill_super, mnt);
 }
 
 static struct file_system_type debug_fs_type = {
diff --git a/fs/devfs/base.c b/fs/devfs/base.c
index 52f5059..51a97f1 100644
--- a/fs/devfs/base.c
+++ b/fs/devfs/base.c
@@ -2549,11 +2549,11 @@ static int devfs_fill_super(struct super
 	return -EINVAL;
 }				/*  End Function devfs_fill_super  */
 
-static struct super_block *devfs_get_sb(struct file_system_type *fs_type,
-					int flags, const char *dev_name,
-					void *data)
+static int devfs_get_sb(struct file_system_type *fs_type,
+			int flags, const char *dev_name,
+			void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, devfs_fill_super);
+	return get_sb_single(fs_type, flags, data, devfs_fill_super, mnt);
 }
 
 static struct file_system_type devfs_fs_type = {
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 14c5620..f7aef5b 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -130,10 +130,10 @@ fail:
 	return -ENOMEM;
 }
 
-static struct super_block *devpts_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int devpts_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, devpts_fill_super);
+	return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
 }
 
 static struct file_system_type devpts_fs_type = {
diff --git a/fs/efs/super.c b/fs/efs/super.c
index dff623e..1ba5e14 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -18,10 +18,10 @@ #include <linux/vfs.h>
 static int efs_statfs(struct super_block *s, struct kstatfs *buf);
 static int efs_fill_super(struct super_block *s, void *d, int silent);
 
-static struct super_block *efs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int efs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt);
 }
 
 static struct file_system_type efs_fs_type = {
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1b4491c..63d857b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -268,9 +268,9 @@ static int ep_poll(struct eventpoll *ep,
 		   int maxevents, long timeout);
 static int eventpollfs_delete_dentry(struct dentry *dentry);
 static struct inode *ep_eventpoll_inode(void);
-static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type,
-					      int flags, const char *dev_name,
-					      void *data);
+static int eventpollfs_get_sb(struct file_system_type *fs_type,
+			      int flags, const char *dev_name,
+			      void *data, struct vfsmount *mnt);
 
 /*
  * This semaphore is used to serialize ep_free() and eventpoll_release_file().
@@ -1595,11 +1595,12 @@ eexit_1:
 }
 
 
-static struct super_block *
+static int
 eventpollfs_get_sb(struct file_system_type *fs_type, int flags,
-		   const char *dev_name, void *data)
+		   const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_pseudo(fs_type, "eventpoll:", NULL, EVENTPOLLFS_MAGIC);
+	return get_sb_pseudo(fs_type, "eventpoll:", NULL, EVENTPOLLFS_MAGIC,
+			     mnt);
 }
 
 
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7e30bae..a4dfffa 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1087,10 +1087,10 @@ static int ext2_statfs (struct super_blo
 	return 0;
 }
 
-static struct super_block *ext2_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int ext2_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt);
 }
 
 #ifdef CONFIG_QUOTA
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f8a5266..657f8e7 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2646,10 +2646,10 @@ out:
 
 #endif
 
-static struct super_block *ext3_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int ext3_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
 }
 
 static struct file_system_type ext3_fs_type = {
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index b44c916..d76eeaa 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -241,10 +241,11 @@ out:
 /*
  * The usual module blurb.
  */
-static struct super_block *vxfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int vxfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type vxfs_fs_type = {
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 13ebe57..5c5ab5f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -570,11 +570,11 @@ static int fuse_fill_super(struct super_
 	return err;
 }
 
-static struct super_block *fuse_get_sb(struct file_system_type *fs_type,
-				       int flags, const char *dev_name,
-				       void *raw_data)
+static int fuse_get_sb(struct file_system_type *fs_type,
+		       int flags, const char *dev_name,
+		       void *raw_data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super);
+	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
 }
 
 static struct file_system_type fuse_fs_type = {
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 1181d11..ee5b80a 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -413,10 +413,11 @@ bail:
 	return res;
 }
 
-static struct super_block *hfs_get_sb(struct file_system_type *fs_type,
-				      int flags, const char *dev_name, void *data)
+static int hfs_get_sb(struct file_system_type *fs_type,
+		      int flags, const char *dev_name, void *data,
+		      struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt);
 }
 
 static struct file_system_type hfs_fs_type = {
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 7843f79..0ed8b7e 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -450,10 +450,12 @@ static void hfsplus_destroy_inode(struct
 
 #define HFSPLUS_INODE_SIZE	sizeof(struct hfsplus_inode_info)
 
-static struct super_block *hfsplus_get_sb(struct file_system_type *fs_type,
-					  int flags, const char *dev_name, void *data)
+static int hfsplus_get_sb(struct file_system_type *fs_type,
+			  int flags, const char *dev_name, void *data,
+			  struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super,
+			   mnt);
 }
 
 static struct file_system_type hfsplus_fs_type = {
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index bf0f8e1..04035e0 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -993,11 +993,11 @@ static int hostfs_fill_sb_common(struct 
 	return(err);
 }
 
-static struct super_block *hostfs_read_sb(struct file_system_type *type,
-					     int flags, const char *dev_name,
-					     void *data)
+static int hostfs_read_sb(struct file_system_type *type,
+			  int flags, const char *dev_name,
+			  void *data, struct vfsmount *mnt)
 {
-	return(get_sb_nodev(type, flags, data, hostfs_fill_sb_common));
+	return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt);
 }
 
 static struct file_system_type hostfs_type = {
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index d72d8c8..3b25cf3 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -662,10 +662,11 @@ bail0:
 	return -EINVAL;
 }
 
-static struct super_block *hpfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int hpfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type hpfs_fs_type = {
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index 5e6363b..ec43c22 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -769,11 +769,11 @@ static int hppfs_fill_super(struct super
 	return(err);
 }
 
-static struct super_block *hppfs_read_super(struct file_system_type *type,
-					     int flags, const char *dev_name,
-					     void *data)
+static int hppfs_read_super(struct file_system_type *type,
+			    int flags, const char *dev_name,
+			    void *data, struct vfsmount *mnt)
 {
-	return(get_sb_nodev(type, flags, data, hppfs_fill_super));
+	return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt);
 }
 
 static struct file_system_type hppfs_type = {
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 3a5b4e9..4665c26 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -723,10 +723,10 @@ void hugetlb_put_quota(struct address_sp
 	}
 }
 
-static struct super_block *hugetlbfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int hugetlbfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super);
+	return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt);
 }
 
 static struct file_system_type hugetlbfs_fs_type = {
diff --git a/fs/inotify.c b/fs/inotify.c
index 1f50302..d3ae18d 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -1098,11 +1098,11 @@ out:
 	return ret;
 }
 
-static struct super_block *
+static int
 inotify_get_sb(struct file_system_type *fs_type, int flags,
-	       const char *dev_name, void *data)
+	       const char *dev_name, void *data, struct vfsmount *mnt)
 {
-    return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA);
+	return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA, mnt);
 }
 
 static struct file_system_type inotify_fs_type = {
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 70adbb9..17268da 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1399,10 +1399,11 @@ struct inode *isofs_iget(struct super_bl
 	return inode;
 }
 
-static struct super_block *isofs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int isofs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type iso9660_fs_type = {
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 020cc09..dd93a09 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -1785,10 +1785,11 @@ static struct super_operations jffs_ops 
 	.remount_fs	= jffs_remount,
 };
 
-static struct super_block *jffs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int jffs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, jffs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, jffs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type jffs_fs_type = {
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index ffd8e84..03053f3 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -111,9 +111,10 @@ static int jffs2_sb_set(struct super_blo
 	return 0;
 }
 
-static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type,
-					      int flags, const char *dev_name,
-					      void *data, struct mtd_info *mtd)
+static int jffs2_get_sb_mtd(struct file_system_type *fs_type,
+			    int flags, const char *dev_name,
+			    void *data, struct mtd_info *mtd,
+			    struct vfsmount *mnt)
 {
 	struct super_block *sb;
 	struct jffs2_sb_info *c;
@@ -121,19 +122,20 @@ static struct super_block *jffs2_get_sb_
 
 	c = kmalloc(sizeof(*c), GFP_KERNEL);
 	if (!c)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 	memset(c, 0, sizeof(*c));
 	c->mtd = mtd;
 
 	sb = sget(fs_type, jffs2_sb_compare, jffs2_sb_set, c);
 
 	if (IS_ERR(sb))
-		goto out_put;
+		goto out_error;
 
 	if (sb->s_root) {
 		/* New mountpoint for JFFS2 which is already mounted */
 		D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): Device %d (\"%s\") is already mounted\n",
 			  mtd->index, mtd->name));
+		ret = simple_set_mnt(mnt, sb);
 		goto out_put;
 	}
 
@@ -158,44 +160,47 @@ static struct super_block *jffs2_get_sb_
 		/* Failure case... */
 		up_write(&sb->s_umount);
 		deactivate_super(sb);
-		return ERR_PTR(ret);
+		return ret;
 	}
 
 	sb->s_flags |= MS_ACTIVE;
-	return sb;
+	return simple_set_mnt(mnt, sb);
 
+out_error:
+	ret = PTR_ERR(sb);
  out_put:
 	kfree(c);
 	put_mtd_device(mtd);
 
-	return sb;
+	return ret;
 }
 
-static struct super_block *jffs2_get_sb_mtdnr(struct file_system_type *fs_type,
-					      int flags, const char *dev_name,
-					      void *data, int mtdnr)
+static int jffs2_get_sb_mtdnr(struct file_system_type *fs_type,
+			      int flags, const char *dev_name,
+			      void *data, int mtdnr,
+			      struct vfsmount *mnt)
 {
 	struct mtd_info *mtd;
 
 	mtd = get_mtd_device(NULL, mtdnr);
 	if (!mtd) {
 		D1(printk(KERN_DEBUG "jffs2: MTD device #%u doesn't appear to exist\n", mtdnr));
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
-	return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd);
+	return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt);
 }
 
-static struct super_block *jffs2_get_sb(struct file_system_type *fs_type,
-					int flags, const char *dev_name,
-					void *data)
+static int jffs2_get_sb(struct file_system_type *fs_type,
+			int flags, const char *dev_name,
+			void *data, struct vfsmount *mnt)
 {
 	int err;
 	struct nameidata nd;
 	int mtdnr;
 
 	if (!dev_name)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	D1(printk(KERN_DEBUG "jffs2_get_sb(): dev_name \"%s\"\n", dev_name));
 
@@ -217,7 +222,7 @@ static struct super_block *jffs2_get_sb(
 				mtd = get_mtd_device(NULL, mtdnr);
 				if (mtd) {
 					if (!strcmp(mtd->name, dev_name+4))
-						return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd);
+						return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt);
 					put_mtd_device(mtd);
 				}
 			}
@@ -230,7 +235,7 @@ static struct super_block *jffs2_get_sb(
 			if (!*endptr) {
 				/* It was a valid number */
 				D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd%%d, mtdnr %d\n", mtdnr));
-				return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr);
+				return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt);
 			}
 		}
 	}
@@ -244,7 +249,7 @@ static struct super_block *jffs2_get_sb(
 		  err, nd.dentry->d_inode));
 
 	if (err)
-		return ERR_PTR(err);
+		return err;
 
 	err = -EINVAL;
 
@@ -266,11 +271,11 @@ static struct super_block *jffs2_get_sb(
 	mtdnr = iminor(nd.dentry->d_inode);
 	path_release(&nd);
 
-	return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr);
+	return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt);
 
 out:
 	path_release(&nd);
-	return ERR_PTR(err);
+	return err;
 }
 
 static void jffs2_put_super (struct super_block *sb)
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index db6f41d..0a81905 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -565,10 +565,11 @@ static void jfs_unlockfs(struct super_bl
 	}
 }
 
-static struct super_block *jfs_get_sb(struct file_system_type *fs_type, 
-	int flags, const char *dev_name, void *data)
+static int jfs_get_sb(struct file_system_type *fs_type, 
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super,
+			   mnt);
 }
 
 static int jfs_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/libfs.c b/fs/libfs.c
index 4a3ec9a..df55ac9 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -196,9 +196,9 @@ struct inode_operations simple_dir_inode
  * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
  * will never be mountable)
  */
-struct super_block *
-get_sb_pseudo(struct file_system_type *fs_type, char *name,
-	struct super_operations *ops, unsigned long magic)
+int get_sb_pseudo(struct file_system_type *fs_type, char *name,
+	struct super_operations *ops, unsigned long magic,
+	struct vfsmount *mnt)
 {
 	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
 	static struct super_operations default_ops = {.statfs = simple_statfs};
@@ -207,7 +207,7 @@ get_sb_pseudo(struct file_system_type *f
 	struct qstr d_name = {.name = name, .len = strlen(name)};
 
 	if (IS_ERR(s))
-		return s;
+		return PTR_ERR(s);
 
 	s->s_flags = MS_NOUSER;
 	s->s_maxbytes = ~0ULL;
@@ -232,12 +232,12 @@ get_sb_pseudo(struct file_system_type *f
 	d_instantiate(dentry, root);
 	s->s_root = dentry;
 	s->s_flags |= MS_ACTIVE;
-	return s;
+	return simple_set_mnt(mnt, s);
 
 Enomem:
 	up_write(&s->s_umount);
 	deactivate_super(s);
-	return ERR_PTR(-ENOMEM);
+	return -ENOMEM;
 }
 
 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 2dcccf1..14f24df 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -559,10 +559,11 @@ void minix_truncate(struct inode * inode
 		V2_minix_truncate(inode);
 }
 
-static struct super_block *minix_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int minix_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super,
+			   mnt);
 }
 
 static struct file_system_type minix_fs_type = {
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 5b76ccd..9e44158 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -661,11 +661,12 @@ static int msdos_fill_super(struct super
 	return 0;
 }
 
-static struct super_block *msdos_get_sb(struct file_system_type *fs_type,
-					int flags, const char *dev_name,
-					void *data)
+static int msdos_get_sb(struct file_system_type *fs_type,
+			int flags, const char *dev_name,
+			void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super,
+			   mnt);
 }
 
 static struct file_system_type msdos_fs_type = {
diff --git a/fs/namespace.c b/fs/namespace.c
index b21c5c2..84a3bec 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -86,6 +86,15 @@ struct vfsmount *alloc_vfsmnt(const char
 	return mnt;
 }
 
+int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
+{
+	mnt->mnt_sb = sb;
+	mnt->mnt_root = dget(sb->s_root);
+	return 0;
+}
+
+EXPORT_SYMBOL(simple_set_mnt);
+
 void free_vfsmnt(struct vfsmount *mnt)
 {
 	kfree(mnt->mnt_devname);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index a1f3e97..8db033f 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -957,10 +957,10 @@ out:
 	return result;
 }
 
-static struct super_block *ncp_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int ncp_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, ncp_fill_super);
+	return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt);
 }
 
 static struct file_system_type ncp_fs_type = {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c0fa6e8..c321f71 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1743,14 +1743,15 @@ struct nfs_clone_mount {
 	rpc_authflavor_t authflavor;
 };
 
-static struct super_block *nfs_clone_generic_sb(struct nfs_clone_mount *data,
-		struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *),
-		struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *))
+static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
+	struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *),
+	struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *),
+	struct vfsmount *mnt)
 {
 	struct nfs_server *server;
 	struct nfs_server *parent = NFS_SB(data->sb);
 	struct super_block *sb = ERR_PTR(-EINVAL);
-	void *err = ERR_PTR(-ENOMEM);
+	int err = -ENOMEM;
 	char *hostname;
 	int len;
 
@@ -1768,17 +1769,28 @@ static struct super_block *nfs_clone_gen
 		goto free_hostname;
 
 	sb = fill_sb(server, data);
-	if (IS_ERR((err = sb)) || sb->s_root)
+	if (IS_ERR(sb) < 0) {
+		err = PTR_ERR(sb);
 		goto kill_rpciod;
+	}
 
-	server = fill_server(sb, data);
-	if (IS_ERR((err = server)))
+	if (sb->s_root) {
+		rpciod_down();
+		kfree(server->hostname);
+		kfree(server);
+		return simple_set_mnt(mnt, sb);
+	}
+
+	server = fill_server(mnt->mnt_sb, data);
+	if (IS_ERR(server)) {
+		err = PTR_ERR(server);
 		goto out_deactivate;
-	return sb;
+	}
+	return simple_set_mnt(mnt, sb);
 out_deactivate:
 	up_write(&sb->s_umount);
 	deactivate_super(sb);
-	return (struct super_block *)err;
+	return err;
 kill_rpciod:
 	rpciod_down();
 free_hostname:
@@ -1786,7 +1798,7 @@ free_hostname:
 free_server:
 	kfree(server);
 out_err:
-	return (struct super_block *)err;
+	return err;
 }
 
 static int nfs_set_super(struct super_block *s, void *data)
@@ -1807,8 +1819,8 @@ static int nfs_compare_super(struct supe
 	return !nfs_compare_fh(&old->fh, &server->fh);
 }
 
-static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *raw_data)
+static int nfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
 	int error;
 	struct nfs_server *server = NULL;
@@ -1816,14 +1828,14 @@ static struct super_block *nfs_get_sb(st
 	struct nfs_fh *root;
 	struct nfs_mount_data *data = raw_data;
 
-	s = ERR_PTR(-EINVAL);
+	error = -EINVAL;
 	if (data == NULL) {
 		dprintk("%s: missing data argument\n", __FUNCTION__);
-		goto out_err;
+		goto out_err_noserver;
 	}
 	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
 		dprintk("%s: bad mount version\n", __FUNCTION__);
-		goto out_err;
+		goto out_err_noserver;
 	}
 	switch (data->version) {
 		case 1:
@@ -1835,7 +1847,7 @@ static struct super_block *nfs_get_sb(st
 				dprintk("%s: mount structure version %d does not support NFSv3\n",
 						__FUNCTION__,
 						data->version);
-				goto out_err;
+				goto out_err_noserver;
 			}
 			data->root.size = NFS2_FHSIZE;
 			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
@@ -1844,24 +1856,24 @@ static struct super_block *nfs_get_sb(st
 				dprintk("%s: mount structure version %d does not support strong security\n",
 						__FUNCTION__,
 						data->version);
-				goto out_err;
+				goto out_err_noserver;
 			}
 		case 5:
 			memset(data->context, 0, sizeof(data->context));
 	}
 #ifndef CONFIG_NFS_V3
 	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
-	s = ERR_PTR(-EPROTONOSUPPORT);
+	error = -EPROTONOSUPPORT;
 	if (data->flags & NFS_MOUNT_VER3) {
 		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
-		goto out_err;
+		goto out_err_noserver;
 	}
 #endif /* CONFIG_NFS_V3 */
 
-	s = ERR_PTR(-ENOMEM);
+	error = -ENOMEM;
 	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
-		goto out_err;
+		goto out_err_noserver;
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -1871,7 +1883,7 @@ #endif /* CONFIG_NFS_V3 */
 		root->size = data->root.size;
 	else
 		root->size = NFS2_FHSIZE;
-	s = ERR_PTR(-EINVAL);
+	error = -EINVAL;
 	if (root->size > sizeof(root->data)) {
 		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
 		goto out_err;
@@ -1887,15 +1899,20 @@ #endif /* CONFIG_NFS_V3 */
 	}
 
 	/* Fire up rpciod if not yet running */
-	s = ERR_PTR(rpciod_up());
-	if (IS_ERR(s)) {
-		dprintk("%s: couldn't start rpciod! Error = %ld\n",
-				__FUNCTION__, PTR_ERR(s));
+	error = rpciod_up();
+	if (error < 0) {
+		dprintk("%s: couldn't start rpciod! Error = %d\n",
+				__FUNCTION__, error);
 		goto out_err;
 	}
 
 	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
-	if (IS_ERR(s) || s->s_root)
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_rpciod;
+	}
+
+	if (s->s_root)
 		goto out_rpciod_down;
 
 	s->s_flags = flags;
@@ -1904,15 +1921,22 @@ #endif /* CONFIG_NFS_V3 */
 	if (error) {
 		up_write(&s->s_umount);
 		deactivate_super(s);
-		return ERR_PTR(error);
+		return error;
 	}
 	s->s_flags |= MS_ACTIVE;
-	return s;
+	return simple_set_mnt(mnt, s);
+
 out_rpciod_down:
 	rpciod_down();
+	kfree(server);
+	return simple_set_mnt(mnt, s);
+
+out_err_rpciod:
+	rpciod_down();
 out_err:
 	kfree(server);
-	return s;
+out_err_noserver:
+	return error;
 }
 
 static void nfs_kill_super(struct super_block *s)
@@ -2009,11 +2033,11 @@ out:
 	return err;
 }
 
-static struct super_block *nfs_clone_nfs_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data)
+static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
+		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
 	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server);
+	return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt);
 }
 
 static struct file_system_type clone_nfs_fs_type = {
@@ -2244,8 +2268,8 @@ nfs_copy_user_string(char *dst, struct n
 	return dst;
 }
 
-static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *raw_data)
+static int nfs4_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
 	int error;
 	struct nfs_server *server;
@@ -2255,16 +2279,16 @@ static struct super_block *nfs4_get_sb(s
 
 	if (data == NULL) {
 		dprintk("%s: missing data argument\n", __FUNCTION__);
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 	if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) {
 		dprintk("%s: bad mount version\n", __FUNCTION__);
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -2286,33 +2310,41 @@ static struct super_block *nfs4_get_sb(s
 
 	/* We now require that the mount process passes the remote address */
 	if (data->host_addrlen != sizeof(server->addr)) {
-		s = ERR_PTR(-EINVAL);
+		error = -EINVAL;
 		goto out_free;
 	}
 	if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
-		s = ERR_PTR(-EFAULT);
+		error = -EFAULT;
 		goto out_free;
 	}
 	if (server->addr.sin_family != AF_INET ||
 	    server->addr.sin_addr.s_addr == INADDR_ANY) {
 		dprintk("%s: mount program didn't pass remote IP address!\n",
 				__FUNCTION__);
-		s = ERR_PTR(-EINVAL);
+		error = -EINVAL;
 		goto out_free;
 	}
 
 	/* Fire up rpciod if not yet running */
-	s = ERR_PTR(rpciod_up());
-	if (IS_ERR(s)) {
-		dprintk("%s: couldn't start rpciod! Error = %ld\n",
-				__FUNCTION__, PTR_ERR(s));
+	error = rpciod_up();
+	if (error < 0) {
+		dprintk("%s: couldn't start rpciod! Error = %d\n",
+				__FUNCTION__, error);
 		goto out_free;
 	}
 
 	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
-
-	if (IS_ERR(s) || s->s_root)
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
 		goto out_free;
+	}
+
+	if (s->s_root) {
+		kfree(server->mnt_path);
+		kfree(server->hostname);
+		kfree(server);
+		return simple_set_mnt(mnt, s);
+	}
 
 	s->s_flags = flags;
 
@@ -2320,17 +2352,17 @@ static struct super_block *nfs4_get_sb(s
 	if (error) {
 		up_write(&s->s_umount);
 		deactivate_super(s);
-		return ERR_PTR(error);
+		return error;
 	}
 	s->s_flags |= MS_ACTIVE;
-	return s;
+	return simple_set_mnt(mnt, s);
 out_err:
-	s = (struct super_block *)p;
+	error = PTR_ERR(p);
 out_free:
 	kfree(server->mnt_path);
 	kfree(server->hostname);
 	kfree(server);
-	return s;
+	return error;
 }
 
 static void nfs4_kill_super(struct super_block *sb)
@@ -2448,11 +2480,11 @@ err:
 	return sb;
 }
 
-static struct super_block *nfs_clone_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data)
+static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
+		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
 	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server);
+	return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt);
 }
 
 static struct file_system_type clone_nfs4_fs_type = {
@@ -2652,11 +2684,11 @@ out_err:
 	return (struct nfs_server *)err;
 }
 
-static struct super_block *nfs_referral_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data)
+static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
+		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
 	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server);
+	return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt);
 }
 
 static struct file_system_type nfs_referral_nfs4_fs_type = {
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3ef017b..a1810e6 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -494,10 +494,10 @@ #endif
 	return simple_fill_super(sb, 0x6e667364, nfsd_files);
 }
 
-static struct super_block *nfsd_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int nfsd_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, nfsd_fill_super);
+	return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt);
 }
 
 static struct file_system_type nfsd_fs_type = {
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 27833f6..d5d5e96 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3093,10 +3093,11 @@ struct kmem_cache *ntfs_index_ctx_cache;
 /* Driver wide mutex. */
 DEFINE_MUTEX(ntfs_lock);
 
-static struct super_block *ntfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int ntfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type ntfs_fs_type = {
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 7e88e24..7273d9f 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -574,10 +574,10 @@ static struct inode_operations dlmfs_fil
 	.getattr	= simple_getattr,
 };
 
-static struct super_block *dlmfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int dlmfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super);
+	return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt);
 }
 
 static struct file_system_type dlmfs_fs_type = {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 949b3da..788b8b5 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -672,12 +672,14 @@ read_super_error:
 	return status;
 }
 
-static struct super_block *ocfs2_get_sb(struct file_system_type *fs_type,
-					int flags,
-					const char *dev_name,
-					void *data)
+static int ocfs2_get_sb(struct file_system_type *fs_type,
+			int flags,
+			const char *dev_name,
+			void *data,
+			struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super,
+			   mnt);
 }
 
 static struct file_system_type ocfs2_fs_type = {
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 0f14276..464e2bc 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -1054,10 +1054,10 @@ out_no_root:
 	return -ENOMEM;
 }
 
-static struct super_block *openprom_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int openprom_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, openprom_fill_super);
+	return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt);
 }
 
 static struct file_system_type openprom_fs_type = {
diff --git a/fs/pipe.c b/fs/pipe.c
index 5acd895..2035257 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -979,12 +979,11 @@ no_files:
  * any operations on the root directory. However, we need a non-trivial
  * d_name - pipe: will go nicely and kill the special-casing in procfs.
  */
-
-static struct super_block *
-pipefs_get_sb(struct file_system_type *fs_type, int flags,
-	      const char *dev_name, void *data)
+static int pipefs_get_sb(struct file_system_type *fs_type,
+			 int flags, const char *dev_name, void *data,
+			 struct vfsmount *mnt)
 {
-	return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
+	return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt);
 }
 
 static struct file_system_type pipe_fs_type = {
diff --git a/fs/proc/root.c b/fs/proc/root.c
index c3fd361..9995356 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -26,10 +26,10 @@ #ifdef CONFIG_SYSCTL
 struct proc_dir_entry *proc_sys_root;
 #endif
 
-static struct super_block *proc_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int proc_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, proc_fill_super);
+	return get_sb_single(fs_type, flags, data, proc_fill_super, mnt);
 }
 
 static struct file_system_type proc_fs_type = {
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 2ecd46f..e6cca5c 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -561,10 +561,11 @@ static void destroy_inodecache(void)
 		       "qnx4_inode_cache: not all structures were freed\n");
 }
 
-static struct super_block *qnx4_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int qnx4_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super,
+			   mnt);
 }
 
 static struct file_system_type qnx4_fs_type = {
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 14bd224..b967733 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -185,16 +185,17 @@ static int ramfs_fill_super(struct super
 	return 0;
 }
 
-struct super_block *ramfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+int ramfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, ramfs_fill_super);
+	return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt);
 }
 
-static struct super_block *rootfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int rootfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
+	return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super,
+			    mnt);
 }
 
 static struct file_system_type ramfs_fs_type = {
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index cae2abb..f3ff41d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2249,11 +2249,12 @@ static ssize_t reiserfs_quota_write(stru
 
 #endif
 
-static struct super_block *get_super_block(struct file_system_type *fs_type,
-					   int flags, const char *dev_name,
-					   void *data)
+static int get_super_block(struct file_system_type *fs_type,
+			   int flags, const char *dev_name,
+			   void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super,
+			   mnt);
 }
 
 static int __init init_reiserfs_fs(void)
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 9b9eda7..4d6cd66 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -607,10 +607,11 @@ static struct super_operations romfs_ops
 	.remount_fs	= romfs_remount,
 };
 
-static struct super_block *romfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int romfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, romfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, romfs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type romfs_fs_type = {
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index fdeabc0..4a37c2b 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -782,10 +782,10 @@ out:
 	return error;
 }
 
-static struct super_block *smb_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int smb_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, smb_fill_super);
+	return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt);
 }
 
 static struct file_system_type smb_fs_type = {
diff --git a/fs/super.c b/fs/super.c
index 15f2afd..3daf41e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -231,7 +231,7 @@ void generic_shutdown_super(struct super
 	if (root) {
 		sb->s_root = NULL;
 		shrink_dcache_parent(root);
-		shrink_dcache_anon(&sb->s_anon);
+		shrink_dcache_sb(sb);
 		dput(root);
 		fsync_super(sb);
 		lock_super(sb);
@@ -676,9 +676,10 @@ static void bdev_uevent(struct block_dev
 	}
 }
 
-struct super_block *get_sb_bdev(struct file_system_type *fs_type,
+int get_sb_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
-	int (*fill_super)(struct super_block *, void *, int))
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt)
 {
 	struct block_device *bdev;
 	struct super_block *s;
@@ -686,7 +687,7 @@ struct super_block *get_sb_bdev(struct f
 
 	bdev = open_bdev_excl(dev_name, flags, fs_type);
 	if (IS_ERR(bdev))
-		return (struct super_block *)bdev;
+		return PTR_ERR(bdev);
 
 	/*
 	 * once the super is inserted into the list by sget, s_umount
@@ -697,15 +698,17 @@ struct super_block *get_sb_bdev(struct f
 	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
 	mutex_unlock(&bdev->bd_mount_mutex);
 	if (IS_ERR(s))
-		goto out;
+		goto error_s;
 
 	if (s->s_root) {
 		if ((flags ^ s->s_flags) & MS_RDONLY) {
 			up_write(&s->s_umount);
 			deactivate_super(s);
-			s = ERR_PTR(-EBUSY);
+			error = -EBUSY;
+			goto error_bdev;
 		}
-		goto out;
+
+		close_bdev_excl(bdev);
 	} else {
 		char b[BDEVNAME_SIZE];
 
@@ -716,18 +719,21 @@ struct super_block *get_sb_bdev(struct f
 		if (error) {
 			up_write(&s->s_umount);
 			deactivate_super(s);
-			s = ERR_PTR(error);
-		} else {
-			s->s_flags |= MS_ACTIVE;
-			bdev_uevent(bdev, KOBJ_MOUNT);
+			goto error;
 		}
+
+		s->s_flags |= MS_ACTIVE;
+		bdev_uevent(bdev, KOBJ_MOUNT);
 	}
 
-	return s;
+	return simple_set_mnt(mnt, s);
 
-out:
+error_s:
+	error = PTR_ERR(s);
+error_bdev:
 	close_bdev_excl(bdev);
-	return s;
+error:
+	return error;
 }
 
 EXPORT_SYMBOL(get_sb_bdev);
@@ -744,15 +750,16 @@ void kill_block_super(struct super_block
 
 EXPORT_SYMBOL(kill_block_super);
 
-struct super_block *get_sb_nodev(struct file_system_type *fs_type,
+int get_sb_nodev(struct file_system_type *fs_type,
 	int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int))
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt)
 {
 	int error;
 	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
 
 	if (IS_ERR(s))
-		return s;
+		return PTR_ERR(s);
 
 	s->s_flags = flags;
 
@@ -760,10 +767,10 @@ struct super_block *get_sb_nodev(struct 
 	if (error) {
 		up_write(&s->s_umount);
 		deactivate_super(s);
-		return ERR_PTR(error);
+		return error;
 	}
 	s->s_flags |= MS_ACTIVE;
-	return s;
+	return simple_set_mnt(mnt, s);
 }
 
 EXPORT_SYMBOL(get_sb_nodev);
@@ -773,28 +780,29 @@ static int compare_single(struct super_b
 	return 1;
 }
 
-struct super_block *get_sb_single(struct file_system_type *fs_type,
+int get_sb_single(struct file_system_type *fs_type,
 	int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int))
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt)
 {
 	struct super_block *s;
 	int error;
 
 	s = sget(fs_type, compare_single, set_anon_super, NULL);
 	if (IS_ERR(s))
-		return s;
+		return PTR_ERR(s);
 	if (!s->s_root) {
 		s->s_flags = flags;
 		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 		if (error) {
 			up_write(&s->s_umount);
 			deactivate_super(s);
-			return ERR_PTR(error);
+			return error;
 		}
 		s->s_flags |= MS_ACTIVE;
 	}
 	do_remount_sb(s, flags, data, 0);
-	return s;
+	return simple_set_mnt(mnt, s);
 }
 
 EXPORT_SYMBOL(get_sb_single);
@@ -802,52 +810,55 @@ EXPORT_SYMBOL(get_sb_single);
 struct vfsmount *
 vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
 {
-	struct super_block *sb = ERR_PTR(-ENOMEM);
 	struct vfsmount *mnt;
-	int error;
 	char *secdata = NULL;
+	int error;
 
+	if (!type)
+		return ERR_PTR(-ENODEV);
+
+	error = -ENOMEM;
 	mnt = alloc_vfsmnt(name);
 	if (!mnt)
 		goto out;
 
 	if (data) {
 		secdata = alloc_secdata();
-		if (!secdata) {
-			sb = ERR_PTR(-ENOMEM);
+		if (!secdata)
 			goto out_mnt;
-		}
 
 		error = security_sb_copy_data(type, data, secdata);
-		if (error) {
-			sb = ERR_PTR(error);
+		if (error)
 			goto out_free_secdata;
-		}
 	}
 
-	sb = type->get_sb(type, flags, name, data);
-	if (IS_ERR(sb))
+	error = type->get_sb(type, flags, name, data, mnt);
+	if (error < 0)
 		goto out_free_secdata;
- 	error = security_sb_kern_mount(sb, secdata);
+
+	BUG_ON(!mnt->mnt_sb);
+	BUG_ON(!mnt->mnt_sb->s_root);
+	BUG_ON(!mnt->mnt_root);
+
+ 	error = security_sb_kern_mount(mnt->mnt_sb, secdata);
  	if (error)
  		goto out_sb;
-	mnt->mnt_sb = sb;
-	mnt->mnt_root = dget(sb->s_root);
-	mnt->mnt_mountpoint = sb->s_root;
+
+	mnt->mnt_mountpoint = mnt->mnt_root;
 	mnt->mnt_parent = mnt;
-	up_write(&sb->s_umount);
+	up_write(&mnt->mnt_sb->s_umount);
 	free_secdata(secdata);
 	return mnt;
 out_sb:
-	up_write(&sb->s_umount);
-	deactivate_super(sb);
-	sb = ERR_PTR(error);
+	dput(mnt->mnt_root);
+	up_write(&mnt->mnt_sb->s_umount);
+	deactivate_super(mnt->mnt_sb);
 out_free_secdata:
 	free_secdata(secdata);
 out_mnt:
 	free_vfsmnt(mnt);
 out:
-	return (struct vfsmount *)sb;
+	return ERR_PTR(error);
 }
 
 EXPORT_SYMBOL_GPL(vfs_kern_mount);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f1117e8..40190c4 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -66,10 +66,10 @@ static int sysfs_fill_super(struct super
 	return 0;
 }
 
-static struct super_block *sysfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int sysfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, sysfs_fill_super);
+	return get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt);
 }
 
 static struct file_system_type sysfs_fs_type = {
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index e92b991..876639b 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -506,16 +506,17 @@ failed:
 
 /* Every kernel module contains stuff like this. */
 
-static struct super_block *sysv_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int sysv_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super,
+			   mnt);
 }
 
-static struct super_block *v7_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int v7_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt);
 }
 
 static struct file_system_type sysv_fs_type = {
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e45789f..2250774 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -94,10 +94,10 @@ static unsigned int udf_count_free(struc
 static int udf_statfs(struct super_block *, struct kstatfs *);
 
 /* UDF filesystem type */
-static struct super_block *udf_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int udf_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt);
 }
 
 static struct file_system_type udf_fstype = {
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index db98a4c..768fb8d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1311,10 +1311,10 @@ out:
 
 #endif
 
-static struct super_block *ufs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int ufs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt);
 }
 
 static struct file_system_type ufs_fs_type = {
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index a56cec3..9a8f48b 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -1023,11 +1023,12 @@ static int vfat_fill_super(struct super_
 	return 0;
 }
 
-static struct super_block *vfat_get_sb(struct file_system_type *fs_type,
-				       int flags, const char *dev_name,
-				       void *data)
+static int vfat_get_sb(struct file_system_type *fs_type,
+		       int flags, const char *dev_name,
+		       void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super,
+			   mnt);
 }
 
 static struct file_system_type vfat_fs_type = {
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 68f4793..7702355 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -900,14 +900,16 @@ fail_vfsop:
 	return -error;
 }
 
-STATIC struct super_block *
+STATIC int
 xfs_fs_get_sb(
 	struct file_system_type	*fs_type,
 	int			flags,
 	const char		*dev_name,
-	void			*data)
+	void			*data,
+	struct vfsmount		*mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super,
+			   mnt);
 }
 
 STATIC struct super_operations xfs_super_operations = {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e40a02e..cde3028 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1270,23 +1270,26 @@ find_exported_dentry(struct super_block 
 struct file_system_type {
 	const char *name;
 	int fs_flags;
-	struct super_block *(*get_sb) (struct file_system_type *, int,
-				       const char *, void *);
+	int (*get_sb) (struct file_system_type *, int,
+		       const char *, void *, struct vfsmount *);
 	void (*kill_sb) (struct super_block *);
 	struct module *owner;
 	struct file_system_type * next;
 	struct list_head fs_supers;
 };
 
-struct super_block *get_sb_bdev(struct file_system_type *fs_type,
+extern int get_sb_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
-	int (*fill_super)(struct super_block *, void *, int));
-struct super_block *get_sb_single(struct file_system_type *fs_type,
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt);
+extern int get_sb_single(struct file_system_type *fs_type,
 	int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int));
-struct super_block *get_sb_nodev(struct file_system_type *fs_type,
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt);
+extern int get_sb_nodev(struct file_system_type *fs_type,
 	int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int));
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt);
 void generic_shutdown_super(struct super_block *sb);
 void kill_block_super(struct super_block *sb);
 void kill_anon_super(struct super_block *sb);
@@ -1297,8 +1300,10 @@ struct super_block *sget(struct file_sys
 			int (*test)(struct super_block *,void *),
 			int (*set)(struct super_block *,void *),
 			void *data);
-struct super_block *get_sb_pseudo(struct file_system_type *, char *,
-			struct super_operations *ops, unsigned long);
+extern int get_sb_pseudo(struct file_system_type *, char *,
+	struct super_operations *ops, unsigned long,
+	struct vfsmount *mnt);
+extern int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
 int __put_super(struct super_block *sb);
 int __put_super_and_need_restart(struct super_block *sb);
 void unnamed_dev_init(void);
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index 78ecfa2..00b340b 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -2,8 +2,8 @@ #ifndef _LINUX_RAMFS_H
 #define _LINUX_RAMFS_H
 
 struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev);
-struct super_block *ramfs_get_sb(struct file_system_type *fs_type,
-	 int flags, const char *dev_name, void *data);
+extern int ramfs_get_sb(struct file_system_type *fs_type,
+	 int flags, const char *dev_name, void *data, struct vfsmount *mnt);
 
 #ifndef CONFIG_MMU
 extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 41ecbd4..332b590 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -202,11 +202,11 @@ static int mqueue_fill_super(struct supe
 	return 0;
 }
 
-static struct super_block *mqueue_get_sb(struct file_system_type *fs_type,
-					 int flags, const char *dev_name,
-					 void *data)
+static int mqueue_get_sb(struct file_system_type *fs_type,
+			 int flags, const char *dev_name,
+			 void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, mqueue_fill_super);
+	return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt);
 }
 
 static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 72248d1..d1217aa 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -392,11 +392,11 @@ static int cpuset_fill_super(struct supe
 	return 0;
 }
 
-static struct super_block *cpuset_get_sb(struct file_system_type *fs_type,
-					int flags, const char *unused_dev_name,
-					void *data)
+static int cpuset_get_sb(struct file_system_type *fs_type,
+			 int flags, const char *unused_dev_name,
+			 void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, cpuset_fill_super);
+	return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt);
 }
 
 static struct file_system_type cpuset_fs_type = {
diff --git a/kernel/futex.c b/kernel/futex.c
index 5699c51..e1a380c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1056,11 +1056,11 @@ asmlinkage long sys_futex(u32 __user *ua
 			(unsigned long)uaddr2, val2, val3);
 }
 
-static struct super_block *
-futexfs_get_sb(struct file_system_type *fs_type,
-	       int flags, const char *dev_name, void *data)
+static int futexfs_get_sb(struct file_system_type *fs_type,
+			  int flags, const char *dev_name, void *data,
+			  struct vfsmount *mnt)
 {
-	return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA);
+	return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt);
 }
 
 static struct file_system_type futex_fs_type = {
diff --git a/mm/shmem.c b/mm/shmem.c
index 8184342..ad19b6c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2231,10 +2231,10 @@ #endif
 };
 
 
-static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int shmem_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
+	return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt);
 }
 
 static struct file_system_type tmpfs_fs_type = {
diff --git a/net/socket.c b/net/socket.c
index 02948b6..565f5e8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -335,10 +335,11 @@ static struct super_operations sockfs_op
 	.statfs =	simple_statfs,
 };
 
-static struct super_block *sockfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int sockfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC);
+	return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
+			     mnt);
 }
 
 static struct vfsmount *sock_mnt __read_mostly;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index a5226df..dafe793 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -815,11 +815,11 @@ out:
 	return -ENOMEM;
 }
 
-static struct super_block *
+static int
 rpc_get_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *data)
+		int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, rpc_fill_super);
+	return get_sb_single(fs_type, flags, data, rpc_fill_super, mnt);
 }
 
 static struct file_system_type rpc_pipe_fs_type = {
diff --git a/security/inode.c b/security/inode.c
index 8bf4062..98a0df5 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -135,11 +135,11 @@ static int fill_super(struct super_block
 	return simple_fill_super(sb, SECURITYFS_MAGIC, files);
 }
 
-static struct super_block *get_sb(struct file_system_type *fs_type,
-				        int flags, const char *dev_name,
-					void *data)
+static int get_sb(struct file_system_type *fs_type,
+		  int flags, const char *dev_name,
+		  void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, fill_super);
+	return get_sb_single(fs_type, flags, data, fill_super, mnt);
 }
 
 static struct file_system_type fs_type = {
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index a4efc96..b3501f7 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1279,10 +1279,11 @@ err:
 	goto out;
 }
 
-static struct super_block *sel_get_sb(struct file_system_type *fs_type,
-				      int flags, const char *dev_name, void *data)
+static int sel_get_sb(struct file_system_type *fs_type,
+		      int flags, const char *dev_name, void *data,
+		      struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, sel_fill_super);
+	return get_sb_single(fs_type, flags, data, sel_fill_super, mnt);
 }
 
 static struct file_system_type sel_fs_type = {


^ permalink raw reply related

* [PATCH 02/14] NFS: Permit filesystem to perform statfs with a known root dentry [try #8]
From: David Howells @ 2006-05-10 16:01 UTC (permalink / raw)
  To: torvalds, akpm, steved, trond.myklebust, aviro
  Cc: linux-fsdevel, linux-cachefs, nfsv4, linux-kernel
In-Reply-To: <20060510160111.9058.55026.stgit@warthog.cambridge.redhat.com>

The attached patch gives the statfs superblock operation a vfsmount pointer
rather than a superblock pointer.

This complements the get_sb() patch.  That reduced the significance of
sb->s_root, allowing NFS to place a fake root there.  However, NFS does require
a dentry to use as a target for the statfs operation.  This permits the root in
the vfsmount to be used instead.

Signed-Off-By: David Howells <dhowells@redhat.com>
---

 Documentation/filesystems/Locking |    2 +-
 Documentation/filesystems/vfs.txt |    2 +-
 arch/alpha/kernel/osf_sys.c       |    8 ++++----
 arch/mips/kernel/sysirix.c        |   12 ++++++------
 arch/parisc/hpux/sys_hpux.c       |   15 ++++++++++-----
 arch/sparc64/solaris/fs.c         |    4 ++--
 fs/adfs/super.c                   |    6 +++---
 fs/affs/super.c                   |    5 +++--
 fs/befs/linuxvfs.c                |    5 +++--
 fs/bfs/inode.c                    |    3 ++-
 fs/cifs/cifsfs.c                  |    3 ++-
 fs/coda/inode.c                   |    6 +++---
 fs/compat.c                       |    8 ++++----
 fs/cramfs/inode.c                 |    4 +++-
 fs/efs/super.c                    |    6 +++---
 fs/ext2/super.c                   |    5 +++--
 fs/ext3/super.c                   |    5 +++--
 fs/fat/inode.c                    |    8 ++++----
 fs/freevxfs/vxfs_super.c          |   12 ++++++------
 fs/fuse/inode.c                   |    3 ++-
 fs/hfs/super.c                    |    4 +++-
 fs/hfsplus/super.c                |    4 +++-
 fs/hostfs/hostfs_kern.c           |    4 ++--
 fs/hpfs/super.c                   |    3 ++-
 fs/hppfs/hppfs_kern.c             |    2 +-
 fs/hugetlbfs/inode.c              |    4 ++--
 fs/isofs/inode.c                  |    7 +++++--
 fs/jffs/inode-v23.c               |    4 ++--
 fs/jffs2/fs.c                     |    4 ++--
 fs/jffs2/os-linux.h               |    2 +-
 fs/jfs/super.c                    |    4 ++--
 fs/libfs.c                        |    4 ++--
 fs/minix/inode.c                  |   10 +++++-----
 fs/ncpfs/inode.c                  |    5 +++--
 fs/nfs/inode.c                    |    5 +++--
 fs/nfsd/nfs4xdr.c                 |    2 +-
 fs/nfsd/vfs.c                     |   14 ++++++++++++--
 fs/ntfs/super.c                   |    7 ++++---
 fs/ocfs2/super.c                  |    4 ++--
 fs/open.c                         |   26 +++++++++++++-------------
 fs/qnx4/inode.c                   |    6 ++++--
 fs/reiserfs/super.c               |    8 ++++----
 fs/romfs/inode.c                  |    4 ++--
 fs/smbfs/inode.c                  |    6 +++---
 fs/smbfs/proc.c                   |    4 ++--
 fs/smbfs/proto.h                  |    2 +-
 fs/super.c                        |    7 ++++++-
 fs/sysv/inode.c                   |    3 ++-
 fs/udf/super.c                    |    6 ++++--
 fs/ufs/super.c                    |    3 ++-
 fs/xfs/linux-2.6/xfs_super.c      |    4 ++--
 include/linux/coda_psdev.h        |    2 +-
 include/linux/fs.h                |    6 +++---
 include/linux/security.h          |   14 +++++++-------
 kernel/acct.c                     |    2 +-
 mm/shmem.c                        |    4 ++--
 security/dummy.c                  |    2 +-
 security/selinux/hooks.c          |    6 +++---
 58 files changed, 188 insertions(+), 142 deletions(-)

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 3abf08f..279898c 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -99,7 +99,7 @@ prototypes:
 	int (*sync_fs)(struct super_block *sb, int wait);
 	void (*write_super_lockfs) (struct super_block *);
 	void (*unlockfs) (struct super_block *);
-	int (*statfs) (struct super_block *, struct kstatfs *);
+	int (*statfs) (struct vfsmount *, struct kstatfs *);
 	int (*remount_fs) (struct super_block *, int *, char *);
 	void (*clear_inode) (struct inode *);
 	void (*umount_begin) (struct super_block *);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index dd7d0dc..199b643 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -211,7 +211,7 @@ struct super_operations {
         int (*sync_fs)(struct super_block *sb, int wait);
         void (*write_super_lockfs) (struct super_block *);
         void (*unlockfs) (struct super_block *);
-        int (*statfs) (struct super_block *, struct kstatfs *);
+        int (*statfs) (struct vfsmount *, struct kstatfs *);
         int (*remount_fs) (struct super_block *, int *, char *);
         void (*clear_inode) (struct inode *);
         void (*umount_begin) (struct super_block *);
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 31afe3d..3e222f5 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -240,11 +240,11 @@ linux_to_osf_statfs(struct kstatfs *linu
 }
 
 static int
-do_osf_statfs(struct dentry * dentry, struct osf_statfs __user *buffer,
+do_osf_statfs(struct vfsmount *mnt, struct osf_statfs __user *buffer,
 	      unsigned long bufsiz)
 {
 	struct kstatfs linux_stat;
-	int error = vfs_statfs(dentry->d_inode->i_sb, &linux_stat);
+	int error = vfs_statfs(mnt, &linux_stat);
 	if (!error)
 		error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
 	return error;	
@@ -258,7 +258,7 @@ osf_statfs(char __user *path, struct osf
 
 	retval = user_path_walk(path, &nd);
 	if (!retval) {
-		retval = do_osf_statfs(nd.dentry, buffer, bufsiz);
+		retval = do_osf_statfs(nd.mnt, buffer, bufsiz);
 		path_release(&nd);
 	}
 	return retval;
@@ -273,7 +273,7 @@ osf_fstatfs(unsigned long fd, struct osf
 	retval = -EBADF;
 	file = fget(fd);
 	if (file) {
-		retval = do_osf_statfs(file->f_dentry, buffer, bufsiz);
+		retval = do_osf_statfs(file->f_vfsmnt, buffer, bufsiz);
 		fput(file);
 	}
 	return retval;
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
index 5407b78..de3f84a 100644
--- a/arch/mips/kernel/sysirix.c
+++ b/arch/mips/kernel/sysirix.c
@@ -694,7 +694,7 @@ asmlinkage int irix_statfs(const char __
 	if (error)
 		goto out;
 
-	error = vfs_statfs(nd.dentry->d_inode->i_sb, &kbuf);
+	error = vfs_statfs(nd.mnt, &kbuf);
 	if (error)
 		goto dput_and_out;
 
@@ -732,7 +732,7 @@ asmlinkage int irix_fstatfs(unsigned int
 		goto out;
 	}
 
-	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &kbuf);
+	error = vfs_statfs(file->f_vfsmnt, &kbuf);
 	if (error)
 		goto out_f;
 
@@ -1360,7 +1360,7 @@ asmlinkage int irix_statvfs(char __user 
 	error = user_path_walk(fname, &nd);
 	if (error)
 		goto out;
-	error = vfs_statfs(nd.dentry->d_inode->i_sb, &kbuf);
+	error = vfs_statfs(nd.mnt, &kbuf);
 	if (error)
 		goto dput_and_out;
 
@@ -1406,7 +1406,7 @@ asmlinkage int irix_fstatvfs(int fd, str
 		error = -EBADF;
 		goto out;
 	}
-	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &kbuf);
+	error = vfs_statfs(file->f_vfsmnt, &kbuf);
 	if (error)
 		goto out_f;
 
@@ -1611,7 +1611,7 @@ asmlinkage int irix_statvfs64(char __use
 	error = user_path_walk(fname, &nd);
 	if (error)
 		goto out;
-	error = vfs_statfs(nd.dentry->d_inode->i_sb, &kbuf);
+	error = vfs_statfs(nd.mnt, &kbuf);
 	if (error)
 		goto dput_and_out;
 
@@ -1658,7 +1658,7 @@ asmlinkage int irix_fstatvfs64(int fd, s
 		error = -EBADF;
 		goto out;
 	}
-	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &kbuf);
+	error = vfs_statfs(file->f_vfsmnt, &kbuf);
 	if (error)
 		goto out_f;
 
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index 05273cc..0e76084 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -139,13 +139,18 @@ static int hpux_ustat(dev_t dev, struct 
 {
 	struct super_block *s;
 	struct hpux_ustat tmp;  /* Changed to hpux_ustat */
+	struct vfsmount mnt;
 	struct kstatfs sbuf;
 	int err = -EINVAL;
 
 	s = user_get_super(dev);
 	if (s == NULL)
 		goto out;
-	err = vfs_statfs(s, &sbuf);
+	memset(&mnt, 0, sizeof(mnt));
+	mnt.mnt_sb = s;
+	mnt.mnt_root = s->s_root;
+	mnt.mnt_mountpoint = s->s_root;
+	err = vfs_statfs(&mnt, &sbuf);
 	drop_super(s);
 	if (err)
 		goto out;
@@ -186,12 +191,12 @@ struct hpux_statfs {
      int16_t f_pad;
 };
 
-static int vfs_statfs_hpux(struct super_block *sb, struct hpux_statfs *buf)
+static int vfs_statfs_hpux(struct vfsmount *mnt, struct hpux_statfs *buf)
 {
 	struct kstatfs st;
 	int retval;
 	
-	retval = vfs_statfs(sb, &st);
+	retval = vfs_statfs(mnt, &st);
 	if (retval)
 		return retval;
 
@@ -219,7 +224,7 @@ asmlinkage long hpux_statfs(const char _
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct hpux_statfs tmp;
-		error = vfs_statfs_hpux(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs_hpux(nd.mnt, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -237,7 +242,7 @@ asmlinkage long hpux_fstatfs(unsigned in
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs_hpux(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs_hpux(file->f_vfsmnt, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
diff --git a/arch/sparc64/solaris/fs.c b/arch/sparc64/solaris/fs.c
index 4885ca6..096cf2c 100644
--- a/arch/sparc64/solaris/fs.c
+++ b/arch/sparc64/solaris/fs.c
@@ -356,7 +356,7 @@ static int report_statvfs(struct vfsmoun
 	int error;
 	struct sol_statvfs __user *ss = A(buf);
 
-	error = vfs_statfs(mnt->mnt_sb, &s);
+	error = vfs_statfs(mnt, &s);
 	if (!error) {
 		const char *p = mnt->mnt_sb->s_type->name;
 		int i = 0;
@@ -392,7 +392,7 @@ static int report_statvfs64(struct vfsmo
 	int error;
 	struct sol_statvfs64 __user *ss = A(buf);
 			
-	error = vfs_statfs(mnt->mnt_sb, &s);
+	error = vfs_statfs(mnt, &s);
 	if (!error) {
 		const char *p = mnt->mnt_sb->s_type->name;
 		int i = 0;
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 1b58a9b..f48c0b0 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -196,13 +196,13 @@ static int adfs_remount(struct super_blo
 	return parse_options(sb, data);
 }
 
-static int adfs_statfs(struct super_block *sb, struct kstatfs *buf)
+static int adfs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
-	struct adfs_sb_info *asb = ADFS_SB(sb);
+	struct adfs_sb_info *asb = ADFS_SB(mnt->mnt_sb);
 
 	buf->f_type    = ADFS_SUPER_MAGIC;
 	buf->f_namelen = asb->s_namelen;
-	buf->f_bsize   = sb->s_blocksize;
+	buf->f_bsize   = mnt->mnt_sb->s_blocksize;
 	buf->f_blocks  = asb->s_size;
 	buf->f_files   = asb->s_ids_per_zone * asb->s_map_size;
 	buf->f_bavail  =
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 6a52e78..747cfdc 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -18,7 +18,7 @@ #include "affs.h"
 
 extern struct timezone sys_tz;
 
-static int affs_statfs(struct super_block *sb, struct kstatfs *buf);
+static int affs_statfs(struct vfsmount *mnt, struct kstatfs *buf);
 static int affs_remount (struct super_block *sb, int *flags, char *data);
 
 static void
@@ -508,8 +508,9 @@ affs_remount(struct super_block *sb, int
 }
 
 static int
-affs_statfs(struct super_block *sb, struct kstatfs *buf)
+affs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
 	int		 free;
 
 	pr_debug("AFFS: statfs() partsize=%d, reserved=%d\n",AFFS_SB(sb)->s_partition_size,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 6ed07a5..551d6f8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -49,7 +49,7 @@ static int befs_nls2utf(struct super_blo
 			char **out, int *out_len);
 static void befs_put_super(struct super_block *);
 static int befs_remount(struct super_block *, int *, char *);
-static int befs_statfs(struct super_block *, struct kstatfs *);
+static int befs_statfs(struct vfsmount *, struct kstatfs *);
 static int parse_options(char *, befs_mount_options *);
 
 static const struct super_operations befs_sops = {
@@ -880,8 +880,9 @@ befs_remount(struct super_block *sb, int
 }
 
 static int
-befs_statfs(struct super_block *sb, struct kstatfs *buf)
+befs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
 
 	befs_debug(sb, "---> befs_statfs()");
 
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index e7da03f..8b156ae 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -203,8 +203,9 @@ static void bfs_put_super(struct super_b
 	s->s_fs_info = NULL;
 }
 
-static int bfs_statfs(struct super_block *s, struct kstatfs *buf)
+static int bfs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *s = mnt->mnt_sb;
 	struct bfs_sb_info *info = BFS_SB(s);
 	u64 id = huge_encode_dev(s->s_bdev->bd_dev);
 	buf->f_type = BFS_MAGIC;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 6779837..6a0bd16 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -166,8 +166,9 @@ cifs_put_super(struct super_block *sb)
 }
 
 static int
-cifs_statfs(struct super_block *sb, struct kstatfs *buf)
+cifs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
 	int xid; 
 	int rc = -EOPNOTSUPP;
 	struct cifs_sb_info *cifs_sb;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index cba7020..a36a8ee 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -36,7 +36,7 @@ #include "coda_int.h"
 /* VFS super_block ops */
 static void coda_clear_inode(struct inode *);
 static void coda_put_super(struct super_block *);
-static int coda_statfs(struct super_block *sb, struct kstatfs *buf);
+static int coda_statfs(struct vfsmount *mnt, struct kstatfs *buf);
 
 static kmem_cache_t * coda_inode_cachep;
 
@@ -278,13 +278,13 @@ struct inode_operations coda_file_inode_
 	.setattr	= coda_setattr,
 };
 
-static int coda_statfs(struct super_block *sb, struct kstatfs *buf)
+static int coda_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
 	int error;
 	
 	lock_kernel();
 
-	error = venus_statfs(sb, buf);
+	error = venus_statfs(mnt->mnt_sb, buf);
 
 	unlock_kernel();
 
diff --git a/fs/compat.c b/fs/compat.c
index 970888a..395759e 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -197,7 +197,7 @@ asmlinkage long compat_sys_statfs(const 
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs(nd.mnt, &tmp);
 		if (!error)
 			error = put_compat_statfs(buf, &tmp);
 		path_release(&nd);
@@ -215,7 +215,7 @@ asmlinkage long compat_sys_fstatfs(unsig
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs(file->f_vfsmnt, &tmp);
 	if (!error)
 		error = put_compat_statfs(buf, &tmp);
 	fput(file);
@@ -265,7 +265,7 @@ asmlinkage long compat_sys_statfs64(cons
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs(nd.mnt, &tmp);
 		if (!error)
 			error = put_compat_statfs64(buf, &tmp);
 		path_release(&nd);
@@ -286,7 +286,7 @@ asmlinkage long compat_sys_fstatfs64(uns
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs(file->f_vfsmnt, &tmp);
 	if (!error)
 		error = put_compat_statfs64(buf, &tmp);
 	fput(file);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 37a91a1..621aedf 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -322,8 +322,10 @@ out:
 	return -EINVAL;
 }
 
-static int cramfs_statfs(struct super_block *sb, struct kstatfs *buf)
+static int cramfs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
+
 	buf->f_type = CRAMFS_MAGIC;
 	buf->f_bsize = PAGE_CACHE_SIZE;
 	buf->f_blocks = CRAMFS_SB(sb)->blocks;
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 1ba5e14..312f5df 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -15,7 +15,7 @@ #include <linux/slab.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
 
-static int efs_statfs(struct super_block *s, struct kstatfs *buf);
+static int efs_statfs(struct vfsmount *mnt, struct kstatfs *buf);
 static int efs_fill_super(struct super_block *s, void *d, int silent);
 
 static int efs_get_sb(struct file_system_type *fs_type,
@@ -322,8 +322,8 @@ out_no_fs:
 	return -EINVAL;
 }
 
-static int efs_statfs(struct super_block *s, struct kstatfs *buf) {
-	struct efs_sb_info *sb = SUPER_INFO(s);
+static int efs_statfs(struct vfsmount *mnt, struct kstatfs *buf) {
+	struct efs_sb_info *sb = SUPER_INFO(mnt->mnt_sb);
 
 	buf->f_type    = EFS_SUPER_MAGIC;	/* efs magic number */
 	buf->f_bsize   = EFS_BLOCKSIZE;		/* blocksize */
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index a4dfffa..86d23e2 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -39,7 +39,7 @@ #include "xip.h"
 static void ext2_sync_super(struct super_block *sb,
 			    struct ext2_super_block *es);
 static int ext2_remount (struct super_block * sb, int * flags, char * data);
-static int ext2_statfs (struct super_block * sb, struct kstatfs * buf);
+static int ext2_statfs (struct vfsmount * mnt, struct kstatfs * buf);
 
 void ext2_error (struct super_block * sb, const char * function,
 		 const char * fmt, ...)
@@ -1038,8 +1038,9 @@ restore_opts:
 	return err;
 }
 
-static int ext2_statfs (struct super_block * sb, struct kstatfs * buf)
+static int ext2_statfs (struct vfsmount * mnt, struct kstatfs * buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
 	unsigned long overhead;
 	int i;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 657f8e7..274a7bc 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -58,7 +58,7 @@ static int ext3_sync_fs(struct super_blo
 static const char *ext3_decode_error(struct super_block * sb, int errno,
 				     char nbuf[16]);
 static int ext3_remount (struct super_block * sb, int * flags, char * data);
-static int ext3_statfs (struct super_block * sb, struct kstatfs * buf);
+static int ext3_statfs (struct vfsmount * mnt, struct kstatfs * buf);
 static void ext3_unlockfs(struct super_block *sb);
 static void ext3_write_super (struct super_block * sb);
 static void ext3_write_super_lockfs(struct super_block *sb);
@@ -2318,8 +2318,9 @@ #endif
 	return err;
 }
 
-static int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
+static int ext3_statfs (struct vfsmount * mnt, struct kstatfs * buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
 	struct ext3_sb_info *sbi = EXT3_SB(sb);
 	struct ext3_super_block *es = sbi->s_es;
 	unsigned long overhead;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index c1ce284..5c15741 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -539,18 +539,18 @@ static int fat_remount(struct super_bloc
 	return 0;
 }
 
-static int fat_statfs(struct super_block *sb, struct kstatfs *buf)
+static int fat_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
-	struct msdos_sb_info *sbi = MSDOS_SB(sb);
+	struct msdos_sb_info *sbi = MSDOS_SB(mnt->mnt_sb);
 
 	/* If the count of free cluster is still unknown, counts it here. */
 	if (sbi->free_clusters == -1) {
-		int err = fat_count_free_clusters(sb);
+		int err = fat_count_free_clusters(mnt->mnt_sb);
 		if (err)
 			return err;
 	}
 
-	buf->f_type = sb->s_magic;
+	buf->f_type = mnt->mnt_sb->s_magic;
 	buf->f_bsize = sbi->cluster_size;
 	buf->f_blocks = sbi->max_cluster - FAT_START_ENT;
 	buf->f_bfree = sbi->free_clusters;
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index d76eeaa..fe8f7af 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -55,7 +55,7 @@ MODULE_ALIAS("vxfs"); /* makes mount -t 
 
 
 static void		vxfs_put_super(struct super_block *);
-static int		vxfs_statfs(struct super_block *, struct kstatfs *);
+static int		vxfs_statfs(struct vfsmount *, struct kstatfs *);
 static int		vxfs_remount(struct super_block *, int *, char *);
 
 static struct super_operations vxfs_super_ops = {
@@ -90,12 +90,12 @@ vxfs_put_super(struct super_block *sbp)
 
 /**
  * vxfs_statfs - get filesystem information
- * @sbp:	VFS superblock
+ * @mnt:	VFS mountpoint
  * @bufp:	output buffer
  *
  * Description:
  *   vxfs_statfs fills the statfs buffer @bufp with information
- *   about the filesystem described by @sbp.
+ *   about the filesystem described by @mnt.
  *
  * Returns:
  *   Zero.
@@ -107,12 +107,12 @@ vxfs_put_super(struct super_block *sbp)
  *   This is everything but complete...
  */
 static int
-vxfs_statfs(struct super_block *sbp, struct kstatfs *bufp)
+vxfs_statfs(struct vfsmount *mnt, struct kstatfs *bufp)
 {
-	struct vxfs_sb_info		*infp = VXFS_SBI(sbp);
+	struct vxfs_sb_info		*infp = VXFS_SBI(mnt->mnt_sb);
 
 	bufp->f_type = VXFS_SUPER_MAGIC;
-	bufp->f_bsize = sbp->s_blocksize;
+	bufp->f_bsize = mnt->mnt_sb->s_blocksize;
 	bufp->f_blocks = infp->vsi_raw->vs_dsize;
 	bufp->f_bfree = infp->vsi_raw->vs_free;
 	bufp->f_bavail = 0;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 5c5ab5f..ffe3734 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -237,8 +237,9 @@ static void convert_fuse_statfs(struct k
 	/* fsid is left zero */
 }
 
-static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
+static int fuse_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
 	struct fuse_req *req;
 	struct fuse_statfs_out outarg;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index ee5b80a..0176315 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -80,8 +80,10 @@ static void hfs_put_super(struct super_b
  *
  * changed f_files/f_ffree to reflect the fs_ablock/free_ablocks.
  */
-static int hfs_statfs(struct super_block *sb, struct kstatfs *buf)
+static int hfs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
+
 	buf->f_type = HFS_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = (u32)HFS_SB(sb)->fs_ablocks * HFS_SB(sb)->fs_div;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 0ed8b7e..22a600b 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -212,8 +212,10 @@ static void hfsplus_put_super(struct sup
 	sb->s_fs_info = NULL;
 }
 
-static int hfsplus_statfs(struct super_block *sb, struct kstatfs *buf)
+static int hfsplus_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
+
 	buf->f_type = HFSPLUS_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = HFSPLUS_SB(sb).total_blocks << HFSPLUS_SB(sb).fs_shift;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 04035e0..35aad74 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -239,7 +239,7 @@ static int read_inode(struct inode *ino)
 	return(err);
 }
 
-int hostfs_statfs(struct super_block *sb, struct kstatfs *sf)
+int hostfs_statfs(struct vfsmount *mnt, struct kstatfs *sf)
 {
 	/* do_statfs uses struct statfs64 internally, but the linux kernel
 	 * struct statfs still has 32-bit versions for most of these fields,
@@ -252,7 +252,7 @@ int hostfs_statfs(struct super_block *sb
 	long long f_files;
 	long long f_ffree;
 
-	err = do_statfs(HOSTFS_I(sb->s_root->d_inode)->host_filename,
+	err = do_statfs(HOSTFS_I(mnt->mnt_sb->s_root->d_inode)->host_filename,
 			&sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
 			&f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
 			&sf->f_namelen, sf->f_spare);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 3b25cf3..04255ec 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -135,8 +135,9 @@ static unsigned count_bitmaps(struct sup
 	return count;
 }
 
-static int hpfs_statfs(struct super_block *s, struct kstatfs *buf)
+static int hpfs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *s = mnt->mnt_sb;
 	struct hpfs_sb_info *sbi = hpfs_sb(s);
 	lock_kernel();
 
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index ec43c22..36fb79d 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -616,7 +616,7 @@ static const struct file_operations hppf
 	.fsync		= hppfs_fsync,
 };
 
-static int hppfs_statfs(struct super_block *sb, struct kstatfs *sf)
+static int hppfs_statfs(struct vfsmount *mnt, struct kstatfs *sf)
 {
 	sf->f_blocks = 0;
 	sf->f_bfree = 0;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 4665c26..054a079 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -467,9 +467,9 @@ static int hugetlbfs_set_page_dirty(stru
 	return 0;
 }
 
-static int hugetlbfs_statfs(struct super_block *sb, struct kstatfs *buf)
+static int hugetlbfs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb);
+	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mnt->mnt_sb);
 
 	buf->f_type = HUGETLBFS_MAGIC;
 	buf->f_bsize = HPAGE_SIZE;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 17268da..5e5f5b1 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -22,6 +22,7 @@ #include <linux/smp_lock.h>
 #include <linux/statfs.h>
 #include <linux/cdrom.h>
 #include <linux/parser.h>
+#include <linux/mount.h>
 
 #include "isofs.h"
 #include "zisofs.h"
@@ -56,7 +57,7 @@ #endif
 }
 
 static void isofs_read_inode(struct inode *);
-static int isofs_statfs (struct super_block *, struct kstatfs *);
+static int isofs_statfs (struct vfsmount *, struct kstatfs *);
 
 static kmem_cache_t *isofs_inode_cachep;
 
@@ -901,8 +902,10 @@ out_freesbi:
 	return -EINVAL;
 }
 
-static int isofs_statfs (struct super_block *sb, struct kstatfs *buf)
+static int isofs_statfs (struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
+
 	buf->f_type = ISOFS_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = (ISOFS_SB(sb)->s_nzones
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index dd93a09..517a141 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -377,9 +377,9 @@ jffs_new_inode(const struct inode * dir,
 
 /* Get statistics of the file system.  */
 static int
-jffs_statfs(struct super_block *sb, struct kstatfs *buf)
+jffs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
-	struct jffs_control *c = (struct jffs_control *) sb->s_fs_info;
+	struct jffs_control *c = (struct jffs_control *) mnt->mnt_sb->s_fs_info;
 	struct jffs_fmcontrol *fmc;
 
 	lock_kernel();
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 09e5d10..0d7ff1b 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -183,9 +183,9 @@ int jffs2_setattr(struct dentry *dentry,
 	return jffs2_do_setattr(dentry->d_inode, iattr);
 }
 
-int jffs2_statfs(struct super_block *sb, struct kstatfs *buf)
+int jffs2_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
-	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
+	struct jffs2_sb_info *c = JFFS2_SB_INFO(mnt->mnt_sb);
 	unsigned long avail;
 
 	buf->f_type = JFFS2_SUPER_MAGIC;
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index d307cf5..90d4c5f 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -182,7 +182,7 @@ void jffs2_clear_inode (struct inode *);
 void jffs2_dirty_inode(struct inode *inode);
 struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
 			       struct jffs2_raw_inode *ri);
-int jffs2_statfs (struct super_block *, struct kstatfs *);
+int jffs2_statfs (struct vfsmount *, struct kstatfs *);
 void jffs2_write_super (struct super_block *);
 int jffs2_remount_fs (struct super_block *, int *, char *);
 int jffs2_do_fill_super(struct super_block *sb, void *data, int silent);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0a81905..27b4434 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -139,9 +139,9 @@ #endif
 	kmem_cache_free(jfs_inode_cachep, ji);
 }
 
-static int jfs_statfs(struct super_block *sb, struct kstatfs *buf)
+static int jfs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
-	struct jfs_sb_info *sbi = JFS_SBI(sb);
+	struct jfs_sb_info *sbi = JFS_SBI(mnt->mnt_sb);
 	s64 maxinodes;
 	struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap;
 
diff --git a/fs/libfs.c b/fs/libfs.c
index df55ac9..27a4b58 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -20,9 +20,9 @@ int simple_getattr(struct vfsmount *mnt,
 	return 0;
 }
 
-int simple_statfs(struct super_block *sb, struct kstatfs *buf)
+int simple_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
-	buf->f_type = sb->s_magic;
+	buf->f_type = mnt->mnt_sb->s_magic;
 	buf->f_bsize = PAGE_CACHE_SIZE;
 	buf->f_namelen = NAME_MAX;
 	return 0;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 14f24df..bfa86c9 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -19,7 +19,7 @@ #include <linux/vfs.h>
 
 static void minix_read_inode(struct inode * inode);
 static int minix_write_inode(struct inode * inode, int wait);
-static int minix_statfs(struct super_block *sb, struct kstatfs *buf);
+static int minix_statfs(struct vfsmount *mnt, struct kstatfs *buf);
 static int minix_remount (struct super_block * sb, int * flags, char * data);
 
 static void minix_delete_inode(struct inode *inode)
@@ -296,11 +296,11 @@ out_bad_sb:
 	return -EINVAL;
 }
 
-static int minix_statfs(struct super_block *sb, struct kstatfs *buf)
+static int minix_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
-	struct minix_sb_info *sbi = minix_sb(sb);
-	buf->f_type = sb->s_magic;
-	buf->f_bsize = sb->s_blocksize;
+	struct minix_sb_info *sbi = minix_sb(mnt->mnt_sb);
+	buf->f_type = mnt->mnt_sb->s_magic;
+	buf->f_bsize = mnt->mnt_sb->s_blocksize;
 	buf->f_blocks = (sbi->s_nzones - sbi->s_firstdatazone) << sbi->s_log_zone_size;
 	buf->f_bfree = minix_count_free_blocks(sbi);
 	buf->f_bavail = buf->f_bfree;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 8db033f..51a1241 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -39,7 +39,7 @@ #include "getopt.h"
 
 static void ncp_delete_inode(struct inode *);
 static void ncp_put_super(struct super_block *);
-static int  ncp_statfs(struct super_block *, struct kstatfs *);
+static int  ncp_statfs(struct vfsmount *, struct kstatfs *);
 
 static kmem_cache_t * ncp_inode_cachep;
 
@@ -724,13 +724,14 @@ #endif /* CONFIG_NCPFS_NLS */
 	kfree(server);
 }
 
-static int ncp_statfs(struct super_block *sb, struct kstatfs *buf)
+static int ncp_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
 	struct dentry* d;
 	struct inode* i;
 	struct ncp_inode_info* ni;
 	struct ncp_server* s;
 	struct ncp_volume_info vi;
+	struct super_block *sb = mnt->mnt_sb;
 	int err;
 	__u8 dh;
 	
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c321f71..5fe41e0 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -67,7 +67,7 @@ static int nfs_write_inode(struct inode 
 static void nfs_delete_inode(struct inode *);
 static void nfs_clear_inode(struct inode *);
 static void nfs_umount_begin(struct vfsmount *, int);
-static int  nfs_statfs(struct super_block *, struct kstatfs *);
+static int  nfs_statfs(struct vfsmount *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
 static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static void nfs_zap_acl_cache(struct inode *);
@@ -548,8 +548,9 @@ #endif /* CONFIG_NFS_V3_ACL */
 }
 
 static int
-nfs_statfs(struct super_block *sb, struct kstatfs *buf)
+nfs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
 	struct nfs_server *server = NFS_SB(sb);
 	unsigned char blockbits;
 	unsigned long blockres;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index de3998f..6ce17b6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1310,7 +1310,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s
 	if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL)) ||
 	    (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
 		       FATTR4_WORD1_SPACE_TOTAL))) {
-		status = vfs_statfs(dentry->d_inode->i_sb, &statfs);
+		status = vfs_statfs(exp->ex_mnt, &statfs);
 		if (status)
 			goto out_nfserr;
 	}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6aa92d0..a5d42e0 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1736,9 +1736,19 @@ out:
 int
 nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
 {
+	struct vfsmount mnt;
+
 	int err = fh_verify(rqstp, fhp, 0, MAY_NOP);
-	if (!err && vfs_statfs(fhp->fh_dentry->d_inode->i_sb,stat))
-		err = nfserr_io;
+	if (!err) {
+		memset(&mnt, 0, sizeof(mnt));
+
+		mnt.mnt_sb = fhp->fh_dentry->d_inode->i_sb;
+		mnt.mnt_root = mnt.mnt_sb->s_root;
+		mnt.mnt_mountpoint = mnt.mnt_root;
+
+		if (vfs_statfs(&mnt, stat))
+			err = nfserr_io;
+	}
 	return err;
 }
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index d5d5e96..bf80fd3 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2601,10 +2601,10 @@ static unsigned long __get_nr_free_mft_r
 
 /**
  * ntfs_statfs - return information about mounted NTFS volume
- * @sb:		super block of mounted volume
+ * @mnt:	mountpoint of mounted volume
  * @sfs:	statfs structure in which to return the information
  *
- * Return information about the mounted NTFS volume @sb in the statfs structure
+ * Return information about the mounted NTFS volume @mnt in the statfs structure
  * pointed to by @sfs (this is initialized with zeros before ntfs_statfs is
  * called). We interpret the values to be correct of the moment in time at
  * which we are called. Most values are variable otherwise and this isn't just
@@ -2617,8 +2617,9 @@ static unsigned long __get_nr_free_mft_r
  *
  * Return 0 on success or -errno on error.
  */
-static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
+static int ntfs_statfs(struct vfsmount *mnt, struct kstatfs *sfs)
 {
+	struct super_block *sb = mnt->mnt_sb;
 	s64 size;
 	ntfs_volume *vol = NTFS_SB(sb);
 	ntfs_inode *mft_ni = NTFS_I(vol->mft_ino);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 788b8b5..5e79ff9 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -100,7 +100,7 @@ static int ocfs2_initialize_mem_caches(v
 static void ocfs2_free_mem_caches(void);
 static void ocfs2_delete_osb(struct ocfs2_super *osb);
 
-static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf);
+static int ocfs2_statfs(struct vfsmount *mnt, struct kstatfs *buf);
 
 static int ocfs2_sync_fs(struct super_block *sb, int wait);
 
@@ -857,7 +857,7 @@ static void ocfs2_put_super(struct super
 	mlog_exit_void();
 }
 
-static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf)
+static int ocfs2_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
 	struct ocfs2_super *osb;
 	u32 numbits, freebits;
diff --git a/fs/open.c b/fs/open.c
index 53ec28c..c7a48ee 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -31,18 +31,18 @@ #include <linux/audit.h>
 
 #include <asm/unistd.h>
 
-int vfs_statfs(struct super_block *sb, struct kstatfs *buf)
+int vfs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
 	int retval = -ENODEV;
 
-	if (sb) {
+	if (mnt) {
 		retval = -ENOSYS;
-		if (sb->s_op->statfs) {
+		if (mnt->mnt_sb->s_op->statfs) {
 			memset(buf, 0, sizeof(*buf));
-			retval = security_sb_statfs(sb);
+			retval = security_sb_statfs(mnt);
 			if (retval)
 				return retval;
-			retval = sb->s_op->statfs(sb, buf);
+			retval = mnt->mnt_sb->s_op->statfs(mnt, buf);
 			if (retval == 0 && buf->f_frsize == 0)
 				buf->f_frsize = buf->f_bsize;
 		}
@@ -52,12 +52,12 @@ int vfs_statfs(struct super_block *sb, s
 
 EXPORT_SYMBOL(vfs_statfs);
 
-static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
+static int vfs_statfs_native(struct vfsmount *mnt, struct statfs *buf)
 {
 	struct kstatfs st;
 	int retval;
 
-	retval = vfs_statfs(sb, &st);
+	retval = vfs_statfs(mnt, &st);
 	if (retval)
 		return retval;
 
@@ -95,12 +95,12 @@ static int vfs_statfs_native(struct supe
 	return 0;
 }
 
-static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf)
+static int vfs_statfs64(struct vfsmount *mnt, struct statfs64 *buf)
 {
 	struct kstatfs st;
 	int retval;
 
-	retval = vfs_statfs(sb, &st);
+	retval = vfs_statfs(mnt, &st);
 	if (retval)
 		return retval;
 
@@ -130,7 +130,7 @@ asmlinkage long sys_statfs(const char __
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs tmp;
-		error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs_native(nd.mnt, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -149,7 +149,7 @@ asmlinkage long sys_statfs64(const char 
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs64 tmp;
-		error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs64(nd.mnt, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -168,7 +168,7 @@ asmlinkage long sys_fstatfs(unsigned int
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs_native(file->f_vfsmnt, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -189,7 +189,7 @@ asmlinkage long sys_fstatfs64(unsigned i
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs64(file->f_vfsmnt, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index e6cca5c..3f3462e 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -128,7 +128,7 @@ static struct inode *qnx4_alloc_inode(st
 static void qnx4_destroy_inode(struct inode *inode);
 static void qnx4_read_inode(struct inode *);
 static int qnx4_remount(struct super_block *sb, int *flags, char *data);
-static int qnx4_statfs(struct super_block *, struct kstatfs *);
+static int qnx4_statfs(struct vfsmount *, struct kstatfs *);
 
 static struct super_operations qnx4_sops =
 {
@@ -282,8 +282,10 @@ unsigned long qnx4_block_map( struct ino
 	return block;
 }
 
-static int qnx4_statfs(struct super_block *sb, struct kstatfs *buf)
+static int qnx4_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
+
 	lock_kernel();
 
 	buf->f_type    = sb->s_magic;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f3ff41d..12b3acc 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -60,7 +60,7 @@ static int is_any_reiserfs_magic_string(
 }
 
 static int reiserfs_remount(struct super_block *s, int *flags, char *data);
-static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf);
+static int reiserfs_statfs(struct vfsmount *mnt, struct kstatfs *buf);
 
 static int reiserfs_sync_fs(struct super_block *s, int wait)
 {
@@ -1938,15 +1938,15 @@ #endif
 	return errval;
 }
 
-static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf)
+static int reiserfs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
-	struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
+	struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(mnt->mnt_sb);
 
 	buf->f_namelen = (REISERFS_MAX_NAME(s->s_blocksize));
 	buf->f_bfree = sb_free_blocks(rs);
 	buf->f_bavail = buf->f_bfree;
 	buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1;
-	buf->f_bsize = s->s_blocksize;
+	buf->f_bsize = mnt->mnt_sb->s_blocksize;
 	/* changed to accommodate gcc folks. */
 	buf->f_type = REISERFS_SUPER_MAGIC;
 	return 0;
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 4d6cd66..c89eb72 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -179,12 +179,12 @@ outnobh:
 /* That's simple too. */
 
 static int
-romfs_statfs(struct super_block *sb, struct kstatfs *buf)
+romfs_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
 	buf->f_type = ROMFS_MAGIC;
 	buf->f_bsize = ROMBSIZE;
 	buf->f_bfree = buf->f_bavail = buf->f_ffree;
-	buf->f_blocks = (romfs_maxsize(sb)+ROMBSIZE-1)>>ROMBSBITS;
+	buf->f_blocks = (romfs_maxsize(mnt->mnt_sb)+ROMBSIZE-1)>>ROMBSBITS;
 	buf->f_namelen = ROMFS_MAXFN;
 	return 0;
 }
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 4a37c2b..42bea32 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -48,7 +48,7 @@ #define SMB_TTL_DEFAULT 1000
 
 static void smb_delete_inode(struct inode *);
 static void smb_put_super(struct super_block *);
-static int  smb_statfs(struct super_block *, struct kstatfs *);
+static int  smb_statfs(struct vfsmount *, struct kstatfs *);
 static int  smb_show_options(struct seq_file *, struct vfsmount *);
 
 static kmem_cache_t *smb_inode_cachep;
@@ -641,13 +641,13 @@ out_no_server:
 }
 
 static int
-smb_statfs(struct super_block *sb, struct kstatfs *buf)
+smb_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
 	int result;
 	
 	lock_kernel();
 
-	result = smb_proc_dskattr(sb, buf);
+	result = smb_proc_dskattr(mnt->mnt_sb, buf);
 
 	unlock_kernel();
 
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index b1b878b..de8eaab 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -3226,9 +3226,9 @@ smb_proc_settime(struct dentry *dentry, 
 }
 
 int
-smb_proc_dskattr(struct super_block *sb, struct kstatfs *attr)
+smb_proc_dskattr(struct vfsmount *mnt, struct kstatfs *attr)
 {
-	struct smb_sb_info *server = SMB_SB(sb);
+	struct smb_sb_info *server = SMB_SB(mnt->mnt_sb);
 	int result;
 	char *p;
 	long unit;
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
index 4766459..f51828b 100644
--- a/fs/smbfs/proto.h
+++ b/fs/smbfs/proto.h
@@ -29,7 +29,7 @@ extern int smb_proc_getattr(struct dentr
 extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr);
 extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor);
 extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr);
-extern int smb_proc_dskattr(struct super_block *sb, struct kstatfs *attr);
+extern int smb_proc_dskattr(struct vfsmount *mnt, struct kstatfs *attr);
 extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len);
 extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const char *oldpath);
 extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry);
diff --git a/fs/super.c b/fs/super.c
index 3daf41e..322c58f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -481,12 +481,17 @@ asmlinkage long sys_ustat(unsigned dev, 
         struct super_block *s;
         struct ustat tmp;
         struct kstatfs sbuf;
+	struct vfsmount mnt;
 	int err = -EINVAL;
 
         s = user_get_super(new_decode_dev(dev));
         if (s == NULL)
                 goto out;
-	err = vfs_statfs(s, &sbuf);
+	memset(&mnt, 0, sizeof(mnt));
+	mnt.mnt_sb = s;
+	mnt.mnt_root = s->s_root;
+	mnt.mnt_mountpoint = mnt.mnt_root;
+	err = vfs_statfs(&mnt, &sbuf);
 	drop_super(s);
 	if (err)
 		goto out;
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 3ff89cc..fd93090 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -85,8 +85,9 @@ static void sysv_put_super(struct super_
 	kfree(sbi);
 }
 
-static int sysv_statfs(struct super_block *sb, struct kstatfs *buf)
+static int sysv_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
 
 	buf->f_type = sb->s_magic;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 2250774..abd9f85 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -91,7 +91,7 @@ static void udf_load_partdesc(struct sup
 static void udf_open_lvid(struct super_block *);
 static void udf_close_lvid(struct super_block *);
 static unsigned int udf_count_free(struct super_block *);
-static int udf_statfs(struct super_block *, struct kstatfs *);
+static int udf_statfs(struct vfsmount *, struct kstatfs *);
 
 /* UDF filesystem type */
 static int udf_get_sb(struct file_system_type *fs_type,
@@ -1779,8 +1779,10 @@ #endif
  *	Written, tested, and released.
  */
 static int
-udf_statfs(struct super_block *sb, struct kstatfs *buf)
+udf_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
+
 	buf->f_type = UDF_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = UDF_SB_PARTLEN(sb, UDF_SB_PARTITION(sb));
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 768fb8d..c76d260 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1113,8 +1113,9 @@ #endif
 	return 0;
 }
 
-static int ufs_statfs (struct super_block *sb, struct kstatfs *buf)
+static int ufs_statfs (struct vfsmount *mnt, struct kstatfs *buf)
 {
+	struct super_block *sb = mnt->mnt_sb;
 	struct ufs_sb_private_info * uspi;
 	struct ufs_super_block_first * usb1;
 	struct ufs_super_block * usb;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 7702355..8aa5f0f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -703,10 +703,10 @@ xfs_fs_sync_super(
 
 STATIC int
 xfs_fs_statfs(
-	struct super_block	*sb,
+	struct vfsmount		*mnt,
 	struct kstatfs		*statp)
 {
-	vfs_t			*vfsp = vfs_from_sb(sb);
+	vfs_t			*vfsp = vfs_from_sb(mnt->mnt_sb);
 	int			error;
 
 	VFS_STATVFS(vfsp, statp, NULL, error);
diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index d539262..7387edf 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -70,7 +70,7 @@ int venus_pioctl(struct super_block *sb,
 		 unsigned int cmd, struct PioctlData *data);
 int coda_downcall(int opcode, union outputArgs *out, struct super_block *sb);
 int venus_fsync(struct super_block *sb, struct CodaFid *fid);
-int venus_statfs(struct super_block *sb, struct kstatfs *sfs);
+int venus_statfs(struct vfsmount *mnt, struct kstatfs *sfs);
 
 
 /* messages between coda filesystem in kernel and Venus */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cde3028..e57518e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1097,7 +1097,7 @@ struct super_operations {
 	int (*sync_fs)(struct super_block *sb, int wait);
 	void (*write_super_lockfs) (struct super_block *);
 	void (*unlockfs) (struct super_block *);
-	int (*statfs) (struct super_block *, struct kstatfs *);
+	int (*statfs) (struct vfsmount *, struct kstatfs *);
 	int (*remount_fs) (struct super_block *, int *, char *);
 	void (*clear_inode) (struct inode *);
 	void (*umount_begin) (struct vfsmount *, int);
@@ -1326,7 +1326,7 @@ extern struct vfsmount *copy_tree(struct
 extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
 				  struct vfsmount *);
 
-extern int vfs_statfs(struct super_block *, struct kstatfs *);
+extern int vfs_statfs(struct vfsmount *, struct kstatfs *);
 
 /* /sys/fs */
 extern struct subsystem fs_subsys;
@@ -1747,7 +1747,7 @@ extern int dcache_dir_close(struct inode
 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
 extern int dcache_readdir(struct file *, void *, filldir_t);
 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
-extern int simple_statfs(struct super_block *, struct kstatfs *);
+extern int simple_statfs(struct vfsmount *, struct kstatfs *);
 extern int simple_link(struct dentry *, struct inode *, struct dentry *);
 extern int simple_unlink(struct inode *, struct dentry *);
 extern int simple_rmdir(struct inode *, struct dentry *);
diff --git a/include/linux/security.h b/include/linux/security.h
index 1bab48f..af189b9 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -171,9 +171,9 @@ #ifdef CONFIG_SECURITY
  *	Deallocate and clear the sb->s_security field.
  *	@sb contains the super_block structure to be modified.
  * @sb_statfs:
- *	Check permission before obtaining filesystem statistics for the @sb
- *	filesystem.
- *	@sb contains the super_block structure for the filesystem.
+ *	Check permission before obtaining filesystem statistics for the @mnt
+ *	mountpoint.
+ *	@mnt contains the mountpoint structure for the filesystem.
  *	Return 0 if permission is granted.  
  * @sb_mount:
  *	Check permission before an object specified by @dev_name is mounted on
@@ -1121,7 +1121,7 @@ struct security_operations {
 	int (*sb_copy_data)(struct file_system_type *type,
 			    void *orig, void *copy);
 	int (*sb_kern_mount) (struct super_block *sb, void *data);
-	int (*sb_statfs) (struct super_block * sb);
+	int (*sb_statfs) (struct vfsmount *mnt);
 	int (*sb_mount) (char *dev_name, struct nameidata * nd,
 			 char *type, unsigned long flags, void *data);
 	int (*sb_check_sb) (struct vfsmount * mnt, struct nameidata * nd);
@@ -1442,9 +1442,9 @@ static inline int security_sb_kern_mount
 	return security_ops->sb_kern_mount (sb, data);
 }
 
-static inline int security_sb_statfs (struct super_block *sb)
+static inline int security_sb_statfs (struct vfsmount *mnt)
 {
-	return security_ops->sb_statfs (sb);
+	return security_ops->sb_statfs (mnt);
 }
 
 static inline int security_sb_mount (char *dev_name, struct nameidata *nd,
@@ -2154,7 +2154,7 @@ static inline int security_sb_kern_mount
 	return 0;
 }
 
-static inline int security_sb_statfs (struct super_block *sb)
+static inline int security_sb_statfs (struct vfsmount *mnt)
 {
 	return 0;
 }
diff --git a/kernel/acct.c b/kernel/acct.c
index b327f4d..55be31d 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -118,7 +118,7 @@ static int check_free_space(struct file 
 	spin_unlock(&acct_globals.lock);
 
 	/* May block */
-	if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf))
+	if (vfs_statfs(file->f_vfsmnt, &sbuf))
 		return res;
 	suspend = sbuf.f_blocks * SUSPEND;
 	resume = sbuf.f_blocks * RESUME;
diff --git a/mm/shmem.c b/mm/shmem.c
index ad19b6c..36aa1ae 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1654,9 +1654,9 @@ static ssize_t shmem_file_sendfile(struc
 	return desc.error;
 }
 
-static int shmem_statfs(struct super_block *sb, struct kstatfs *buf)
+static int shmem_statfs(struct vfsmount *mnt, struct kstatfs *buf)
 {
-	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+	struct shmem_sb_info *sbinfo = SHMEM_SB(mnt->mnt_sb);
 
 	buf->f_type = TMPFS_MAGIC;
 	buf->f_bsize = PAGE_CACHE_SIZE;
diff --git a/security/dummy.c b/security/dummy.c
index 8cccccc..e2f41b2 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -191,7 +191,7 @@ static int dummy_sb_kern_mount (struct s
 	return 0;
 }
 
-static int dummy_sb_statfs (struct super_block *sb)
+static int dummy_sb_statfs (struct vfsmount *mnt)
 {
 	return 0;
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d987048..97a6617 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1900,13 +1900,13 @@ static int selinux_sb_kern_mount(struct 
 	return superblock_has_perm(current, sb, FILESYSTEM__MOUNT, &ad);
 }
 
-static int selinux_sb_statfs(struct super_block *sb)
+static int selinux_sb_statfs(struct vfsmount *mnt)
 {
 	struct avc_audit_data ad;
 
 	AVC_AUDIT_DATA_INIT(&ad,FS);
-	ad.u.fs.dentry = sb->s_root;
-	return superblock_has_perm(current, sb, FILESYSTEM__GETATTR, &ad);
+	ad.u.fs.dentry = mnt->mnt_root;
+	return superblock_has_perm(current, mnt->mnt_sb, FILESYSTEM__GETATTR, &ad);
 }
 
 static int selinux_mount(char * dev_name,


^ permalink raw reply related

* [PATCH 12/14] FS-Cache: CacheFiles: A cache that backs onto a mounted filesystem [try #8]
From: David Howells @ 2006-05-10 16:01 UTC (permalink / raw)
  To: torvalds, akpm, steved, trond.myklebust, aviro
  Cc: linux-fsdevel, linux-cachefs, nfsv4, linux-kernel
In-Reply-To: <20060510160111.9058.55026.stgit@warthog.cambridge.redhat.com>

Add a cache backend that permits a mounted filesystem to be used as a backing
store for the cache.


CacheFiles uses a userspace daemon to do some of the cache management - such as
reaping stale nodes and culling.  This is called cachefilesd and lives in
/sbin.  The source for the daemon can be downloaded from:

	http://people.redhat.com/~dhowells/cachefs/cachefilesd.c

And an example configuration from:

	http://people.redhat.com/~dhowells/cachefs/cachefilesd.conf

The filesystem and data integrity of the cache are only as good as those of the
filesystem providing the backing services.  Note that CacheFiles does not
attempt to journal anything since the journalling interfaces of the various
filesystems are very specific in nature.

CacheFiles creates a proc-file - "/proc/fs/cachefiles" - that is used for
communication with the daemon.  Only one thing may have this open at once, and
whilst it is open, a cache is at least partially in existence.  The daemon
opens this and sends commands down it to control the cache.

CacheFiles is currently limited to a single cache.

CacheFiles attempts to maintain at least a certain percentage of free space on
the filesystem, shrinking the cache by culling the objects it contains to make
space if necessary - see the "Cache Culling" section.  This means it can be
placed on the same medium as a live set of data, and will expand to make use of
spare space and automatically contract when the set of data requires more
space.


============
REQUIREMENTS
============

The use of CacheFiles and its daemon requires the following features to be
available in the system and in the cache filesystem:

	- dnotify.

	- extended attributes (xattrs).

	- openat() and friends.

	- bmap() support on files in the filesystem (FIBMAP ioctl).

	- The use of bmap() to detect a partial page at the end of the file.

It is strongly recommended that the "dir_index" option is enabled on Ext3
filesystems being used as a cache.


=============
CONFIGURATION
=============

The cache is configured by a script in /etc/cachefilesd.conf.  These commands
set up cache ready for use.  The following script commands are available:

 (*) brun <N>%
 (*) bcull <N>%
 (*) bstop <N>%

	Configure the culling limits.  Optional.  See the section on culling
	The defaults are 7%, 5% and 1% respectively.

 (*) dir <path>

	Specify the directory containing the root of the cache.  Mandatory.

 (*) tag <name>

	Specify a tag to FS-Cache to use in distinguishing multiple caches.
	Optional.  The default is "CacheFiles".

 (*) debug <mask>

	Specify a numeric bitmask to control debugging in the kernel module.
	Optional.  The default is zero (all off).


==================
STARTING THE CACHE
==================

The cache is started by running the daemon.  The daemon opens the cache proc
file, configures the cache and tells it to begin caching.  At that point the
cache binds to fscache and the cache becomes live.

The daemon is run as follows:

	/sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>]

The flags are:

 (*) -d

	Increase the debugging level.  This can be specified multiple times and
	is cumulative with itself.

 (*) -s

	Send messages to stderr instead of syslog.

 (*) -n

	Don't daemonise and go into background.

 (*) -f <configfile>

	Use an alternative configuration file rather than the default one.


===============
THINGS TO AVOID
===============

Do not mount other things within the cache as this will cause problems.  The
kernel module contains its own very cut-down path walking facility that ignores
mountpoints, but the daemon can't avoid them.

Do not create, rename or unlink files and directories in the cache whilst the
cache is active, as this may cause the state to become uncertain.

Renaming files in the cache might make objects appear to be other objects (the
filename is part of the lookup key).

Do not change or remove the extended attributes attached to cache files by the
cache as this will cause the cache state management to get confused.

Do not create files or directories in the cache, lest the cache get confused or
serve incorrect data.

Do not chmod files in the cache.  The module creates things with minimal
permissions to prevent random users being able to access them directly.


=============
CACHE CULLING
=============

The cache may need culling occasionally to make space.  This involves
discarding objects from the cache that have been used less recently than
anything else.  Culling is based on the access time of data objects.  Empty
directories are culled if not in use.

Cache culling is done on the basis of the percentage of blocks available in the
underlying filesystem.  There are three "limits":

 (*) brun

     If the amount of available space in the cache rises above this limit, then
     culling is turned off.

 (*) bcull

     If the amount of available space in the cache falls below this limit, then
     culling is started.

 (*) bstop

     If the amount of available space in the cache falls below this limit, then
     no further allocation of disk space is permitted until culling has raised
     the amount above this limit again.

These must be configured thusly:

	0 <= bstop < bcull < brun < 100

Note that these are percentages of available space, and do _not_ appear as 100
minus the percentage displayed by the "df" program.

The userspace daemon scans the cache to build up a table of cullable objects.
These are then culled in least recently used order.  A new scan of the cache is
started as soon as space is made in the table.  Objects will be skipped if
their atimes have changed or if the kernel module says it is still using them.


===============
CACHE STRUCTURE
===============

The CacheFiles module will create two directories in the directory it was
given:

 (*) cache/

 (*) graveyard/

The active cache objects all reside in the first directory.  The CacheFiles
kernel module moves any retired or culled objects that it can't simply unlink
to the graveyard from which the daemon will actually delete them.

The daemon uses dnotify to monitor the graveyard directory, and will delete
anything that appears therein.


The module represents index objects as directories with the filename "I..." or
"J...".  Note that the "cache/" directory is itself a special index.

Data objects are represented as files if they have no children, or directories
if they do.  Their filenames all begin "D..." or "E...".  If represented as a
directory, data objects will have a file in the directory called "data" that
actually holds the data.

Special objects are similar to data objects, except their filenames begin
"S..." or "T...".


If an object has children, then it will be represented as a directory.
Immediately in the representative directory are a collection of directories
named for hash values of the child object keys with an '@' prepended.  Into
this directory, if possible, will be placed the representations of the child
objects:

	INDEX     INDEX      INDEX                             DATA FILES
	========= ========== ================================= ================
	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400
	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry
	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry
	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry


If the key is so long that it exceeds NAME_MAX with the decorations added on to
it, then it will be cut into pieces, the first few of which will be used to
make a nest of directories, and the last one of which will be the objects
inside the last directory.  The names of the intermediate directories will have
'+' prepended:

	J1223/@23/+xy...z/+kl...m/Epqr


Note that keys are raw data, and not only may they exceed NAME_MAX in size,
they may also contain things like '/' and NUL characters, and so they may not
be suitable for turning directly into a filename.

To handle this, CacheFiles will use a suitably printable filename directly and
"base-64" encode ones that aren't directly suitable.  The two versions of
object filenames indicate the encoding:

	OBJECT TYPE	PRINTABLE	ENCODED
	===============	===============	===============
	Index		"I..."		"J..."
	Data		"D..."		"E..."
	Special		"S..."		"T..."

Intermediate directories are always "@" or "+" as appropriate.


Each object in the cache has an extended attribute label that holds the object
type ID (required to distinguish special objects) and the auxiliary data from
the netfs.  The latter is used to detect stale objects in the cache and update
or retire them.


Note that CacheFiles will erase from the cache any file it doesn't recognise or
any file of an incorrect type (such as a FIFO file or a device file).


This documentation is added by the patch to:

	Documentation/filesystems/caching/cachefiles.txt

Signed-Off-By: David Howells <dhowells@redhat.com>
---

 Documentation/filesystems/caching/cachefiles.txt |  274 +++++
 fs/Kconfig                                       |    8 
 fs/Makefile                                      |    1 
 fs/buffer.c                                      |    2 
 fs/cachefiles/Makefile                           |   14 
 fs/cachefiles/cf-bind.c                          |  283 +++++
 fs/cachefiles/cf-interface.c                     | 1303 ++++++++++++++++++++++
 fs/cachefiles/cf-key.c                           |  160 +++
 fs/cachefiles/cf-main.c                          |  122 ++
 fs/cachefiles/cf-namei.c                         |  837 ++++++++++++++
 fs/cachefiles/cf-proc.c                          |  510 +++++++++
 fs/cachefiles/cf-xattr.c                         |  299 +++++
 fs/cachefiles/internal.h                         |  292 +++++
 fs/fcntl.c                                       |    2 
 fs/file_table.c                                  |    1 
 include/linux/pagemap.h                          |    6 
 mm/filemap.c                                     |  103 ++
 17 files changed, 4217 insertions(+), 0 deletions(-)

diff --git a/Documentation/filesystems/caching/cachefiles.txt b/Documentation/filesystems/caching/cachefiles.txt
new file mode 100644
index 0000000..c9875f2
--- /dev/null
+++ b/Documentation/filesystems/caching/cachefiles.txt
@@ -0,0 +1,274 @@
+	       ===============================================
+	       CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM
+	       ===============================================
+
+Contents:
+
+ (*) Overview.
+
+ (*) Requirements.
+
+ (*) Configuration.
+
+ (*) Starting the cache.
+
+ (*) Things to avoid.
+
+
+========
+OVERVIEW
+========
+
+CacheFiles is a caching backend that's meant to use as a cache a directory on
+an already mounted filesystem of a local type (such as Ext3).
+
+CacheFiles uses a userspace daemon to do some of the cache management - such as
+reaping stale nodes and culling.  This is called cachefilesd and lives in
+/sbin.
+
+The filesystem and data integrity of the cache are only as good as those of the
+filesystem providing the backing services.  Note that CacheFiles does not
+attempt to journal anything since the journalling interfaces of the various
+filesystems are very specific in nature.
+
+CacheFiles creates a proc-file - "/proc/fs/cachefiles" - that is used for
+communication with the daemon.  Only one thing may have this open at once, and
+whilst it is open, a cache is at least partially in existence.  The daemon
+opens this and sends commands down it to control the cache.
+
+CacheFiles is currently limited to a single cache.
+
+CacheFiles attempts to maintain at least a certain percentage of free space on
+the filesystem, shrinking the cache by culling the objects it contains to make
+space if necessary - see the "Cache Culling" section.  This means it can be
+placed on the same medium as a live set of data, and will expand to make use of
+spare space and automatically contract when the set of data requires more
+space.
+
+
+============
+REQUIREMENTS
+============
+
+The use of CacheFiles and its daemon requires the following features to be
+available in the system and in the cache filesystem:
+
+	- dnotify.
+
+	- extended attributes (xattrs).
+
+	- openat() and friends.
+
+	- bmap() support on files in the filesystem (FIBMAP ioctl).
+
+	- The use of bmap() to detect a partial page at the end of the file.
+
+It is strongly recommended that the "dir_index" option is enabled on Ext3
+filesystems being used as a cache.
+
+
+=============
+CONFIGURATION
+=============
+
+The cache is configured by a script in /etc/cachefilesd.conf.  These commands
+set up cache ready for use.  The following script commands are available:
+
+ (*) brun <N>%
+ (*) bcull <N>%
+ (*) bstop <N>%
+
+	Configure the culling limits.  Optional.  See the section on culling
+	The defaults are 7%, 5% and 1% respectively.
+
+ (*) dir <path>
+
+	Specify the directory containing the root of the cache.  Mandatory.
+
+ (*) tag <name>
+
+	Specify a tag to FS-Cache to use in distinguishing multiple caches.
+	Optional.  The default is "CacheFiles".
+
+ (*) debug <mask>
+
+	Specify a numeric bitmask to control debugging in the kernel module.
+	Optional.  The default is zero (all off).
+
+
+==================
+STARTING THE CACHE
+==================
+
+The cache is started by running the daemon.  The daemon opens the cache proc
+file, configures the cache and tells it to begin caching.  At that point the
+cache binds to fscache and the cache becomes live.
+
+The daemon is run as follows:
+
+	/sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>]
+
+The flags are:
+
+ (*) -d
+
+	Increase the debugging level.  This can be specified multiple times and
+	is cumulative with itself.
+
+ (*) -s
+
+	Send messages to stderr instead of syslog.
+
+ (*) -n
+
+	Don't daemonise and go into background.
+
+ (*) -f <configfile>
+
+	Use an alternative configuration file rather than the default one.
+
+
+===============
+THINGS TO AVOID
+===============
+
+Do not mount other things within the cache as this will cause problems.  The
+kernel module contains its own very cut-down path walking facility that ignores
+mountpoints, but the daemon can't avoid them.
+
+Do not create, rename or unlink files and directories in the cache whilst the
+cache is active, as this may cause the state to become uncertain.
+
+Renaming files in the cache might make objects appear to be other objects (the
+filename is part of the lookup key).
+
+Do not change or remove the extended attributes attached to cache files by the
+cache as this will cause the cache state management to get confused.
+
+Do not create files or directories in the cache, lest the cache get confused or
+serve incorrect data.
+
+Do not chmod files in the cache.  The module creates things with minimal
+permissions to prevent random users being able to access them directly.
+
+
+=============
+CACHE CULLING
+=============
+
+The cache may need culling occasionally to make space.  This involves
+discarding objects from the cache that have been used less recently than
+anything else.  Culling is based on the access time of data objects.  Empty
+directories are culled if not in use.
+
+Cache culling is done on the basis of the percentage of blocks available in the
+underlying filesystem.  There are three "limits":
+
+ (*) brun
+
+     If the amount of available space in the cache rises above this limit, then
+     culling is turned off.
+
+ (*) bcull
+
+     If the amount of available space in the cache falls below this limit, then
+     culling is started.
+
+ (*) bstop
+
+     If the amount of available space in the cache falls below this limit, then
+     no further allocation of disk space is permitted until culling has raised
+     the amount above this limit again.
+
+These must be configured thusly:
+
+	0 <= bstop < bcull < brun < 100
+
+Note that these are percentages of available space, and do _not_ appear as 100
+minus the percentage displayed by the "df" program.
+
+The userspace daemon scans the cache to build up a table of cullable objects.
+These are then culled in least recently used order.  A new scan of the cache is
+started as soon as space is made in the table.  Objects will be skipped if
+their atimes have changed or if the kernel module says it is still using them.
+
+
+===============
+CACHE STRUCTURE
+===============
+
+The CacheFiles module will create two directories in the directory it was
+given:
+
+ (*) cache/
+
+ (*) graveyard/
+
+The active cache objects all reside in the first directory.  The CacheFiles
+kernel module moves any retired or culled objects that it can't simply unlink
+to the graveyard from which the daemon will actually delete them.
+
+The daemon uses dnotify to monitor the graveyard directory, and will delete
+anything that appears therein.
+
+
+The module represents index objects as directories with the filename "I..." or
+"J...".  Note that the "cache/" directory is itself a special index.
+
+Data objects are represented as files if they have no children, or directories
+if they do.  Their filenames all begin "D..." or "E...".  If represented as a
+directory, data objects will have a file in the directory called "data" that
+actually holds the data.
+
+Special objects are similar to data objects, except their filenames begin
+"S..." or "T...".
+
+
+If an object has children, then it will be represented as a directory.
+Immediately in the representative directory are a collection of directories
+named for hash values of the child object keys with an '@' prepended.  Into
+this directory, if possible, will be placed the representations of the child
+objects:
+
+	INDEX     INDEX      INDEX                             DATA FILES
+	========= ========== ================================= ================
+	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400
+	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry
+	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry
+	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry
+
+
+If the key is so long that it exceeds NAME_MAX with the decorations added on to
+it, then it will be cut into pieces, the first few of which will be used to
+make a nest of directories, and the last one of which will be the objects
+inside the last directory.  The names of the intermediate directories will have
+'+' prepended:
+
+	J1223/@23/+xy...z/+kl...m/Epqr
+
+
+Note that keys are raw data, and not only may they exceed NAME_MAX in size,
+they may also contain things like '/' and NUL characters, and so they may not
+be suitable for turning directly into a filename.
+
+To handle this, CacheFiles will use a suitably printable filename directly and
+"base-64" encode ones that aren't directly suitable.  The two versions of
+object filenames indicate the encoding:
+
+	OBJECT TYPE	PRINTABLE	ENCODED
+	===============	===============	===============
+	Index		"I..."		"J..."
+	Data		"D..."		"E..."
+	Special		"S..."		"T..."
+
+Intermediate directories are always "@" or "+" as appropriate.
+
+
+Each object in the cache has an extended attribute label that holds the object
+type ID (required to distinguish special objects) and the auxiliary data from
+the netfs.  The latter is used to detect stale objects in the cache and update
+or retire them.
+
+
+Note that CacheFiles will erase from the cache any file it doesn't recognise or
+any file of an incorrect type (such as a FIFO file or a device file).
diff --git a/fs/Kconfig b/fs/Kconfig
index 6c95e58..9ef9f14 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -521,6 +521,14 @@ config FSCACHE
 
 	  See Documentation/filesystems/caching/fscache.txt for more information.
 
+config CACHEFILES
+	tristate "Filesystem caching on files"
+	depends on FSCACHE
+	help
+	  This permits use of a mounted filesystem as a cache for other
+	  filesystems - primarily networking filesystems - thus allowing fast
+	  local disk to enhance the speed of slower devices.
+
 endmenu
 
 menu "CD-ROM/DVD Filesystems"
diff --git a/fs/Makefile b/fs/Makefile
index 36ee03b..94ab3f9 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_AFS_FS)		+= afs/
 obj-$(CONFIG_BEFS_FS)		+= befs/
 obj-$(CONFIG_HOSTFS)		+= hostfs/
 obj-$(CONFIG_HPPFS)		+= hppfs/
+obj-$(CONFIG_CACHEFILES)	+= cachefiles/
 obj-$(CONFIG_DEBUG_FS)		+= debugfs/
 obj-$(CONFIG_CONFIGFS_FS)	+= configfs/
 obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
diff --git a/fs/buffer.c b/fs/buffer.c
index 23f1f3a..0602bf8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -185,6 +185,8 @@ int fsync_super(struct super_block *sb)
 	return sync_blockdev(sb->s_bdev);
 }
 
+EXPORT_SYMBOL(fsync_super);
+
 /*
  * Write out and wait upon all dirty data associated with this
  * device.   Filesystem data as well as the underlying block
diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile
new file mode 100644
index 0000000..ffc3783
--- /dev/null
+++ b/fs/cachefiles/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for caching in a mounted filesystem
+#
+
+cachefiles-objs := \
+	cf-bind.o \
+	cf-interface.o \
+	cf-key.o \
+	cf-main.o \
+	cf-namei.o \
+	cf-proc.o \
+	cf-xattr.o
+
+obj-$(CONFIG_CACHEFILES) := cachefiles.o
diff --git a/fs/cachefiles/cf-bind.c b/fs/cachefiles/cf-bind.c
new file mode 100644
index 0000000..c963ee9
--- /dev/null
+++ b/fs/cachefiles/cf-bind.c
@@ -0,0 +1,283 @@
+/* cf-bind.c: bind and unbind a cache from the filesystem backing it
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/namespace.h>
+#include <linux/statfs.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include "internal.h"
+
+static int cachefiles_proc_add_cache(struct cachefiles_cache *cache,
+				     struct vfsmount *mnt);
+
+/*****************************************************************************/
+/*
+ * bind a directory as a cache
+ */
+int cachefiles_proc_bind(struct cachefiles_cache *cache, char *args)
+{
+	_enter("{%u,%u,%u},%s",
+	       cache->brun_percent,
+	       cache->bcull_percent,
+	       cache->bstop_percent,
+	       args);
+
+	/* start by checking things over */
+	ASSERT(cache->bstop_percent >= 0 &&
+	       cache->bstop_percent < cache->bcull_percent &&
+	       cache->bcull_percent < cache->brun_percent &&
+	       cache->brun_percent  < 100);
+
+	if (*args) {
+		kerror("'bind' command doesn't take an argument");
+		return -EINVAL;
+	}
+
+	if (!cache->rootdirname) {
+		kerror("No cache directory specified");
+		return -EINVAL;
+	}
+
+	/* don't permit already bound caches to be re-bound */
+	if (test_bit(CACHEFILES_READY, &cache->flags)) {
+		kerror("Cache already bound");
+		return -EBUSY;
+	}
+
+	/* make sure we have copies of the tag and dirname strings */
+	if (!cache->tag) {
+		/* the tag string is released by the fops->release()
+		 * function, so we don't release it on error here */
+		cache->tag = kstrdup("CacheFiles", GFP_KERNEL);
+		if (!cache->tag)
+			return -ENOMEM;
+	}
+
+	/* add the cache */
+	return cachefiles_proc_add_cache(cache, NULL);
+
+} /* end cachefiles_proc_bind() */
+
+/*****************************************************************************/
+/*
+ * add a cache
+ */
+static int cachefiles_proc_add_cache(struct cachefiles_cache *cache,
+				     struct vfsmount *mnt)
+{
+	struct cachefiles_object *fsdef;
+	struct nameidata nd;
+	struct kstatfs stats;
+	struct dentry *graveyard, *cachedir, *root;
+	int ret;
+
+	_enter("");
+
+	/* allocate the root index object */
+	ret = -ENOMEM;
+
+	fsdef = kmem_cache_alloc(cachefiles_object_jar, SLAB_KERNEL);
+	if (!fsdef)
+		goto error_root_object;
+
+	atomic_set(&fsdef->usage, 1);
+	atomic_set(&fsdef->fscache_usage, 1);
+	fsdef->type = FSCACHE_COOKIE_TYPE_INDEX;
+
+	_debug("- fsdef %p", fsdef);
+
+	/* look up the directory at the root of the cache */
+	memset(&nd, 0, sizeof(nd));
+
+	ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd);
+	if (ret < 0)
+		goto error_open_root;
+
+	/* bind to the special mountpoint we've prepared */
+	if (mnt) {
+		atomic_inc(&nd.mnt->mnt_sb->s_active);
+		mnt->mnt_sb = nd.mnt->mnt_sb;
+		mnt->mnt_flags = nd.mnt->mnt_flags;
+		mnt->mnt_flags |= MNT_NOSUID | MNT_NOEXEC | MNT_NODEV;
+		mnt->mnt_root = dget(nd.dentry);
+		mnt->mnt_mountpoint = mnt->mnt_root;
+
+		/* copy the name, but ignore kstrdup() failing ENOMEM - we'll
+		 * just end up with an devicenameless mountpoint */
+		mnt->mnt_devname = kstrdup(nd.mnt->mnt_devname, GFP_KERNEL);
+		path_release(&nd);
+
+		cache->mnt = mntget(mnt);
+		root = dget(mnt->mnt_root);
+	}
+	else {
+		cache->mnt = nd.mnt;
+		root = nd.dentry;
+
+		nd.mnt = NULL;
+		nd.dentry = NULL;
+		path_release(&nd);
+	}
+
+	/* check parameters */
+	ret = -EOPNOTSUPP;
+	if (!root->d_inode ||
+	    !root->d_inode->i_op ||
+	    !root->d_inode->i_op->lookup ||
+	    !root->d_inode->i_op->mkdir ||
+	    !root->d_inode->i_op->setxattr ||
+	    !root->d_inode->i_op->getxattr ||
+	    !root->d_sb ||
+	    !root->d_sb->s_op ||
+	    !root->d_sb->s_op->statfs ||
+	    !root->d_sb->s_op->sync_fs)
+		goto error_unsupported;
+
+	ret = -EROFS;
+	if (root->d_sb->s_flags & MS_RDONLY)
+		goto error_unsupported;
+
+	/* get the cache size and blocksize */
+	ret = root->d_sb->s_op->statfs(cache->mnt, &stats);
+	if (ret < 0)
+		goto error_unsupported;
+
+	ret = -ERANGE;
+	if (stats.f_bsize <= 0)
+		goto error_unsupported;
+
+	ret = -EOPNOTSUPP;
+	if (stats.f_bsize > PAGE_SIZE)
+		goto error_unsupported;
+
+	cache->bsize = stats.f_bsize;
+	cache->bshift = 0;
+	if (stats.f_bsize < PAGE_SIZE)
+		cache->bshift = PAGE_SHIFT - long_log2(stats.f_bsize);
+
+	_debug("blksize %u (shift %u)",
+	       cache->bsize, cache->bshift);
+
+	_debug("size %llu, avail %llu", stats.f_blocks, stats.f_bavail);
+
+	/* set up caching limits */
+	stats.f_blocks >>= cache->bshift;
+	do_div(stats.f_blocks, 100);
+	cache->bstop = stats.f_blocks * cache->bstop_percent;
+	cache->bcull = stats.f_blocks * cache->bcull_percent;
+	cache->brun  = stats.f_blocks * cache->brun_percent;
+
+	_debug("limits {%llu,%llu,%llu}",
+	       cache->brun,
+	       cache->bcull,
+	       cache->bstop);
+
+	/* get the cache directory and check its type */
+	cachedir = cachefiles_get_directory(cache, root, "cache");
+	if (IS_ERR(cachedir)) {
+		ret = PTR_ERR(cachedir);
+		goto error_unsupported;
+	}
+
+	fsdef->dentry = cachedir;
+
+	ret = cachefiles_check_object_type(fsdef);
+	if (ret < 0)
+		goto error_unsupported;
+
+	/* get the graveyard directory */
+	graveyard = cachefiles_get_directory(cache, root, "graveyard");
+	if (IS_ERR(graveyard)) {
+		ret = PTR_ERR(graveyard);
+		goto error_unsupported;
+	}
+
+	cache->graveyard = graveyard;
+
+	/* publish the cache */
+	fscache_init_cache(&cache->cache,
+			   &cachefiles_cache_ops,
+			   "%02x:%02x",
+			   MAJOR(fsdef->dentry->d_sb->s_dev),
+			   MINOR(fsdef->dentry->d_sb->s_dev)
+			   );
+
+	ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag);
+	if (ret < 0)
+		goto error_add_cache;
+
+	/* done */
+	set_bit(CACHEFILES_READY, &cache->flags);
+	dput(root);
+
+	printk(KERN_INFO "CacheFiles:"
+	       " File cache on %s registered\n",
+	       cache->cache.identifier);
+
+	/* check how much space the cache has */
+	cachefiles_has_space(cache, 0);
+
+	return 0;
+
+error_add_cache:
+	dput(cache->graveyard);
+	cache->graveyard = NULL;
+error_unsupported:
+	mntput(cache->mnt);
+	cache->mnt = NULL;
+	dput(fsdef->dentry);
+	fsdef->dentry = NULL;
+	dput(root);
+error_open_root:
+	kmem_cache_free(cachefiles_object_jar, fsdef);
+error_root_object:
+	kerror("Failed to register: %d", ret);
+	return ret;
+
+} /* end cachefiles_proc_add_cache() */
+
+/*****************************************************************************/
+/*
+ * unbind a cache on fd release
+ */
+void cachefiles_proc_unbind(struct cachefiles_cache *cache)
+{
+	_enter("");
+
+	if (test_bit(CACHEFILES_READY, &cache->flags)) {
+		printk(KERN_INFO "CacheFiles:"
+		       " File cache on %s unregistering\n",
+		       cache->cache.identifier);
+
+		fscache_withdraw_cache(&cache->cache);
+	}
+
+	if (cache->cache.fsdef)
+		cache->cache.ops->put_object(cache->cache.fsdef);
+
+	dput(cache->graveyard);
+	mntput(cache->mnt);
+
+	kfree(cache->rootdirname);
+	kfree(cache->tag);
+
+	_leave("");
+
+} /* end cachefiles_proc_unbind() */
diff --git a/fs/cachefiles/cf-interface.c b/fs/cachefiles/cf-interface.c
new file mode 100644
index 0000000..5076e87
--- /dev/null
+++ b/fs/cachefiles/cf-interface.c
@@ -0,0 +1,1303 @@
+/* cf-interface.c: CacheFiles to FS-Cache interface
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/statfs.h>
+#include <linux/buffer_head.h>
+#include "internal.h"
+
+#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
+#define log2(n) ffz(~(n))
+
+/*****************************************************************************/
+/*
+ * look up the nominated node in this cache, creating it if necessary
+ */
+static struct fscache_object *cachefiles_lookup_object(
+	struct fscache_cache *_cache,
+	struct fscache_object *_parent,
+	struct fscache_cookie *cookie)
+{
+	struct cachefiles_object *parent, *object;
+	struct cachefiles_cache *cache;
+	struct cachefiles_xattr *auxdata;
+	unsigned keylen, auxlen;
+	void *buffer;
+	char *key;
+	int ret;
+
+	ASSERT(_parent);
+
+	cache = container_of(_cache, struct cachefiles_cache, cache);
+	parent = container_of(_parent, struct cachefiles_object, fscache);
+
+	//printk("\n");
+	_enter("{%s},%p,%p", cache->cache.identifier, parent, cookie);
+
+	/* create a new object record and a temporary leaf image */
+	object = kmem_cache_alloc(cachefiles_object_jar, SLAB_KERNEL);
+	if (!object)
+		goto nomem_object;
+
+	atomic_set(&object->usage, 1);
+	atomic_set(&object->fscache_usage, 1);
+
+	fscache_object_init(&object->fscache);
+	object->fscache.cookie = cookie;
+	object->fscache.cache = parent->fscache.cache;
+
+	object->type = cookie->def->type;
+
+	/* get hold of the raw key
+	 * - stick the length on the front and leave space on the back for the
+	 *   encoder
+	 */
+	buffer = kmalloc((2 + 512) + 3, GFP_KERNEL);
+	if (!buffer)
+		goto nomem_buffer;
+
+	keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512);
+	ASSERTCMP(keylen, <, 512);
+
+	*(uint16_t *)buffer = keylen;
+	((char *)buffer)[keylen + 2] = 0;
+	((char *)buffer)[keylen + 3] = 0;
+	((char *)buffer)[keylen + 4] = 0;
+
+	/* turn the raw key into something that can work with as a filename */
+	key = cachefiles_cook_key(buffer, keylen + 2, object->type);
+	if (!key)
+		goto nomem_key;
+
+	/* get hold of the auxiliary data and prepend the object type */
+	auxdata = buffer;
+	auxlen = 0;
+	if (cookie->def->get_aux) {
+		auxlen = cookie->def->get_aux(cookie->netfs_data,
+					      auxdata->data, 511);
+		ASSERTCMP(auxlen, <, 511);
+	}
+
+	auxdata->len = auxlen + 1;
+	auxdata->type = cookie->def->type;
+
+	/* look up the key, creating any missing bits */
+	ret = cachefiles_walk_to_object(parent, object, key, auxdata);
+	if (ret < 0)
+		goto lookup_failed;
+
+	kfree(buffer);
+	kfree(key);
+	_leave(" = %p", &object->fscache);
+	return &object->fscache;
+
+lookup_failed:
+	kmem_cache_free(cachefiles_object_jar, object);
+	kfree(buffer);
+	kfree(key);
+	kleave(" = %d", ret);
+	return ERR_PTR(ret);
+
+nomem_key:
+	kfree(buffer);
+nomem_buffer:
+	kmem_cache_free(cachefiles_object_jar, object);
+nomem_object:
+	kleave(" = -ENOMEM");
+	return ERR_PTR(-ENOMEM);
+
+} /* end cachefiles_lookup_object() */
+
+/*****************************************************************************/
+/*
+ * increment the usage count on an inode object (may fail if unmounting)
+ */
+static struct fscache_object *cachefiles_grab_object(struct fscache_object *_object)
+{
+	struct cachefiles_object *object;
+
+	_enter("%p", _object);
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+
+#ifdef CACHEFILES_DEBUG_SLAB
+	ASSERT((atomic_read(&object->fscache_usage) & 0xffff0000) != 0x6b6b0000);
+#endif
+
+	atomic_inc(&object->fscache_usage);
+	return &object->fscache;
+
+} /* end cachefiles_grab_object() */
+
+/*****************************************************************************/
+/*
+ * lock the semaphore on an object object
+ */
+static void cachefiles_lock_object(struct fscache_object *_object)
+{
+	struct cachefiles_object *object;
+
+	_enter("%p", _object);
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+
+#ifdef CACHEFILES_DEBUG_SLAB
+	ASSERT((atomic_read(&object->fscache_usage) & 0xffff0000) != 0x6b6b0000);
+#endif
+
+	down_write(&object->sem);
+
+} /* end cachefiles_lock_object() */
+
+/*****************************************************************************/
+/*
+ * unlock the semaphore on an object object
+ */
+static void cachefiles_unlock_object(struct fscache_object *_object)
+{
+	struct cachefiles_object *object;
+
+	_enter("%p", _object);
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	up_write(&object->sem);
+
+} /* end cachefiles_unlock_object() */
+
+/*****************************************************************************/
+/*
+ * update the auxilliary data for an object object on disk
+ */
+static void cachefiles_update_object(struct fscache_object *_object)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+
+	kenter("%p", _object);
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	cache = container_of(object->fscache.cache, struct cachefiles_cache, cache);
+
+	//cachefiles_tree_update_object(super, object);
+
+} /* end cachefiles_update_object() */
+
+/*****************************************************************************/
+/*
+ * dispose of a reference to an object object
+ */
+static void cachefiles_put_object(struct fscache_object *_object)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+	int ret;
+
+	ASSERT(_object);
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	_enter("%p{%d}", object, atomic_read(&object->usage));
+
+	ASSERT(object);
+
+	cache = container_of(object->fscache.cache,
+			     struct cachefiles_cache, cache);
+
+#ifdef CACHEFILES_DEBUG_SLAB
+	ASSERT((atomic_read(&object->fscache_usage) & 0xffff0000) != 0x6b6b0000);
+#endif
+
+	if (!atomic_dec_and_test(&object->fscache_usage))
+		return;
+
+	_debug("- kill object %p", object);
+
+	/* delete retired objects */
+	if (test_bit(FSCACHE_OBJECT_RECYCLING, &object->fscache.flags) &&
+	    _object != cache->cache.fsdef
+	    ) {
+		_debug("- retire object %p", object);
+		cachefiles_delete_object(cache, object);
+	}
+
+	/* close the filesystem stuff attached to the object */
+	if (object->backer) {
+		if (object->backer->f_op &&
+		    object->backer->f_op->flush
+		    ) {
+			ret = object->backer->f_op->flush(object->backer);
+			if (ret < 0)
+				kerror("Backing file flush returned error %d",
+				       ret);
+		}
+		fput(object->backer);
+		object->backer = NULL;
+	}
+
+	/* note that an object is now inactive */
+	write_lock(&cache->active_lock);
+	rb_erase(&object->active_node, &cache->active_nodes);
+	write_unlock(&cache->active_lock);
+
+	dput(object->dentry);
+	object->dentry = NULL;
+
+	/* then dispose of the object */
+	kmem_cache_free(cachefiles_object_jar, object);
+
+	_leave("");
+
+} /* end cachefiles_put_object() */
+
+/*****************************************************************************/
+/*
+ * sync a cache
+ */
+static void cachefiles_sync_cache(struct fscache_cache *_cache)
+{
+	struct cachefiles_cache *cache;
+	int ret;
+
+	_enter("%p", _cache);
+
+	cache = container_of(_cache, struct cachefiles_cache, cache);
+
+	/* make sure all pages pinned by operations on behalf of the netfs are
+	 * written to disc */
+	ret = fsync_super(cache->mnt->mnt_sb);
+	if (ret == -EIO)
+		cachefiles_io_error(cache,
+				    "Attempt to sync backing fs superblock"
+				    " returned error %d",
+				    ret);
+
+} /* end cachefiles_sync_cache() */
+
+/*****************************************************************************/
+/*
+ * set the data size on an object
+ */
+static int cachefiles_set_i_size(struct fscache_object *_object, loff_t i_size)
+{
+	struct cachefiles_object *object;
+	struct iattr newattrs;
+	int ret;
+
+	_enter("%p,%llu", _object, i_size);
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+
+	if (i_size == object->i_size)
+		return 0;
+
+	if (!object->backer)
+		return -ENOBUFS;
+
+	ASSERT(S_ISREG(object->backer->f_dentry->d_inode->i_mode));
+
+	newattrs.ia_size = i_size;
+	newattrs.ia_file = object->backer;
+	newattrs.ia_valid = ATTR_SIZE | ATTR_FILE;
+
+	mutex_lock(&object->backer->f_dentry->d_inode->i_mutex);
+	ret = notify_change(object->backer->f_dentry, &newattrs);
+	mutex_unlock(&object->backer->f_dentry->d_inode->i_mutex);
+
+	if (ret == -EIO) {
+		cachefiles_io_error_obj(object, "Size set failed");
+		ret = -ENOBUFS;
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_set_i_size() */
+
+/*****************************************************************************/
+/*
+ * see if we have space for a number of pages in the cache
+ */
+int cachefiles_has_space(struct cachefiles_cache *cache, unsigned nr)
+{
+	struct kstatfs stats;
+	int ret;
+
+	_enter("{%llu,%llu,%llu},%d",
+	       cache->brun, cache->bcull, cache->bstop,  nr);
+
+	/* find out how many pages of blockdev are available */
+	memset(&stats, 0, sizeof(stats));
+
+	ret = cache->mnt->mnt_sb->s_op->statfs(cache->mnt, &stats);
+	if (ret < 0) {
+		if (ret == -EIO)
+			cachefiles_io_error(cache, "statfs failed");
+		return ret;
+	}
+
+	stats.f_bavail >>= cache->bshift;
+
+	_debug("avail %llu", stats.f_bavail);
+
+	/* see if there is sufficient space */
+	stats.f_bavail -= nr;
+
+	ret = -ENOBUFS;
+	if (stats.f_bavail < cache->bstop)
+		goto begin_cull;
+
+	ret = 0;
+	if (stats.f_bavail < cache->bcull)
+		goto begin_cull;
+
+	if (test_bit(CACHEFILES_CULLING, &cache->flags) &&
+	    stats.f_bavail >= cache->brun
+	    ) {
+		if (test_and_clear_bit(CACHEFILES_CULLING, &cache->flags)) {
+			kdebug("cease culling");
+			send_sigurg(&cache->cachefilesd->f_owner);
+		}
+	}
+
+	_leave(" = 0");
+	return 0;
+
+begin_cull:
+	if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) {
+		kdebug("### CULL CACHE ###");
+		send_sigurg(&cache->cachefilesd->f_owner);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_has_space() */
+
+/*****************************************************************************/
+/*
+ * waiting reading backing files
+ */
+static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
+				  int sync, void *_key)
+{
+	struct cachefiles_one_read *monitor =
+		container_of(wait, struct cachefiles_one_read, monitor);
+	struct wait_bit_key *key = _key;
+	struct page *page = wait->private;
+
+	ASSERT(key);
+
+	_enter("{%lu},%u,%d,{%p,%u}",
+	       monitor->netfs_page->index, mode, sync,
+	       key->flags, key->bit_nr);
+
+	if (key->flags != &page->flags ||
+	    key->bit_nr != PG_locked)
+		return 0;
+
+	_debug("--- monitor %p %lx ---", page, page->flags);
+
+	if (!PageUptodate(page) && !PageError(page))
+		dump_stack();
+
+	/* remove from the waitqueue */
+	list_del(&wait->task_list);
+
+	/* move onto the action list and queue for keventd */
+	ASSERT(monitor->object);
+
+	spin_lock(&monitor->object->work_lock);
+	list_move(&monitor->obj_link, &monitor->object->read_list);
+	spin_unlock(&monitor->object->work_lock);
+
+	schedule_work(&monitor->object->read_work);
+
+	return 0;
+
+} /* end cachefiles_read_waiter() */
+
+/*****************************************************************************/
+/*
+ * let keventd drive the copying of pages
+ */
+void cachefiles_read_copier_work(void *_object)
+{
+	struct cachefiles_one_read *monitor;
+	struct cachefiles_object *object = _object;
+	struct fscache_cookie *cookie = object->fscache.cookie;
+	struct pagevec pagevec;
+	int error, max;
+
+	_enter("{ino=%lu}", object->backer->f_dentry->d_inode->i_ino);
+
+	pagevec_init(&pagevec, 0);
+
+	max = 8;
+	spin_lock_irq(&object->work_lock);
+
+	while (!list_empty(&object->read_list)) {
+		monitor = list_entry(object->read_list.next,
+				     struct cachefiles_one_read, obj_link);
+		list_del(&monitor->obj_link);
+
+		spin_unlock_irq(&object->work_lock);
+
+		_debug("- copy {%lu}", monitor->back_page->index);
+
+		error = -EIO;
+		if (PageUptodate(monitor->back_page)) {
+			copy_highpage(monitor->netfs_page, monitor->back_page);
+
+			pagevec_add(&pagevec, monitor->netfs_page);
+			cookie->def->mark_pages_cached(
+				cookie->netfs_data,
+				monitor->netfs_page->mapping,
+				&pagevec);
+			pagevec_reinit(&pagevec);
+
+			error = 0;
+		}
+
+		if (error)
+			cachefiles_io_error_obj(
+				object,
+				"readpage failed on backing file %lx",
+				(unsigned long) monitor->back_page->flags);
+
+		page_cache_release(monitor->back_page);
+
+		monitor->callback_func(monitor->netfs_page,
+				       monitor->callback_data, error);
+
+		page_cache_release(monitor->netfs_page);
+		kfree(monitor);
+
+		/* let keventd have some air occasionally */
+		max--;
+		if (max < 0 || need_resched()) {
+			if (!list_empty(&object->read_list))
+				schedule_work(&object->read_work);
+			_leave(" [maxed out]");
+			return;
+		}
+
+		spin_lock_irq(&object->work_lock);
+	}
+
+	spin_unlock_irq(&object->work_lock);
+
+	_leave("");
+
+} /* end cachefiles_read_copier_work() */
+
+/*****************************************************************************/
+/*
+ * read the corresponding page to the given set from the backing file
+ * - an uncertain page is simply discarded, to be tried again another time
+ */
+static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
+					    fscache_rw_complete_t callback_func,
+					    void *callback_data,
+					    struct page *netpage,
+					    struct pagevec *lru_pvec)
+{
+	struct cachefiles_one_read *monitor;
+	struct address_space *bmapping;
+	struct page *newpage, *backpage;
+	int ret;
+
+	_enter("");
+
+	ASSERTCMP(pagevec_count(lru_pvec), ==, 0);
+	pagevec_reinit(lru_pvec);
+
+	_debug("read back %p{%lu,%d}",
+	       netpage, netpage->index, page_count(netpage));
+
+	monitor = kzalloc(sizeof(*monitor), GFP_KERNEL);
+	if (!monitor)
+		goto nomem;
+
+	init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter);
+	monitor->object = object;
+	monitor->callback_func = callback_func;
+	monitor->callback_data = callback_data;
+	monitor->netfs_page = netpage;
+
+	/* attempt to get hold of the backing page */
+	bmapping = object->backer->f_mapping;
+	newpage = NULL;
+
+	for (;;) {
+		backpage = find_get_page(bmapping, netpage->index);
+		if (backpage)
+			goto backing_page_already_present;
+
+		if (!newpage) {
+			newpage = page_cache_alloc_cold(bmapping);
+			if (!newpage)
+				goto nomem_monitor;
+		}
+
+		ret = add_to_page_cache(newpage, bmapping,
+					netpage->index, GFP_KERNEL);
+		if (ret == 0)
+			goto installed_new_backing_page;
+		if (ret != -EEXIST)
+			goto nomem_page;
+	}
+
+	/* we've installed a new backing page, so now we need to add it
+	 * to the LRU list and start it reading */
+installed_new_backing_page:
+	_debug("- new %p", newpage);
+
+	backpage = newpage;
+	newpage = NULL;
+
+	page_cache_get(backpage);
+	pagevec_add(lru_pvec, backpage);
+	__pagevec_lru_add(lru_pvec);
+
+	ret = bmapping->a_ops->readpage(object->backer, backpage);
+	if (ret < 0)
+		goto read_error;
+
+	/* set the monitor to transfer the data across */
+monitor_backing_page:
+	_debug("- monitor add");
+
+	/* install the monitor */
+	page_cache_get(monitor->netfs_page);
+	page_cache_get(backpage);
+	monitor->back_page = backpage;
+
+	spin_lock_irq(&object->work_lock);
+	list_add_tail(&monitor->obj_link, &object->read_pend_list);
+	spin_unlock_irq(&object->work_lock);
+
+	monitor->monitor.private = backpage;
+	install_page_waitqueue_monitor(backpage, &monitor->monitor);
+	monitor = NULL;
+
+	/* but the page may have been read before the monitor was
+	 * installed, so the monitor may miss the event - so we have to
+	 * ensure that we do get one in such a case */
+	if (!TestSetPageLocked(backpage))
+		unlock_page(backpage);
+	goto success;
+
+	/* if the backing page is already present, it can be in one of
+	 * three states: read in progress, read failed or read okay */
+backing_page_already_present:
+	_debug("- present");
+
+	if (newpage) {
+		page_cache_release(newpage);
+		newpage = NULL;
+	}
+
+	if (PageError(backpage))
+		goto io_error;
+
+	if (PageUptodate(backpage))
+		goto backing_page_already_uptodate;
+
+	goto monitor_backing_page;
+
+	/* the backing page is already up to date, attach the netfs
+	 * page to the pagecache and LRU and copy the data across */
+backing_page_already_uptodate:
+	_debug("- uptodate");
+
+	copy_highpage(netpage, backpage);
+	callback_func(netpage, callback_data, 0);
+
+success:
+	_debug("success");
+	ret = 0;
+
+out:
+	if (backpage)
+		page_cache_release(backpage);
+	kfree(monitor);
+
+	_leave(" = %d", ret);
+	return ret;
+
+read_error:
+	_debug("read error %d", ret);
+	if (ret == -ENOMEM)
+		goto out;
+io_error:
+	cachefiles_io_error_obj(object, "page read error on backing file");
+	ret = -EIO;
+	goto out;
+
+nomem_page:
+	page_cache_release(newpage);
+nomem_monitor:
+	kfree(monitor);
+nomem:
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
+
+} /* end cachefiles_read_backing_file_one() */
+
+/*****************************************************************************/
+/*
+ * read a page from the cache or allocate a block in which to store it
+ * - cache withdrawal is prevented by the caller
+ * - returns -EINTR if interrupted
+ * - returns -ENOMEM if ran out of memory
+ * - returns -ENOBUFS if no buffers can be made available
+ * - returns -ENOBUFS if page is beyond EOF
+ * - if the page is backed by a block in the cache:
+ *   - a read will be started which will call the callback on completion
+ *   - 0 will be returned
+ * - else if the page is unbacked:
+ *   - the metadata will be retained
+ *   - -ENODATA will be returned
+ */
+static int cachefiles_read_or_alloc_page(struct fscache_object *_object,
+					 struct page *page,
+					 fscache_rw_complete_t callback_func,
+					 void *callback_data,
+					 unsigned long gfp)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+	struct fscache_cookie *cookie;
+	struct pagevec pagevec;
+	struct inode *inode;
+	sector_t block0, e3block;
+	unsigned shift;
+	int ret;
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	cache = container_of(object->fscache.cache, struct cachefiles_cache, cache);
+
+	_enter("{%p},{%lx},,,", object, page->index);
+
+	if (!object->backer)
+		return -ENOBUFS;
+
+	inode = object->backer->f_dentry->d_inode;
+	ASSERT(S_ISREG(inode->i_mode));
+	ASSERT(inode->i_mapping->a_ops->bmap);
+	ASSERT(inode->i_mapping->a_ops->readpages);
+
+	/* calculate the shift required to use bmap */
+	if (inode->i_sb->s_blocksize > PAGE_SIZE)
+		return -ENOBUFS;
+
+	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
+
+	cookie = object->fscache.cookie;
+
+	pagevec_init(&pagevec, 0);
+
+	/* we assume the absence or presence of the first block is a good
+	 * enough indication for the page as a whole
+	 * - TODO: don't use bmap() for this as it is _not_ actually good
+	 *   enough for this as it doesn't indicate errors, but it's all we've
+	 *   got for the moment
+	 */
+	block0 = page->index;
+	block0 <<= shift;
+
+	e3block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
+	_debug("%llx -> %llx", block0, e3block);
+
+	if (e3block) {
+		/* submit the apparently valid page to the backing fs to be
+		 * read from disk */
+		ret = cachefiles_read_backing_file_one(object,
+						       callback_func,
+						       callback_data,
+						       page,
+						       &pagevec);
+		ret = 0;
+	}
+	else if (cachefiles_has_space(cache, 1) == 0) {
+		/* there's space in the cache we can use */
+		pagevec_add(&pagevec, page);
+		cookie->def->mark_pages_cached(cookie->netfs_data,
+					       page->mapping, &pagevec);
+		ret = -ENODATA;
+	}
+	else {
+		ret = -ENOBUFS;
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_read_or_alloc_page() */
+
+/*****************************************************************************/
+/*
+ * read the corresponding pages to the given set from the backing file
+ * - any uncertain pages are simply discarded, to be tried again another time
+ */
+static int cachefiles_read_backing_file(struct cachefiles_object *object,
+					fscache_rw_complete_t callback_func,
+					void *callback_data,
+					struct address_space *mapping,
+					struct list_head *list,
+					struct pagevec *lru_pvec)
+{
+	struct cachefiles_one_read *monitor = NULL;
+	struct address_space *bmapping = object->backer->f_mapping;
+	struct page *newpage = NULL, *netpage, *_n, *backpage = NULL;
+	int ret = 0;
+
+	_enter("");
+
+	ASSERTCMP(pagevec_count(lru_pvec), ==, 0);
+	pagevec_reinit(lru_pvec);
+
+	list_for_each_entry_safe(netpage, _n, list, lru) {
+		list_del(&netpage->lru);
+
+		_debug("read back %p{%lu,%d}",
+		       netpage, netpage->index, page_count(netpage));
+
+		if (!monitor) {
+			monitor = kzalloc(sizeof(*monitor), GFP_KERNEL);
+			if (!monitor)
+				goto nomem;
+
+			init_waitqueue_func_entry(&monitor->monitor,
+						  cachefiles_read_waiter);
+			monitor->object = object;
+			monitor->callback_func = callback_func;
+			monitor->callback_data = callback_data;
+		}
+
+		for (;;) {
+			backpage = find_get_page(bmapping, netpage->index);
+			if (backpage)
+				goto backing_page_already_present;
+
+			if (!newpage) {
+				newpage = page_cache_alloc_cold(bmapping);
+				if (!newpage)
+					goto nomem;
+			}
+
+			ret = add_to_page_cache(newpage, bmapping,
+						netpage->index, GFP_KERNEL);
+			if (ret == 0)
+				goto installed_new_backing_page;
+			if (ret != -EEXIST)
+				goto nomem;
+		}
+
+		/* we've installed a new backing page, so now we need to add it
+		 * to the LRU list and start it reading */
+	installed_new_backing_page:
+		_debug("- new %p", newpage);
+
+		backpage = newpage;
+		newpage = NULL;
+
+		page_cache_get(backpage);
+		if (!pagevec_add(lru_pvec, backpage))
+			__pagevec_lru_add(lru_pvec);
+
+	reread_backing_page:
+		ret = bmapping->a_ops->readpage(object->backer, backpage);
+		if (ret < 0)
+			goto read_error;
+
+		/* add the netfs page to the pagecache and LRU, and set the
+		 * monitor to transfer the data across */
+	monitor_backing_page:
+		_debug("- monitor add");
+
+		ret = add_to_page_cache(netpage, mapping, netpage->index,
+					GFP_KERNEL);
+		if (ret < 0) {
+			if (ret == -EEXIST) {
+				page_cache_release(netpage);
+				continue;
+			}
+			goto nomem;
+		}
+
+		page_cache_get(netpage);
+		if (!pagevec_add(lru_pvec, netpage))
+			__pagevec_lru_add(lru_pvec);
+
+		/* install a monitor */
+		page_cache_get(netpage);
+		monitor->netfs_page = netpage;
+
+		page_cache_get(backpage);
+		monitor->back_page = backpage;
+
+		spin_lock_irq(&object->work_lock);
+		list_add_tail(&monitor->obj_link, &object->read_pend_list);
+		spin_unlock_irq(&object->work_lock);
+
+		monitor->monitor.private = backpage;
+		install_page_waitqueue_monitor(backpage, &monitor->monitor);
+		monitor = NULL;
+
+		/* but the page may have been read before the monitor was
+		 * installed, so the monitor may miss the event - so we have to
+		 * ensure that we do get one in such a case */
+		if (!TestSetPageLocked(backpage)) {
+			_debug("2unlock %p", backpage);
+			unlock_page(backpage);
+		}
+
+		page_cache_release(backpage);
+		backpage = NULL;
+
+		page_cache_release(netpage);
+		netpage = NULL;
+		continue;
+
+		/* if the backing page is already present, it can be in one of
+		 * three states: read in progress, read failed or read okay */
+	backing_page_already_present:
+		_debug("- present %p", backpage);
+
+		if (PageError(backpage))
+			goto io_error;
+
+		if (PageUptodate(backpage))
+			goto backing_page_already_uptodate;
+
+		_debug("- not ready %p{%lx}", backpage, backpage->flags);
+
+		if (TestSetPageLocked(backpage))
+			goto monitor_backing_page;
+
+		if (PageError(backpage)) {
+			unlock_page(backpage);
+			goto io_error;
+		}
+
+		if (PageUptodate(backpage))
+			goto backing_page_already_uptodate_unlock;
+
+		/* we've locked a page that's neither up to date nor erroneous,
+		 * so we need to attempt to read it again */
+		//if (!PageLRU(backpage))
+		//	goto add_to_LRU_and_reread_backing_page;
+
+		goto reread_backing_page;
+
+		/* the backing page is already up to date, attach the netfs
+		 * page to the pagecache and LRU and copy the data across */
+	backing_page_already_uptodate_unlock:
+		unlock_page(backpage);
+	backing_page_already_uptodate:
+		_debug("- uptodate");
+
+		ret = add_to_page_cache(netpage, mapping, netpage->index,
+					GFP_KERNEL);
+		if (ret < 0) {
+			if (ret == -EEXIST) {
+				page_cache_release(netpage);
+				continue;
+			}
+			goto nomem;
+		}
+
+		copy_highpage(netpage, backpage);
+
+		page_cache_release(backpage);
+		backpage = NULL;
+
+		page_cache_get(netpage);
+		if (!pagevec_add(lru_pvec, netpage))
+			__pagevec_lru_add(lru_pvec);
+
+		callback_func(netpage, callback_data, 0);
+
+		page_cache_release(netpage);
+		netpage = NULL;
+		continue;
+	}
+
+	netpage = NULL;
+
+	_debug("out");
+
+out:
+	/* tidy up */
+	pagevec_lru_add(lru_pvec);
+
+	if (newpage)
+		page_cache_release(newpage);
+	if (netpage)
+		page_cache_release(netpage);
+	if (backpage)
+		page_cache_release(backpage);
+	kfree(monitor);
+
+	list_for_each_entry_safe(netpage, _n, list, lru) {
+		list_del(&netpage->lru);
+		page_cache_release(netpage);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+nomem:
+	_debug("nomem");
+	ret = -ENOMEM;
+	goto out;
+
+read_error:
+	_debug("read error %d", ret);
+	if (ret == -ENOMEM)
+		goto out;
+io_error:
+	cachefiles_io_error_obj(object, "page read error on backing file");
+	ret = -EIO;
+	goto out;
+
+} /* end cachefiles_read_backing_file() */
+
+/*****************************************************************************/
+/*
+ * read a list of pages from the cache or allocate blocks in which to store
+ * them
+ */
+static int cachefiles_read_or_alloc_pages(struct fscache_object *_object,
+					  struct address_space *mapping,
+					  struct list_head *pages,
+					  unsigned *nr_pages,
+					  fscache_rw_complete_t callback_func,
+					  void *callback_data,
+					  unsigned long gfp)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+	struct fscache_cookie *cookie;
+	struct list_head e3pages;
+	struct pagevec pagevec;
+	struct inode *inode;
+	struct page *page, *_n;
+	unsigned shift, e3nrpages;
+	int ret, ret2, space;
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	cache = container_of(object->fscache.cache, struct cachefiles_cache, cache);
+
+	_enter("{%p},,%d,,", object, *nr_pages);
+
+	if (!object->backer)
+		return -ENOBUFS;
+
+	space = 1;
+	if (cachefiles_has_space(cache, *nr_pages) < 0)
+		space = 0;
+
+	inode = object->backer->f_dentry->d_inode;
+	ASSERT(S_ISREG(inode->i_mode));
+	ASSERT(inode->i_mapping->a_ops->bmap);
+	ASSERT(inode->i_mapping->a_ops->readpages);
+
+	/* calculate the shift required to use bmap */
+	if (inode->i_sb->s_blocksize > PAGE_SIZE)
+		return -ENOBUFS;
+
+	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
+
+	pagevec_init(&pagevec, 0);
+
+	cookie = object->fscache.cookie;
+
+	INIT_LIST_HEAD(&e3pages);
+	e3nrpages = 0;
+
+	ret = space ? -ENODATA : -ENOBUFS;
+	list_for_each_entry_safe(page, _n, pages, lru) {
+		sector_t block0, e3block;
+
+		/* we assume the absence or presence of the first block is a
+		 * good enough indication for the page as a whole
+		 * - TODO: don't use bmap() for this as it is _not_ actually
+		 *   good enough for this as it doesn't indicate errors, but
+		 *   it's all we've got for the moment
+		 */
+		block0 = page->index;
+		block0 <<= shift;
+
+		e3block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
+							block0);
+		_debug("%llx -> %llx", block0, e3block);
+
+		if (e3block) {
+			/* we have data - add it to the list to give to the
+			 * backing fs */
+			list_move(&page->lru, &e3pages);
+			(*nr_pages)--;
+			e3nrpages++;
+		}
+		else if (space && pagevec_add(&pagevec, page) == 0) {
+			cookie->def->mark_pages_cached(cookie->netfs_data,
+						       mapping, &pagevec);
+			pagevec_reinit(&pagevec);
+			ret = -ENODATA;
+		}
+	}
+
+	if (pagevec_count(&pagevec) > 0) {
+		cookie->def->mark_pages_cached(cookie->netfs_data,
+					       mapping, &pagevec);
+		pagevec_reinit(&pagevec);
+	}
+
+	if (list_empty(pages))
+		ret = 0;
+
+	/* submit the apparently valid pages to the backing fs to be read from disk */
+	if (e3nrpages > 0) {
+		ret2 = cachefiles_read_backing_file(object,
+						    callback_func,
+						    callback_data,
+						    mapping,
+						    &e3pages,
+						    &pagevec);
+
+		ASSERTCMP(pagevec_count(&pagevec), ==, 0);
+
+		if (ret2 == -ENOMEM || ret2 == -EINTR)
+			ret = ret2;
+	}
+
+	_leave(" = %d [nr=%u%s]",
+	       ret, *nr_pages, list_empty(pages) ? " empty" : "");
+	return ret;
+
+} /* end cachefiles_read_or_alloc_pages() */
+
+/*****************************************************************************/
+/*
+ * read a page from the cache or allocate a block in which to store it
+ * - cache withdrawal is prevented by the caller
+ * - returns -EINTR if interrupted
+ * - returns -ENOMEM if ran out of memory
+ * - returns -ENOBUFS if no buffers can be made available
+ * - returns -ENOBUFS if page is beyond EOF
+ * - otherwise:
+ *   - the metadata will be retained
+ *   - 0 will be returned
+ */
+static int cachefiles_allocate_page(struct fscache_object *_object,
+				    struct page *page,
+				    unsigned long gfp)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	cache = container_of(object->fscache.cache,
+			     struct cachefiles_cache, cache);
+
+	_enter("%p,{%lx},,,", object, page->index);
+
+	return cachefiles_has_space(cache, 1);
+
+} /* end cachefiles_allocate_page() */
+
+/*****************************************************************************/
+/*
+ * page storer
+ */
+void cachefiles_write_work(void *_object)
+{
+	struct cachefiles_one_write *writer;
+	struct cachefiles_object *object = _object;
+	int ret, max;
+
+	_enter("%p", object);
+
+	ASSERT(!irqs_disabled());
+
+	spin_lock_irq(&object->work_lock);
+	max = 8;
+
+	while (!list_empty(&object->write_list)) {
+		writer = list_entry(object->write_list.next,
+				    struct cachefiles_one_write, obj_link);
+		list_del(&writer->obj_link);
+
+		spin_unlock_irq(&object->work_lock);
+
+		_debug("- store {%lu}", writer->netfs_page->index);
+
+		ret = generic_file_buffered_write_one_kernel_page(
+			object->backer,
+			writer->netfs_page->index,
+			writer->netfs_page);
+
+		if (ret == -ENOSPC) {
+			ret = -ENOBUFS;
+		}
+		else if (ret == -EIO) {
+			cachefiles_io_error_obj(object,
+						"write page to backing file"
+						" failed");
+			ret = -ENOBUFS;
+		}
+
+		_debug("- callback");
+		writer->callback_func(writer->netfs_page,
+				      writer->callback_data, ret);
+
+		_debug("- put net");
+		page_cache_release(writer->netfs_page);
+		kfree(writer);
+
+		/* let keventd have some air occasionally */
+		max--;
+		if (max < 0 || need_resched()) {
+			if (!list_empty(&object->write_list))
+				schedule_work(&object->write_work);
+			_leave(" [maxed out]");
+			return;
+		}
+
+		_debug("- next");
+		spin_lock_irq(&object->work_lock);
+	}
+
+	spin_unlock_irq(&object->work_lock);
+	_leave("");
+
+} /* end cachefiles_write_work() */
+
+/*****************************************************************************/
+/*
+ * request a page be stored in the cache
+ * - cache withdrawal is prevented by the caller
+ * - this request may be ignored if there's no cache block available, in which
+ *   case -ENOBUFS will be returned
+ * - if the op is in progress, 0 will be returned
+ */
+static int cachefiles_write_page(struct fscache_object *_object,
+				 struct page *page,
+				 fscache_rw_complete_t callback_func,
+				 void *callback_data,
+				 unsigned long gfp)
+{
+//	struct cachefiles_one_write *writer;
+	struct cachefiles_object *object;
+	int ret;
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+
+	_enter("%p,%p{%lx},,,", object, page, page->index);
+
+	if (!object->backer)
+		return -ENOBUFS;
+
+	ASSERT(S_ISREG(object->backer->f_dentry->d_inode->i_mode));
+
+#if 0 // set to 1 for deferred writing
+	/* queue the operation for deferred processing by keventd */
+	writer = kzalloc(sizeof(*writer), GFP_KERNEL);
+	if (!writer)
+		return -ENOMEM;
+
+	page_cache_get(page);
+	writer->netfs_page = page;
+	writer->object = object;
+	writer->callback_func = callback_func;
+	writer->callback_data = callback_data;
+
+	spin_lock_irq(&object->work_lock);
+	list_add_tail(&writer->obj_link, &object->write_list);
+	spin_unlock_irq(&object->work_lock);
+
+	schedule_work(&object->write_work);
+	ret = 0;
+
+#else
+	/* copy the page to ext3 and let it store it in its own time */
+	ret = generic_file_buffered_write_one_kernel_page(object->backer,
+							  page->index,
+							  page);
+
+	if (ret != 0) {
+		if (ret == -EIO)
+			cachefiles_io_error_obj(object,
+						"write page to backing file"
+						" failed");
+		ret = -ENOBUFS;
+	}
+
+	callback_func(page, callback_data, ret);
+#endif
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_write_page() */
+
+/*****************************************************************************/
+/*
+ * detach a backing block from a page
+ * - cache withdrawal is prevented by the caller
+ */
+static void cachefiles_uncache_pages(struct fscache_object *_object,
+				     struct pagevec *pagevec)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	cache = container_of(object->fscache.cache,
+			     struct cachefiles_cache, cache);
+
+	_enter("%p,{%lu,%lx},,,",
+	       object, pagevec->nr, pagevec->pages[0]->index);
+
+} /* end cachefiles_uncache_pages() */
+
+/*****************************************************************************/
+/*
+ * dissociate a cache from all the pages it was backing
+ */
+static void cachefiles_dissociate_pages(struct fscache_cache *cache)
+{
+	_enter("");
+
+} /* end cachefiles_dissociate_pages() */
+
+struct fscache_cache_ops cachefiles_cache_ops = {
+	.name			= "cachefiles",
+	.lookup_object		= cachefiles_lookup_object,
+	.grab_object		= cachefiles_grab_object,
+	.lock_object		= cachefiles_lock_object,
+	.unlock_object		= cachefiles_unlock_object,
+	.update_object		= cachefiles_update_object,
+	.put_object		= cachefiles_put_object,
+	.sync_cache		= cachefiles_sync_cache,
+	.set_i_size		= cachefiles_set_i_size,
+	.read_or_alloc_page	= cachefiles_read_or_alloc_page,
+	.read_or_alloc_pages	= cachefiles_read_or_alloc_pages,
+	.allocate_page		= cachefiles_allocate_page,
+	.write_page		= cachefiles_write_page,
+	.uncache_pages		= cachefiles_uncache_pages,
+	.dissociate_pages	= cachefiles_dissociate_pages,
+};
diff --git a/fs/cachefiles/cf-key.c b/fs/cachefiles/cf-key.c
new file mode 100644
index 0000000..86d7fe7
--- /dev/null
+++ b/fs/cachefiles/cf-key.c
@@ -0,0 +1,160 @@
+/* cf-key.c: Key to pathname encoder
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include "internal.h"
+
+static const char cachefiles_charmap[64] =
+	"0123456789"			/* 0 - 9 */
+	"abcdefghijklmnopqrstuvwxyz"	/* 10 - 35 */
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"	/* 36 - 61 */
+	"_-"				/* 62 - 63 */
+	;
+
+static const char cachefiles_filecharmap[256] = {
+	/* we skip space and tab and control chars */
+	[ 33 ... 46 ] = 1,		/* '!' -> '.' */
+	/* we skip '/' as it's significant to pathwalk */
+	[ 48 ... 127 ] = 1,		/* '0' -> '~' */
+};
+
+/*****************************************************************************/
+/*
+ * turn the raw key into something cooked
+ * - the raw key should include the length in the two bytes at the front
+ * - the key may be up to 514 bytes in length (including the length word)
+ *   - "base64" encode the strange keys, mapping 3 bytes of raw to four of
+ *     cooked
+ *   - need to cut the cooked key into 252 char lengths (189 raw bytes)
+ */
+char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type)
+{
+	unsigned char csum, ch;
+	unsigned int acc;
+	char *key;
+	int loop, len, max, seg, mark, print;
+
+	_enter(",%d", keylen);
+
+	BUG_ON(keylen < 2 || keylen > 514);
+
+	csum = raw[0] + raw[1];
+	print = 1;
+	for (loop = 2; loop < keylen; loop++) {
+		ch = raw[loop];
+		csum += ch;
+		print &= cachefiles_filecharmap[ch];
+	}
+
+	if (print) {
+		/* if the path is usable ASCII, then we render it directly */
+		max = keylen - 2;
+		max += 2;	/* two base64'd length chars on the front */
+		max += 5;	/* @checksum/M */
+		max += 3 * 2;	/* maximum number of segment dividers (".../M")
+				 * is ((514 + 251) / 252) = 3
+				 */
+		max += 1;	/* NUL on end */
+	}
+	else {
+		/* calculate the maximum length of the cooked key */
+		keylen = (keylen + 2) / 3;
+
+		max = keylen * 4;
+		max += 5;	/* @checksum/M */
+		max += 3 * 2;	/* maximum number of segment dividers (".../M")
+				 * is ((514 + 188) / 189) = 3
+				 */
+		max += 1;	/* NUL on end */
+	}
+
+	_debug("max: %d", max);
+
+	key = kmalloc(max, GFP_KERNEL);
+	if (!key)
+		return NULL;
+
+	len = 0;
+
+	/* build the cooked key */
+	sprintf(key, "@%02x/+", (unsigned) csum);
+	len = 5;
+	mark = len - 1;
+
+	if (print) {
+		acc = *(uint16_t *) raw;
+		raw += 2;
+
+		key[len + 1] = cachefiles_charmap[acc & 63];
+		acc >>= 6;
+		key[len] = cachefiles_charmap[acc & 63];
+		len += 2;
+
+		seg = 250;
+		for (loop = keylen; loop > 0; loop--) {
+			if (seg <= 0) {
+				key[len++] = '/';
+				mark = len;
+				key[len++] = '+';
+				seg = 252;
+			}
+
+			key[len++] = *raw++;
+			ASSERT(len < max);
+		}
+
+		switch (type) {
+		case FSCACHE_COOKIE_TYPE_INDEX:		type = 'I';	break;
+		case FSCACHE_COOKIE_TYPE_DATAFILE:	type = 'D';	break;
+		default:				type = 'S';	break;
+		}
+	}
+	else {
+		seg = 252;
+		for (loop = keylen; loop > 0; loop--) {
+			if (seg <= 0) {
+				key[len++] = '/';
+				mark = len;
+				key[len++] = '+';
+				seg = 252;
+			}
+
+			acc = *raw++;
+			acc |= *raw++ << 8;
+			acc |= *raw++ << 16;
+
+			_debug("acc: %06x", acc);
+
+			key[len++] = cachefiles_charmap[acc & 63];
+			acc >>= 6;
+			key[len++] = cachefiles_charmap[acc & 63];
+			acc >>= 6;
+			key[len++] = cachefiles_charmap[acc & 63];
+			acc >>= 6;
+			key[len++] = cachefiles_charmap[acc & 63];
+
+			ASSERT(len < max);
+		}
+
+		switch (type) {
+		case FSCACHE_COOKIE_TYPE_INDEX:		type = 'J';	break;
+		case FSCACHE_COOKIE_TYPE_DATAFILE:	type = 'E';	break;
+		default:				type = 'T';	break;
+		}
+	}
+
+	key[mark] = type;
+	key[len] = 0;
+
+	_leave(" = %p %d:[%s]", key, len, key);
+	return key;
+
+} /* end cachefiles_cook_key() */
diff --git a/fs/cachefiles/cf-main.c b/fs/cachefiles/cf-main.c
new file mode 100644
index 0000000..1a023b1
--- /dev/null
+++ b/fs/cachefiles/cf-main.c
@@ -0,0 +1,122 @@
+/* cf-main.c: network filesystem caching backend to use cache files on a
+ *            premounted filesystem
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/statfs.h>
+#include <linux/proc_fs.h>
+#include "internal.h"
+
+unsigned long cachefiles_debug;
+
+static int cachefiles_init(void);
+static void cachefiles_exit(void);
+
+fs_initcall(cachefiles_init);
+module_exit(cachefiles_exit);
+
+MODULE_DESCRIPTION("Mounted-filesystem based cache");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+kmem_cache_t *cachefiles_object_jar;
+
+static void cachefiles_object_init_once(void *_object, kmem_cache_t *cachep,
+					unsigned long flags)
+{
+	struct cachefiles_object *object = _object;
+
+	switch (flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) {
+	case SLAB_CTOR_CONSTRUCTOR:
+		memset(object, 0, sizeof(*object));
+		fscache_object_init(&object->fscache);
+		init_rwsem(&object->sem);
+		spin_lock_init(&object->work_lock);
+		INIT_LIST_HEAD(&object->read_list);
+		INIT_LIST_HEAD(&object->read_pend_list);
+		INIT_WORK(&object->read_work, &cachefiles_read_copier_work,
+			  object);
+		INIT_LIST_HEAD(&object->write_list);
+		INIT_WORK(&object->write_work, &cachefiles_write_work, object);
+		break;
+
+	default:
+		break;
+	}
+}
+
+/*****************************************************************************/
+/*
+ * initialise the fs caching module
+ */
+static int __init cachefiles_init(void)
+{
+	struct proc_dir_entry *pde;
+	int ret;
+
+	/* create a proc entry to use as a handle for the userspace daemon */
+	ret = -ENOMEM;
+
+	pde = create_proc_entry("cachefiles", 0600, proc_root_fs);
+	if (!pde) {
+		kerror("unable to create /proc/fs/cachefiles");
+		goto error_proc;
+	}
+
+	pde->owner = THIS_MODULE;
+	pde->proc_fops = &cachefiles_proc_fops;
+	cachefiles_proc = pde;
+
+	/* create an object jar */
+	cachefiles_object_jar =
+		kmem_cache_create("cachefiles_object_jar",
+				  sizeof(struct cachefiles_object),
+				  0,
+				  SLAB_HWCACHE_ALIGN,
+				  cachefiles_object_init_once,
+				  NULL);
+	if (!cachefiles_object_jar) {
+		printk(KERN_NOTICE
+		       "CacheFiles: Failed to allocate an object jar\n");
+		goto error_object_jar;
+	}
+
+	printk(KERN_INFO "CacheFiles: Loaded\n");
+	return 0;
+
+error_object_jar:
+	remove_proc_entry("cachefiles", proc_root_fs);
+error_proc:
+	kerror("failed to register: %d", ret);
+	return ret;
+
+} /* end cachefiles_init() */
+
+/*****************************************************************************/
+/*
+ * clean up on module removal
+ */
+static void __exit cachefiles_exit(void)
+{
+	printk(KERN_INFO "CacheFiles: Unloading\n");
+
+	kmem_cache_destroy(cachefiles_object_jar);
+	remove_proc_entry("cachefiles", proc_root_fs);
+
+} /* end cachefiles_exit() */
diff --git a/fs/cachefiles/cf-namei.c b/fs/cachefiles/cf-namei.c
new file mode 100644
index 0000000..45e2359
--- /dev/null
+++ b/fs/cachefiles/cf-namei.c
@@ -0,0 +1,837 @@
+/* cf-namei.c: CacheFiles path walking and related routines
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/quotaops.h>
+#include <linux/xattr.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include "internal.h"
+
+/*****************************************************************************/
+/*
+ * record the fact that an object is now active
+ */
+static void cachefiles_mark_object_active(struct cachefiles_cache *cache,
+					  struct cachefiles_object *object)
+{
+	struct cachefiles_object *xobject;
+	struct rb_node **_p, *_parent = NULL;
+	struct dentry *dentry;
+
+	write_lock(&cache->active_lock);
+
+	dentry = object->dentry;
+	_p = &cache->active_nodes.rb_node;
+	while (*_p) {
+		_parent = *_p;
+		xobject = rb_entry(_parent,
+				   struct cachefiles_object, active_node);
+
+		if (xobject->dentry > dentry)
+			_p = &(*_p)->rb_left;
+		else if (xobject->dentry < dentry)
+			_p = &(*_p)->rb_right;
+		else
+			BUG(); /* uh oh... this dentry shouldn't be here */
+	}
+
+	rb_link_node(&object->active_node, _parent, _p);
+	rb_insert_color(&object->active_node, &cache->active_nodes);
+
+	write_unlock(&cache->active_lock);
+
+} /* end cachefiles_mark_object_active() */
+
+/*****************************************************************************/
+/*
+ * delete an object representation from the cache
+ * - file backed objects are unlinked
+ * - directory backed objects are stuffed into the graveyard for userspace to
+ *   delete
+ * - unlocks the directory mutex
+ */
+static int cachefiles_bury_object(struct cachefiles_cache *cache,
+				  struct dentry *dir,
+				  struct dentry *rep)
+{
+	struct dentry *grave, *alt, *trap;
+	struct qstr name;
+	const char *old_name;
+	char nbuffer[8 + 8 + 1];
+	int ret;
+
+	_enter(",'%*.*s','%*.*s'",
+	       dir->d_name.len, dir->d_name.len, dir->d_name.name,
+	       rep->d_name.len, rep->d_name.len, rep->d_name.name);
+
+	/* non-directories can just be unlinked */
+	if (!S_ISDIR(rep->d_inode->i_mode)) {
+		_debug("unlink stale object");
+		ret = dir->d_inode->i_op->unlink(dir->d_inode, rep);
+
+		mutex_unlock(&dir->d_inode->i_mutex);
+
+		if (ret == 0) {
+			_debug("d_delete");
+			d_delete(rep);
+		}
+		else if (ret == -EIO) {
+			cachefiles_io_error(cache, "Unlink failed");
+		}
+
+		_leave(" = %d", ret);
+		return ret;
+	}
+
+	/* directories have to be moved to the graveyard */
+	_debug("move stale object to graveyard");
+	mutex_unlock(&dir->d_inode->i_mutex);
+
+try_again:
+	/* first step is to make up a grave dentry in the graveyard */
+	sprintf(nbuffer, "%08x%08x",
+		(uint32_t) xtime.tv_sec,
+		(uint32_t) atomic_inc_return(&cache->gravecounter));
+
+	name.name = nbuffer;
+	name.len = strlen(name.name);
+
+	/* hash the name */
+	name.hash = full_name_hash(name.name, name.len);
+
+	if (dir->d_op && dir->d_op->d_hash) {
+		ret = dir->d_op->d_hash(dir, &name);
+		if (ret < 0) {
+			if (ret == -EIO)
+				cachefiles_io_error(cache, "Hash failed");
+
+			_leave(" = %d", ret);
+			return ret;
+		}
+	}
+
+	/* do the multiway lock magic */
+	trap = lock_rename(cache->graveyard, dir);
+
+	/* do some checks before getting the grave dentry */
+	if (rep->d_parent != dir) {
+		/* the entry was probably culled when we dropped the parent dir
+		 * lock */
+		unlock_rename(cache->graveyard, dir);
+		_leave(" = 0 [culled?]");
+		return 0;
+	}
+
+	if (!S_ISDIR(cache->graveyard->d_inode->i_mode)) {
+		unlock_rename(cache->graveyard, dir);
+		cachefiles_io_error(cache, "Graveyard no longer a directory");
+		return -EIO;
+	}
+
+	if (trap == rep) {
+		unlock_rename(cache->graveyard, dir);
+		cachefiles_io_error(cache, "May not make directory loop");
+		return -EIO;
+	}
+
+	if (d_mountpoint(rep)) {
+		unlock_rename(cache->graveyard, dir);
+		cachefiles_io_error(cache, "Mountpoint in cache");
+		return -EIO;
+	}
+
+	/* see if there's a dentry already there for this name */
+	grave = d_lookup(cache->graveyard, &name);
+	if (!grave) {
+		_debug("not found");
+
+		grave = d_alloc(cache->graveyard, &name);
+		if (!grave) {
+			unlock_rename(cache->graveyard, dir);
+			_leave(" = -ENOMEM");
+			return -ENOMEM;
+		}
+
+		alt = cache->graveyard->d_inode->i_op->lookup(
+			cache->graveyard->d_inode, grave, NULL);
+		if (IS_ERR(alt)) {
+			unlock_rename(cache->graveyard, dir);
+			dput(grave);
+
+			if (PTR_ERR(alt) == -ENOMEM) {
+				_leave(" = -ENOMEM");
+				return -ENOMEM;
+			}
+
+			cachefiles_io_error(cache, "Lookup error %ld",
+					    PTR_ERR(alt));
+			return -EIO;
+		}
+
+		if (alt) {
+			dput(grave);
+			grave = alt;
+		}
+	}
+
+	if (grave->d_inode) {
+		unlock_rename(cache->graveyard, dir);
+		dput(grave);
+		grave = NULL;
+		cond_resched();
+		goto try_again;
+	}
+
+	if (d_mountpoint(grave)) {
+		unlock_rename(cache->graveyard, dir);
+		dput(grave);
+		cachefiles_io_error(cache, "Mountpoint in graveyard");
+		return -EIO;
+	}
+
+	/* target should not be an ancestor of source */
+	if (trap == grave) {
+		unlock_rename(cache->graveyard, dir);
+		dput(grave);
+		cachefiles_io_error(cache, "May not make directory loop");
+		return -EIO;
+	}
+
+	/* attempt the rename */
+	DQUOT_INIT(dir->d_inode);
+	DQUOT_INIT(cache->graveyard->d_inode);
+
+	old_name = fsnotify_oldname_init(rep->d_name.name);
+
+	ret = dir->d_inode->i_op->rename(dir->d_inode, rep,
+					 cache->graveyard->d_inode, grave);
+
+	if (ret == 0) {
+		d_move(rep, grave);
+		fsnotify_move(dir->d_inode, cache->graveyard->d_inode,
+			      old_name, rep->d_name.name, 1,
+			      grave->d_inode, rep->d_inode);
+	}
+	else if (ret != -ENOMEM) {
+		cachefiles_io_error(cache, "Rename failed with error %d", ret);
+	}
+
+	fsnotify_oldname_free(old_name);
+
+	unlock_rename(cache->graveyard, dir);
+	dput(grave);
+	_leave(" = 0");
+	return 0;
+
+} /* end cachefiles_bury_object() */
+
+/*****************************************************************************/
+/*
+ * delete an object representation from the cache
+ */
+int cachefiles_delete_object(struct cachefiles_cache *cache,
+			     struct cachefiles_object *object)
+{
+	struct dentry *dir;
+	int ret;
+
+	_enter(",{%p}", object->dentry);
+
+	ASSERT(object->dentry);
+	ASSERT(object->dentry->d_inode);
+	ASSERT(object->dentry->d_parent);
+
+	dir = dget_parent(object->dentry);
+
+	mutex_lock(&dir->d_inode->i_mutex);
+	ret = cachefiles_bury_object(cache, dir, object->dentry);
+
+	dput(dir);
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_delete_object() */
+
+/*****************************************************************************/
+/*
+ * walk from the parent object to the child object through the backing
+ * filesystem, creating directories as we go
+ */
+int cachefiles_walk_to_object(struct cachefiles_object *parent,
+			      struct cachefiles_object *object,
+			      char *key,
+			      struct cachefiles_xattr *auxdata)
+{
+	struct cachefiles_cache *cache;
+	struct dentry *dir, *next = NULL, *new;
+	struct file *file;
+	struct qstr name;
+	uid_t fsuid;
+	gid_t fsgid;
+	int ret;
+
+	_enter("{%p}", parent->dentry);
+
+	cache = container_of(parent->fscache.cache,
+			     struct cachefiles_cache, cache);
+
+	ASSERT(parent->dentry);
+	ASSERT(parent->dentry->d_inode);
+
+	if (!(S_ISDIR(parent->dentry->d_inode->i_mode))) {
+		// TODO: convert file to dir
+		_leave("looking up in none directory");
+		return -ENOBUFS;
+	}
+
+	fsuid = current->fsuid;
+	fsgid = current->fsgid;
+	current->fsuid = 0;
+	current->fsgid = 0;
+
+	dir = dget(parent->dentry);
+
+advance:
+	/* attempt to transit the first directory component */
+	name.name = key;
+	key = strchr(key, '/');
+	if (key) {
+		name.len = key - (char *) name.name;
+		*key++ = 0;
+	}
+	else {
+		name.len = strlen(name.name);
+	}
+
+	/* hash the name */
+	name.hash = full_name_hash(name.name, name.len);
+
+	if (dir->d_op && dir->d_op->d_hash) {
+		ret = dir->d_op->d_hash(dir, &name);
+		if (ret < 0) {
+			cachefiles_io_error(cache, "Hash failed");
+			goto error_out2;
+		}
+	}
+
+lookup_again:
+	/* search the current directory for the element name */
+	_debug("lookup '%s' %x", name.name, name.hash);
+
+	mutex_lock(&dir->d_inode->i_mutex);
+
+	next = d_lookup(dir, &name);
+	if (!next) {
+		_debug("not found");
+
+		new = d_alloc(dir, &name);
+		if (!new)
+			goto nomem_d_alloc;
+
+		ASSERT(dir->d_inode->i_op);
+		ASSERT(dir->d_inode->i_op->lookup);
+
+		next = dir->d_inode->i_op->lookup(dir->d_inode, new, NULL);
+		if (IS_ERR(next))
+			goto lookup_error;
+
+		if (!next)
+			next = new;
+		else
+			dput(new);
+
+		if (next->d_inode) {
+			ret = -EPERM;
+			if (!next->d_inode->i_op ||
+			    !next->d_inode->i_op->setxattr ||
+			    !next->d_inode->i_op->getxattr ||
+			    !next->d_inode->i_op->removexattr)
+				goto error;
+
+			if (key && (!next->d_inode->i_op->lookup ||
+				    !next->d_inode->i_op->mkdir ||
+				    !next->d_inode->i_op->create ||
+				    !next->d_inode->i_op->rename ||
+				    !next->d_inode->i_op->rmdir ||
+				    !next->d_inode->i_op->unlink))
+				goto error;
+		}
+	}
+
+	_debug("next -> %p %s", next, next->d_inode ? "positive" : "negative");
+
+	if (!key)
+		object->new = !next->d_inode;
+
+	/* we need to create the object if it's negative */
+	if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) {
+		/* index objects and intervening tree levels must be subdirs */
+		if (!next->d_inode) {
+			DQUOT_INIT(dir->d_inode);
+			ret = dir->d_inode->i_op->mkdir(dir->d_inode, next, 0);
+			if (ret < 0)
+				goto create_error;
+
+			ASSERT(next->d_inode);
+
+			fsnotify_mkdir(dir->d_inode, next);
+
+			_debug("mkdir -> %p{%p{ino=%lu}}",
+			       next, next->d_inode, next->d_inode->i_ino);
+		}
+		/* we need to make sure a positive match found a directory */
+		else if (!S_ISDIR(next->d_inode->i_mode)) {
+			kerror("inode %lu is not a directory",
+			       next->d_inode->i_ino);
+			ret = -ENOBUFS;
+			goto error;
+		}
+	}
+	else {
+		/* non-index objects start out life as files */
+		if (!next->d_inode) {
+			DQUOT_INIT(dir->d_inode);
+			ret = dir->d_inode->i_op->create(dir->d_inode, next,
+							 S_IFREG, NULL);
+			if (ret < 0)
+				goto create_error;
+
+			ASSERT(next->d_inode);
+
+			fsnotify_create(dir->d_inode, next);
+
+			_debug("create -> %p{%p{ino=%lu}}",
+			       next, next->d_inode, next->d_inode->i_ino);
+		}
+		/* we need to make sure a positive match found a directory or a
+		 * file */
+		else if (!S_ISDIR(next->d_inode->i_mode) &&
+			 !S_ISREG(next->d_inode->i_mode)
+			 ) {
+			kerror("inode %lu is not a file or directory",
+			       next->d_inode->i_ino);
+			ret = -ENOBUFS;
+			goto error;
+		}
+	}
+
+	/* process the next component */
+	if (key) {
+		_debug("advance");
+		mutex_unlock(&dir->d_inode->i_mutex);
+		dput(dir);
+		dir = next;
+		next = NULL;
+		goto advance;
+	}
+
+	/* we've found the object we were looking for */
+	object->dentry = next;
+
+	/* if we've found that the terminal object exists, then we need to
+	 * check its attributes and delete it if it's out of date */
+	if (!object->new) {
+		_debug("validate '%*.*s'",
+		       next->d_name.len, next->d_name.len, next->d_name.name);
+
+		ret = cachefiles_check_object_xattr(object, auxdata);
+		if (ret == -ESTALE) {
+			/* delete the object (the deleter drops the directory
+			 * mutex) */
+			object->dentry = NULL;
+
+			ret = cachefiles_bury_object(cache, dir, next);
+			dput(next);
+			next = NULL;
+
+			if (ret < 0)
+				goto delete_error;
+
+			_debug("redo lookup");
+			goto lookup_again;
+		}
+	}
+
+	/* note that we're now using this object */
+	cachefiles_mark_object_active(cache, object);
+
+	mutex_unlock(&dir->d_inode->i_mutex);
+	dput(dir);
+	dir = NULL;
+
+	if (object->new) {
+		/* attach data to a newly constructed terminal object */
+		ret = cachefiles_set_object_xattr(object, auxdata);
+		if (ret < 0)
+			goto check_error;
+	}
+	else {
+		/* always update the atime on an object we've just looked up
+		 * (this is used to keep track of culling, and atimes are only
+		 * updated by read, write and readdir but not lookup or
+		 * open) */
+		touch_atime(cache->mnt, next);
+	}
+
+	/* open a file interface onto a data file */
+	if (object->type != FSCACHE_COOKIE_TYPE_INDEX) {
+		if (S_ISREG(object->dentry->d_inode->i_mode)) {
+			file = dentry_open_kernel(dget(object->dentry),
+						  mntget(cache->mnt), 0);
+			if (IS_ERR(file))
+				goto open_error;
+
+			object->backer = file;
+		}
+		else {
+			BUG(); // TODO: open file in data-class subdir
+		}
+	}
+
+	current->fsuid = fsuid;
+	current->fsgid = fsgid;
+	object->new = 0;
+
+	_leave(" = 0 [%lu]", object->dentry->d_inode->i_ino);
+	return 0;
+
+create_error:
+	if (ret == -EIO)
+		cachefiles_io_error(cache, "create/mkdir failed");
+	goto error;
+
+open_error:
+	ret = PTR_ERR(file);
+check_error:
+	write_lock(&cache->active_lock);
+	rb_erase(&object->active_node, &cache->active_nodes);
+	write_unlock(&cache->active_lock);
+
+	dput(object->dentry);
+	object->dentry = NULL;
+	goto error_out;
+
+delete_error:
+	_debug("delete error %d", ret);
+	goto error_out2;
+
+lookup_error:
+	_debug("lookup error %ld", PTR_ERR(next));
+	dput(new);
+	ret = PTR_ERR(next);
+	if (ret == -EIO)
+		cachefiles_io_error(cache, "Lookup failed");
+	next = NULL;
+	goto error;
+
+nomem_d_alloc:
+	ret = -ENOMEM;
+error:
+	mutex_unlock(&dir->d_inode->i_mutex);
+	dput(next);
+error_out2:
+	dput(dir);
+error_out:
+	current->fsuid = fsuid;
+	current->fsgid = fsgid;
+
+	_leave(" = ret");
+	return ret;
+
+} /* end cachefiles_walk_to_object() */
+
+/*****************************************************************************/
+/*
+ * get a subdirectory
+ */
+struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
+					struct dentry *dir,
+					const char *dirname)
+{
+	struct dentry *subdir, *new;
+	struct qstr name;
+	uid_t fsuid;
+	gid_t fsgid;
+	int ret;
+
+	_enter("");
+
+	/* set up the name */
+	name.name = dirname;
+	name.len = strlen(dirname);
+	name.hash = full_name_hash(name.name, name.len);
+
+	if (dir->d_op && dir->d_op->d_hash) {
+		ret = dir->d_op->d_hash(dir, &name);
+		if (ret < 0) {
+			if (ret == -EIO)
+				kerror("Hash failed");
+			_leave(" = %d", ret);
+			return ERR_PTR(ret);
+		}
+	}
+
+	/* search the current directory for the element name */
+	_debug("lookup '%s' %x", name.name, name.hash);
+
+	fsuid = current->fsuid;
+	fsgid = current->fsgid;
+	current->fsuid = 0;
+	current->fsgid = 0;
+
+	mutex_lock(&dir->d_inode->i_mutex);
+
+	subdir = d_lookup(dir, &name);
+	if (!subdir) {
+		_debug("not found");
+
+		new = d_alloc(dir, &name);
+		if (!new)
+			goto nomem_d_alloc;
+
+		subdir = dir->d_inode->i_op->lookup(dir->d_inode, new, NULL);
+		if (IS_ERR(subdir))
+			goto lookup_error;
+
+		if (!subdir)
+			subdir = new;
+		else
+			dput(new);
+	}
+
+	_debug("subdir -> %p %s",
+	       subdir, subdir->d_inode ? "positive" : "negative");
+
+	/* we need to create the subdir if it doesn't exist yet */
+	if (!subdir->d_inode) {
+		DQUOT_INIT(dir->d_inode);
+		ret = dir->d_inode->i_op->mkdir(dir->d_inode, subdir, 0700);
+		if (ret < 0)
+			goto mkdir_error;
+
+		ASSERT(subdir->d_inode);
+
+		fsnotify_mkdir(dir->d_inode, subdir);
+
+		_debug("mkdir -> %p{%p{ino=%lu}}",
+		       subdir,
+		       subdir->d_inode,
+		       subdir->d_inode->i_ino);
+	}
+
+	mutex_unlock(&dir->d_inode->i_mutex);
+
+	current->fsuid = fsuid;
+	current->fsgid = fsgid;
+
+	/* we need to make sure the subdir is a directory */
+	ASSERT(subdir->d_inode);
+
+	if (!S_ISDIR(subdir->d_inode->i_mode)) {
+		kerror("%s is not a directory", dirname);
+		ret = -EIO;
+		goto check_error;
+	}
+
+	ret = -EPERM;
+	if (!subdir->d_inode->i_op ||
+	    !subdir->d_inode->i_op->setxattr ||
+	    !subdir->d_inode->i_op->getxattr ||
+	    !subdir->d_inode->i_op->lookup ||
+	    !subdir->d_inode->i_op->mkdir ||
+	    !subdir->d_inode->i_op->create ||
+	    !subdir->d_inode->i_op->rename ||
+	    !subdir->d_inode->i_op->rmdir ||
+	    !subdir->d_inode->i_op->unlink)
+		goto check_error;
+
+	_leave(" = [%lu]", subdir->d_inode->i_ino);
+	return subdir;
+
+check_error:
+	dput(subdir);
+	_leave(" = %d [check]", ret);
+	return ERR_PTR(ret);
+
+mkdir_error:
+	mutex_unlock(&dir->d_inode->i_mutex);
+	kerror("mkdir %s failed with error %d", dirname, ret);
+	goto error_out;
+
+lookup_error:
+	mutex_unlock(&dir->d_inode->i_mutex);
+	dput(new);
+	ret = PTR_ERR(subdir);
+	kerror("Lookup %s failed with error %d", dirname, ret);
+	goto error_out;
+
+nomem_d_alloc:
+	mutex_unlock(&dir->d_inode->i_mutex);
+	ret = -ENOMEM;
+	goto error_out;
+
+error_out:
+	current->fsuid = fsuid;
+	current->fsgid = fsgid;
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
+
+} /* end cachefiles_get_directory() */
+
+/*****************************************************************************/
+/*
+ * cull an object if it's not in use
+ * - called only by cache manager daemon
+ */
+int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
+		    char *filename)
+{
+	struct cachefiles_object *object;
+	struct rb_node *_n;
+	struct dentry *victim, *new;
+	struct qstr name;
+	int ret;
+
+	_enter(",%*.*s/,%s",
+	       dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
+
+	/* set up the name */
+	name.name = filename;
+	name.len = strlen(filename);
+	name.hash = full_name_hash(name.name, name.len);
+
+	if (dir->d_op && dir->d_op->d_hash) {
+		ret = dir->d_op->d_hash(dir, &name);
+		if (ret < 0) {
+			if (ret == -EIO)
+				cachefiles_io_error(cache, "Hash failed");
+			_leave(" = %d", ret);
+			return ret;
+		}
+	}
+
+	/* look up the victim */
+	mutex_lock(&dir->d_inode->i_mutex);
+
+	victim = d_lookup(dir, &name);
+	if (!victim) {
+		_debug("not found");
+
+		new = d_alloc(dir, &name);
+		if (!new)
+			goto nomem_d_alloc;
+
+		victim = dir->d_inode->i_op->lookup(dir->d_inode, new, NULL);
+		if (IS_ERR(victim))
+			goto lookup_error;
+
+		if (!victim)
+			victim = new;
+		else
+			dput(new);
+	}
+
+	_debug("victim -> %p %s", victim, victim->d_inode ? "positive" : "negative");
+
+	/* if the object is no longer there then we probably retired the object
+	 * at the netfs's request whilst the cull was in progress
+	 */
+	if (!victim->d_inode) {
+		mutex_unlock(&dir->d_inode->i_mutex);
+		dput(victim);
+		_leave(" = -ENOENT [absent]");
+		return -ENOENT;
+	}
+
+	/* check to see if we're using this object */
+	read_lock(&cache->active_lock);
+
+	_n = cache->active_nodes.rb_node;
+
+	while (_n) {
+		object = rb_entry(_n, struct cachefiles_object, active_node);
+
+		if (object->dentry > victim)
+			_n = _n->rb_left;
+		else if (object->dentry < victim)
+			_n = _n->rb_right;
+		else
+			goto object_in_use;
+	}
+
+	read_unlock(&cache->active_lock);
+
+	/* okay... the victim is not being used so we can cull it
+	 * - start by marking it as stale
+	 */
+	_debug("victim is cullable");
+
+	ret = cachefiles_remove_object_xattr(cache, victim);
+	if (ret < 0)
+		goto error_unlock;
+
+	/*  actually remove the victim (drops the dir mutex) */
+	_debug("bury");
+
+	ret = cachefiles_bury_object(cache, dir, victim);
+	if (ret < 0)
+		goto error;
+
+	dput(victim);
+	_leave(" = 0");
+	return 0;
+
+
+object_in_use:
+	read_unlock(&cache->active_lock);
+	mutex_unlock(&dir->d_inode->i_mutex);
+	dput(victim);
+	_leave(" = -EBUSY [in use]");
+	return -EBUSY;
+
+nomem_d_alloc:
+	mutex_unlock(&dir->d_inode->i_mutex);
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
+
+lookup_error:
+	mutex_unlock(&dir->d_inode->i_mutex);
+	dput(new);
+	ret = PTR_ERR(victim);
+	if (ret == -EIO)
+		cachefiles_io_error(cache, "Lookup failed");
+	goto choose_error;
+
+error_unlock:
+	mutex_unlock(&dir->d_inode->i_mutex);
+error:
+	dput(victim);
+choose_error:
+	if (ret == -ENOENT) {
+		/* file or dir now absent - probably retired by netfs */
+		_leave(" = -ESTALE [absent]");
+		return -ESTALE;
+	}
+
+	if (ret != -ENOMEM) {
+		kerror("Internal error: %d", ret);
+		ret = -EIO;
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_cull() */
diff --git a/fs/cachefiles/cf-proc.c b/fs/cachefiles/cf-proc.c
new file mode 100644
index 0000000..fce1385
--- /dev/null
+++ b/fs/cachefiles/cf-proc.c
@@ -0,0 +1,510 @@
+/* cf-proc.c: /proc/fs/cachefiles interface
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/namespace.h>
+#include <linux/statfs.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include "internal.h"
+
+static int cachefiles_proc_open(struct inode *, struct file *);
+static int cachefiles_proc_release(struct inode *, struct file *);
+static ssize_t cachefiles_proc_read(struct file *, char __user *, size_t, loff_t *);
+static ssize_t cachefiles_proc_write(struct file *, const char __user *, size_t, loff_t *);
+static int cachefiles_proc_brun(struct cachefiles_cache *cache, char *args);
+static int cachefiles_proc_bcull(struct cachefiles_cache *cache, char *args);
+static int cachefiles_proc_bstop(struct cachefiles_cache *cache, char *args);
+static int cachefiles_proc_cull(struct cachefiles_cache *cache, char *args);
+static int cachefiles_proc_debug(struct cachefiles_cache *cache, char *args);
+static int cachefiles_proc_dir(struct cachefiles_cache *cache, char *args);
+static int cachefiles_proc_tag(struct cachefiles_cache *cache, char *args);
+
+struct proc_dir_entry *cachefiles_proc;
+
+static unsigned long cachefiles_open;
+
+struct file_operations cachefiles_proc_fops = {
+	.open		= cachefiles_proc_open,
+	.release	= cachefiles_proc_release,
+	.read		= cachefiles_proc_read,
+	.write		= cachefiles_proc_write,
+};
+
+struct cachefiles_proc_cmd {
+	char name[8];
+	int (*handler)(struct cachefiles_cache *cache, char *args);
+};
+
+static const struct cachefiles_proc_cmd cachefiles_proc_cmds[] = {
+	{ "bind",	cachefiles_proc_bind	},
+	{ "brun",	cachefiles_proc_brun	},
+	{ "bcull",	cachefiles_proc_bcull	},
+	{ "bstop",	cachefiles_proc_bstop	},
+	{ "cull",	cachefiles_proc_cull	},
+	{ "debug",	cachefiles_proc_debug	},
+	{ "dir",	cachefiles_proc_dir	},
+	{ "tag",	cachefiles_proc_tag	},
+	{ "",		NULL			}
+};
+
+
+/*****************************************************************************/
+/*
+ * do various checks
+ */
+static int cachefiles_proc_open(struct inode *inode, struct file *file)
+{
+	struct cachefiles_cache *cache;
+
+	_enter("");
+
+	/* only the superuser may do this */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* /proc/fs/cachefiles may only be open once at a time */
+	if (xchg(&cachefiles_open, 1) == 1)
+		return -EBUSY;
+
+	/* allocate a cache record */
+	cache = kzalloc(sizeof(struct cachefiles_cache), GFP_KERNEL);
+	if (!cache) {
+		cachefiles_open = 0;
+		return -ENOMEM;
+	}
+
+	cache->active_nodes = RB_ROOT;
+	rwlock_init(&cache->active_lock);
+
+	/* set default caching limits
+	 * - limit at 1% free space
+	 * - cull below 5% free space
+	 * - cease culling above 7% free space
+	 */
+	cache->brun_percent = 7;
+	cache->bcull_percent = 5;
+	cache->bstop_percent = 1;
+
+	file->private_data = cache;
+	cache->cachefilesd = file;
+	return 0;
+
+} /* end cachefiles_proc_open() */
+
+/*****************************************************************************/
+/*
+ * release a cache
+ */
+static int cachefiles_proc_release(struct inode *inode, struct file *file)
+{
+	struct cachefiles_cache *cache = file->private_data;
+
+	_enter("");
+
+	ASSERT(cache);
+
+	set_bit(CACHEFILES_DEAD, &cache->flags);
+
+	cachefiles_proc_unbind(cache);
+
+	ASSERT(!cache->active_nodes.rb_node);
+
+	/* clean up the control file interface */
+	cache->cachefilesd = NULL;
+	file->private_data = NULL;
+	cachefiles_open = 0;
+
+	kfree(cache);
+
+	_leave("");
+	return 0;
+
+} /* end cachefiles_proc_release() */
+
+/*****************************************************************************/
+/*
+ * read the cache state
+ */
+static ssize_t cachefiles_proc_read(struct file *file, char __user *_buffer,
+				    size_t buflen, loff_t *pos)
+{
+	struct cachefiles_cache *cache = file->private_data;
+	char buffer[256];
+	int n;
+
+	_enter(",,%zu,", buflen);
+
+	if (!test_bit(CACHEFILES_READY, &cache->flags))
+		return 0;
+
+	/* check how much space the cache has */
+	cachefiles_has_space(cache, 0);
+
+	/* summarise */
+	n = snprintf(buffer, sizeof(buffer),
+		     "cull=%c"
+		     " brun=%llx"
+		     " bcull=%llx"
+		     " bstop=%llx",
+		     test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0',
+		     cache->brun,
+		     cache->bcull,
+		     cache->bstop
+		     );
+
+	if (n > buflen)
+		return -EMSGSIZE;
+
+	if (copy_to_user(_buffer, buffer, n) != 0)
+		return -EFAULT;
+
+	return n;
+
+} /* end cachefiles_proc_read() */
+
+/*****************************************************************************/
+/*
+ * command the cache
+ */
+static ssize_t cachefiles_proc_write(struct file *file,
+				     const char __user *_data, size_t datalen,
+				     loff_t *pos)
+{
+	const struct cachefiles_proc_cmd *cmd;
+	struct cachefiles_cache *cache = file->private_data;
+	ssize_t ret;
+	char *data, *args, *cp;
+
+	_enter(",,%zu,", datalen);
+
+	ASSERT(cache);
+
+	if (test_bit(CACHEFILES_DEAD, &cache->flags))
+		return -EIO;
+
+	if (datalen < 0 || datalen > PAGE_SIZE - 1)
+		return -EOPNOTSUPP;
+
+	/* drag the command string into the kernel so we can parse it */
+	data = kmalloc(datalen + 1, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(data, _data, datalen) != 0)
+		goto error;
+
+	data[datalen] = '\0';
+
+	ret = -EINVAL;
+	if (memchr(data, '\0', datalen))
+		goto error;
+
+	/* strip any newline */
+	cp = memchr(data, '\n', datalen);
+	if (cp) {
+		if (cp == data)
+			goto error;
+
+		*cp = '\0';
+	}
+
+	/* parse the command */
+	ret = -EOPNOTSUPP;
+
+	for (args = data; *args; args++)
+		if (isspace(*args))
+			break;
+	if (*args) {
+		if (args == data)
+			goto error;
+		*args = '\0';
+		for (args++; isspace(*args); args++)
+			continue;
+	}
+
+	/* run the appropriate command handler */
+	for (cmd = cachefiles_proc_cmds; cmd->name[0]; cmd++)
+		if (strcmp(cmd->name, data) == 0)
+			goto found_command;
+
+error:
+	kfree(data);
+	_leave(" = %d", ret);
+	return ret;
+
+found_command:
+	mutex_lock(&file->f_dentry->d_inode->i_mutex);
+
+	ret = -EIO;
+	if (!test_bit(CACHEFILES_DEAD, &cache->flags))
+		ret = cmd->handler(cache, args);
+
+	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+
+	if (ret == 0)
+		ret = datalen;
+	goto error;
+
+} /* end cachefiles_proc_write() */
+
+/*****************************************************************************/
+/*
+ * give a range error for cache space constraints
+ * - can be tail-called
+ */
+static int cachefiles_proc_range_error(struct cachefiles_cache *cache, char *args)
+{
+	kerror("Free space limits must be in range"
+	       " 0%%<=bstop<bcull<brun<100%%");
+
+	return -EINVAL;
+
+} /* end cachefiles_proc_range_error() */
+
+/*****************************************************************************/
+/*
+ * set the percentage of blocks at which to stop culling
+ * - command: "brun <N>%"
+ */
+static int cachefiles_proc_brun(struct cachefiles_cache *cache, char *args)
+{
+	unsigned long brun;
+
+	_enter(",%s", args);
+
+	if (!*args)
+		return -EINVAL;
+
+	brun = simple_strtoul(args, &args, 10);
+	if (args[0] != '%' || args[1] != '\0')
+		return -EINVAL;
+
+	if (brun <= cache->bcull_percent || brun >= 100)
+		return cachefiles_proc_range_error(cache, args);
+
+	cache->brun_percent = brun;
+	return 0;
+
+} /* end cachefiles_proc_brun() */
+
+/*****************************************************************************/
+/*
+ * set the percentage of blocks at which to start culling
+ * - command: "bcull <N>%"
+ */
+static int cachefiles_proc_bcull(struct cachefiles_cache *cache, char *args)
+{
+	unsigned long bcull;
+
+	_enter(",%s", args);
+
+	if (!*args)
+		return -EINVAL;
+
+	bcull = simple_strtoul(args, &args, 10);
+	if (args[0] != '%' || args[1] != '\0')
+		return -EINVAL;
+
+	if (bcull <= cache->bstop_percent || bcull >= cache->brun_percent)
+		return cachefiles_proc_range_error(cache, args);
+
+	cache->bcull_percent = bcull;
+	return 0;
+
+} /* end cachefiles_proc_bcull() */
+
+/*****************************************************************************/
+/*
+ * set the percentage of blocks at which to stop allocating
+ * - command: "bstop <N>%"
+ */
+static int cachefiles_proc_bstop(struct cachefiles_cache *cache, char *args)
+{
+	unsigned long bstop;
+
+	_enter(",%s", args);
+
+	if (!*args)
+		return -EINVAL;
+
+	bstop = simple_strtoul(args, &args, 10);
+	if (args[0] != '%' || args[1] != '\0')
+		return -EINVAL;
+
+	if (bstop < 0 || bstop >= cache->bcull_percent)
+		return cachefiles_proc_range_error(cache, args);
+
+	cache->bstop_percent = bstop;
+	return 0;
+
+} /* end cachefiles_proc_bstop() */
+
+/*****************************************************************************/
+/*
+ * set the cache directory
+ * - command: "dir <name>"
+ */
+static int cachefiles_proc_dir(struct cachefiles_cache *cache, char *args)
+{
+	char *dir;
+
+	_enter(",%s", args);
+
+	if (!*args) {
+		kerror("Empty directory specified");
+		return -EINVAL;
+	}
+
+	if (cache->rootdirname) {
+		kerror("Second cache directory specified");
+		return -EEXIST;
+	}
+
+	dir = kstrdup(args, GFP_KERNEL);
+	if (!dir)
+		return -ENOMEM;
+
+	cache->rootdirname = dir;
+	return 0;
+
+} /* end cachefiles_proc_dir() */
+
+/*****************************************************************************/
+/*
+ * set the cache tag
+ * - command: "tag <name>"
+ */
+static int cachefiles_proc_tag(struct cachefiles_cache *cache, char *args)
+{
+	char *tag;
+
+	_enter(",%s", args);
+
+	if (!*args) {
+		kerror("Empty tag specified");
+		return -EINVAL;
+	}
+
+	if (cache->tag)
+		return -EEXIST;
+
+	tag = kstrdup(args, GFP_KERNEL);
+	if (!tag)
+		return -ENOMEM;
+
+	cache->tag = tag;
+	return 0;
+
+} /* end cachefiles_proc_tag() */
+
+/*****************************************************************************/
+/*
+ * request a node in the cache be culled
+ * - command: "cull <dirfd> <name>"
+ */
+static int cachefiles_proc_cull(struct cachefiles_cache *cache, char *args)
+{
+	struct dentry *dir;
+	struct file *dirfile;
+	int dirfd, fput_needed, ret;
+
+	_enter(",%s", args);
+
+	dirfd = simple_strtoul(args, &args, 0);
+
+	if (!args || !isspace(*args))
+		goto inval;
+
+	while (isspace(*args))
+		args++;
+
+	if (!*args)
+		goto inval;
+
+	if (strchr(args, '/'))
+		goto inval;
+
+	if (!test_bit(CACHEFILES_READY, &cache->flags)) {
+		kerror("cull applied to unready cache");
+		return -EIO;
+	}
+
+	if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
+		kerror("cull applied to dead cache");
+		return -EIO;
+	}
+
+	/* extract the directory dentry from the fd */
+	dirfile = fget_light(dirfd, &fput_needed);
+	if (!dirfile) {
+		kerror("cull dirfd not open");
+		return -EBADF;
+	}
+
+	dir = dget(dirfile->f_dentry);
+	fput_light(dirfile, fput_needed);
+	dirfile = NULL;
+
+	if (!S_ISDIR(dir->d_inode->i_mode))
+		goto notdir;
+
+	ret = cachefiles_cull(cache, dir, args);
+
+	dput(dir);
+	_leave(" = %d", ret);
+	return ret;
+
+notdir:
+	dput(dir);
+	kerror("cull command requires dirfd to be a directory");
+	return -ENOTDIR;
+
+inval:
+	kerror("cull command requires dirfd and filename");
+	return -EINVAL;
+
+} /* end cachefiles_proc_cull() */
+
+/*****************************************************************************/
+/*
+ * set debugging mode
+ * - command: "debug <mask>"
+ */
+static int cachefiles_proc_debug(struct cachefiles_cache *cache, char *args)
+{
+	unsigned long mask;
+
+	_enter(",%s", args);
+
+	mask = simple_strtoul(args, &args, 0);
+
+	if (!args || !isspace(*args))
+		goto inval;
+
+	cachefiles_debug = mask;
+	_leave(" = 0");
+	return 0;
+
+inval:
+	kerror("debug command requires mask");
+	return -EINVAL;
+
+} /* end cachefiles_proc_debug() */
diff --git a/fs/cachefiles/cf-xattr.c b/fs/cachefiles/cf-xattr.c
new file mode 100644
index 0000000..a811667
--- /dev/null
+++ b/fs/cachefiles/cf-xattr.c
@@ -0,0 +1,299 @@
+/* cf-xattr.c: CacheFiles extended attribute management
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/quotaops.h>
+#include <linux/xattr.h>
+#include "internal.h"
+
+static const char cachefiles_xattr_cache[] = XATTR_USER_PREFIX "CacheFiles.cache";
+
+/*****************************************************************************/
+/*
+ * check the type label on an object
+ * - done using xattrs
+ */
+int cachefiles_check_object_type(struct cachefiles_object *object)
+{
+	struct dentry *dentry = object->dentry;
+	char type[3], xtype[3];
+	int ret;
+
+	ASSERT(dentry);
+	ASSERT(dentry->d_inode);
+	ASSERT(dentry->d_inode->i_op);
+	ASSERT(dentry->d_inode->i_op->setxattr);
+	ASSERT(dentry->d_inode->i_op->getxattr);
+
+	if (!object->fscache.cookie)
+		strcpy(type, "C3");
+	else
+		snprintf(type, 3, "%02x", object->fscache.cookie->def->type);
+
+	_enter("%p{%s}", object, type);
+
+	mutex_lock(&dentry->d_inode->i_mutex);
+
+	/* attempt to install a type label directly */
+	ret = dentry->d_inode->i_op->setxattr(dentry, cachefiles_xattr_cache,
+					      type, 2, XATTR_CREATE);
+	if (ret == 0) {
+		_debug("SET");
+		fsnotify_xattr(dentry);
+		mutex_unlock(&dentry->d_inode->i_mutex);
+		goto error;
+	}
+
+	if (ret != -EEXIST) {
+		kerror("Can't set xattr on %*.*s [%lu] (err %d)",
+		       dentry->d_name.len, dentry->d_name.len,
+		       dentry->d_name.name, dentry->d_inode->i_ino,
+		       -ret);
+		goto error;
+	}
+
+	/* read the current type label */
+	ret = dentry->d_inode->i_op->getxattr(dentry, cachefiles_xattr_cache,
+					      xtype, 3);
+	if (ret < 0) {
+		if (ret == -ERANGE)
+			goto bad_type_length;
+
+		kerror("Can't read xattr on %*.*s [%lu] (err %d)",
+		       dentry->d_name.len, dentry->d_name.len,
+		       dentry->d_name.name, dentry->d_inode->i_ino,
+		       -ret);
+		goto error;
+	}
+
+	/* check the type is what we're expecting */
+	if (ret != 2)
+		goto bad_type_length;
+
+	if (xtype[0] != type[0] || xtype[1] != type[1])
+		goto bad_type;
+
+	ret = 0;
+
+error:
+	mutex_unlock(&dentry->d_inode->i_mutex);
+	_leave(" = %d", ret);
+	return ret;
+
+bad_type_length:
+	kerror("Cache object %lu type xattr length incorrect",
+	       dentry->d_inode->i_ino);
+	ret = -EIO;
+	goto error;
+
+bad_type:
+	xtype[2] = 0;
+	kerror("Cache object %*.*s [%lu] type %s not %s",
+	       dentry->d_name.len, dentry->d_name.len,
+	       dentry->d_name.name, dentry->d_inode->i_ino,
+	       xtype, type);
+	ret = -EIO;
+	goto error;
+
+} /* end cachefiles_check_object_type() */
+
+/*****************************************************************************/
+/*
+ * set the state xattr on a cache file
+ */
+int cachefiles_set_object_xattr(struct cachefiles_object *object,
+				struct cachefiles_xattr *auxdata)
+{
+	struct dentry *dentry = object->dentry;
+	int ret;
+
+	ASSERT(object->fscache.cookie);
+	ASSERT(dentry);
+	ASSERT(dentry->d_inode->i_op->setxattr);
+
+	_enter("%p,#%d", object, auxdata->len);
+
+	/* attempt to install the cache metadata directly */
+	mutex_lock(&dentry->d_inode->i_mutex);
+
+	_debug("SET %s #%u",
+	       object->fscache.cookie->def->name, auxdata->len);
+
+	ret = dentry->d_inode->i_op->setxattr(dentry, cachefiles_xattr_cache,
+					      &auxdata->type, auxdata->len,
+					      XATTR_CREATE);
+	if (ret == 0)
+		fsnotify_xattr(dentry);
+	else if (ret != -ENOMEM)
+		cachefiles_io_error_obj(object,
+					"Failed to set xattr with error %d",
+					ret);
+
+	mutex_unlock(&dentry->d_inode->i_mutex);
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_set_object_xattr() */
+
+/*****************************************************************************/
+/*
+ * check the state xattr on a cache file
+ * - return -ESTALE if the object should be deleted
+ */
+int cachefiles_check_object_xattr(struct cachefiles_object *object,
+				  struct cachefiles_xattr *auxdata)
+{
+	struct cachefiles_xattr *auxbuf;
+	struct dentry *dentry = object->dentry;
+	int ret;
+
+	_enter("%p,#%d", object, auxdata->len);
+
+	ASSERT(dentry);
+	ASSERT(dentry->d_inode);
+	ASSERT(dentry->d_inode->i_op->setxattr);
+	ASSERT(dentry->d_inode->i_op->getxattr);
+
+	auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL);
+	if (!auxbuf) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	mutex_lock(&dentry->d_inode->i_mutex);
+
+	/* read the current type label */
+	ret = dentry->d_inode->i_op->getxattr(dentry, cachefiles_xattr_cache,
+					      &auxbuf->type, 512 + 1);
+	if (ret < 0) {
+		if (ret == -ENODATA)
+			goto stale; /* no attribute - power went off
+				     * mid-cull? */
+
+		if (ret == -ERANGE)
+			goto bad_type_length;
+
+		cachefiles_io_error_obj(object,
+					"can't read xattr on %lu (err %d)",
+					dentry->d_inode->i_ino, -ret);
+		goto error;
+	}
+
+	/* check the on-disk object */
+	if (ret < 1)
+		goto bad_type_length;
+
+	if (auxbuf->type != auxdata->type)
+		goto stale;
+
+	auxbuf->len = ret;
+
+	/* consult the netfs */
+	if (object->fscache.cookie->def->check_aux) {
+		fscache_checkaux_t result;
+		unsigned int dlen;
+
+		dlen = auxbuf->len - 1;
+
+		_debug("checkaux %s #%u",
+		       object->fscache.cookie->def->name, dlen);
+
+		result = object->fscache.cookie->def->check_aux(
+			object->fscache.cookie->netfs_data,
+			&auxbuf->data, dlen);
+
+		switch (result) {
+			/* entry okay as is */
+		case FSCACHE_CHECKAUX_OKAY:
+			goto okay;
+
+			/* entry requires update */
+		case FSCACHE_CHECKAUX_NEEDS_UPDATE:
+			break;
+
+			/* entry requires deletion */
+		case FSCACHE_CHECKAUX_OBSOLETE:
+			goto stale;
+
+		default:
+			BUG();
+		}
+
+		/* update the current label */
+		ret = dentry->d_inode->i_op->setxattr(dentry,
+						      cachefiles_xattr_cache,
+						      &auxdata->type,
+						      auxdata->len,
+						      XATTR_REPLACE);
+		if (ret < 0) {
+			cachefiles_io_error_obj(object,
+						"Can't update xattr on %lu"
+						" (error %d)",
+						dentry->d_inode->i_ino, -ret);
+			goto error;
+		}
+	}
+
+okay:
+	ret = 0;
+
+error:
+	mutex_unlock(&dentry->d_inode->i_mutex);
+	kfree(auxbuf);
+	_leave(" = %d", ret);
+	return ret;
+
+bad_type_length:
+	kerror("Cache object %lu xattr length incorrect",
+	       dentry->d_inode->i_ino);
+	ret = -EIO;
+	goto error;
+
+stale:
+	ret = -ESTALE;
+	goto error;
+
+} /* end cachefiles_check_object_xattr() */
+
+/*****************************************************************************/
+/*
+ * remove the object's xattr to mark it stale
+ */
+int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
+				   struct dentry *dentry)
+{
+	int ret;
+
+	mutex_lock(&dentry->d_inode->i_mutex);
+
+	ret = dentry->d_inode->i_op->removexattr(dentry,
+						 cachefiles_xattr_cache);
+
+	mutex_unlock(&dentry->d_inode->i_mutex);
+
+	if (ret < 0) {
+		if (ret == -ENOENT || ret == -ENODATA)
+			ret = 0;
+		else if (ret != -ENOMEM)
+			cachefiles_io_error(cache,
+					    "Can't remove xattr from %lu"
+					    " (error %d)",
+					    dentry->d_inode->i_ino, -ret);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_remove_object_xattr() */
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
new file mode 100644
index 0000000..83af6b3
--- /dev/null
+++ b/fs/cachefiles/internal.h
@@ -0,0 +1,292 @@
+/* internal.h: general netfs cache on cache files internal defs
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * CacheFiles layout:
+ *
+ *	/..../CacheDir/
+ *		index
+ *		0/
+ *		1/
+ *		2/
+ *		  index
+ *		  0/
+ *		  1/
+ *		  2/
+ *		    index
+ *		    0
+ *		    1
+ *		    2
+ */
+
+#include <linux/fscache-cache.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+struct cachefiles_cache;
+struct cachefiles_object;
+
+extern unsigned long cachefiles_debug;
+extern struct fscache_cache_ops cachefiles_cache_ops;
+extern struct proc_dir_entry *cachefiles_proc;
+extern struct file_operations cachefiles_proc_fops;
+
+/*****************************************************************************/
+/*
+ * node records
+ */
+struct cachefiles_object {
+	struct fscache_object		fscache;	/* fscache handle */
+	struct dentry			*dentry;	/* the file/dir representing this object */
+	struct file			*backer;	/* backing file */
+	loff_t				i_size;		/* object size */
+	atomic_t			usage;		/* basic object usage count */
+	atomic_t			fscache_usage;	/* FSDEF object usage count */
+	uint8_t				type;		/* object type */
+	uint8_t				new;		/* T if object new */
+	spinlock_t			work_lock;
+	struct rw_semaphore		sem;
+	struct work_struct		read_work;	/* read page copier */
+	struct list_head		read_list;	/* pages to copy */
+	struct list_head		read_pend_list;	/* pages to pending read from backer */
+	struct work_struct		write_work;	/* page writer */
+	struct list_head		write_list;	/* pages to store */
+	struct rb_node			active_node;	/* link in active tree (dentry is key) */
+};
+
+extern kmem_cache_t *cachefiles_object_jar;
+
+/*****************************************************************************/
+/*
+ * Cache files cache definition
+ */
+struct cachefiles_cache {
+	struct fscache_cache		cache;		/* FS-Cache record */
+	struct vfsmount			*mnt;		/* mountpoint holding the cache */
+	struct dentry			*graveyard;	/* directory into which dead objects go */
+	struct file			*cachefilesd;	/* manager daemon handle */
+	struct rb_root			active_nodes;	/* active nodes (can't be culled) */
+	rwlock_t			active_lock;	/* lock for active_nodes */
+	atomic_t			gravecounter;	/* graveyard uniquifier */
+	unsigned			brun_percent;	/* when to stop culling (%) */
+	unsigned			bcull_percent;	/* when to start culling (%) */
+	unsigned			bstop_percent;	/* when to stop allocating (%) */
+	unsigned			bsize;		/* cache's block size */
+	unsigned			bshift;		/* min(log2 (PAGE_SIZE / bsize), 0) */
+	sector_t			brun;		/* when to stop culling */
+	sector_t			bcull;		/* when to start culling */
+	sector_t			bstop;		/* when to stop allocating */
+	unsigned long			flags;
+#define CACHEFILES_READY		0	/* T if cache prepared */
+#define CACHEFILES_DEAD			1	/* T if cache dead */
+#define CACHEFILES_CULLING		2	/* T if cull engaged */
+	char				*rootdirname;	/* name of cache root directory */
+	char				*tag;		/* cache binding tag */
+};
+
+/*****************************************************************************/
+/*
+ * backing file read tracking
+ */
+struct cachefiles_one_read {
+	wait_queue_t			monitor;	/* link into monitored waitqueue */
+	struct page			*back_page;	/* backing file page we're waiting for */
+	struct page			*netfs_page;	/* netfs page we're going to fill */
+	struct cachefiles_object	*object;
+	struct list_head		obj_link;	/* link in object's lists */
+	fscache_rw_complete_t		callback_func;
+	void				*callback_data;
+};
+
+/*****************************************************************************/
+/*
+ * backing file write tracking
+ */
+struct cachefiles_one_write {
+	struct page			*netfs_page;	/* netfs page to copy */
+	struct cachefiles_object	*object;
+	struct list_head		obj_link;	/* link in object's lists */
+	fscache_rw_complete_t		callback_func;
+	void				*callback_data;
+};
+
+/*****************************************************************************/
+/*
+ * auxiliary data xattr buffer
+ */
+struct cachefiles_xattr {
+	uint16_t			len;
+	uint8_t				type;
+	uint8_t				data[];
+};
+
+
+/* cf-bind.c */
+extern int cachefiles_proc_bind(struct cachefiles_cache *cache, char *args);
+extern void cachefiles_proc_unbind(struct cachefiles_cache *cache);
+
+/* cf-interface.c */
+extern void cachefiles_read_copier_work(void *_object);
+extern void cachefiles_write_work(void *_object);
+extern int cachefiles_has_space(struct cachefiles_cache *cache, unsigned nr);
+
+/* cf-key.c */
+extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type);
+
+/* cf-namei.c */
+extern int cachefiles_delete_object(struct cachefiles_cache *cache,
+				    struct cachefiles_object *object);
+extern int cachefiles_walk_to_object(struct cachefiles_object *parent,
+				     struct cachefiles_object *object,
+				     char *key,
+				     struct cachefiles_xattr *auxdata);
+extern struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
+					       struct dentry *dir,
+					       const char *name);
+
+extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
+			   char *filename);
+
+/* cf-xattr.c */
+extern int cachefiles_check_object_type(struct cachefiles_object *object);
+extern int cachefiles_set_object_xattr(struct cachefiles_object *object,
+				       struct cachefiles_xattr *auxdata);
+extern int cachefiles_check_object_xattr(struct cachefiles_object *object,
+					 struct cachefiles_xattr *auxdata);
+extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
+					  struct dentry *dentry);
+
+
+/*****************************************************************************/
+/*
+ * debug tracing
+ */
+#define kerror(FMT,...) printk(KERN_ERR "CacheFiles: "FMT"\n" ,##__VA_ARGS__);
+
+#define cachefiles_io_error(___cache, FMT, ...)		\
+do {							\
+	kerror("I/O Error: " FMT ,##__VA_ARGS__);	\
+	fscache_io_error(&(___cache)->cache);		\
+	set_bit(CACHEFILES_DEAD, &(___cache)->flags);	\
+} while(0)
+
+#define cachefiles_io_error_obj(object, FMT, ...)			\
+do {									\
+	struct cachefiles_cache *___cache;				\
+									\
+	___cache = container_of((object)->fscache.cache,		\
+				struct cachefiles_cache, cache);	\
+	cachefiles_io_error(___cache, FMT ,##__VA_ARGS__);		\
+} while(0)
+
+#define dbgprintk(FMT,...) \
+	printk("[%-6.6s] "FMT"\n",current->comm ,##__VA_ARGS__)
+#define _dbprintk(FMT,...) do { } while(0)
+
+#define kenter(FMT,...)	dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define kleave(FMT,...)	dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define kdebug(FMT,...)	dbgprintk(FMT ,##__VA_ARGS__)
+
+
+#define kjournal(FMT,...) _dbprintk(FMT ,##__VA_ARGS__)
+
+#define dbgfree(ADDR)  _dbprintk("%p:%d: FREEING %p",__FILE__,__LINE__,ADDR)
+
+#define dbgpgalloc(PAGE)						\
+do {									\
+	_dbprintk("PGALLOC %s:%d: %p {%lx,%lu}\n",			\
+		  __FILE__,__LINE__,					\
+		  (PAGE),(PAGE)->mapping->host->i_ino,(PAGE)->index	\
+		  );							\
+} while(0)
+
+#define dbgpgfree(PAGE)						\
+do {								\
+	if ((PAGE))						\
+		_dbprintk("PGFREE %s:%d: %p {%lx,%lu}\n",	\
+			  __FILE__,__LINE__,			\
+			  (PAGE),				\
+			  (PAGE)->mapping->host->i_ino,		\
+			  (PAGE)->index				\
+			  );					\
+} while(0)
+
+#ifdef __KDEBUG
+#define _enter(FMT,...)	kenter(FMT,##__VA_ARGS__)
+#define _leave(FMT,...)	kleave(FMT,##__VA_ARGS__)
+#define _debug(FMT,...)	kdebug(FMT,##__VA_ARGS__)
+#else
+#define _enter(FMT,...)	do { } while(0)
+#define _leave(FMT,...)	do { } while(0)
+#define _debug(FMT,...)	do { } while(0)
+#endif
+
+#if 1 // defined(__KDEBUGALL)
+
+#define ASSERT(X)							\
+do {									\
+	if (unlikely(!(X))) {						\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "CacheFiles: Assertion failed\n");	\
+		BUG();							\
+	}								\
+} while(0)
+
+#define ASSERTCMP(X, OP, Y)						\
+do {									\
+	if (unlikely(!((X) OP (Y)))) {					\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "CacheFiles: Assertion failed\n");	\
+		printk(KERN_ERR "%lx " #OP " %lx is false\n",		\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		BUG();							\
+	}								\
+} while(0)
+
+#define ASSERTIF(C, X)							\
+do {									\
+	if (unlikely((C) && !(X))) {					\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "CacheFiles: Assertion failed\n");	\
+		BUG();							\
+	}								\
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y)					\
+do {									\
+	if (unlikely((C) && !((X) OP (Y)))) {				\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "CacheFiles: Assertion failed\n");	\
+		printk(KERN_ERR "%lx " #OP " %lx is false\n",		\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		BUG();							\
+	}								\
+} while(0)
+
+#else
+
+#define ASSERT(X)				\
+do {						\
+} while(0)
+
+#define ASSERTCMP(X, OP, Y)			\
+do {						\
+} while(0)
+
+#define ASSERTIF(C, X)				\
+do {						\
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y)		\
+do {						\
+} while(0)
+
+#endif
diff --git a/fs/fcntl.c b/fs/fcntl.c
index d35cbc6..b43d821 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -529,6 +529,8 @@ int send_sigurg(struct fown_struct *fown
 	return ret;
 }
 
+EXPORT_SYMBOL(send_sigurg);
+
 static DEFINE_RWLOCK(fasync_lock);
 static kmem_cache_t *fasync_cache __read_mostly;
 
diff --git a/fs/file_table.c b/fs/file_table.c
index 0b42be9..fc1f9bc 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -265,6 +265,7 @@ struct file fastcall *fget_light(unsigne
 	return file;
 }
 
+EXPORT_SYMBOL_GPL(fget_light);
 
 void put_filp(struct file *file)
 {
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 049382d..3fec74f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -219,6 +219,12 @@ static inline void wait_on_page_fs_misc(
 extern void fastcall end_page_fs_misc(struct page *page);
 
 /*
+ * permit installation of a state change monitor in the queue for a page
+ */
+extern void install_page_waitqueue_monitor(struct page *page,
+					   wait_queue_t *monitor);
+
+/*
  * Fault a userspace page into pagetables.  Return non-zero on a fault.
  *
  * This assumes that two userspace pages are always sufficient.  That's
diff --git a/mm/filemap.c b/mm/filemap.c
index 02c4925..a531c90 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -488,6 +488,18 @@ void fastcall wait_on_page_bit(struct pa
 }
 EXPORT_SYMBOL(wait_on_page_bit);
 
+void install_page_waitqueue_monitor(struct page *page, wait_queue_t *monitor)
+{
+	wait_queue_head_t *q = page_waitqueue(page);
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->lock, flags);
+	__add_wait_queue(q, monitor);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+
+EXPORT_SYMBOL_GPL(install_page_waitqueue_monitor);
+
 /**
  * unlock_page() - unlock a locked page
  *
@@ -2136,6 +2148,97 @@ generic_file_buffered_write(struct kiocb
 }
 EXPORT_SYMBOL(generic_file_buffered_write);
 
+/*
+ * This writes the data from the source page to the specified page offset in
+ * the nominated file
+ * - the source page does not need to have any association with the file or the
+ *   page offset
+ */
+int
+generic_file_buffered_write_one_kernel_page(struct file *file,
+					    pgoff_t index,
+					    struct page *src)
+{
+	struct address_space *mapping = file->f_mapping;
+	struct address_space_operations *a_ops = mapping->a_ops;
+	struct pagevec	lru_pvec;
+	struct page *page, *cached_page = NULL;
+	long status = 0;
+
+	pagevec_init(&lru_pvec, 0);
+
+	page = __grab_cache_page(mapping, index, &cached_page, &lru_pvec);
+	if (!page) {
+		BUG_ON(cached_page);
+		return -ENOMEM;
+	}
+
+	status = a_ops->prepare_write(file, page, 0, PAGE_CACHE_SIZE);
+	if (unlikely(status)) {
+		loff_t isize = i_size_read(mapping->host);
+
+		if (status != AOP_TRUNCATED_PAGE)
+			unlock_page(page);
+		page_cache_release(page);
+		if (status == AOP_TRUNCATED_PAGE)
+			goto sync;
+
+		/* prepare_write() may have instantiated a few blocks outside
+		 * i_size.  Trim these off again.
+		 */
+		if ((1ULL << (index + 1)) > isize)
+			vmtruncate(mapping->host, isize);
+		goto sync;
+	}
+
+	copy_highpage(page, src);
+	flush_dcache_page(page);
+
+	status = a_ops->commit_write(file, page, 0, PAGE_CACHE_SIZE);
+	if (status == AOP_TRUNCATED_PAGE) {
+		page_cache_release(page);
+		goto sync;
+	}
+
+	if (status > 0)
+		status = 0;
+
+	unlock_page(page);
+	mark_page_accessed(page);
+	page_cache_release(page);
+	if (status < 0)
+		return status;
+
+	balance_dirty_pages_ratelimited(mapping);
+	cond_resched();
+
+sync:
+	if (cached_page)
+		page_cache_release(cached_page);
+
+	/*
+	 * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
+	 */
+	if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(mapping->host))) {
+		if (!a_ops->writepage)
+			status = generic_osync_inode(
+				mapping->host, mapping,
+				OSYNC_METADATA | OSYNC_DATA);
+  	}
+
+	/*
+	 * If we get here for O_DIRECT writes then we must have fallen through
+	 * to buffered writes (block instantiation inside i_size).  So we sync
+	 * the file data here, to try to honour O_DIRECT expectations.
+	 */
+	if (unlikely(file->f_flags & O_DIRECT))
+		status = filemap_write_and_wait(mapping);
+
+	pagevec_lru_add(&lru_pvec);
+	return status;
+}
+EXPORT_SYMBOL(generic_file_buffered_write_one_kernel_page);
+
 static ssize_t
 __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t *ppos)

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.