public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] NFS support for btrfs - v2
@ 2008-07-20 20:31 Balaji Rao
  2008-08-17 11:53 ` David Woodhouse
  0 siblings, 1 reply; 27+ messages in thread
From: Balaji Rao @ 2008-07-20 20:31 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Chris Mason, David Woodhouse

Hi,

Here's an implementation of NFS support for btrfs. It does not work in one
particular case as described in 
http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg00298.html.

This uses the btrfs_iget helper introduced previously.

Comments ?
---

Signed-off-by: Balaji Rao <balajirrao@gmail.com>

diff -r 3f0eee804974 Makefile
--- a/Makefile	Thu Jun 26 10:34:20 2008 -0400
+++ b/Makefile	Mon Jul 21 01:14:45 2008 +0530
@@ -6,7 +6,8 @@
 	   hash.o file-item.o inode-item.o inode-map.o disk-io.o \
 	   transaction.o bit-radix.o inode.o file.o tree-defrag.o \
 	   extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
-	   extent_io.o volumes.o async-thread.o ioctl.o locking.o
+	   extent_io.o volumes.o async-thread.o ioctl.o locking.o \
+	   export.o
 
 btrfs-$(CONFIG_FS_POSIX_ACL)	+= acl.o
 else
diff -r 3f0eee804974 export.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export.c	Mon Jul 21 01:14:45 2008 +0530
@@ -0,0 +1,183 @@
+#include <linux/fs.h>
+#include <linux/types.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "btrfs_inode.h"
+#include "print-tree.h"
+#include "export.h"
+
+/* The size of encoded fh is the type number of the fh itself */
+#define BTRFS_FID_NON_CONNECTABLE		5
+#define BTRFS_FID_CONNECTABLE	 		8
+
+static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
+			int connectable)
+{
+	struct btrfs_fid *fid = (struct btrfs_fid *)fh;
+	struct inode *inode = dentry->d_inode;
+	int len = *max_len;
+
+	if ((len < BTRFS_FID_NON_CONNECTABLE ) ||
+			(connectable && len < BTRFS_FID_CONNECTABLE)) {
+		return 255;
+	}
+	
+	len  = BTRFS_FID_NON_CONNECTABLE;
+
+	fid->objectid = BTRFS_I(inode)->location.objectid;
+	fid->root_objectid = BTRFS_I(inode)->root->objectid;
+	fid->gen = inode->i_generation;
+
+	if (connectable && !S_ISDIR(inode->i_mode)) {
+		struct inode *parent;
+
+		spin_lock(&dentry->d_lock);
+
+		parent = dentry->d_parent->d_inode;
+		fid->parent_objectid = BTRFS_I(parent)->location.objectid;
+		fid->parent_gen = parent->i_generation;
+
+		spin_unlock(&dentry->d_lock);
+		len = BTRFS_FID_CONNECTABLE;
+	}
+	*max_len = len;
+
+	/* We return length itself for the type */
+	return len;
+
+}
+
+static struct dentry * btrfs_get_dentry(struct super_block *sb,
+			u64 objectid, u64 root_objectid, u32 generation)
+{
+	struct btrfs_root *root;
+	struct inode *inode;
+	struct dentry *result;
+	struct btrfs_key key;
+
+	key.objectid = objectid;
+	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
+	key.offset = 0;
+
+	root = btrfs_lookup_fs_root(btrfs_sb(sb)->fs_info, root_objectid);
+	inode = btrfs_iget(sb, &key, root, NULL);
+	if (IS_ERR(inode))
+		return (void *)inode;
+
+	if(generation != inode->i_generation) {
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+
+	result = d_alloc_anon(inode);
+	if(!result) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return result;
+}
+
+static struct dentry *btrfs_fh_to_parent(struct super_block *sb,
+		struct fid *fh, int fh_len, int fh_type)
+{
+	struct btrfs_fid *fid = (struct btrfs_fid *) fh;
+	u64 objectid, root_objectid;
+	u32 generation;
+
+	if (fh_type != BTRFS_FID_CONNECTABLE) {
+		return NULL;
+	}
+
+	
+	root_objectid = fid->root_objectid;
+	objectid = fid->parent_objectid;
+	generation = fid->parent_gen;
+
+	return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+}
+
+static struct dentry *btrfs_fh_to_dentry(struct super_block *sb,
+		struct fid *fh, int fh_len, int fh_type)
+{
+	struct btrfs_fid *fid = (struct btrfs_fid *) fh;
+	u64 objectid, root_objectid;
+	u32 generation;
+
+	if (fh_type > BTRFS_FID_CONNECTABLE) {
+		return NULL;
+	}
+
+	objectid = fid->objectid;
+	root_objectid = fid->root_objectid;
+	generation = fid->gen;
+
+	return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+}
+
+static struct dentry *btrfs_get_parent(struct dentry *child)
+{
+	struct inode *dir = child->d_inode;
+	struct inode *inode;
+	struct dentry *parent;
+	struct btrfs_root *root = BTRFS_I(dir)->root;
+	struct btrfs_key key;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;	
+	u32 nritems;
+	int slot;
+	u64 objectid;
+	int ret;
+
+	path = btrfs_alloc_path();
+
+	key.objectid = dir->i_ino;
+	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
+	key.offset = 0;
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	BUG_ON(ret == 0);
+	ret = 0;
+
+	leaf = path->nodes[0];
+	slot = path->slots[0];
+	nritems = btrfs_header_nritems(leaf);
+	if (slot >= nritems) {
+		goto out;
+	}
+
+	btrfs_item_key_to_cpu(leaf, &key, slot);
+	if (key.objectid != dir->i_ino ||
+	    key.type != BTRFS_INODE_REF_KEY) {
+		goto out;
+	}
+
+	btrfs_free_path(path);
+	objectid = key.offset;
+
+	/* Build a new key for the inode item */
+	key.objectid = objectid;
+	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+	key.offset = 0;
+
+	inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
+	
+	parent = d_alloc_anon(inode);
+	if (!parent) {
+		iput(inode);
+		parent = ERR_PTR(-ENOMEM);
+	}
+	
+	return parent;
+
+out:
+	btrfs_free_path(path);
+	return ERR_PTR(-EINVAL);
+
+}
+
+const struct export_operations btrfs_export_ops = {
+	.encode_fh	= btrfs_encode_fh,
+	.fh_to_dentry	= btrfs_fh_to_dentry,
+	.fh_to_parent	= btrfs_fh_to_parent,
+	.get_parent	= btrfs_get_parent,
+};
diff -r 3f0eee804974 export.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export.h	Mon Jul 21 01:14:45 2008 +0530
@@ -0,0 +1,17 @@
+#ifndef BTRFS_EXPORT_H
+#define BTRFS_EXPORT_H
+
+#include <linux/exportfs.h>
+
+extern const struct export_operations btrfs_export_ops;
+
+struct btrfs_fid {
+	u64 objectid;
+	u64 root_objectid;
+	u32 gen;
+
+	u64 parent_objectid;
+	u32 parent_gen;
+} __attribute__ ((packed));
+
+#endif
diff -r 3f0eee804974 super.c
--- a/super.c	Thu Jun 26 10:34:20 2008 -0400
+++ b/super.c	Mon Jul 21 01:14:45 2008 +0530
@@ -45,6 +45,7 @@
 #include "print-tree.h"
 #include "xattr.h"
 #include "volumes.h"
+#include "export.h"
 
 #define BTRFS_SUPER_MAGIC 0x9123683E
 
@@ -298,6 +299,7 @@
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_magic = BTRFS_SUPER_MAGIC;
 	sb->s_op = &btrfs_super_ops;
+	sb->s_export_op = &btrfs_export_ops;
 	sb->s_xattr = btrfs_xattr_handlers;
 	sb->s_time_gran = 1;
 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-07-20 20:31 [PATCH] NFS support for btrfs - v2 Balaji Rao
@ 2008-08-17 11:53 ` David Woodhouse
  2008-08-17 12:51   ` Balaji Rao
  0 siblings, 1 reply; 27+ messages in thread
From: David Woodhouse @ 2008-08-17 11:53 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs, Chris Mason

On Mon, 2008-07-21 at 02:01 +0530, Balaji Rao wrote:
> Here's an implementation of NFS support for btrfs. It does not work in
> one particular case as described in 
> http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg00298.html.

I can't get it to work properly. The simple case of exporting and using
a file system works -- but that doesn't exercise the get_parent() or
dentry_to_fh() code paths. To test those, you need to clear the server's
dcache completely -- I prefer just rebooting the server.

Note that the first problem you'll hit is the lack of stable fsid --
because btrfs uses an anonymous superblock, it might cause stale file
handles after a reboot, unless your test setup mounts exactly the same
anonymous file systems each time. That bit me when I context switched
from something else and had debugfs mounted before btrfs, then rebooted
and didn't mount debugfs first. I'll deal with the fsid problem
separately; just be aware of it and avoid it for now.

The second problem is that btrfs_fh_to_dentry() fails -- it seems we
need to set the key type BTRFS_INODE_ITEM_KEY in btrfs_get_dentry(),
rather than BTRFS_INODE_REF_KEY. I still haven't learned enough to know
precisely what the implications of that are -- but unless I make that
change, btrfs_lookup_inode() fails and btrfs_read_locked_inode() marks
the resulting inode as bad.

You can test that by mounting the exported file system, then rebooting
the server, then running 'ls' in the NFS-mounted file system.

My next test was to make a deep directory path: mkdir -p a/b/c/d/e/f/g
and then change directory into it. Again reboot the server and run 'ls'.
This time, I see 'ls: cannot open directory .'. In this csse, it seems
to be because btrfs_get_parent() is failing. In my case, it seems to be
because the 'if (slot >= nritems)' check is triggering -- both are set
to 14. I'm now trying to work out precisely what that means...

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-17 11:53 ` David Woodhouse
@ 2008-08-17 12:51   ` Balaji Rao
  2008-08-17 12:56     ` David Woodhouse
  2008-08-18 11:51     ` David Woodhouse
  0 siblings, 2 replies; 27+ messages in thread
From: Balaji Rao @ 2008-08-17 12:51 UTC (permalink / raw)
  To: David Woodhouse; +Cc: linux-btrfs, Chris Mason

On Sunday 17 August 2008 05:23:22 pm David Woodhouse wrote:
> On Mon, 2008-07-21 at 02:01 +0530, Balaji Rao wrote:
> > Here's an implementation of NFS support for btrfs. It does not work in
> > one particular case as described in
> > http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg00298.html.
>
> I can't get it to work properly. The simple case of exporting and using
> a file system works -- but that doesn't exercise the get_parent() or
> dentry_to_fh() code paths. To test those, you need to clear the server's
> dcache completely -- I prefer just rebooting the server.
>

OK. I was not very sure if NFS worked across server reboots.

> Note that the first problem you'll hit is the lack of stable fsid --
> because btrfs uses an anonymous superblock, it might cause stale file
> handles after a reboot, unless your test setup mounts exactly the same
> anonymous file systems each time. That bit me when I context switched
> from something else and had debugfs mounted before btrfs, then rebooted
> and didn't mount debugfs first. I'll deal with the fsid problem
> separately; just be aware of it and avoid it for now.
>
Hmmm.. how do we deal with that ?

> The second problem is that btrfs_fh_to_dentry() fails -- it seems we
> need to set the key type BTRFS_INODE_ITEM_KEY in btrfs_get_dentry(),
> rather than BTRFS_INODE_REF_KEY. I still haven't learned enough to know
> precisely what the implications of that are -- but unless I make that
> change, btrfs_lookup_inode() fails and btrfs_read_locked_inode() marks
> the resulting inode as bad.
>
Right. Again, my bad. That's a silly mistake, which didn't surface because of 
bad testing!

> You can test that by mounting the exported file system, then rebooting
> the server, then running 'ls' in the NFS-mounted file system.
>
> My next test was to make a deep directory path: mkdir -p a/b/c/d/e/f/g
> and then change directory into it. Again reboot the server and run 'ls'.
> This time, I see 'ls: cannot open directory .'. In this csse, it seems
> to be because btrfs_get_parent() is failing. In my case, it seems to be
> because the 'if (slot >= nritems)' check is triggering -- both are set
> to 14. I'm now trying to work out precisely what that means...

I think this means that the inode ref is not found, which is weird. I remember 
that it worked with it a really deep path. I'll try to reproduce the problem 
and see what's going on..

-- 
Thanks,
Balaji

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-17 12:51   ` Balaji Rao
@ 2008-08-17 12:56     ` David Woodhouse
  2008-08-17 13:24       ` Balaji Rao
  2008-08-18 11:51     ` David Woodhouse
  1 sibling, 1 reply; 27+ messages in thread
From: David Woodhouse @ 2008-08-17 12:56 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs, Chris Mason

On Sun, 2008-08-17 at 18:21 +0530, Balaji Rao wrote:
> On Sunday 17 August 2008 05:23:22 pm David Woodhouse wrote:
> > On Mon, 2008-07-21 at 02:01 +0530, Balaji Rao wrote:
> > > Here's an implementation of NFS support for btrfs. It does not work in
> > > one particular case as described in
> > > http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg00298.html.
> >
> > I can't get it to work properly. The simple case of exporting and using
> > a file system works -- but that doesn't exercise the get_parent() or
> > dentry_to_fh() code paths. To test those, you need to clear the server's
> > dcache completely -- I prefer just rebooting the server.
> >
> 
> OK. I was not very sure if NFS worked across server reboots.

It definitely should.

> > Note that the first problem you'll hit is the lack of stable fsid --
> > because btrfs uses an anonymous superblock, it might cause stale file
> > handles after a reboot, unless your test setup mounts exactly the same
> > anonymous file systems each time. That bit me when I context switched
> > from something else and had debugfs mounted before btrfs, then rebooted
> > and didn't mount debugfs first. I'll deal with the fsid problem
> > separately; just be aware of it and avoid it for now.
> >
> Hmmm.. how do we deal with that ?

I'm not entirely sure yet. I had a patch to make the kernel
automatically set a uuid on the export, which was simple enough. 

Unfortunately that isn't sufficient, because although we then return an
appropriate root fh to the mount request, we then aren't capable of
_interpreting_ that fh when the client immediately gives it back to us
in a subsequent fsinfo request. Because we're relying on mountd to do
the initial fh->dentry conversion for us, and mountd is very limited in
its uuid handling -- it assumes that
 - there is a block device
 - there is a 1:1 mapping between block device and uuid
 - libuuid can cope with new file systems.

> > The second problem is that btrfs_fh_to_dentry() fails -- it seems we
> > need to set the key type BTRFS_INODE_ITEM_KEY in btrfs_get_dentry(),
> > rather than BTRFS_INODE_REF_KEY. I still haven't learned enough to know
> > precisely what the implications of that are -- but unless I make that
> > change, btrfs_lookup_inode() fails and btrfs_read_locked_inode() marks
> > the resulting inode as bad.
> >
> Right. Again, my bad. That's a silly mistake, which didn't surface because of 
> bad testing!
> 
> > You can test that by mounting the exported file system, then rebooting
> > the server, then running 'ls' in the NFS-mounted file system.
> >
> > My next test was to make a deep directory path: mkdir -p a/b/c/d/e/f/g
> > and then change directory into it. Again reboot the server and run 'ls'.
> > This time, I see 'ls: cannot open directory .'. In this csse, it seems
> > to be because btrfs_get_parent() is failing. In my case, it seems to be
> > because the 'if (slot >= nritems)' check is triggering -- both are set
> > to 14. I'm now trying to work out precisely what that means...
> 
> I think this means that the inode ref is not found, which is weird. I remember 
> that it worked with it a really deep path. I'll try to reproduce the problem 
> and see what's going on..

See below... it seems to be working now.

diff --git a/export.c b/export.c
index 1b8875c..6209f35 100644
--- a/export.c
+++ b/export.c
@@ -72,7 +72,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
 	struct btrfs_key key;
 
 	key.objectid = objectid;
-	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
+	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
 	key.offset = 0;
 
 	root = btrfs_lookup_fs_root(btrfs_sb(sb)->fs_info, root_objectid);
@@ -164,14 +164,22 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
 	leaf = path->nodes[0];
 	slot = path->slots[0];
 	nritems = btrfs_header_nritems(leaf);
-	if (slot >= nritems)
-		goto out;
+	if (slot >= nritems) {
+		ret = btrfs_next_leaf(root, path);
+		if (ret) {
+			btrfs_free_path(path);
+			goto out;
+		}
+		leaf = path->nodes[0];
+		slot = path->slots[0];
+	}
+
+	btrfs_free_path(path);
 
 	btrfs_item_key_to_cpu(leaf, &key, slot);
 	if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
 		goto out;
 
-	btrfs_free_path(path);
 	objectid = key.offset;
 
 	/* Build a new key for the inode item */

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-17 12:56     ` David Woodhouse
@ 2008-08-17 13:24       ` Balaji Rao
  2008-08-17 13:30         ` David Woodhouse
  2008-08-17 13:40         ` [PATCH] NFS support for btrfs - v2 David Woodhouse
  0 siblings, 2 replies; 27+ messages in thread
From: Balaji Rao @ 2008-08-17 13:24 UTC (permalink / raw)
  To: David Woodhouse; +Cc: linux-btrfs, Chris Mason

On Sunday 17 August 2008 06:26:03 pm David Woodhouse wrote:
> On Sun, 2008-08-17 at 18:21 +0530, Balaji Rao wrote:
> > On Sunday 17 August 2008 05:23:22 pm David Woodhouse wrote:
> > > On Mon, 2008-07-21 at 02:01 +0530, Balaji Rao wrote:
> > > > Here's an implementation of NFS support for btrfs. It does not work
> > > > in one particular case as described in
> > > > http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg00298.html
> > > >.
> > >
> > > I can't get it to work properly. The simple case of exporting and using
> > > a file system works -- but that doesn't exercise the get_parent() or
> > > dentry_to_fh() code paths. To test those, you need to clear the
> > > server's dcache completely -- I prefer just rebooting the server.
> >
> > OK. I was not very sure if NFS worked across server reboots.
>
> It definitely should.
>
> > > Note that the first problem you'll hit is the lack of stable fsid --
> > > because btrfs uses an anonymous superblock, it might cause stale file
> > > handles after a reboot, unless your test setup mounts exactly the same
> > > anonymous file systems each time. That bit me when I context switched
> > > from something else and had debugfs mounted before btrfs, then rebooted
> > > and didn't mount debugfs first. I'll deal with the fsid problem
> > > separately; just be aware of it and avoid it for now.
> >
> > Hmmm.. how do we deal with that ?
>
> I'm not entirely sure yet. I had a patch to make the kernel
> automatically set a uuid on the export, which was simple enough.
>
> Unfortunately that isn't sufficient, because although we then return an
> appropriate root fh to the mount request, we then aren't capable of
> _interpreting_ that fh when the client immediately gives it back to us
> in a subsequent fsinfo request. Because we're relying on mountd to do
> the initial fh->dentry conversion for us, and mountd is very limited in
> its uuid handling -- it assumes that
>  - there is a block device
>  - there is a 1:1 mapping between block device and uuid
>  - libuuid can cope with new file systems.
>
> > > The second problem is that btrfs_fh_to_dentry() fails -- it seems we
> > > need to set the key type BTRFS_INODE_ITEM_KEY in btrfs_get_dentry(),
> > > rather than BTRFS_INODE_REF_KEY. I still haven't learned enough to know
> > > precisely what the implications of that are -- but unless I make that
> > > change, btrfs_lookup_inode() fails and btrfs_read_locked_inode() marks
> > > the resulting inode as bad.
> >
> > Right. Again, my bad. That's a silly mistake, which didn't surface
> > because of bad testing!
> >
> > > You can test that by mounting the exported file system, then rebooting
> > > the server, then running 'ls' in the NFS-mounted file system.
> > >
> > > My next test was to make a deep directory path: mkdir -p a/b/c/d/e/f/g
> > > and then change directory into it. Again reboot the server and run
> > > 'ls'. This time, I see 'ls: cannot open directory .'. In this csse, it
> > > seems to be because btrfs_get_parent() is failing. In my case, it seems
> > > to be because the 'if (slot >= nritems)' check is triggering -- both
> > > are set to 14. I'm now trying to work out precisely what that means...
> >
> > I think this means that the inode ref is not found, which is weird. I
> > remember that it worked with it a really deep path. I'll try to reproduce
> > the problem and see what's going on..
>
> See below... it seems to be working now.
>
> diff --git a/export.c b/export.c
> index 1b8875c..6209f35 100644
> --- a/export.c
> +++ b/export.c
> @@ -72,7 +72,7 @@ static struct dentry *btrfs_get_dentry(struct super_block
> *sb, u64 objectid, struct btrfs_key key;
>
>  	key.objectid = objectid;
> -	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
> +	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
>  	key.offset = 0;
>
>  	root = btrfs_lookup_fs_root(btrfs_sb(sb)->fs_info, root_objectid);
> @@ -164,14 +164,22 @@ static struct dentry *btrfs_get_parent(struct dentry
> *child) leaf = path->nodes[0];
>  	slot = path->slots[0];
>  	nritems = btrfs_header_nritems(leaf);
> -	if (slot >= nritems)
> -		goto out;
> +	if (slot >= nritems) {
> +		ret = btrfs_next_leaf(root, path);
> +		if (ret) {
> +			btrfs_free_path(path);
> +			goto out;
> +		}
> +		leaf = path->nodes[0];
> +		slot = path->slots[0];
> +	}
> +
> +	btrfs_free_path(path);
>
>  	btrfs_item_key_to_cpu(leaf, &key, slot);
>  	if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
>  		goto out;
>
> -	btrfs_free_path(path);
>  	objectid = key.offset;
>
>  	/* Build a new key for the inode item */

OK. I had copied over this code snippet from inode.c:btrfs_inode_by_name, 
which has the condition 'if (slot >= nritems)' removed now by this.

changeset:   631:87490dc3bb59
user:        "Yan Zheng" <yanzheng@21cn.com>
date:        Thu Jul 24 12:19:32 2008 -0400
summary:     Fix .. lookup corner case

I think we should refactor btrfs_inode_by_name into something we can use ?

-- 
Thanks,
Balaji

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-17 13:24       ` Balaji Rao
@ 2008-08-17 13:30         ` David Woodhouse
  2008-08-17 14:17           ` David Woodhouse
  2008-08-17 13:40         ` [PATCH] NFS support for btrfs - v2 David Woodhouse
  1 sibling, 1 reply; 27+ messages in thread
From: David Woodhouse @ 2008-08-17 13:30 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs, Chris Mason

On Sun, 2008-08-17 at 18:54 +0530, Balaji Rao wrote:
> 
> 
> OK. I had copied over this code snippet from
> inode.c:btrfs_inode_by_name, 
> which has the condition 'if (slot >= nritems)' removed now by this.
> 
> changeset:   631:87490dc3bb59
> user:        "Yan Zheng" <yanzheng@21cn.com>
> date:        Thu Jul 24 12:19:32 2008 -0400
> summary:     Fix .. lookup corner case
> 
> I think we should refactor btrfs_inode_by_name into something we can
> use ?

I'm not entirely sure why btrfs_inode_by_name() handles "." and ".." at
all. We haven't needed that since the 2.2 kernel, have we?

I think we should just remove that code completely -- and remove the
similar code from btrfs_real_readdir() while we're at it -- just use
parent_ino(filp->f_path.dentry) instead. In _that_ case, we know the
dcache is connected.

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-17 13:24       ` Balaji Rao
  2008-08-17 13:30         ` David Woodhouse
@ 2008-08-17 13:40         ` David Woodhouse
  2008-08-18 19:23           ` Chris Mason
  1 sibling, 1 reply; 27+ messages in thread
From: David Woodhouse @ 2008-08-17 13:40 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs, Chris Mason

On Sun, 2008-08-17 at 18:54 +0530, Balaji Rao wrote:
> 
> OK. I had copied over this code snippet from
> inode.c:btrfs_inode_by_name, 
> which has the condition 'if (slot >= nritems)' removed now by this.
> 
> changeset:   631:87490dc3bb59
> user:        "Yan Zheng" <yanzheng@21cn.com>
> date:        Thu Jul 24 12:19:32 2008 -0400
> summary:     Fix .. lookup corner case

Er, isn't that just moving the error case around?

That's commit 3dcd1334c286fa4467219302ff2f9a4a190fbb9c in the git tree:
http://git.kernel.org/?p=linux/kernel/git/dwmw2/btrfs-kernel-unstable.git;a=commitdiff;h=3dcd1334

Your version fails if the item we want is in slot 0, because we don't
jump forward to the next leaf.

The new version fails if it's in the _last_ slot -- the return from
btrfs_search_slot() now points to the first slot in the next leaf, and
we treat that as an error instead of rewinding to the one we want. It's
just the same error, but in reverse.

Or am I missing something?

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-17 13:30         ` David Woodhouse
@ 2008-08-17 14:17           ` David Woodhouse
  2008-08-17 16:10             ` [PATCH] rewrite btrfs_readdir() David Woodhouse
  0 siblings, 1 reply; 27+ messages in thread
From: David Woodhouse @ 2008-08-17 14:17 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs, Chris Mason

On Sun, 2008-08-17 at 14:30 +0100, David Woodhouse wrote:
> I think we should just remove that code completely -- and remove the
> similar code from btrfs_real_readdir() while we're at it -- just use
> parent_ino(filp->f_path.dentry) instead. In _that_ case, we know the
> dcache is connected.

>From 6a3710b961c61fef038cca1722176b171697b3da Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sun, 17 Aug 2008 15:14:48 +0100
Subject: [PATCH] Remove special cases for "." and ".."

We never get asked by the VFS to lookup either of them, and we can
handle the readdir() case a lot more simply, too.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 inode.c |   52 ++--------------------------------------------------
 1 files changed, 2 insertions(+), 50 deletions(-)

diff --git a/inode.c b/inode.c
index fe6d4ae..411dfc4 100644
--- a/inode.c
+++ b/inode.c
@@ -1742,42 +1742,9 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	int ret = 0;
 
-	if (namelen == 1 && strcmp(name, ".") == 0) {
-		location->objectid = dir->i_ino;
-		location->type = BTRFS_INODE_ITEM_KEY;
-		location->offset = 0;
-		return 0;
-	}
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
-	if (namelen == 2 && strcmp(name, "..") == 0) {
-		struct btrfs_key key;
-		struct extent_buffer *leaf;
-		int slot;
-
-		key.objectid = dir->i_ino;
-		key.offset = (u64)-1;
-		btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
-		if (ret < 0 || path->slots[0] == 0)
-			goto out_err;
-		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-		BUG_ON(ret == 0);
-		ret = 0;
-		leaf = path->nodes[0];
-		slot = path->slots[0] - 1;
-
-		btrfs_item_key_to_cpu(leaf, &key, slot);
-		if (key.objectid != dir->i_ino ||
-		    key.type != BTRFS_INODE_REF_KEY) {
-			goto out_err;
-		}
-		location->objectid = key.offset;
-		location->type = BTRFS_INODE_ITEM_KEY;
-		location->offset = 0;
-		goto out;
-	}
-
 	di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
 				    namelen, 0);
 	if (IS_ERR(di))
@@ -2000,29 +1967,14 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 
 	/* special case for .., just use the back ref */
 	if (filp->f_pos == 1) {
-		btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
-		key.offset = (u64)-1;
-		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-		if (ret < 0 || path->slots[0] == 0) {
-			btrfs_release_path(root, path);
-			goto read_dir_items;
-		}
-		BUG_ON(ret == 0);
-		leaf = path->nodes[0];
-		slot = path->slots[0] - 1;
-		btrfs_item_key_to_cpu(leaf, &found_key, slot);
-		btrfs_release_path(root, path);
-		if (found_key.objectid != key.objectid ||
-		    found_key.type != BTRFS_INODE_REF_KEY)
-			goto read_dir_items;
+		u64 pino = parent_ino(filp->f_path.dentry);
 		over = filldir(dirent, "..", 2,
-			       2, found_key.offset, DT_DIR);
+			       2, pino, DT_DIR);
 		if (over)
 			goto nopos;
 		filp->f_pos = 2;
 	}
 
-read_dir_items:
 	btrfs_set_key_type(&key, key_type);
 	key.offset = filp->f_pos;
 
-- 
1.5.5.1


-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH] rewrite btrfs_readdir()
  2008-08-17 14:17           ` David Woodhouse
@ 2008-08-17 16:10             ` David Woodhouse
  2008-08-18 18:46               ` Chris Mason
  0 siblings, 1 reply; 27+ messages in thread
From: David Woodhouse @ 2008-08-17 16:10 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs, Chris Mason

On Sun, 2008-08-17 at 15:17 +0100, David Woodhouse wrote:
> Subject: [PATCH] Remove special cases for "." and ".."
> 
> We never get asked by the VFS to lookup either of them, and we can
> handle the readdir() case a lot more simply, too.

And a few other minor cleanups... some cosmetic, but note the WARN_ON().

From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sun, 17 Aug 2008 17:08:36 +0100
Subject: [PATCH] Minor cleanup of btrfs_real_readdir()

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 inode.c |   42 +++++++++++++++++++++++++++---------------
 1 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/inode.c b/inode.c
index 411dfc4..187e1f8 100644
--- a/inode.c
+++ b/inode.c
@@ -1960,34 +1960,34 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 			return 0;
 		filp->f_pos = 1;
 	}
-
-	key.objectid = inode->i_ino;
-	path = btrfs_alloc_path();
-	path->reada = 2;
-
 	/* special case for .., just use the back ref */
 	if (filp->f_pos == 1) {
 		u64 pino = parent_ino(filp->f_path.dentry);
 		over = filldir(dirent, "..", 2,
 			       2, pino, DT_DIR);
 		if (over)
-			goto nopos;
+			return 0;
 		filp->f_pos = 2;
 	}
 
+	path = btrfs_alloc_path();
+	path->reada = 2;
+
 	btrfs_set_key_type(&key, key_type);
 	key.offset = filp->f_pos;
+	key.objectid = inode->i_ino;
 
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	if (ret < 0)
 		goto err;
 	advance = 0;
-	while(1) {
+
+	while (1) {
 		leaf = path->nodes[0];
 		nritems = btrfs_header_nritems(leaf);
 		slot = path->slots[0];
 		if (advance || slot >= nritems) {
-			if (slot >= nritems -1) {
+			if (slot >= nritems - 1) {
 				ret = btrfs_next_leaf(root, path);
 				if (ret)
 					break;
@@ -2011,19 +2011,23 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 			continue;
 
 		filp->f_pos = found_key.offset;
-		advance = 1;
+
 		di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
 		di_cur = 0;
 		di_total = btrfs_item_size(leaf, item);
-		while(di_cur < di_total) {
+
+		while (di_cur < di_total) {
 			struct btrfs_key location;
 
 			name_len = btrfs_dir_name_len(leaf, di);
-			if (name_len < 32) {
+			if (name_len <= sizeof(tmp_name)) {
 				name_ptr = tmp_name;
 			} else {
 				name_ptr = kmalloc(name_len, GFP_NOFS);
-				BUG_ON(!name_ptr);
+				if (!name_ptr) {
+					ret = -ENOMEM;
+					goto err;
+				}
 			}
 			read_extent_buffer(leaf, name_ptr,
 					   (unsigned long)(di + 1), name_len);
@@ -2031,8 +2035,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 			d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
 			btrfs_dir_item_key_to_cpu(leaf, di, &location);
 			over = filldir(dirent, name_ptr, name_len,
-				       found_key.offset,
-				       location.objectid,
+				       found_key.offset, location.objectid,
 				       d_type);
 
 			if (name_ptr != tmp_name)
@@ -2040,12 +2043,21 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 
 			if (over)
 				goto nopos;
+
 			di_len = btrfs_dir_name_len(leaf, di) +
-				btrfs_dir_data_len(leaf, di) +sizeof(*di);
+				 btrfs_dir_data_len(leaf, di) + sizeof(*di);
 			di_cur += di_len;
 			di = (struct btrfs_dir_item *)((char *)di + di_len);
+
+			/* If there's more than one directory entry in each
+			   item, then the offset isn't unique. Seeking in 
+			   directories will be broken. Can this ever actually
+			   happen? */
+			WARN_ON(di_cur != di_total);
 		}
 	}
+
+	/* Reached end of directory/root. Bump pos past the last item. */
 	if (key_type == BTRFS_DIR_INDEX_KEY)
 		filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
 	else
-- 
1.5.5.1



-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-17 12:51   ` Balaji Rao
  2008-08-17 12:56     ` David Woodhouse
@ 2008-08-18 11:51     ` David Woodhouse
  2008-08-18 12:10       ` David Woodhouse
  1 sibling, 1 reply; 27+ messages in thread
From: David Woodhouse @ 2008-08-18 11:51 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs, Chris Mason

On Sun, 2008-08-17 at 18:21 +0530, Balaji Rao wrote:
> > Note that the first problem you'll hit is the lack of stable fsid --
> > because btrfs uses an anonymous superblock, it might cause stale file
> > handles after a reboot, unless your test setup mounts exactly the same
> > anonymous file systems each time. That bit me when I context switched
> > from something else and had debugfs mounted before btrfs, then rebooted
> > and didn't mount debugfs first. I'll deal with the fsid problem
> > separately; just be aware of it and avoid it for now.
> >
> Hmmm.. how do we deal with that ?

First we teach nfs-utils to derive a UUID from the 'f_fsid' field it's
already being given by the kernel when it calls statfs():
git.infradead.org/users/dwmw2/nfs-utils.git

Then we make btrfs put something suitable into the f_fsid field in
btrfs_statfs(). The patch below works OK, but doesn't yet handle
subvolumes -- it gives the same fsid for all subvolumes.

But then, since we're also returning the same {dev,ino#} for all
subvolumes, nfs exporting isn't the _only_ thing that's going to get
confused...

From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 18 Aug 2008 12:01:52 +0100
Subject: [PATCH] Fill f_fsid field in btrfs_statfs()

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 super.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/super.c b/super.c
index e830e0e..6446ab7 100644
--- a/super.c
+++ b/super.c
@@ -489,6 +489,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	struct btrfs_root *root = btrfs_sb(dentry->d_sb);
 	struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
 	int bits = dentry->d_sb->s_blocksize_bits;
+	__be32 *fsid = (__be32 *)root->fs_info->fsid;
 
 	buf->f_namelen = BTRFS_NAME_LEN;
 	buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
@@ -497,6 +498,11 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_bavail = buf->f_bfree;
 	buf->f_bsize = dentry->d_sb->s_blocksize;
 	buf->f_type = BTRFS_SUPER_MAGIC;
+	/* We treat it as constant endianness (it doesn't matter _which_)
+	   because we want the fsid to come out the same whether mounted 
+	   on a big-endian or little-endian host */
+	buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
+	buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
 	return 0;
 }
 
-- 
1.5.5.1


-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-18 11:51     ` David Woodhouse
@ 2008-08-18 12:10       ` David Woodhouse
  2008-08-18 19:15         ` Chris Mason
  0 siblings, 1 reply; 27+ messages in thread
From: David Woodhouse @ 2008-08-18 12:10 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs, Chris Mason

On Mon, 2008-08-18 at 12:51 +0100, David Woodhouse wrote:
> The patch below works OK, but doesn't yet handle
> subvolumes -- it gives the same fsid for all subvolumes.

Is this the correct fix?

diff --git a/super.c b/super.c
index 6446ab7..55f4d00 100644
--- a/super.c
+++ b/super.c
@@ -503,6 +503,10 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	   on a big-endian or little-endian host */
 	buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
 	buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
+	/* Mask in the root object ID too, to disambiguate subvols */
+	buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32;
+	buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid;
+
 	return 0;
 }
 

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] rewrite btrfs_readdir()
  2008-08-17 16:10             ` [PATCH] rewrite btrfs_readdir() David Woodhouse
@ 2008-08-18 18:46               ` Chris Mason
  2008-08-18 19:08                 ` David Woodhouse
  0 siblings, 1 reply; 27+ messages in thread
From: Chris Mason @ 2008-08-18 18:46 UTC (permalink / raw)
  To: David Woodhouse; +Cc: Balaji Rao, linux-btrfs

On Sun, 2008-08-17 at 17:10 +0100, David Woodhouse wrote:
> On Sun, 2008-08-17 at 15:17 +0100, David Woodhouse wrote:
> > Subject: [PATCH] Remove special cases for "." and ".."
> > 
> > We never get asked by the VFS to lookup either of them, and we can
> > handle the readdir() case a lot more simply, too.
> 
> And a few other minor cleanups... some cosmetic, but note the
> WARN_ON().

> +
> +			/* If there's more than one directory entry in each
> +			   item, then the offset isn't unique. Seeking in 
> +			   directories will be broken. Can this ever actually
> +			   happen? */
> +			WARN_ON(di_cur != di_total);
>  		}

It can happen today for the tree of tree roots (directory of subvolumes
and named snapshots).  It happens when the name hashes collide.  For
real directories we use the sequence number index instead of the name
hash index, so it normally won't happen.

Longer term the directory of subvolumes is going to be a real directory
and this special case will go away.

-chris



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] rewrite btrfs_readdir()
  2008-08-18 18:46               ` Chris Mason
@ 2008-08-18 19:08                 ` David Woodhouse
  2008-08-18 19:24                   ` Chris Mason
  0 siblings, 1 reply; 27+ messages in thread
From: David Woodhouse @ 2008-08-18 19:08 UTC (permalink / raw)
  To: Chris Mason; +Cc: Balaji Rao, linux-btrfs

On Mon, 2008-08-18 at 14:46 -0400, Chris Mason wrote:
> It can happen today for the tree of tree roots (directory of
> subvolumes and named snapshots).  It happens when the name hashes
> collide.  For real directories we use the sequence number index
> instead of the name hash index, so it normally won't happen.

Ok, that's fine then. I've removed the WARN_ON() from the patch in my
git tree. This is what I have outstanding for you at
git://git.infradead.org/users/dwmw2/btrfs-kernel-unstable:

Balaji Rao (2):
      Introduce btrfs_iget helper
      NFS support for btrfs - v3

David Woodhouse (7):
      Implement our own copy of the nfsd readdir hack, for older kernels
      Discard sector data in __free_extent()
      Remove special cases for "." and ".."
      Minor cleanup of btrfs_real_readdir()
      Optimise NFS readdir hack slightly; don't call readdir() again when done
      Fill f_fsid field in btrfs_statfs()
      Mask root object ID into f_fsid in btrfs_statfs()

 Makefile      |    2 +-
 compat.h      |   17 ++++
 ctree.h       |    2 +
 export.c      |  208 +++++++++++++++++++++++++++++++++++++++++++++++
 export.h      |   19 +++++
 extent-tree.c |   25 ++++++
 inode.c       |  250 +++++++++++++++++++++++++++++++++++++-------------------
 super.c       |   12 +++
 8 files changed, 449 insertions(+), 86 deletions(-)

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-18 12:10       ` David Woodhouse
@ 2008-08-18 19:15         ` Chris Mason
  0 siblings, 0 replies; 27+ messages in thread
From: Chris Mason @ 2008-08-18 19:15 UTC (permalink / raw)
  To: David Woodhouse; +Cc: Balaji Rao, linux-btrfs

On Mon, 2008-08-18 at 13:10 +0100, David Woodhouse wrote:
> On Mon, 2008-08-18 at 12:51 +0100, David Woodhouse wrote:
> > The patch below works OK, but doesn't yet handle
> > subvolumes -- it gives the same fsid for all subvolumes.
> 
> Is this the correct fix?
> 

This looks sane:

-chris

> diff --git a/super.c b/super.c
> index 6446ab7..55f4d00 100644
> --- a/super.c
> +++ b/super.c
> @@ -503,6 +503,10 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
>  	   on a big-endian or little-endian host */
>  	buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
>  	buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
> +	/* Mask in the root object ID too, to disambiguate subvols */
> +	buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32;
> +	buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid;
> +
>  	return 0;
>  }
>  
> 


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-17 13:40         ` [PATCH] NFS support for btrfs - v2 David Woodhouse
@ 2008-08-18 19:23           ` Chris Mason
  2008-08-18 19:33             ` David Woodhouse
  0 siblings, 1 reply; 27+ messages in thread
From: Chris Mason @ 2008-08-18 19:23 UTC (permalink / raw)
  To: David Woodhouse; +Cc: Balaji Rao, linux-btrfs

On Sun, 2008-08-17 at 14:40 +0100, David Woodhouse wrote:
> On Sun, 2008-08-17 at 18:54 +0530, Balaji Rao wrote:
> > 
> > OK. I had copied over this code snippet from
> > inode.c:btrfs_inode_by_name, 
> > which has the condition 'if (slot >= nritems)' removed now by this.
> > 
> > changeset:   631:87490dc3bb59
> > user:        "Yan Zheng" <yanzheng@21cn.com>
> > date:        Thu Jul 24 12:19:32 2008 -0400
> > summary:     Fix .. lookup corner case
> 
> Er, isn't that just moving the error case around?
> 
> That's commit 3dcd1334c286fa4467219302ff2f9a4a190fbb9c in the git tree:
> http://git.kernel.org/?p=linux/kernel/git/dwmw2/btrfs-kernel-unstable.git;a=commitdiff;h=3dcd1334
> 
> Your version fails if the item we want is in slot 0, because we don't
> jump forward to the next leaf.
> 
> The new version fails if it's in the _last_ slot -- the return from
> btrfs_search_slot() now points to the first slot in the next leaf, and
> we treat that as an error instead of rewinding to the one we want. It's
> just the same error, but in reverse.
> 
> Or am I missing something?
> 

The search key in Yan's patch changes to (u64)-1.  We know that if we're
in slot 0, there's nothing after us in the tree.

-chris



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] rewrite btrfs_readdir()
  2008-08-18 19:08                 ` David Woodhouse
@ 2008-08-18 19:24                   ` Chris Mason
  2008-08-18 19:32                     ` David Woodhouse
  0 siblings, 1 reply; 27+ messages in thread
From: Chris Mason @ 2008-08-18 19:24 UTC (permalink / raw)
  To: David Woodhouse; +Cc: Balaji Rao, linux-btrfs

On Mon, 2008-08-18 at 20:08 +0100, David Woodhouse wrote:
> On Mon, 2008-08-18 at 14:46 -0400, Chris Mason wrote:
> > It can happen today for the tree of tree roots (directory of
> > subvolumes and named snapshots).  It happens when the name hashes
> > collide.  For real directories we use the sequence number index
> > instead of the name hash index, so it normally won't happen.
> 
> Ok, that's fine then. I've removed the WARN_ON() from the patch in my
> git tree. This is what I have outstanding for you at
> git://git.infradead.org/users/dwmw2/btrfs-kernel-unstable:
> 

Ok, just to double check after rereading the thread, are there known
bugs in this stuff?  They look sane to me.

-chris



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] rewrite btrfs_readdir()
  2008-08-18 19:24                   ` Chris Mason
@ 2008-08-18 19:32                     ` David Woodhouse
  0 siblings, 0 replies; 27+ messages in thread
From: David Woodhouse @ 2008-08-18 19:32 UTC (permalink / raw)
  To: Chris Mason; +Cc: Balaji Rao, linux-btrfs

On Mon, 2008-08-18 at 15:24 -0400, Chris Mason wrote:
> Ok, just to double check after rereading the thread, are there known
> bugs in this stuff?  They look sane to me.

No, no known bugs.

This is now working nicely for me -- I can export two separate
subvolumes by NFS, change into a deep subdirectory and reboot the
server, and it all works fine again when the server comes back.

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-18 19:23           ` Chris Mason
@ 2008-08-18 19:33             ` David Woodhouse
  2008-08-18 19:47               ` Chris Mason
  0 siblings, 1 reply; 27+ messages in thread
From: David Woodhouse @ 2008-08-18 19:33 UTC (permalink / raw)
  To: Chris Mason; +Cc: Balaji Rao, linux-btrfs

On Mon, 2008-08-18 at 15:23 -0400, Chris Mason wrote:
> The search key in Yan's patch changes to (u64)-1.  We know that if
> we're in slot 0, there's nothing after us in the tree.

But because we searched for a reference to (u64)-1, the item we want is
actually _earlier_ in the tree, not later.

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-18 19:33             ` David Woodhouse
@ 2008-08-18 19:47               ` Chris Mason
  2008-08-18 20:20                 ` David Woodhouse
  0 siblings, 1 reply; 27+ messages in thread
From: Chris Mason @ 2008-08-18 19:47 UTC (permalink / raw)
  To: David Woodhouse; +Cc: Balaji Rao, linux-btrfs

On Mon, 2008-08-18 at 20:33 +0100, David Woodhouse wrote:
> On Mon, 2008-08-18 at 15:23 -0400, Chris Mason wrote:
> > The search key in Yan's patch changes to (u64)-1.  We know that if
> > we're in slot 0, there's nothing after us in the tree.
> 
> But because we searched for a reference to (u64)-1, the item we want is
> actually _earlier_ in the tree, not later.
> 

Lets pretend I had put in commments something like the code below.  The
important part is that directories have only one link, so they have only
one backref.

	/* 
         * we're a directory and we have at most 1 back reference
         * to our one and only parent directory
         */
	if (namelen == 2 && strcmp(name, "..") == 0) {
                struct btrfs_key key;
                struct extent_buffer *leaf;
                int slot;

                key.objectid = dir->i_ino;

                /* after the search, we'll be
                 * one slot after the last back reference for this
                 * inode.
                 */
                key.offset = (u64)-1;
                btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
		
		/* normally, I would check the return values after the
                 * search.  But this time I merged the patch wrong and
                 * made a bug.  If path->slots[0] == 0 after the
                 * search, there won't be any keys smaller to ours
                 * in the tree.
                 */
                if (ret < 0 || path->slots[0] == 0)
                        goto out_err;
                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
                BUG_ON(ret == 0);
                ret = 0;
                leaf = path->nodes[0];

                /* go back one slot and find our only backref */
                slot = path->slots[0] - 1;

                btrfs_item_key_to_cpu(leaf, &key, slot);
                if (key.objectid != dir->i_ino ||
                    key.type != BTRFS_INODE_REF_KEY) {
                        goto out_err;
                }
                location->objectid = key.offset;
                location->type = BTRFS_INODE_ITEM_KEY;
                location->offset = 0;
                goto out;



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-18 19:47               ` Chris Mason
@ 2008-08-18 20:20                 ` David Woodhouse
  2008-08-18 20:32                   ` Chris Mason
  2008-08-19  0:16                   ` Christoph Hellwig
  0 siblings, 2 replies; 27+ messages in thread
From: David Woodhouse @ 2008-08-18 20:20 UTC (permalink / raw)
  To: Chris Mason; +Cc: Balaji Rao, linux-btrfs

On Mon, 2008-08-18 at 15:47 -0400, Chris Mason wrote:
> Lets pretend I had put in commments something like the code below.
> The important part is that directories have only one link, so they
> have only one backref.

OK. Now can I rip that code out anyway? The VFS will never call
btrfs_lookup() for ".." -- not since the 2.2 kernel :)

I'm still a little confused about precisely what btrfs_search_slot()
returns when it doesn't find a match -- that's probably where the
documentation would be more useful.

What I have in btrfs_get_parent() is correct, I think -- but could
probably be simplified to your version, searching for (u64)-1 and then
not needing to call btrfs_next_leaf().

Something like this?

(And am I fixing a use-after-free bug by moving that btrfs_free_path()
down by a line, at the end?)

diff --git a/export.c b/export.c
index 797b4cb..830e87a 100644
--- a/export.c
+++ b/export.c
@@ -156,27 +156,27 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
 
 	key.objectid = dir->i_ino;
 	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
-	key.offset = 0;
+	key.offset = (u64)-1;
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-	BUG_ON(ret == 0);
-	ret = 0;
 
 	leaf = path->nodes[0];
 	slot = path->slots[0];
-	nritems = btrfs_header_nritems(leaf);
-	if (slot >= nritems) {
-		ret = btrfs_next_leaf(root, path);
-		if (ret) {
-			btrfs_free_path(path);
-			goto out;
-		}
-		leaf = path->nodes[0];
-		slot = path->slots[0];
+	if (ret < 0 && slot == 0) {
+		btrfs_free_path(path);
+		goto out;
 	}
+	/*
+	  This will be in the slot _before_ the one that btrfs_search_slot()
+	   returns. And for some reason which dwmw2 doesn't quite understand,
+	   that'll definitely be in the same leaf that btrfs_search_slot()
+	   returned -- even if btrfs_search_slot() had to look in the _next_
+	   leaf to find the first key which is greater than what we asked for 
+	*/
+	slot--;
 
+	btrfs_item_key_to_cpu(leaf, &key, slot);
 	btrfs_free_path(path);
 
-	btrfs_item_key_to_cpu(leaf, &key, slot);
 	if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
 		goto out;
 


-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-18 20:20                 ` David Woodhouse
@ 2008-08-18 20:32                   ` Chris Mason
  2008-08-18 21:52                     ` David Woodhouse
  2008-08-19  0:16                   ` Christoph Hellwig
  1 sibling, 1 reply; 27+ messages in thread
From: Chris Mason @ 2008-08-18 20:32 UTC (permalink / raw)
  To: David Woodhouse; +Cc: Balaji Rao, linux-btrfs

On Mon, 2008-08-18 at 21:20 +0100, David Woodhouse wrote:
> On Mon, 2008-08-18 at 15:47 -0400, Chris Mason wrote:
> > Lets pretend I had put in commments something like the code below.
> > The important part is that directories have only one link, so they
> > have only one backref.
> 
> OK. Now can I rip that code out anyway? The VFS will never call
> btrfs_lookup() for ".." -- not since the 2.2 kernel :)
> 
> I'm still a little confused about precisely what btrfs_search_slot()
> returns when it doesn't find a match -- that's probably where the
> documentation would be more useful.
> 

if btrfs_search_slot returns < 0, there was an error
if btrfs_search_slot returns > 1, path->slots[0] is the spot in the tree
where you'd want to insert the item.

In this case, if path->slots[0] == 0, there are no keys in the tree
smaller than your search key.

If path->slots[0] == btrfs_header_nritems(path->nodes[0]), there are no
items in this node that have a key > than your search key.

> What I have in btrfs_get_parent() is correct, I think -- but could
> probably be simplified to your version, searching for (u64)-1 and then
> not needing to call btrfs_next_leaf().

Yes.

> 
> Something like this?
> 
> (And am I fixing a use-after-free bug by moving that btrfs_free_path()
> down by a line, at the end?)
> 

Most definitely ;)  One comment inline below

> diff --git a/export.c b/export.c
> index 797b4cb..830e87a 100644
> --- a/export.c
> +++ b/export.c
> @@ -156,27 +156,27 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
>  
>  	key.objectid = dir->i_ino;
>  	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
> -	key.offset = 0;
> +	key.offset = (u64)-1;
>  	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> -	BUG_ON(ret == 0);
> -	ret = 0;
>  
>  	leaf = path->nodes[0];
>  	slot = path->slots[0];
> -	nritems = btrfs_header_nritems(leaf);
> -	if (slot >= nritems) {
> -		ret = btrfs_next_leaf(root, path);
> -		if (ret) {
> -			btrfs_free_path(path);
> -			goto out;
> -		}
> -		leaf = path->nodes[0];
> -		slot = path->slots[0];
> +	if (ret < 0 && slot == 0) {

              ^^^^^^^^^^^^^ should be ||, and should set ret to
something bad if slot == 0

> +		btrfs_free_path(path);
> +		goto out;
>  	}
> +	/*
> +	  This will be in the slot _before_ the one that btrfs_search_slot()
> +	   returns. And for some reason which dwmw2 doesn't quite understand,
> +	   that'll definitely be in the same leaf that btrfs_search_slot()
> +	   returned -- even if btrfs_search_slot() had to look in the _next_
> +	   leaf to find the first key which is greater than what we asked for 
> +	*/
> +	slot--;
>  
> +	btrfs_item_key_to_cpu(leaf, &key, slot);
>  	btrfs_free_path(path);
>  
> -	btrfs_item_key_to_cpu(leaf, &key, slot);
>  	if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
>  		goto out;
>  
> 
> 

-chris



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-18 20:32                   ` Chris Mason
@ 2008-08-18 21:52                     ` David Woodhouse
  2008-08-19 11:54                       ` Chris Mason
  0 siblings, 1 reply; 27+ messages in thread
From: David Woodhouse @ 2008-08-18 21:52 UTC (permalink / raw)
  To: Chris Mason; +Cc: Balaji Rao, linux-btrfs

On Mon, 2008-08-18 at 16:32 -0400, Chris Mason wrote:
> On Mon, 2008-08-18 at 21:20 +0100, David Woodhouse wrote:
> > On Mon, 2008-08-18 at 15:47 -0400, Chris Mason wrote:
> > > Lets pretend I had put in commments something like the code below.
> > > The important part is that directories have only one link, so they
> > > have only one backref.
> > 
> > OK. Now can I rip that code out anyway? The VFS will never call
> > btrfs_lookup() for ".." -- not since the 2.2 kernel :)
> > 
> > I'm still a little confused about precisely what btrfs_search_slot()
> > returns when it doesn't find a match -- that's probably where the
> > documentation would be more useful.
> > 
> 
> if btrfs_search_slot returns < 0, there was an error
> if btrfs_search_slot returns > 1, path->slots[0] is the spot in the tree
> where you'd want to insert the item.

What if the parent inode actually _is_ inode #0xffffffffffffffff? Can
that happen? In that case it would return zero, and I shouldn't subtract
1 from the slot number -- I've actually found what I'm looking for?

> In this case, if path->slots[0] == 0, there are no keys in the tree
> smaller than your search key.                                  ^^^^

OK... what if the place I'd want to insert the item is the first slot in
some node? There are keys in the _tree_ which are smaller, but just not
in this node? I think that's where the root of my confusion lies.

> If path->slots[0] == btrfs_header_nritems(path->nodes[0]), there are
> no items in this node that have a key > than your search key.
                   ^^^^
Not in this node, but maybe in the next one? That's why my own fix for
the bug involved using btrfs_next_leaf() and using the first item from
that one?

> > +	if (ret < 0 && slot == 0) {
> 
>               ^^^^^^^^^^^^^ should be ||, and should set ret to
> something bad if slot == 0

Er, yes. Moment of stupidity there :)

We were ignoring 'ret' anyway -- none of this stuff should ever happen,
and it's all just 'return ERR_PTR(-EINVAL)' at the out: label.

I've tested this, and added it to my tree:

From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 18 Aug 2008 22:50:22 +0100
Subject: [PATCH] Simplify btrfs_get_parent(), fix use-after-free bug

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 export.c |   26 +++++++++++---------------
 1 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/export.c b/export.c
index 797b4cb..a913b9b 100644
--- a/export.c
+++ b/export.c
@@ -147,7 +147,6 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
 	struct btrfs_key key;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
-	u32 nritems;
 	int slot;
 	u64 objectid;
 	int ret;
@@ -156,27 +155,24 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
 
 	key.objectid = dir->i_ino;
 	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
-	key.offset = 0;
-	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-	BUG_ON(ret == 0);
-	ret = 0;
+	key.offset = (u64)-1;
 
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	leaf = path->nodes[0];
 	slot = path->slots[0];
-	nritems = btrfs_header_nritems(leaf);
-	if (slot >= nritems) {
-		ret = btrfs_next_leaf(root, path);
-		if (ret) {
-			btrfs_free_path(path);
-			goto out;
-		}
-		leaf = path->nodes[0];
-		slot = path->slots[0];
+	if (ret < 0 || slot == 0) {
+		btrfs_free_path(path);
+		goto out;
 	}
+	/* btrfs_search_slot() returns the slot where we'd want to insert
+	   an INODE_REF_KEY for parent inode #0xFFFFFFFFFFFFFFFF. The _real_
+	   one, telling us what the parent inode _actually_ is, will be in
+	   the slot _before_ the one that btrfs_search_slot() returns. */
+	slot--;
 
+	btrfs_item_key_to_cpu(leaf, &key, slot);
 	btrfs_free_path(path);
 
-	btrfs_item_key_to_cpu(leaf, &key, slot);
 	if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
 		goto out;
 
-- 
1.5.5.1


-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-18 20:20                 ` David Woodhouse
  2008-08-18 20:32                   ` Chris Mason
@ 2008-08-19  0:16                   ` Christoph Hellwig
  2008-08-19  0:21                     ` David Woodhouse
  1 sibling, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2008-08-19  0:16 UTC (permalink / raw)
  To: David Woodhouse; +Cc: Chris Mason, Balaji Rao, linux-btrfs

On Mon, Aug 18, 2008 at 09:20:39PM +0100, David Woodhouse wrote:
> On Mon, 2008-08-18 at 15:47 -0400, Chris Mason wrote:
> > Lets pretend I had put in commments something like the code below.
> > The important part is that directories have only one link, so they
> > have only one backref.
> 
> OK. Now can I rip that code out anyway? The VFS will never call
> btrfs_lookup() for ".." -- not since the 2.2 kernel :)

If you get_parent doesn't call into the lowlevel lookup code it doesn't
need to handle "..".  But most filesystems end up reusing the lookup
code for get_parent.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-19  0:16                   ` Christoph Hellwig
@ 2008-08-19  0:21                     ` David Woodhouse
  0 siblings, 0 replies; 27+ messages in thread
From: David Woodhouse @ 2008-08-19  0:21 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Chris Mason, Balaji Rao, linux-btrfs

On Mon, 2008-08-18 at 20:16 -0400, Christoph Hellwig wrote:
> If you get_parent doesn't call into the lowlevel lookup code it doesn't
> need to handle "..".  But most filesystems end up reusing the lookup
> code for get_parent.

We don't do that in btrfs. It doesn't really seem like a useful thing to
do.

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-18 21:52                     ` David Woodhouse
@ 2008-08-19 11:54                       ` Chris Mason
  2008-08-19 14:49                         ` David Woodhouse
  0 siblings, 1 reply; 27+ messages in thread
From: Chris Mason @ 2008-08-19 11:54 UTC (permalink / raw)
  To: David Woodhouse; +Cc: Balaji Rao, linux-btrfs

On Mon, 2008-08-18 at 22:52 +0100, David Woodhouse wrote:
> On Mon, 2008-08-18 at 16:32 -0400, Chris Mason wrote:
> > On Mon, 2008-08-18 at 21:20 +0100, David Woodhouse wrote:
> > > On Mon, 2008-08-18 at 15:47 -0400, Chris Mason wrote:
> > > > Lets pretend I had put in commments something like the code below.
> > > > The important part is that directories have only one link, so they
> > > > have only one backref.
> > > 
> > > OK. Now can I rip that code out anyway? The VFS will never call
> > > btrfs_lookup() for ".." -- not since the 2.2 kernel :)
> > > 
> > > I'm still a little confused about precisely what btrfs_search_slot()
> > > returns when it doesn't find a match -- that's probably where the
> > > documentation would be more useful.
> > > 
> > 
> > if btrfs_search_slot returns < 0, there was an error
> > if btrfs_search_slot returns > 1, path->slots[0] is the spot in the tree
> > where you'd want to insert the item.
> 
> What if the parent inode actually _is_ inode #0xffffffffffffffff? Can
> that happen? In that case it would return zero, and I shouldn't subtract
> 1 from the slot number -- I've actually found what I'm looking for?
> 

The max inode will be 2^64 - 1

> > In this case, if path->slots[0] == 0, there are no keys in the tree
> > smaller than your search key.                                  ^^^^
> 
> OK... what if the place I'd want to insert the item is the first slot in
> some node? There are keys in the _tree_ which are smaller, but just not
> in this node? I think that's where the root of my confusion lies.

In this case, you'll always end up in the last slot of the previous node
(which is what you were seeing when you put in the fix).

> 
> > If path->slots[0] == btrfs_header_nritems(path->nodes[0]), there are
> > no items in this node that have a key > than your search key.
>                    ^^^^
> Not in this node, but maybe in the next one? That's why my own fix for
> the bug involved using btrfs_next_leaf() and using the first item from
> that one?

Correct.

> 
> > > +	if (ret < 0 && slot == 0) {
> > 
> >               ^^^^^^^^^^^^^ should be ||, and should set ret to
> > something bad if slot == 0
> 
> Er, yes. Moment of stupidity there :)
> 
> We were ignoring 'ret' anyway -- none of this stuff should ever happen,
> and it's all just 'return ERR_PTR(-EINVAL)' at the out: label.
> 
> I've tested this, and added it to my tree:
> 

Looks good, thanks.

-chris



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-19 11:54                       ` Chris Mason
@ 2008-08-19 14:49                         ` David Woodhouse
  2008-08-19 21:34                           ` David Woodhouse
  0 siblings, 1 reply; 27+ messages in thread
From: David Woodhouse @ 2008-08-19 14:49 UTC (permalink / raw)
  To: Chris Mason; +Cc: Balaji Rao, linux-btrfs

On Tue, 2008-08-19 at 07:54 -0400, Chris Mason wrote:
> 
> > What if the parent inode actually _is_ inode #0xffffffffffffffff? Can
> > that happen? In that case it would return zero, and I shouldn't subtract
> > 1 from the slot number -- I've actually found what I'm looking for?
> > 
> 
> The max inode will be 2^64 - 1

Which is what we're searching for -- so it's _possible_, albeit
vanishingly unlikely, that btrfs_search_slot() will actually return
zero, having found precisely what we wanted?

And in that case, path->slots[0] being zero is fine. And we shouldn't be
subtracting one from it to find the slot we want?

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] NFS support for btrfs - v2
  2008-08-19 14:49                         ` David Woodhouse
@ 2008-08-19 21:34                           ` David Woodhouse
  0 siblings, 0 replies; 27+ messages in thread
From: David Woodhouse @ 2008-08-19 21:34 UTC (permalink / raw)
  To: Chris Mason; +Cc: Balaji Rao, linux-btrfs

On Tue, 2008-08-19 at 15:49 +0100, David Woodhouse wrote:
> On Tue, 2008-08-19 at 07:54 -0400, Chris Mason wrote:
> > 
> > > What if the parent inode actually _is_ inode #0xffffffffffffffff? Can
> > > that happen? In that case it would return zero, and I shouldn't subtract
> > > 1 from the slot number -- I've actually found what I'm looking for?
> > > 
> > 
> > The max inode will be 2^64 - 1
> 
> Which is what we're searching for -- so it's _possible_, albeit
> vanishingly unlikely, that btrfs_search_slot() will actually return
> zero, having found precisely what we wanted?
> 
> And in that case, path->slots[0] being zero is fine. And we shouldn't be
> subtracting one from it to find the slot we want?

Subject: [PATCH] Clean up btrfs_get_parent() a little more, fix a free-after-free bug

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 export.c |   33 +++++++++++++++++++--------------
 1 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/export.c b/export.c
index 36cbc68..5c75cbd 100644
--- a/export.c
+++ b/export.c
@@ -165,23 +165,32 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
 	key.offset = (u64)-1;
 
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-	leaf = path->nodes[0];
-	slot = path->slots[0];
-	if (ret < 0 || slot == 0) {
+	if (ret < 0) {
+		/* Error */
 		btrfs_free_path(path);
-		goto out;
+		return ERR_PTR(ret);
+	}
+	if (ret) {
+		leaf = path->nodes[0];
+		slot = path->slots[0];
+		/* btrfs_search_slot() returns the slot where we'd want to
+		   insert a backref for parent inode #0xFFFFFFFFFFFFFFFF.
+		   The _real_ backref, telling us what the parent inode
+		   _actually_ is, will be in the slot _before_ the one
+		   that btrfs_search_slot() returns. */
+		if (!slot) {
+			/* Unless there is _no_ key in the tree before... */
+			btrfs_free_path(path);
+			return ERR_PTR(-EIO);
+		}
+		slot--;
 	}
-	/* btrfs_search_slot() returns the slot where we'd want to insert
-	   an INODE_REF_KEY for parent inode #0xFFFFFFFFFFFFFFFF. The _real_
-	   one, telling us what the parent inode _actually_ is, will be in
-	   the slot _before_ the one that btrfs_search_slot() returns. */
-	slot--;
 
 	btrfs_item_key_to_cpu(leaf, &key, slot);
 	btrfs_free_path(path);
 
 	if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
-		goto out;
+		return ERR_PTR(-EINVAL);
 
 	objectid = key.offset;
 
@@ -201,10 +210,6 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
 		parent = ERR_PTR(-ENOMEM);
 
 	return parent;
-
-out:
-	btrfs_free_path(path);
-	return ERR_PTR(-EINVAL);
 }
 
 const struct export_operations btrfs_export_ops = {
-- 
1.5.5.1


-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply related	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2008-08-19 21:34 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-20 20:31 [PATCH] NFS support for btrfs - v2 Balaji Rao
2008-08-17 11:53 ` David Woodhouse
2008-08-17 12:51   ` Balaji Rao
2008-08-17 12:56     ` David Woodhouse
2008-08-17 13:24       ` Balaji Rao
2008-08-17 13:30         ` David Woodhouse
2008-08-17 14:17           ` David Woodhouse
2008-08-17 16:10             ` [PATCH] rewrite btrfs_readdir() David Woodhouse
2008-08-18 18:46               ` Chris Mason
2008-08-18 19:08                 ` David Woodhouse
2008-08-18 19:24                   ` Chris Mason
2008-08-18 19:32                     ` David Woodhouse
2008-08-17 13:40         ` [PATCH] NFS support for btrfs - v2 David Woodhouse
2008-08-18 19:23           ` Chris Mason
2008-08-18 19:33             ` David Woodhouse
2008-08-18 19:47               ` Chris Mason
2008-08-18 20:20                 ` David Woodhouse
2008-08-18 20:32                   ` Chris Mason
2008-08-18 21:52                     ` David Woodhouse
2008-08-19 11:54                       ` Chris Mason
2008-08-19 14:49                         ` David Woodhouse
2008-08-19 21:34                           ` David Woodhouse
2008-08-19  0:16                   ` Christoph Hellwig
2008-08-19  0:21                     ` David Woodhouse
2008-08-18 11:51     ` David Woodhouse
2008-08-18 12:10       ` David Woodhouse
2008-08-18 19:15         ` Chris Mason

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox