[RFC] NFS Support

public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed

* [RFC] NFS Support
@ 2008-06-28 23:35 Balaji Rao
  2008-06-30 14:50 ` Josef Bacik
                   ` (4 more replies)
  0 siblings, 5 replies; 15+ messages in thread
From: Balaji Rao @ 2008-06-28 23:35 UTC (permalink / raw)
  To: linux-btrfs

Hello,

Here's a quick implementation of NFS support for btrfs. It works mostly. But
its still a bit buggy. I suspect it has to do something with inode locking.
When I unmount and mount a couple of times, it stops working i.e, when I do an 
'ls', it keeps waiting indefinitely. This is seen when accessing the filesystem
from a local mount point as well.

Can anybody please review and tell me what stupid mistake I'm committing ? 

Thanks and Regards,
Balaji Rao
National Institute of Technology, Karnataka
---

#include <linux/fs.h>
#include <linux/types.h>
#include "ctree.h"
#include "disk-io.h"
#include "btrfs_inode.h"
#include "print-tree.h"
#include "export.h"

static int btrfs_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
			int connectable)
{	struct inode *inode = dentry->d_inode;
	int len = *max_len;
	u64 objectid, root_objectid;
	u32 generation;
	__le32 *fh = (__force __le32 *) fh_in;

	if ((len < 5 )|| (connectable && len < 8)) {
		return 255;
	}
	
	objectid = BTRFS_I(inode)->location.objectid;
	root_objectid = BTRFS_I(inode)->root->objectid;
	generation = inode->i_generation;

	len  = 5;
	fh[0] = cpu_to_le32((u32)(objectid >> 32));
	fh[1] = cpu_to_le32((u32)(objectid & 0xffffffff));
	fh[2] = cpu_to_le32((u32)(root_objectid >> 32));
	fh[3] = cpu_to_le32((u32)(root_objectid & 0xffffffff));
	fh[4] = cpu_to_le32(generation);

	if (connectable && !S_ISDIR(inode->i_mode)) {
		struct inode *parent;

		spin_lock(&dentry->d_lock);

		parent = dentry->d_parent->d_inode;
		objectid = BTRFS_I(parent)->location.objectid;
		generation = parent->i_generation;

		fh[5] = cpu_to_le32((u32)(objectid >> 32));
		fh[6] = cpu_to_le32((u32)(objectid & 0xffffffff));
		fh[7] = cpu_to_le32(generation);

		spin_unlock(&dentry->d_lock);

		len += 3;
	}
	
	*max_len = len;
	return len;

}

static struct dentry * btrfs_get_dentry(struct super_block *sb,
			u64 objectid, u64 root_objectid, u32 generation)
{
	struct inode *inode;
	struct dentry *result;

	inode = btrfs_ilookup(sb, objectid, root_objectid);
	if (IS_ERR(inode))
		return (void *)inode;

	if(generation != inode->i_generation) {
		iput(inode);
		return ERR_PTR(-ESTALE);
	}

	result = d_alloc_anon(inode);
	if(!result) {
		iput(inode);
		return ERR_PTR(-ENOMEM);
	}

	return result;
}

static struct dentry *btrfs_fh_to_parent(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	u64 objectid, root_objectid;
	u32 generation;

	if (fh_len < 8 || fh_type != 8) {
		return NULL;
	}


	root_objectid = (u64)le32_to_cpu(fid->raw[2]) << 32;
	root_objectid |= (u64)le32_to_cpu(fid->raw[3]);
	objectid = (u64)le32_to_cpu(fid->raw[5]) << 32;
	objectid |= (u64)le32_to_cpu(fid->raw[6]);	
	generation = le32_to_cpu(fid->raw[7]);

	return btrfs_get_dentry(sb, objectid, root_objectid, generation);
}

static struct dentry *btrfs_fh_to_dentry(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	u64 objectid, root_objectid;
	u32 generation;

	if (fh_len < 5 || fh_type > 5) {
		return NULL;
	}

	objectid = (u64)le32_to_cpu(fid->raw[0]) << 32;
	objectid |= (u64)le32_to_cpu(fid->raw[1]);	
	root_objectid = (u64)le32_to_cpu(fid->raw[2]) << 32;
	root_objectid |= (u64)le32_to_cpu(fid->raw[3]);
	generation = le32_to_cpu(fid->raw[4]);

	return btrfs_get_dentry(sb, objectid, root_objectid, generation);
}

static struct dentry *btrfs_get_parent(struct dentry *child)
{
	struct inode *dir = child->d_inode;
	struct inode *inode;
	struct dentry *parent;
	struct btrfs_root *root = BTRFS_I(dir)->root;
	struct btrfs_key key;
	struct btrfs_path *path;
	struct extent_buffer *leaf;	
	u32 nritems;
	int slot;
	u64 objectid;
	int ret;

	path = btrfs_alloc_path();

	key.objectid = dir->i_ino;
	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
	key.offset = 0;
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	BUG_ON(ret == 0);
	ret = 0;

	leaf = path->nodes[0];
	slot = path->slots[0];
	nritems = btrfs_header_nritems(leaf);
	if (slot >= nritems)
		return ERR_PTR(-EINVAL);

	btrfs_item_key_to_cpu(leaf, &key, slot);
	if (key.objectid != dir->i_ino ||
	    key.type != BTRFS_INODE_REF_KEY) {
		return ERR_PTR(-EINVAL);
	}

	btrfs_free_path(path);
	objectid = key.offset;
	inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
	
	parent = d_alloc_anon(inode);
	if (!parent) {
		iput(inode);
		parent = ERR_PTR(-ENOMEM);
	}
	
	return parent;	
}

const struct export_operations btrfs_export_ops = {
	.encode_fh	= btrfs_encode_fh,
	.fh_to_dentry	= btrfs_fh_to_dentry,
	.fh_to_parent	= btrfs_fh_to_parent,
	.get_parent	= btrfs_get_parent,
};

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] NFS Support
  2008-06-28 23:35 [RFC] NFS Support Balaji Rao
@ 2008-06-30 14:50 ` Josef Bacik
  2008-06-30 15:20   ` Balaji Rao R
  2008-06-30 18:46 ` Christoph Hellwig
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 15+ messages in thread
From: Josef Bacik @ 2008-06-30 14:50 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs

On Sun, Jun 29, 2008 at 05:05:31AM +0530, Balaji Rao wrote:
> Hello,
> 
> Here's a quick implementation of NFS support for btrfs. It works mostly. But
> its still a bit buggy. I suspect it has to do something with inode locking.
> When I unmount and mount a couple of times, it stops working i.e, when I do an 
> 'ls', it keeps waiting indefinitely. This is seen when accessing the filesystem
> from a local mount point as well.
> 
> Can anybody please review and tell me what stupid mistake I'm committing ? 
>

You may want to send things in patch form.  I'd recommend using the hg quilt
plugin, its nice for adding patches easily.
 
> 
> #include <linux/fs.h>
> #include <linux/types.h>
> #include "ctree.h"
> #include "disk-io.h"
> #include "btrfs_inode.h"
> #include "print-tree.h"
> #include "export.h"
> 
> static int btrfs_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
> 			int connectable)
> {	struct inode *inode = dentry->d_inode;
> 	int len = *max_len;
> 	u64 objectid, root_objectid;
> 	u32 generation;
> 	__le32 *fh = (__force __le32 *) fh_in;
> 
> 	if ((len < 5 )|| (connectable && len < 8)) {
> 		return 255;
> 	}

Magic numbers are a no-no.

> 	
> 	objectid = BTRFS_I(inode)->location.objectid;
> 	root_objectid = BTRFS_I(inode)->root->objectid;
> 	generation = inode->i_generation;
> 
> 	len  = 5;
> 	fh[0] = cpu_to_le32((u32)(objectid >> 32));
> 	fh[1] = cpu_to_le32((u32)(objectid & 0xffffffff));
> 	fh[2] = cpu_to_le32((u32)(root_objectid >> 32));
> 	fh[3] = cpu_to_le32((u32)(root_objectid & 0xffffffff));
> 	fh[4] = cpu_to_le32(generation);
> 
> 	if (connectable && !S_ISDIR(inode->i_mode)) {
> 		struct inode *parent;
> 
> 		spin_lock(&dentry->d_lock);
> 
> 		parent = dentry->d_parent->d_inode;
> 		objectid = BTRFS_I(parent)->location.objectid;
> 		generation = parent->i_generation;
> 
> 		fh[5] = cpu_to_le32((u32)(objectid >> 32));
> 		fh[6] = cpu_to_le32((u32)(objectid & 0xffffffff));
> 		fh[7] = cpu_to_le32(generation);
> 
> 		spin_unlock(&dentry->d_lock);
> 
> 		len += 3;

there is no need to take the d_lock here.

> 	}
> 	
> 	*max_len = len;
> 	return len;
> 
> }
> 
> static struct dentry * btrfs_get_dentry(struct super_block *sb,
> 			u64 objectid, u64 root_objectid, u32 generation)
> {
> 	struct inode *inode;
> 	struct dentry *result;
> 
> 	inode = btrfs_ilookup(sb, objectid, root_objectid);
> 	if (IS_ERR(inode))
> 		return (void *)inode;
> 
> 	if(generation != inode->i_generation) {
> 		iput(inode);
> 		return ERR_PTR(-ESTALE);
> 	}
> 
> 	result = d_alloc_anon(inode);
> 	if(!result) {
> 		iput(inode);
> 		return ERR_PTR(-ENOMEM);
> 	}
> 
> 	return result;
> }
> 
> static struct dentry *btrfs_fh_to_parent(struct super_block *sb,
> 		struct fid *fid, int fh_len, int fh_type)
> {
> 	u64 objectid, root_objectid;
> 	u32 generation;
> 
> 	if (fh_len < 8 || fh_type != 8) {
> 		return NULL;
> 	}
> 
> 
> 	root_objectid = (u64)le32_to_cpu(fid->raw[2]) << 32;
> 	root_objectid |= (u64)le32_to_cpu(fid->raw[3]);
> 	objectid = (u64)le32_to_cpu(fid->raw[5]) << 32;
> 	objectid |= (u64)le32_to_cpu(fid->raw[6]);	
> 	generation = le32_to_cpu(fid->raw[7]);
> 
> 	return btrfs_get_dentry(sb, objectid, root_objectid, generation);
> }
> 
> static struct dentry *btrfs_fh_to_dentry(struct super_block *sb,
> 		struct fid *fid, int fh_len, int fh_type)
> {
> 	u64 objectid, root_objectid;
> 	u32 generation;
> 
> 	if (fh_len < 5 || fh_type > 5) {
> 		return NULL;
> 	}
> 
> 	objectid = (u64)le32_to_cpu(fid->raw[0]) << 32;
> 	objectid |= (u64)le32_to_cpu(fid->raw[1]);	
> 	root_objectid = (u64)le32_to_cpu(fid->raw[2]) << 32;
> 	root_objectid |= (u64)le32_to_cpu(fid->raw[3]);
> 	generation = le32_to_cpu(fid->raw[4]);
> 
> 	return btrfs_get_dentry(sb, objectid, root_objectid, generation);
> }
> 
> static struct dentry *btrfs_get_parent(struct dentry *child)
> {
> 	struct inode *dir = child->d_inode;
> 	struct inode *inode;
> 	struct dentry *parent;
> 	struct btrfs_root *root = BTRFS_I(dir)->root;
> 	struct btrfs_key key;
> 	struct btrfs_path *path;
> 	struct extent_buffer *leaf;	
> 	u32 nritems;
> 	int slot;
> 	u64 objectid;
> 	int ret;
> 
> 	path = btrfs_alloc_path();
> 
> 	key.objectid = dir->i_ino;
> 	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
> 	key.offset = 0;
> 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> 	BUG_ON(ret == 0);
> 	ret = 0;
> 
> 	leaf = path->nodes[0];
> 	slot = path->slots[0];
> 	nritems = btrfs_header_nritems(leaf);
> 	if (slot >= nritems)
> 		return ERR_PTR(-EINVAL);
> 
> 	btrfs_item_key_to_cpu(leaf, &key, slot);
> 	if (key.objectid != dir->i_ino ||
> 	    key.type != BTRFS_INODE_REF_KEY) {
> 		return ERR_PTR(-EINVAL);
> 	}
>

in both of the above error cases you don't free the path, you need to do that.
 
> 	btrfs_free_path(path);
> 	objectid = key.offset;
> 	inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
> 	
> 	parent = d_alloc_anon(inode);
> 	if (!parent) {
> 		iput(inode);
> 		parent = ERR_PTR(-ENOMEM);
> 	}
> 	
> 	return parent;	
> }
> 
> const struct export_operations btrfs_export_ops = {
> 	.encode_fh	= btrfs_encode_fh,
> 	.fh_to_dentry	= btrfs_fh_to_dentry,
> 	.fh_to_parent	= btrfs_fh_to_parent,
> 	.get_parent	= btrfs_get_parent,
> };

I don't know much about NFS so I can't really comment on much of anything else
other than the obvious problems, but that should point you in a general
direction.

Josef

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] NFS Support
  2008-06-30 15:20   ` Balaji Rao R
@ 2008-06-30 15:08     ` Josef Bacik
  2008-07-01  7:01       ` Christoph Hellwig
  0 siblings, 1 reply; 15+ messages in thread
From: Josef Bacik @ 2008-06-30 15:08 UTC (permalink / raw)
  To: Balaji Rao R; +Cc: Josef Bacik, linux-btrfs

On Mon, Jun 30, 2008 at 08:50:46PM +0530, Balaji Rao R wrote:
> On Mon, Jun 30, 2008 at 8:20 PM, Josef Bacik <jbacik@redhat.com> wrote:
> > On Sun, Jun 29, 2008 at 05:05:31AM +0530, Balaji Rao wrote:
> >> Hello,
> >>
> >> Here's a quick implementation of NFS support for btrfs. It works mostly. But
> >> its still a bit buggy. I suspect it has to do something with inode locking.
> >> When I unmount and mount a couple of times, it stops working i.e, when I do an
> >> 'ls', it keeps waiting indefinitely. This is seen when accessing the filesystem
> >> from a local mount point as well.
> >>
> >> Can anybody please review and tell me what stupid mistake I'm committing ?
> >>
> >
> > You may want to send things in patch form.  I'd recommend using the hg quilt
> > plugin, its nice for adding patches easily.
> Yes, since this was just an RFC, i thought it would be  better to send
> just the file I created.
> >
> >>
> >> #include <linux/fs.h>
> >> #include <linux/types.h>
> >> #include "ctree.h"
> >> #include "disk-io.h"
> >> #include "btrfs_inode.h"
> >> #include "print-tree.h"
> >> #include "export.h"
> >>
> >> static int btrfs_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
> >>                       int connectable)
> >> {     struct inode *inode = dentry->d_inode;
> >>       int len = *max_len;
> >>       u64 objectid, root_objectid;
> >>       u32 generation;
> >>       __le32 *fh = (__force __le32 *) fh_in;
> >>
> >>       if ((len < 5 )|| (connectable && len < 8)) {
> >>               return 255;
> >>       }
> >
> > Magic numbers are a no-no.
> OK :) Will change
> >
> >>
> >>       objectid = BTRFS_I(inode)->location.objectid;
> >>       root_objectid = BTRFS_I(inode)->root->objectid;
> >>       generation = inode->i_generation;
> >>
> >>       len  = 5;
> >>       fh[0] = cpu_to_le32((u32)(objectid >> 32));
> >>       fh[1] = cpu_to_le32((u32)(objectid & 0xffffffff));
> >>       fh[2] = cpu_to_le32((u32)(root_objectid >> 32));
> >>       fh[3] = cpu_to_le32((u32)(root_objectid & 0xffffffff));
> >>       fh[4] = cpu_to_le32(generation);
> >>
> >>       if (connectable && !S_ISDIR(inode->i_mode)) {
> >>               struct inode *parent;
> >>
> >>               spin_lock(&dentry->d_lock);
> >>
> >>               parent = dentry->d_parent->d_inode;
> >>               objectid = BTRFS_I(parent)->location.objectid;
> >>               generation = parent->i_generation;
> >>
> >>               fh[5] = cpu_to_le32((u32)(objectid >> 32));
> >>               fh[6] = cpu_to_le32((u32)(objectid & 0xffffffff));
> >>               fh[7] = cpu_to_le32(generation);
> >>
> >>               spin_unlock(&dentry->d_lock);
> >>
> >>               len += 3;
> >
> > there is no need to take the d_lock here.
> OK. Will remove it. It was done at fs/ocfs2/export.c. So probably i
> thought it was necessary.
> 
> <snip>
>

Ahh see this is where me not being familiar with NFS gets me in trouble, you do
need the d_lock in that case.  Sorry about that.

Josef

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] NFS Support
  2008-06-30 14:50 ` Josef Bacik
@ 2008-06-30 15:20   ` Balaji Rao R
  2008-06-30 15:08     ` Josef Bacik
  0 siblings, 1 reply; 15+ messages in thread
From: Balaji Rao R @ 2008-06-30 15:20 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-btrfs

On Mon, Jun 30, 2008 at 8:20 PM, Josef Bacik <jbacik@redhat.com> wrote:
> On Sun, Jun 29, 2008 at 05:05:31AM +0530, Balaji Rao wrote:
>> Hello,
>>
>> Here's a quick implementation of NFS support for btrfs. It works mostly. But
>> its still a bit buggy. I suspect it has to do something with inode locking.
>> When I unmount and mount a couple of times, it stops working i.e, when I do an
>> 'ls', it keeps waiting indefinitely. This is seen when accessing the filesystem
>> from a local mount point as well.
>>
>> Can anybody please review and tell me what stupid mistake I'm committing ?
>>
>
> You may want to send things in patch form.  I'd recommend using the hg quilt
> plugin, its nice for adding patches easily.
Yes, since this was just an RFC, i thought it would be  better to send
just the file I created.
>
>>
>> #include <linux/fs.h>
>> #include <linux/types.h>
>> #include "ctree.h"
>> #include "disk-io.h"
>> #include "btrfs_inode.h"
>> #include "print-tree.h"
>> #include "export.h"
>>
>> static int btrfs_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
>>                       int connectable)
>> {     struct inode *inode = dentry->d_inode;
>>       int len = *max_len;
>>       u64 objectid, root_objectid;
>>       u32 generation;
>>       __le32 *fh = (__force __le32 *) fh_in;
>>
>>       if ((len < 5 )|| (connectable && len < 8)) {
>>               return 255;
>>       }
>
> Magic numbers are a no-no.
OK :) Will change
>
>>
>>       objectid = BTRFS_I(inode)->location.objectid;
>>       root_objectid = BTRFS_I(inode)->root->objectid;
>>       generation = inode->i_generation;
>>
>>       len  = 5;
>>       fh[0] = cpu_to_le32((u32)(objectid >> 32));
>>       fh[1] = cpu_to_le32((u32)(objectid & 0xffffffff));
>>       fh[2] = cpu_to_le32((u32)(root_objectid >> 32));
>>       fh[3] = cpu_to_le32((u32)(root_objectid & 0xffffffff));
>>       fh[4] = cpu_to_le32(generation);
>>
>>       if (connectable && !S_ISDIR(inode->i_mode)) {
>>               struct inode *parent;
>>
>>               spin_lock(&dentry->d_lock);
>>
>>               parent = dentry->d_parent->d_inode;
>>               objectid = BTRFS_I(parent)->location.objectid;
>>               generation = parent->i_generation;
>>
>>               fh[5] = cpu_to_le32((u32)(objectid >> 32));
>>               fh[6] = cpu_to_le32((u32)(objectid & 0xffffffff));
>>               fh[7] = cpu_to_le32(generation);
>>
>>               spin_unlock(&dentry->d_lock);
>>
>>               len += 3;
>
> there is no need to take the d_lock here.
OK. Will remove it. It was done at fs/ocfs2/export.c. So probably i
thought it was necessary.

<snip>

>> static struct dentry *btrfs_get_parent(struct dentry *child)
>> {
>>       struct inode *dir = child->d_inode;
>>       struct inode *inode;
>>       struct dentry *parent;
>>       struct btrfs_root *root = BTRFS_I(dir)->root;
>>       struct btrfs_key key;
>>       struct btrfs_path *path;
>>       struct extent_buffer *leaf;
>>       u32 nritems;
>>       int slot;
>>       u64 objectid;
>>       int ret;
>>
>>       path = btrfs_alloc_path();
>>
>>       key.objectid = dir->i_ino;
>>       btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
>>       key.offset = 0;
>>       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
>>       BUG_ON(ret == 0);
>>       ret = 0;
>>
>>       leaf = path->nodes[0];
>>       slot = path->slots[0];
>>       nritems = btrfs_header_nritems(leaf);
>>       if (slot >= nritems)
>>               return ERR_PTR(-EINVAL);
>>
>>       btrfs_item_key_to_cpu(leaf, &key, slot);
>>       if (key.objectid != dir->i_ino ||
>>           key.type != BTRFS_INODE_REF_KEY) {
>>               return ERR_PTR(-EINVAL);
>>       }
>>
>
> in both of the above error cases you don't free the path, you need to do that.
Yes. Will do.

Thanks for the review!
-- 
warm regards

Balaji Rao

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] NFS Support
  2008-06-28 23:35 [RFC] NFS Support Balaji Rao
  2008-06-30 14:50 ` Josef Bacik
@ 2008-06-30 18:46 ` Christoph Hellwig
  2008-08-12 13:46 ` [PATCH 1/3] Introduce btrfs_iget helper David Woodhouse
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2008-06-30 18:46 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs

On Sun, Jun 29, 2008 at 05:05:31AM +0530, Balaji Rao wrote:
> 	len  = 5;
> 	fh[0] = cpu_to_le32((u32)(objectid >> 32));
> 	fh[1] = cpu_to_le32((u32)(objectid & 0xffffffff));
> 	fh[2] = cpu_to_le32((u32)(root_objectid >> 32));
> 	fh[3] = cpu_to_le32((u32)(root_objectid & 0xffffffff));
> 	fh[4] = cpu_to_le32(generation);

Please use C struct types for the file handles.  Take a look at
fs/xfs/linux-2.6/xfs_export.[ch] for an example.

> static struct dentry * btrfs_get_dentry(struct super_block *sb,
> 			u64 objectid, u64 root_objectid, u32 generation)
> {
> 	struct inode *inode;
> 	struct dentry *result;
> 
> 	inode = btrfs_ilookup(sb, objectid, root_objectid);
> 	if (IS_ERR(inode))
> 		return (void *)inode;

This one will only find inodes when they already are in memory.  You
need to something similar to btrfs_lookup minus the btrfs_inode_by_name
call here.  In fact it would make a lot of sense to factor that part
of btrfs_lookup out into a btrfs_iget helper that takes a struct
btrfs_key pointer and returns the inode for you.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] NFS Support
  2008-06-30 15:08     ` Josef Bacik
@ 2008-07-01  7:01       ` Christoph Hellwig
  0 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2008-07-01  7:01 UTC (permalink / raw)
  To: Josef Bacik; +Cc: Balaji Rao R, linux-btrfs

On Mon, Jun 30, 2008 at 11:08:11AM -0400, Josef Bacik wrote:
> > >>
> > >>               spin_unlock(&dentry->d_lock);
> > >>
> > >>               len += 3;
> > >
> > > there is no need to take the d_lock here.
> > OK. Will remove it. It was done at fs/ocfs2/export.c. So probably i
> > thought it was necessary.
> > 
> > <snip>
> >
> 
> Ahh see this is where me not being familiar with NFS gets me in trouble, you do
> need the d_lock in that case.  Sorry about that.

It's not really about nfs, dereferencing ->d_parent needs d_lock.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/3] Introduce btrfs_iget helper
  2008-06-28 23:35 [RFC] NFS Support Balaji Rao
  2008-06-30 14:50 ` Josef Bacik
  2008-06-30 18:46 ` Christoph Hellwig
@ 2008-08-12 13:46 ` David Woodhouse
  2008-08-12 18:46   ` Christoph Hellwig
  2008-08-12 13:46 ` [PATCH 2/3] NFS support for btrfs - v2 David Woodhouse
  2008-08-12 13:46 ` [PATCH 3/3] Implement our own copy of the nfsd readdir hack, for older kernels David Woodhouse
  4 siblings, 1 reply; 15+ messages in thread
From: David Woodhouse @ 2008-08-12 13:46 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs

From: Balaji Rao <balajirrao@gmail.com>

This patch introduces a btrfs_iget helper to be used in NFS support.

Signed-off-by: Balaji Rao <balajirrao@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 ctree.h |    2 ++
 inode.c |   55 +++++++++++++++++++++++++++++++++++++------------------
 2 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/ctree.h b/ctree.h
index c88f1e1..411d576 100644
--- a/ctree.h
+++ b/ctree.h
@@ -1698,6 +1698,8 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
 				struct btrfs_root *root);
 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
 			    u64 root_objectid);
+struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
+			 struct btrfs_root *root, int *is_new);
 int btrfs_commit_write(struct file *file, struct page *page,
 		       unsigned from, unsigned to);
 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
diff --git a/inode.c b/inode.c
index a26d365..5344526 100644
--- a/inode.c
+++ b/inode.c
@@ -1881,6 +1881,33 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
 	return inode;
 }
 
+/* Get an inode object given its location and corresponding root.
+ * Returns in *is_new if the inode was read from disk
+ */
+struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
+			 struct btrfs_root *root, int *is_new)
+{
+	struct inode *inode;
+
+	inode = btrfs_iget_locked(s, location->objectid, root);
+	if (!inode)
+		return ERR_PTR(-EACCES);
+
+	if (inode->i_state & I_NEW) {
+		BTRFS_I(inode)->root = root;
+		memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
+		btrfs_read_locked_inode(inode);
+		unlock_new_inode(inode);
+		if (is_new)
+			*is_new = 1;
+	} else {
+		if (is_new)
+			*is_new = 0;
+	}
+
+	return inode;
+}
+
 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 				   struct nameidata *nd)
 {
@@ -1889,7 +1916,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 	struct btrfs_root *root = bi->root;
 	struct btrfs_root *sub_root = root;
 	struct btrfs_key location;
-	int ret, do_orphan = 0;
+	int ret, new, do_orphan = 0;
 
 	if (dentry->d_name.len > BTRFS_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
@@ -1907,23 +1934,15 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 			return ERR_PTR(ret);
 		if (ret > 0)
 			return ERR_PTR(-ENOENT);
-
-		inode = btrfs_iget_locked(dir->i_sb, location.objectid,
-					  sub_root);
-		if (!inode)
-			return ERR_PTR(-EACCES);
-		if (inode->i_state & I_NEW) {
-			/* the inode and parent dir are two different roots */
-			if (sub_root != root) {
-				igrab(inode);
-				sub_root->inode = inode;
-				do_orphan = 1;
-			}
-			BTRFS_I(inode)->root = sub_root;
-			memcpy(&BTRFS_I(inode)->location, &location,
-			       sizeof(location));
-			btrfs_read_locked_inode(inode);
-			unlock_new_inode(inode);
+		inode = btrfs_iget(dir->i_sb, &location, sub_root, &new);
+		if (IS_ERR(inode))
+			return ERR_CAST(inode);
+
+		/* the inode and parent dir are two different roots */
+		if (new && root != sub_root) {
+			igrab(inode);
+			sub_root->inode = inode;
+			do_orphan = 1;
 		}
 	}
 
-- 
1.5.5.1


-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/3] NFS support for btrfs - v2
  2008-06-28 23:35 [RFC] NFS Support Balaji Rao
                   ` (2 preceding siblings ...)
  2008-08-12 13:46 ` [PATCH 1/3] Introduce btrfs_iget helper David Woodhouse
@ 2008-08-12 13:46 ` David Woodhouse
  2008-08-12 18:51   ` Christoph Hellwig
  2008-08-13 15:06   ` [PATCH 2/3] NFS support for btrfs - v3 David Woodhouse
  2008-08-12 13:46 ` [PATCH 3/3] Implement our own copy of the nfsd readdir hack, for older kernels David Woodhouse
  4 siblings, 2 replies; 15+ messages in thread
From: David Woodhouse @ 2008-08-12 13:46 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs

From: Balaji Rao <balajirrao@gmail.com>

Here's an implementation of NFS support for btrfs. It relies on the
fixes which are going in to 2.6.28 for the NFS readdir/lookup deadlock.

This uses the btrfs_iget helper introduced previously.

[dwmw2: Tidy up a little, switch to d_obtain_alias() w/compat.]

Signed-off-by: Balaji Rao <balajirrao@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 Makefile |    2 +-
 compat.h |   10 ++++
 export.c |  174 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 export.h |   17 ++++++
 super.c  |    2 +
 5 files changed, 204 insertions(+), 1 deletions(-)
 create mode 100644 export.c
 create mode 100644 export.h

diff --git a/Makefile b/Makefile
index a4b3817..75f8818 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 	   transaction.o bit-radix.o inode.o file.o tree-defrag.o \
 	   extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
 	   extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
-	   ref-cache.o
+	   ref-cache.o export.o
 
 btrfs-$(CONFIG_FS_POSIX_ACL)	+= acl.o
 else
diff --git a/compat.h b/compat.h
index b3349a6..1040f07 100644
--- a/compat.h
+++ b/compat.h
@@ -5,6 +5,16 @@
 #define trylock_page(page) (!TestSetPageLocked(page))
 #endif
 
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27)
+static inline struct dentry *d_obtain_alias(struct inode *inode)
+{
+	struct dentry *d = d_alloc_anon(inode);
+	if (!d)
+		iput(inode);
+	return d;
+}
+#endif
+
 /*
  * Even if AppArmor isn't enabled, it still has different prototypes.
  * Add more distro/version pairs here to declare which has AppArmor applied.
diff --git a/export.c b/export.c
new file mode 100644
index 0000000..253080a
--- /dev/null
+++ b/export.c
@@ -0,0 +1,174 @@
+#include <linux/fs.h>
+#include <linux/types.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "btrfs_inode.h"
+#include "print-tree.h"
+#include "export.h"
+#include "compat.h"
+
+/* The size of encoded fh is the type number of the fh itself */
+#define BTRFS_FID_NON_CONNECTABLE		5
+#define BTRFS_FID_CONNECTABLE	 		8
+
+static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
+			   int connectable)
+{
+	struct btrfs_fid *fid = (struct btrfs_fid *)fh;
+	struct inode *inode = dentry->d_inode;
+	int len = *max_len;
+
+	if ((len < BTRFS_FID_NON_CONNECTABLE) ||
+	    (connectable && len < BTRFS_FID_CONNECTABLE))
+		return 255;
+
+	len  = BTRFS_FID_NON_CONNECTABLE;
+
+	fid->objectid = BTRFS_I(inode)->location.objectid;
+	fid->root_objectid = BTRFS_I(inode)->root->objectid;
+	fid->gen = inode->i_generation;
+
+	if (connectable && !S_ISDIR(inode->i_mode)) {
+		struct inode *parent;
+
+		spin_lock(&dentry->d_lock);
+
+		parent = dentry->d_parent->d_inode;
+		fid->parent_objectid = BTRFS_I(parent)->location.objectid;
+		fid->parent_gen = parent->i_generation;
+
+		spin_unlock(&dentry->d_lock);
+		len = BTRFS_FID_CONNECTABLE;
+	}
+	*max_len = len;
+
+	/* We return length itself for the type */
+	return len;
+
+}
+
+static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+				       u64 root_objectid, u32 generation)
+{
+	struct btrfs_root *root;
+	struct inode *inode;
+	struct dentry *result;
+	struct btrfs_key key;
+
+	key.objectid = objectid;
+	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
+	key.offset = 0;
+
+	root = btrfs_lookup_fs_root(btrfs_sb(sb)->fs_info, root_objectid);
+	inode = btrfs_iget(sb, &key, root, NULL);
+	if (IS_ERR(inode))
+		return (void *)inode;
+
+	if (generation != inode->i_generation) {
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+
+	result = d_alloc_anon(inode);
+	if (!result) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return result;
+}
+
+static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
+					 int fh_len, int fh_type)
+{
+	struct btrfs_fid *fid = (struct btrfs_fid *) fh;
+	u64 objectid, root_objectid;
+	u32 generation;
+
+	if (fh_type != BTRFS_FID_CONNECTABLE)
+		return NULL;
+
+	root_objectid = fid->root_objectid;
+	objectid = fid->parent_objectid;
+	generation = fid->parent_gen;
+
+	return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+}
+
+static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
+					 int fh_len, int fh_type)
+{
+	struct btrfs_fid *fid = (struct btrfs_fid *) fh;
+	u64 objectid, root_objectid;
+	u32 generation;
+
+	if (fh_type > BTRFS_FID_CONNECTABLE)
+		return NULL;
+
+	objectid = fid->objectid;
+	root_objectid = fid->root_objectid;
+	generation = fid->gen;
+
+	return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+}
+
+static struct dentry *btrfs_get_parent(struct dentry *child)
+{
+	struct inode *dir = child->d_inode;
+	struct inode *inode;
+	struct dentry *parent;
+	struct btrfs_root *root = BTRFS_I(dir)->root;
+	struct btrfs_key key;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	u32 nritems;
+	int slot;
+	u64 objectid;
+	int ret;
+
+	path = btrfs_alloc_path();
+
+	key.objectid = dir->i_ino;
+	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
+	key.offset = 0;
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	BUG_ON(ret == 0);
+	ret = 0;
+
+	leaf = path->nodes[0];
+	slot = path->slots[0];
+	nritems = btrfs_header_nritems(leaf);
+	if (slot >= nritems)
+		goto out;
+
+	btrfs_item_key_to_cpu(leaf, &key, slot);
+	if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
+		goto out;
+
+	btrfs_free_path(path);
+	objectid = key.offset;
+
+	/* Build a new key for the inode item */
+	key.objectid = objectid;
+	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+	key.offset = 0;
+
+	inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
+
+	parent = d_obtain_alias(inode);
+	if (!parent)
+		parent = ERR_PTR(-ENOMEM);
+
+	return parent;
+
+out:
+	btrfs_free_path(path);
+	return ERR_PTR(-EINVAL);
+}
+
+const struct export_operations btrfs_export_ops = {
+	.encode_fh	= btrfs_encode_fh,
+	.fh_to_dentry	= btrfs_fh_to_dentry,
+	.fh_to_parent	= btrfs_fh_to_parent,
+	.get_parent	= btrfs_get_parent,
+};
diff --git a/export.h b/export.h
new file mode 100644
index 0000000..019ca3a
--- /dev/null
+++ b/export.h
@@ -0,0 +1,17 @@
+#ifndef BTRFS_EXPORT_H
+#define BTRFS_EXPORT_H
+
+#include <linux/exportfs.h>
+
+extern const struct export_operations btrfs_export_ops;
+
+struct btrfs_fid {
+	u64 objectid;
+	u64 root_objectid;
+	u32 gen;
+
+	u64 parent_objectid;
+	u32 parent_gen;
+} __attribute__ ((packed));
+
+#endif
diff --git a/super.c b/super.c
index eb4b357..e830e0e 100644
--- a/super.c
+++ b/super.c
@@ -46,6 +46,7 @@
 #include "xattr.h"
 #include "volumes.h"
 #include "version.h"
+#include "export.h"
 
 #define BTRFS_SUPER_MAGIC 0x9123683E
 
@@ -303,6 +304,7 @@ static int btrfs_fill_super(struct super_block * sb,
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_magic = BTRFS_SUPER_MAGIC;
 	sb->s_op = &btrfs_super_ops;
+	sb->s_export_op = &btrfs_export_ops;
 	sb->s_xattr = btrfs_xattr_handlers;
 	sb->s_time_gran = 1;
 	sb->s_flags |= MS_POSIXACL;
-- 
1.5.5.1


-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/3]  Implement our own copy of the nfsd readdir hack, for older kernels
  2008-06-28 23:35 [RFC] NFS Support Balaji Rao
                   ` (3 preceding siblings ...)
  2008-08-12 13:46 ` [PATCH 2/3] NFS support for btrfs - v2 David Woodhouse
@ 2008-08-12 13:46 ` David Woodhouse
  4 siblings, 0 replies; 15+ messages in thread
From: David Woodhouse @ 2008-08-12 13:46 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 inode.c |  104 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/inode.c b/inode.c
index 5344526..faa5543 100644
--- a/inode.c
+++ b/inode.c
@@ -1956,7 +1956,8 @@ static unsigned char btrfs_filetype_table[] = {
 	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
 };
 
-static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
+static int btrfs_real_readdir(struct file *filp, void *dirent,
+			      filldir_t filldir)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -2105,6 +2106,101 @@ err:
 	return ret;
 }
 
+/* Kernels earlier than 2.6.28 still have the NFS deadlock where nfsd
+   will call the file system's ->lookup() method from within its
+   filldir callback, which in turn was called from the file system's
+   ->readdir() method. And will deadlock for many file systems. */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+
+struct nfshack_dirent {
+	u64		ino;
+	loff_t		offset;
+	int		namlen;
+	unsigned int	d_type;
+	char		name[];
+};
+
+struct nfshack_readdir {
+	char		*dirent;
+	size_t		used;
+};
+
+
+
+static int btrfs_nfshack_filldir(void *__buf, const char *name, int namlen,
+			      loff_t offset, u64 ino, unsigned int d_type)
+{
+	struct nfshack_readdir *buf = __buf;
+	struct nfshack_dirent *de = (void *)(buf->dirent + buf->used);
+	unsigned int reclen;
+
+	reclen = ALIGN(sizeof(struct nfshack_dirent) + namlen, sizeof(u64));
+	if (buf->used + reclen > PAGE_SIZE)
+		return -EINVAL;
+
+	de->namlen = namlen;
+	de->offset = offset;
+	de->ino = ino;
+	de->d_type = d_type;
+	memcpy(de->name, name, namlen);
+	buf->used += reclen;
+
+	return 0;
+}
+
+static int btrfs_nfshack_readdir(struct file *file, void *dirent,
+				 filldir_t filldir)
+{
+	struct nfshack_readdir buf;
+	struct nfshack_dirent *de;
+	int err;
+	int size;
+	loff_t offset;
+
+	buf.dirent = (void *)__get_free_page(GFP_KERNEL);
+	if (!buf.dirent)
+		return -ENOMEM;
+
+	offset = file->f_pos;
+
+	while (1) {
+		unsigned int reclen;
+
+		buf.used = 0;
+
+		err = btrfs_real_readdir(file, &buf, btrfs_nfshack_filldir);
+		if (err)
+			break;
+
+		size = buf.used;
+
+		if (!size)
+			break;
+
+		de = (struct nfshack_dirent *)buf.dirent;
+		while (size > 0) {
+			offset = de->offset;
+
+			if (filldir(dirent, de->name, de->namlen, de->offset,
+				    de->ino, de->d_type))
+				goto done;
+			offset = file->f_pos;
+
+			reclen = ALIGN(sizeof(*de) + de->namlen,
+				       sizeof(u64));
+			size -= reclen;
+			de = (struct nfshack_dirent *)((char *)de + reclen);
+		}
+	}
+
+ done:
+	free_page((unsigned long)buf.dirent);
+	file->f_pos = offset;
+
+	return err;
+}
+#endif
+
 int btrfs_write_inode(struct inode *inode, int wait)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3661,7 +3757,11 @@ static struct inode_operations btrfs_dir_ro_inode_operations = {
 static struct file_operations btrfs_dir_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
-	.readdir	= btrfs_readdir,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+	.readdir	= btrfs_nfshack_readdir,
+#else /* NFSd readdir/lookup deadlock is fixed */
+	.readdir	= btrfs_real_readdir,
+#endif
 	.unlocked_ioctl	= btrfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= btrfs_ioctl,
-- 
1.5.5.1


-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] Introduce btrfs_iget helper
  2008-08-12 13:46 ` [PATCH 1/3] Introduce btrfs_iget helper David Woodhouse
@ 2008-08-12 18:46   ` Christoph Hellwig
  2008-08-13  9:07     ` David Woodhouse
  0 siblings, 1 reply; 15+ messages in thread
From: Christoph Hellwig @ 2008-08-12 18:46 UTC (permalink / raw)
  To: David Woodhouse; +Cc: Balaji Rao, linux-btrfs

> +struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
> +			 struct btrfs_root *root, int *is_new)
> +{
> +	struct inode *inode;
> +
> +	inode = btrfs_iget_locked(s, location->objectid, root);
> +	if (!inode)
> +		return ERR_PTR(-EACCES);
> +
> +	if (inode->i_state & I_NEW) {
> +		BTRFS_I(inode)->root = root;
> +		memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
> +		btrfs_read_locked_inode(inode);
> +		unlock_new_inode(inode);
> +		if (is_new)
> +			*is_new = 1;
> +	} else {
> +		if (is_new)
> +			*is_new = 0;
> +	}
> +
> +	return inode;

Please leave the unlock_new_inode to the caller and kill the is_new
parameter. Gives much nicer to read code, and generally making sure the
inode is 100% ready and correct before unlocking it is a good idea, too
so that e.g. no one can actually access an inode marked bad due to a
generation number mismatch before it's marked bad.

> +}
> +
>  static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
>  				   struct nameidata *nd)
>  {
> @@ -1889,7 +1916,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
>  	struct btrfs_root *root = bi->root;
>  	struct btrfs_root *sub_root = root;
>  	struct btrfs_key location;
> -	int ret, do_orphan = 0;
> +	int ret, new, do_orphan = 0;
>  
>  	if (dentry->d_name.len > BTRFS_NAME_LEN)
>  		return ERR_PTR(-ENAMETOOLONG);
> @@ -1907,23 +1934,15 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
>  			return ERR_PTR(ret);
>  		if (ret > 0)
>  			return ERR_PTR(-ENOENT);
> -
> -		inode = btrfs_iget_locked(dir->i_sb, location.objectid,
> -					  sub_root);
> -		if (!inode)
> -			return ERR_PTR(-EACCES);
> -		if (inode->i_state & I_NEW) {
> -			/* the inode and parent dir are two different roots */
> -			if (sub_root != root) {
> -				igrab(inode);
> -				sub_root->inode = inode;
> -				do_orphan = 1;
> -			}
> -			BTRFS_I(inode)->root = sub_root;
> -			memcpy(&BTRFS_I(inode)->location, &location,
> -			       sizeof(location));
> -			btrfs_read_locked_inode(inode);
> -			unlock_new_inode(inode);
> +		inode = btrfs_iget(dir->i_sb, &location, sub_root, &new);
> +		if (IS_ERR(inode))
> +			return ERR_CAST(inode);
> +
> +		/* the inode and parent dir are two different roots */
> +		if (new && root != sub_root) {
> +			igrab(inode);
> +			sub_root->inode = inode;
> +			do_orphan = 1;
>  		}
>  	}
>  
> -- 
> 1.5.5.1
> 
> 
> -- 
> David Woodhouse                            Open Source Technology Centre
> David.Woodhouse@intel.com                              Intel Corporation
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
---end quoted text---

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] NFS support for btrfs - v2
  2008-08-12 13:46 ` [PATCH 2/3] NFS support for btrfs - v2 David Woodhouse
@ 2008-08-12 18:51   ` Christoph Hellwig
  2008-08-13  8:53     ` David Woodhouse
  2008-08-13 15:06   ` [PATCH 2/3] NFS support for btrfs - v3 David Woodhouse
  1 sibling, 1 reply; 15+ messages in thread
From: Christoph Hellwig @ 2008-08-12 18:51 UTC (permalink / raw)
  To: David Woodhouse; +Cc: Balaji Rao, linux-btrfs

On Tue, Aug 12, 2008 at 02:46:46PM +0100, David Woodhouse wrote:
> +static inline struct dentry *d_obtain_alias(struct inode *inode)
> +{
> +	struct dentry *d = d_alloc_anon(inode);
> +	if (!d)
> +		iput(inode);
> +	return d;
> +}
> +#endif

I'm not sure when al wants to merge with Linus, but the for-next naming
makes it sound like the tree is the .28 queue.  Also please take the
full implementation of d_obtain_alias from

	http://git.kernel.org/?p=linux/kernel/git/viro/vfs-2.6.git;a=commitdiff;h=10cdb734be3c4175b977ba18eafbaba8e5716291

please.  Your light implementation will crash and burn when btrfs_iget
returns an error.

> +/* The size of encoded fh is the type number of the fh itself */
> +#define BTRFS_FID_NON_CONNECTABLE		5
> +#define BTRFS_FID_CONNECTABLE	 		8

That was an assumption in the very old code, but I think it's a very
bad idea.  Just give your filehandles a uniqueue type number, e.g. in
the 0x4? range so that people looking at nfs traffic using a packet
analyzer know what kind of fhs they are actually dealing with.

> +	if (IS_ERR(inode))
> +		return (void *)inode;
> +
> +	if (generation != inode->i_generation) {
> +		iput(inode);
> +		return ERR_PTR(-ESTALE);
> +	}
> +
> +	result = d_alloc_anon(inode);
> +	if (!result) {
> +		iput(inode);
> +		return ERR_PTR(-ENOMEM);
> +	}

Didn't you intend to use d_obtain_alias?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] NFS support for btrfs - v2
  2008-08-12 18:51   ` Christoph Hellwig
@ 2008-08-13  8:53     ` David Woodhouse
  0 siblings, 0 replies; 15+ messages in thread
From: David Woodhouse @ 2008-08-13  8:53 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Balaji Rao, linux-btrfs

On Tue, 2008-08-12 at 14:51 -0400, Christoph Hellwig wrote:
> On Tue, Aug 12, 2008 at 02:46:46PM +0100, David Woodhouse wrote:
> > +static inline struct dentry *d_obtain_alias(struct inode *inode)
> > +{
> > +	struct dentry *d = d_alloc_anon(inode);
> > +	if (!d)
> > +		iput(inode);
> > +	return d;
> > +}
> > +#endif
> 
> I'm not sure when al wants to merge with Linus, but the for-next naming
> makes it sound like the tree is the .28 queue.  Also please take the
> full implementation of d_obtain_alias from
> 
> 	http://git.kernel.org/?p=linux/kernel/git/viro/vfs-2.6.git;a=commitdiff;h=10cdb734be3c4175b977ba18eafbaba8e5716291
> 
> please.  Your light implementation will crash and burn when btrfs_iget
> returns an error.

Well spotted; thanks.

> > +/* The size of encoded fh is the type number of the fh itself */
> > +#define BTRFS_FID_NON_CONNECTABLE		5
> > +#define BTRFS_FID_CONNECTABLE	 		8
> 
> That was an assumption in the very old code, but I think it's a very
> bad idea.  Just give your filehandles a uniqueue type number, e.g. in
> the 0x4? range so that people looking at nfs traffic using a packet
> analyzer know what kind of fhs they are actually dealing with.

Ok, I'll submit the patch to include/linux/exportfs.h separately -- the
numbers might as well be reserved immediately.

Btw, I'm not convinced nfsd does the right thing when we return 255 from
->encode_fh(). It seems to leak a refcount somewhere in nfs code, so I
can't stop the server and can't unmount the file system.

> > +	if (IS_ERR(inode))
> > +		return (void *)inode;
> > +
> > +	if (generation != inode->i_generation) {
> > +		iput(inode);
> > +		return ERR_PTR(-ESTALE);
> > +	}
> > +
> > +	result = d_alloc_anon(inode);
> > +	if (!result) {
> > +		iput(inode);
> > +		return ERR_PTR(-ENOMEM);
> > +	}
> 
> Didn't you intend to use d_obtain_alias?

Oops, yes. I did use it once; I missed the second one though.

I'll merge this into the patch in question...

diff --git a/compat.h b/compat.h
index 1040f07..d45fb37 100644
--- a/compat.h
+++ b/compat.h
@@ -8,7 +8,14 @@
 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27)
 static inline struct dentry *d_obtain_alias(struct inode *inode)
 {
-	struct dentry *d = d_alloc_anon(inode);
+	struct dentry *d;
+
+	if (!inode)
+		return NULL;
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+
+	d = d_alloc_anon(inode);
 	if (!d)
 		iput(inode);
 	return d;
diff --git a/export.c b/export.c
index 253080a..34e4631 100644
--- a/export.c
+++ b/export.c
@@ -7,9 +7,13 @@
 #include "export.h"
 #include "compat.h"
 
-/* The size of encoded fh is the type number of the fh itself */
-#define BTRFS_FID_NON_CONNECTABLE		5
-#define BTRFS_FID_CONNECTABLE	 		8
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+#define FILEID_BTRFS_WITHOUT_PARENT		0x4e
+#define FILEID_BTRFS_WITH_PARENT 		0x4f
+#endif
+
+#define BTRFS_FID_SIZE_NON_CONNECTABLE		(offsetof(struct btrfs_fid, parent_objectid)/4)
+#define BTRFS_FID_SIZE_CONNECTABLE		(sizeof(struct btrfs_fid)/4)
 
 static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
 			   int connectable)
@@ -17,12 +21,14 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
 	struct btrfs_fid *fid = (struct btrfs_fid *)fh;
 	struct inode *inode = dentry->d_inode;
 	int len = *max_len;
+	int type;
 
-	if ((len < BTRFS_FID_NON_CONNECTABLE) ||
-	    (connectable && len < BTRFS_FID_CONNECTABLE))
+	if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) ||
+	    (connectable && len < BTRFS_FID_SIZE_CONNECTABLE))
 		return 255;
 
-	len  = BTRFS_FID_NON_CONNECTABLE;
+	len  = BTRFS_FID_SIZE_NON_CONNECTABLE;
+	type = FILEID_BTRFS_WITHOUT_PARENT;
 
 	fid->objectid = BTRFS_I(inode)->location.objectid;
 	fid->root_objectid = BTRFS_I(inode)->root->objectid;
@@ -38,13 +44,12 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
 		fid->parent_gen = parent->i_generation;
 
 		spin_unlock(&dentry->d_lock);
-		len = BTRFS_FID_CONNECTABLE;
+		len = BTRFS_FID_SIZE_CONNECTABLE;
+		type = FILEID_BTRFS_WITH_PARENT;
 	}
-	*max_len = len;
-
-	/* We return length itself for the type */
-	return len;
 
+	*max_len = len;
+	return type;
 }
 
 static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
@@ -69,11 +74,9 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
 		return ERR_PTR(-ESTALE);
 	}
 
-	result = d_alloc_anon(inode);
-	if (!result) {
-		iput(inode);
+	result = d_obtain_alias(inode);
+	if (!result)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	return result;
 }
@@ -85,7 +88,8 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
 	u64 objectid, root_objectid;
 	u32 generation;
 
-	if (fh_type != BTRFS_FID_CONNECTABLE)
+	if (fh_type != FILEID_BTRFS_WITH_PARENT ||
+	    fh_len != BTRFS_FID_SIZE_CONNECTABLE)
 		return NULL;
 
 	root_objectid = fid->root_objectid;
@@ -102,7 +106,10 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
 	u64 objectid, root_objectid;
 	u32 generation;
 
-	if (fh_type > BTRFS_FID_CONNECTABLE)
+	if ((fh_type != FILEID_BTRFS_WITH_PARENT ||
+	     fh_len != BTRFS_FID_SIZE_CONNECTABLE) &&
+	    (fh_type != FILEID_BTRFS_WITHOUT_PARENT ||
+	     fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE))
 		return NULL;
 
 	objectid = fid->objectid;

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] Introduce btrfs_iget helper
  2008-08-12 18:46   ` Christoph Hellwig
@ 2008-08-13  9:07     ` David Woodhouse
  2008-08-13  9:41       ` David Woodhouse
  0 siblings, 1 reply; 15+ messages in thread
From: David Woodhouse @ 2008-08-13  9:07 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Balaji Rao, linux-btrfs

On Tue, 2008-08-12 at 14:46 -0400, Christoph Hellwig wrote:
> Please leave the unlock_new_inode to the caller and kill the is_new
> parameter. Gives much nicer to read code, and generally making sure the
> inode is 100% ready and correct before unlocking it is a good idea, too
> so that e.g. no one can actually access an inode marked bad due to a
> generation number mismatch before it's marked bad.

Unconvinced -- I don't like the idea that we'd sometimes return a locked
inode, sometimes not, and force the callers to handle that.

And the inode _is_ ready and correct. We don't mark inodes bad because
of a generation number mismatch -- we just return -ESTALE and drop our
refcount on the inode. The inode itself is fine and there's no issue
with it being unlocked.

The code in btrfs_lookup() which goes...
		/* the inode and parent dir are two different roots */
		if (new && root != sub_root) {
			igrab(inode);
			sub_root->inode = inode;
			do_orphan = 1;
		}
... should also be fine when the inode is unlocked, too.

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] Introduce btrfs_iget helper
  2008-08-13  9:07     ` David Woodhouse
@ 2008-08-13  9:41       ` David Woodhouse
  0 siblings, 0 replies; 15+ messages in thread
From: David Woodhouse @ 2008-08-13  9:41 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Balaji Rao, linux-btrfs

On Wed, 2008-08-13 at 10:07 +0100, David Woodhouse wrote:
>                 /* the inode and parent dir are two different roots */
>                 if (new && root != sub_root) {
>                         igrab(inode);
>                         sub_root->inode = inode;
>                         do_orphan = 1;
>                 }

Hmmmm.... looking at that, don't we need to store the _parent_ root
object ID in the file handle too, if it can be different from the child?

Something like this, perhaps. Given that I think we've already given up
on exporting by NFSv2, do we care about another 8 bytes in the fh size?

diff --git a/export.c b/export.c
index 34e4631..1b8875c 100644
--- a/export.c
+++ b/export.c
@@ -8,12 +8,14 @@
 #include "compat.h"
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
-#define FILEID_BTRFS_WITHOUT_PARENT		0x4e
-#define FILEID_BTRFS_WITH_PARENT 		0x4f
+#define FILEID_BTRFS_WITHOUT_PARENT		0x4d
+#define FILEID_BTRFS_WITH_PARENT 		0x4e
+#define FILEID_BTRFS_WITH_PARENT_ROOT 		0x4f
 #endif
 
 #define BTRFS_FID_SIZE_NON_CONNECTABLE		(offsetof(struct btrfs_fid, parent_objectid)/4)
-#define BTRFS_FID_SIZE_CONNECTABLE		(sizeof(struct btrfs_fid)/4)
+#define BTRFS_FID_SIZE_CONNECTABLE		(offsetof(struct btrfs_fid, parent_root_objectid)/4)
+#define BTRFS_FID_SIZE_CONNECTABLE_ROOT		(sizeof(struct btrfs_fid)/4)
 
 static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
 			   int connectable)
@@ -36,16 +38,25 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
 
 	if (connectable && !S_ISDIR(inode->i_mode)) {
 		struct inode *parent;
+		u64 parent_root_id;
 
 		spin_lock(&dentry->d_lock);
 
 		parent = dentry->d_parent->d_inode;
 		fid->parent_objectid = BTRFS_I(parent)->location.objectid;
 		fid->parent_gen = parent->i_generation;
+		parent_root_id = BTRFS_I(parent)->root->objectid;
 
 		spin_unlock(&dentry->d_lock);
-		len = BTRFS_FID_SIZE_CONNECTABLE;
-		type = FILEID_BTRFS_WITH_PARENT;
+
+		if (parent_root_id != fid->root_objectid) {
+			fid->parent_root_objectid = parent_root_id;
+			len = BTRFS_FID_SIZE_CONNECTABLE_ROOT;
+			type = FILEID_BTRFS_WITH_PARENT_ROOT;
+		} else {
+			len = BTRFS_FID_SIZE_CONNECTABLE; 
+			type = FILEID_BTRFS_WITH_PARENT;
+		}
 	}
 
 	*max_len = len;
@@ -88,11 +99,17 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
 	u64 objectid, root_objectid;
 	u32 generation;
 
-	if (fh_type != FILEID_BTRFS_WITH_PARENT ||
-	    fh_len != BTRFS_FID_SIZE_CONNECTABLE)
+	if (fh_type == FILEID_BTRFS_WITH_PARENT) {
+		if (fh_len !=  BTRFS_FID_SIZE_CONNECTABLE)
+			return NULL;
+		root_objectid = fid->root_objectid;
+	} else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) {
+		if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT)
+			return NULL;
+		root_objectid = fid->parent_root_objectid;
+	} else
 		return NULL;
 
-	root_objectid = fid->root_objectid;
 	objectid = fid->parent_objectid;
 	generation = fid->parent_gen;
 
@@ -108,6 +125,8 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
 
 	if ((fh_type != FILEID_BTRFS_WITH_PARENT ||
 	     fh_len != BTRFS_FID_SIZE_CONNECTABLE) &&
+	    (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT ||
+	     fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&
 	    (fh_type != FILEID_BTRFS_WITHOUT_PARENT ||
 	     fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE))
 		return NULL;
diff --git a/export.h b/export.h
index 019ca3a..074348a 100644
--- a/export.h
+++ b/export.h
@@ -12,6 +12,8 @@ struct btrfs_fid {
 
 	u64 parent_objectid;
 	u32 parent_gen;
+
+	u64 parent_root_objectid;
 } __attribute__ ((packed));
 
 #endif


-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/3] NFS support for btrfs - v3
  2008-08-12 13:46 ` [PATCH 2/3] NFS support for btrfs - v2 David Woodhouse
  2008-08-12 18:51   ` Christoph Hellwig
@ 2008-08-13 15:06   ` David Woodhouse
  1 sibling, 0 replies; 15+ messages in thread
From: David Woodhouse @ 2008-08-13 15:06 UTC (permalink / raw)
  To: Balaji Rao; +Cc: linux-btrfs

From: Balaji Rao <balajirrao@gmail.com>

Here's an implementation of NFS support for btrfs. It relies on the
fixes which are going in to 2.6.28 for the NFS readdir/lookup deadlock.

This uses the btrfs_iget helper introduced previously.

[dwmw2: Tidy up a little, switch to d_obtain_alias() w/compat routine,
	change fh_type,	store parent's root object ID where needed]

Signed-off-by: Balaji Rao <balajirrao@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---

This replaces patch 2 of the sequence I sent out yesterday; the other
two patches remain the same.

 Makefile |    2 +-
 compat.h |   17 +++++
 export.c |  200 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 export.h |   19 ++++++
 super.c  |    2 +
 5 files changed, 239 insertions(+), 1 deletions(-)
 create mode 100644 export.c
 create mode 100644 export.h

diff --git a/Makefile b/Makefile
index a4b3817..75f8818 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 	   transaction.o bit-radix.o inode.o file.o tree-defrag.o \
 	   extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
 	   extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
-	   ref-cache.o
+	   ref-cache.o export.o
 
 btrfs-$(CONFIG_FS_POSIX_ACL)	+= acl.o
 else
diff --git a/compat.h b/compat.h
index b3349a6..d45fb37 100644
--- a/compat.h
+++ b/compat.h
@@ -5,6 +5,23 @@
 #define trylock_page(page) (!TestSetPageLocked(page))
 #endif
 
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27)
+static inline struct dentry *d_obtain_alias(struct inode *inode)
+{
+	struct dentry *d;
+
+	if (!inode)
+		return NULL;
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+
+	d = d_alloc_anon(inode);
+	if (!d)
+		iput(inode);
+	return d;
+}
+#endif
+
 /*
  * Even if AppArmor isn't enabled, it still has different prototypes.
  * Add more distro/version pairs here to declare which has AppArmor applied.
diff --git a/export.c b/export.c
new file mode 100644
index 0000000..1b8875c
--- /dev/null
+++ b/export.c
@@ -0,0 +1,200 @@
+#include <linux/fs.h>
+#include <linux/types.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "btrfs_inode.h"
+#include "print-tree.h"
+#include "export.h"
+#include "compat.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+#define FILEID_BTRFS_WITHOUT_PARENT		0x4d
+#define FILEID_BTRFS_WITH_PARENT 		0x4e
+#define FILEID_BTRFS_WITH_PARENT_ROOT 		0x4f
+#endif
+
+#define BTRFS_FID_SIZE_NON_CONNECTABLE		(offsetof(struct btrfs_fid, parent_objectid)/4)
+#define BTRFS_FID_SIZE_CONNECTABLE		(offsetof(struct btrfs_fid, parent_root_objectid)/4)
+#define BTRFS_FID_SIZE_CONNECTABLE_ROOT		(sizeof(struct btrfs_fid)/4)
+
+static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
+			   int connectable)
+{
+	struct btrfs_fid *fid = (struct btrfs_fid *)fh;
+	struct inode *inode = dentry->d_inode;
+	int len = *max_len;
+	int type;
+
+	if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) ||
+	    (connectable && len < BTRFS_FID_SIZE_CONNECTABLE))
+		return 255;
+
+	len  = BTRFS_FID_SIZE_NON_CONNECTABLE;
+	type = FILEID_BTRFS_WITHOUT_PARENT;
+
+	fid->objectid = BTRFS_I(inode)->location.objectid;
+	fid->root_objectid = BTRFS_I(inode)->root->objectid;
+	fid->gen = inode->i_generation;
+
+	if (connectable && !S_ISDIR(inode->i_mode)) {
+		struct inode *parent;
+		u64 parent_root_id;
+
+		spin_lock(&dentry->d_lock);
+
+		parent = dentry->d_parent->d_inode;
+		fid->parent_objectid = BTRFS_I(parent)->location.objectid;
+		fid->parent_gen = parent->i_generation;
+		parent_root_id = BTRFS_I(parent)->root->objectid;
+
+		spin_unlock(&dentry->d_lock);
+
+		if (parent_root_id != fid->root_objectid) {
+			fid->parent_root_objectid = parent_root_id;
+			len = BTRFS_FID_SIZE_CONNECTABLE_ROOT;
+			type = FILEID_BTRFS_WITH_PARENT_ROOT;
+		} else {
+			len = BTRFS_FID_SIZE_CONNECTABLE; 
+			type = FILEID_BTRFS_WITH_PARENT;
+		}
+	}
+
+	*max_len = len;
+	return type;
+}
+
+static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+				       u64 root_objectid, u32 generation)
+{
+	struct btrfs_root *root;
+	struct inode *inode;
+	struct dentry *result;
+	struct btrfs_key key;
+
+	key.objectid = objectid;
+	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
+	key.offset = 0;
+
+	root = btrfs_lookup_fs_root(btrfs_sb(sb)->fs_info, root_objectid);
+	inode = btrfs_iget(sb, &key, root, NULL);
+	if (IS_ERR(inode))
+		return (void *)inode;
+
+	if (generation != inode->i_generation) {
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+
+	result = d_obtain_alias(inode);
+	if (!result)
+		return ERR_PTR(-ENOMEM);
+
+	return result;
+}
+
+static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
+					 int fh_len, int fh_type)
+{
+	struct btrfs_fid *fid = (struct btrfs_fid *) fh;
+	u64 objectid, root_objectid;
+	u32 generation;
+
+	if (fh_type == FILEID_BTRFS_WITH_PARENT) {
+		if (fh_len !=  BTRFS_FID_SIZE_CONNECTABLE)
+			return NULL;
+		root_objectid = fid->root_objectid;
+	} else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) {
+		if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT)
+			return NULL;
+		root_objectid = fid->parent_root_objectid;
+	} else
+		return NULL;
+
+	objectid = fid->parent_objectid;
+	generation = fid->parent_gen;
+
+	return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+}
+
+static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
+					 int fh_len, int fh_type)
+{
+	struct btrfs_fid *fid = (struct btrfs_fid *) fh;
+	u64 objectid, root_objectid;
+	u32 generation;
+
+	if ((fh_type != FILEID_BTRFS_WITH_PARENT ||
+	     fh_len != BTRFS_FID_SIZE_CONNECTABLE) &&
+	    (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT ||
+	     fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&
+	    (fh_type != FILEID_BTRFS_WITHOUT_PARENT ||
+	     fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE))
+		return NULL;
+
+	objectid = fid->objectid;
+	root_objectid = fid->root_objectid;
+	generation = fid->gen;
+
+	return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+}
+
+static struct dentry *btrfs_get_parent(struct dentry *child)
+{
+	struct inode *dir = child->d_inode;
+	struct inode *inode;
+	struct dentry *parent;
+	struct btrfs_root *root = BTRFS_I(dir)->root;
+	struct btrfs_key key;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	u32 nritems;
+	int slot;
+	u64 objectid;
+	int ret;
+
+	path = btrfs_alloc_path();
+
+	key.objectid = dir->i_ino;
+	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
+	key.offset = 0;
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	BUG_ON(ret == 0);
+	ret = 0;
+
+	leaf = path->nodes[0];
+	slot = path->slots[0];
+	nritems = btrfs_header_nritems(leaf);
+	if (slot >= nritems)
+		goto out;
+
+	btrfs_item_key_to_cpu(leaf, &key, slot);
+	if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
+		goto out;
+
+	btrfs_free_path(path);
+	objectid = key.offset;
+
+	/* Build a new key for the inode item */
+	key.objectid = objectid;
+	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+	key.offset = 0;
+
+	inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
+
+	parent = d_obtain_alias(inode);
+	if (!parent)
+		parent = ERR_PTR(-ENOMEM);
+
+	return parent;
+
+out:
+	btrfs_free_path(path);
+	return ERR_PTR(-EINVAL);
+}
+
+const struct export_operations btrfs_export_ops = {
+	.encode_fh	= btrfs_encode_fh,
+	.fh_to_dentry	= btrfs_fh_to_dentry,
+	.fh_to_parent	= btrfs_fh_to_parent,
+	.get_parent	= btrfs_get_parent,
+};
diff --git a/export.h b/export.h
new file mode 100644
index 0000000..074348a
--- /dev/null
+++ b/export.h
@@ -0,0 +1,19 @@
+#ifndef BTRFS_EXPORT_H
+#define BTRFS_EXPORT_H
+
+#include <linux/exportfs.h>
+
+extern const struct export_operations btrfs_export_ops;
+
+struct btrfs_fid {
+	u64 objectid;
+	u64 root_objectid;
+	u32 gen;
+
+	u64 parent_objectid;
+	u32 parent_gen;
+
+	u64 parent_root_objectid;
+} __attribute__ ((packed));
+
+#endif
diff --git a/super.c b/super.c
index eb4b357..e830e0e 100644
--- a/super.c
+++ b/super.c
@@ -46,6 +46,7 @@
 #include "xattr.h"
 #include "volumes.h"
 #include "version.h"
+#include "export.h"
 
 #define BTRFS_SUPER_MAGIC 0x9123683E
 
@@ -303,6 +304,7 @@ static int btrfs_fill_super(struct super_block * sb,
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_magic = BTRFS_SUPER_MAGIC;
 	sb->s_op = &btrfs_super_ops;
+	sb->s_export_op = &btrfs_export_ops;
 	sb->s_xattr = btrfs_xattr_handlers;
 	sb->s_time_gran = 1;
 	sb->s_flags |= MS_POSIXACL;
-- 
1.5.5.1


-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2008-08-13 15:06 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-28 23:35 [RFC] NFS Support Balaji Rao
2008-06-30 14:50 ` Josef Bacik
2008-06-30 15:20   ` Balaji Rao R
2008-06-30 15:08     ` Josef Bacik
2008-07-01  7:01       ` Christoph Hellwig
2008-06-30 18:46 ` Christoph Hellwig
2008-08-12 13:46 ` [PATCH 1/3] Introduce btrfs_iget helper David Woodhouse
2008-08-12 18:46   ` Christoph Hellwig
2008-08-13  9:07     ` David Woodhouse
2008-08-13  9:41       ` David Woodhouse
2008-08-12 13:46 ` [PATCH 2/3] NFS support for btrfs - v2 David Woodhouse
2008-08-12 18:51   ` Christoph Hellwig
2008-08-13  8:53     ` David Woodhouse
2008-08-13 15:06   ` [PATCH 2/3] NFS support for btrfs - v3 David Woodhouse
2008-08-12 13:46 ` [PATCH 3/3] Implement our own copy of the nfsd readdir hack, for older kernels David Woodhouse

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox