linux-api.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/5] add metadata_incore ioctl in vfs
@ 2011-01-04  5:40 Shaohua Li
  2011-01-04  9:40 ` Arnd Bergmann
  0 siblings, 1 reply; 8+ messages in thread
From: Shaohua Li @ 2011-01-04  5:40 UTC (permalink / raw)
  To: linux-btrfs@vger.kernel.org, linux-fsdevel@vger.kernel.org
  Cc: Chris Mason, Christoph Hellwig, Andrew Morton, Arjan van de Ven,
	Yan, Zheng, linux-api, mtk.manpages

Add an ioctl to dump filesystem's metadata in memory in vfs. Userspace collects
such info and uses it to do metadata readahead.
Filesystem can hook to super_operations.metadata_incore to get metadata in
specific approach. Next patch will give an example how to implement
.metadata_incore in btrfs.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>

---
 fs/compat_ioctl.c  |    1 
 fs/ioctl.c         |   75 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |   15 ++++++++++
 3 files changed, 91 insertions(+)

Index: linux/fs/ioctl.c
===================================================================
--- linux.orig/fs/ioctl.c	2010-12-31 08:52:00.000000000 +0800
+++ linux/fs/ioctl.c	2011-01-03 21:15:48.000000000 +0800
@@ -530,6 +530,78 @@ static int ioctl_fsthaw(struct file *fil
 }
 
 /*
+ * Copy info about metadata in memory to userspace
+ * Returns:
+ * > 0, number of metadata_incore_ent entries copied to userspace
+ * = 0, no more metadata
+ * < 0, error
+ */
+static int ioctl_metadata_incore(struct file *filp, void __user *argp)
+{
+	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	struct metadata_incore_args args;
+	struct metadata_incore_ent ent;
+	loff_t offset, last_offset = 0;
+	ssize_t size, last_size = 0;
+	__u64 __user vec_addr;
+	int entries = 0;
+
+	if (!sb->s_op->metadata_incore)
+		return -EINVAL;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	/* we check metadata info in page unit */
+	if (args.offset & ~PAGE_CACHE_MASK)
+		return -EINVAL;
+
+	if ((args.vec_size % sizeof(struct metadata_incore_ent)) != 0)
+		return -EINVAL;
+
+	offset = args.offset;
+
+	ent.unused = 0;
+	vec_addr = args.vec_addr;
+
+	while (vec_addr < args.vec_addr + args.vec_size) {
+		if (signal_pending(current))
+			return -EINTR;
+		cond_resched();
+
+		if (sb->s_op->metadata_incore(sb, &offset, &size) < 0)
+			break;
+		/* A merge or offset == 0 */
+		if (offset == last_offset + last_size) {
+			last_size += size;
+			offset = offset + size;
+			continue;
+		}
+		ent.offset = last_offset;
+		ent.size = last_size;
+		if (copy_to_user((void *)(long)vec_addr, &ent, sizeof(ent)))
+			return -EFAULT;
+		vec_addr += sizeof(ent);
+		entries++;
+
+		last_offset = offset;
+		last_size = size;
+		ent.unused = 0;
+		offset = offset + size;
+	}
+
+	if (last_size > 0 && vec_addr < args.vec_addr + args.vec_size) {
+		ent.offset = last_offset;
+		ent.size = last_size;
+		if (copy_to_user((void *)(long)vec_addr, &ent, sizeof(ent)))
+			return -EFAULT;
+		entries++;
+	}
+
+	return entries;
+}
+
+/*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
  *
@@ -589,6 +661,9 @@ int do_vfs_ioctl(struct file *filp, unsi
 		return put_user(inode->i_sb->s_blocksize, p);
 	}
 
+	case FIMETADATA_INCORE:
+		return ioctl_metadata_incore(filp, argp);
+
 	default:
 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			error = file_ioctl(filp, cmd, arg);
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h	2010-12-31 08:51:35.000000000 +0800
+++ linux/include/linux/fs.h	2011-01-03 21:15:48.000000000 +0800
@@ -52,6 +52,18 @@ struct inodes_stat_t {
 	int dummy[5];		/* padding for sysctl ABI compatibility */
 };
 
+struct metadata_incore_ent {
+	__u64 offset;
+	__u32 size;
+	__u32 unused;
+};
+
+struct metadata_incore_args {
+	__u64 offset; /* offset in meta address */
+	__u64 __user vec_addr; /* vector's address */
+	__u32 vec_size; /* vector's size */
+	__u32 unused;
+};
 
 #define NR_FILE  8192	/* this can well be larger on a larger system */
 
@@ -325,6 +337,7 @@ struct inodes_stat_t {
 #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
 #define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
+#define FIMETADATA_INCORE _IOWR('X', 122, struct metadata_incore_args)
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
@@ -1613,6 +1626,8 @@ struct super_operations {
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
 #endif
 	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
+	int (*metadata_incore)(struct super_block*, loff_t *offset,
+		ssize_t *size);
 };
 
 /*
Index: linux/fs/compat_ioctl.c
===================================================================
--- linux.orig/fs/compat_ioctl.c	2010-12-31 08:52:00.000000000 +0800
+++ linux/fs/compat_ioctl.c	2011-01-03 21:15:48.000000000 +0800
@@ -882,6 +882,7 @@ COMPATIBLE_IOCTL(FIGETBSZ)
 /* 'X' - originally XFS but some now in the VFS */
 COMPATIBLE_IOCTL(FIFREEZE)
 COMPATIBLE_IOCTL(FITHAW)
+COMPATIBLE_IOCTL(FIMETADATA_INCORE)
 COMPATIBLE_IOCTL(KDGETKEYCODE)
 COMPATIBLE_IOCTL(KDSETKEYCODE)
 COMPATIBLE_IOCTL(KDGKBTYPE)



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/5] add metadata_incore ioctl in vfs
  2011-01-04  5:40 [PATCH v2 1/5] add metadata_incore ioctl in vfs Shaohua Li
@ 2011-01-04  9:40 ` Arnd Bergmann
       [not found]   ` <201101041040.31482.arnd-r2nGTMty4D4@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Arnd Bergmann @ 2011-01-04  9:40 UTC (permalink / raw)
  To: Shaohua Li
  Cc: linux-btrfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Chris Mason, Christoph Hellwig, Andrew Morton, Arjan van de Ven,
	Yan, Zheng, linux-api-u79uwXL29TY76Z2rM5mHXA,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w

On Tuesday 04 January 2011 06:40:32 Shaohua Li wrote:

> +static int ioctl_metadata_incore(struct file *filp, void __user *argp)
> +{
> +       struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
> +       struct metadata_incore_args args;
> +       struct metadata_incore_ent ent;
> +       loff_t offset, last_offset = 0;
> +       ssize_t size, last_size = 0;
> +       __u64 __user vec_addr;

__user only makes sense on pointers. Just make this a "struct
metadata_incore_ent __user *", which will also take care of the
"sparse" warnings you get from the copy_to_user lines below.

>  
> +struct metadata_incore_ent {
> +       __u64 offset;
> +       __u32 size;
> +       __u32 unused;
> +};
> +
> +struct metadata_incore_args {
> +       __u64 offset; /* offset in meta address */
> +       __u64 __user vec_addr; /* vector's address */
> +       __u32 vec_size; /* vector's size */
> +       __u32 unused;
> +};

We usually try hard to avoid ioctls with indirect pointers
in them. The implementation is correct (most people
get this wrong), besides the extraneous __user keyword in
there.

Have you tried passing just a single metadata_incore_ent
at the ioctl and looping in user space? I would guess the
extra overhead of that would be small enough, but that might
need to be measured.

	Arnd

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/5] add metadata_incore ioctl in vfs
       [not found]   ` <201101041040.31482.arnd-r2nGTMty4D4@public.gmane.org>
@ 2011-01-05  2:17     ` Shaohua Li
  2011-01-05  9:42       ` Arnd Bergmann
  0 siblings, 1 reply; 8+ messages in thread
From: Shaohua Li @ 2011-01-05  2:17 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: linux-btrfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Chris Mason, Christoph Hellwig, Andrew Morton, Arjan van de Ven,
	Yan, Zheng, linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org

On Tue, 2011-01-04 at 17:40 +0800, Arnd Bergmann wrote:
> On Tuesday 04 January 2011 06:40:32 Shaohua Li wrote:
> 
> > +static int ioctl_metadata_incore(struct file *filp, void __user *argp)
> > +{
> > +       struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
> > +       struct metadata_incore_args args;
> > +       struct metadata_incore_ent ent;
> > +       loff_t offset, last_offset = 0;
> > +       ssize_t size, last_size = 0;
> > +       __u64 __user vec_addr;
> 
> __user only makes sense on pointers. Just make this a "struct
> metadata_incore_ent __user *", which will also take care of the
> "sparse" warnings you get from the copy_to_user lines below.
thanks for your comments.
ok, fixed it.

> >  
> > +struct metadata_incore_ent {
> > +       __u64 offset;
> > +       __u32 size;
> > +       __u32 unused;
> > +};
> > +
> > +struct metadata_incore_args {
> > +       __u64 offset; /* offset in meta address */
> > +       __u64 __user vec_addr; /* vector's address */
> > +       __u32 vec_size; /* vector's size */
> > +       __u32 unused;
> > +};
> 
> We usually try hard to avoid ioctls with indirect pointers
> in them. The implementation is correct (most people
> get this wrong), besides the extraneous __user keyword in
> there.
fixed the extraneous __user.

> Have you tried passing just a single metadata_incore_ent
> at the ioctl and looping in user space? I would guess the
> extra overhead of that would be small enough, but that might
> need to be measured.
metadata usually isn't continuous, so this means we have a lot of
metadata_incore_ent entries. And this is called at boot time and I want
to make the overhead as low as possible to not impact boot. Unless there
are certain reasons we can't use indirect pointers, I'd like to make
kernel return a vector of entries.
updated patch follows.

Thanks,
Shaohua


Subject: add metadata_incore ioctl in vfs

Add an ioctl to dump filesystem's metadata in memory in vfs. Userspace collects
such info and uses it to do metadata readahead.
Filesystem can hook to super_operations.metadata_incore to get metadata in
specific approach. Next patch will give an example how to implement
.metadata_incore in btrfs.

Signed-off-by: Shaohua Li <shaohua.li-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

---
 fs/compat_ioctl.c  |    2 +
 fs/ioctl.c         |   77 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |   15 ++++++++++
 3 files changed, 94 insertions(+)

Index: linux/fs/ioctl.c
===================================================================
--- linux.orig/fs/ioctl.c	2011-01-04 10:25:11.000000000 +0800
+++ linux/fs/ioctl.c	2011-01-05 09:53:09.000000000 +0800
@@ -530,6 +530,80 @@ static int ioctl_fsthaw(struct file *fil
 }
 
 /*
+ * Copy info about metadata in memory to userspace
+ * Returns:
+ * > 0, number of metadata_incore_ent entries copied to userspace
+ * = 0, no more metadata
+ * < 0, error
+ */
+static int ioctl_metadata_incore(struct file *filp, void __user *argp)
+{
+	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	struct metadata_incore_args args;
+	struct metadata_incore_ent ent;
+	loff_t offset, last_offset = 0;
+	ssize_t size, last_size = 0;
+	__u64 vec_addr;
+	int entries = 0;
+
+	if (!sb->s_op->metadata_incore)
+		return -EINVAL;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	/* we check metadata info in page unit */
+	if (args.offset & ~PAGE_CACHE_MASK)
+		return -EINVAL;
+
+	if ((args.vec_size % sizeof(struct metadata_incore_ent)) != 0)
+		return -EINVAL;
+
+	offset = args.offset;
+
+	ent.unused = 0;
+	vec_addr = args.vec_addr;
+
+	while (vec_addr < args.vec_addr + args.vec_size) {
+		if (signal_pending(current))
+			return -EINTR;
+		cond_resched();
+
+		if (sb->s_op->metadata_incore(sb, &offset, &size) < 0)
+			break;
+		/* A merge or offset == 0 */
+		if (offset == last_offset + last_size) {
+			last_size += size;
+			offset = offset + size;
+			continue;
+		}
+		ent.offset = last_offset;
+		ent.size = last_size;
+		if (copy_to_user((void __user *)(long)vec_addr, &ent,
+				sizeof(ent)))
+			return -EFAULT;
+		vec_addr += sizeof(ent);
+		entries++;
+
+		last_offset = offset;
+		last_size = size;
+		ent.unused = 0;
+		offset = offset + size;
+	}
+
+	if (last_size > 0 && vec_addr < args.vec_addr + args.vec_size) {
+		ent.offset = last_offset;
+		ent.size = last_size;
+		if (copy_to_user((void __user *)(long)vec_addr, &ent,
+				sizeof(ent)))
+			return -EFAULT;
+		entries++;
+	}
+
+	return entries;
+}
+
+/*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
  *
@@ -589,6 +663,9 @@ int do_vfs_ioctl(struct file *filp, unsi
 		return put_user(inode->i_sb->s_blocksize, p);
 	}
 
+	case FIMETADATA_INCORE:
+		return ioctl_metadata_incore(filp, argp);
+
 	default:
 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			error = file_ioctl(filp, cmd, arg);
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h	2011-01-04 10:25:11.000000000 +0800
+++ linux/include/linux/fs.h	2011-01-05 09:10:12.000000000 +0800
@@ -52,6 +52,18 @@ struct inodes_stat_t {
 	int dummy[5];		/* padding for sysctl ABI compatibility */
 };
 
+struct metadata_incore_ent {
+	__u64 offset;
+	__u32 size;
+	__u32 unused;
+};
+
+struct metadata_incore_args {
+	__u64 offset; /* offset in meta address */
+	__u64 vec_addr; /* vector's address */
+	__u32 vec_size; /* vector's size */
+	__u32 unused;
+};
 
 #define NR_FILE  8192	/* this can well be larger on a larger system */
 
@@ -325,6 +337,7 @@ struct inodes_stat_t {
 #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
 #define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
+#define FIMETADATA_INCORE _IOWR('X', 122, struct metadata_incore_args)
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
@@ -1613,6 +1626,8 @@ struct super_operations {
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
 #endif
 	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
+	int (*metadata_incore)(struct super_block*, loff_t *offset,
+		ssize_t *size);
 };
 
 /*
Index: linux/fs/compat_ioctl.c
===================================================================
--- linux.orig/fs/compat_ioctl.c	2011-01-04 10:25:11.000000000 +0800
+++ linux/fs/compat_ioctl.c	2011-01-05 08:58:06.000000000 +0800
@@ -882,6 +882,7 @@ COMPATIBLE_IOCTL(FIGETBSZ)
 /* 'X' - originally XFS but some now in the VFS */
 COMPATIBLE_IOCTL(FIFREEZE)
 COMPATIBLE_IOCTL(FITHAW)
+COMPATIBLE_IOCTL(FIMETADATA_INCORE)
 COMPATIBLE_IOCTL(KDGETKEYCODE)
 COMPATIBLE_IOCTL(KDSETKEYCODE)
 COMPATIBLE_IOCTL(KDGKBTYPE)
@@ -1577,6 +1578,7 @@ asmlinkage long compat_sys_ioctl(unsigne
 	case FIONBIO:
 	case FIOASYNC:
 	case FIOQSIZE:
+	case FIMETADATA_INCORE:
 		break;
 
 #if defined(CONFIG_IA64) || defined(CONFIG_X86_64)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/5] add metadata_incore ioctl in vfs
  2011-01-05  2:17     ` Shaohua Li
@ 2011-01-05  9:42       ` Arnd Bergmann
       [not found]         ` <201101051042.37181.arnd-r2nGTMty4D4@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Arnd Bergmann @ 2011-01-05  9:42 UTC (permalink / raw)
  To: Shaohua Li
  Cc: linux-btrfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Chris Mason, Christoph Hellwig, Andrew Morton, Arjan van de Ven,
	Yan, Zheng, linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org

On Wednesday 05 January 2011 03:17:16 Shaohua Li wrote:
> On Tue, 2011-01-04 at 17:40 +0800, Arnd Bergmann wrote:
> 
> > Have you tried passing just a single metadata_incore_ent
> > at the ioctl and looping in user space? I would guess the
> > extra overhead of that would be small enough, but that might
> > need to be measured.
> metadata usually isn't continuous, so this means we have a lot of
> metadata_incore_ent entries. And this is called at boot time and I want
> to make the overhead as low as possible to not impact boot. Unless there
> are certain reasons we can't use indirect pointers, I'd like to make
> kernel return a vector of entries.

It's not a strict rule, but the indirect data passing is rather
ugly and I'd only do that if the difference can be /measured/.

If the purpose is to speed up boot time by preloading metadata,
the FIMETADATA_INCORE operations should of course not take a
significant amount of time compared to the actual preloading,
but as long as it's less than one percent of the time you need
for the preload, I would just use the simpler interface.

> @@ -882,6 +882,7 @@ COMPATIBLE_IOCTL(FIGETBSZ)
>  /* 'X' - originally XFS but some now in the VFS */
>  COMPATIBLE_IOCTL(FIFREEZE)
>  COMPATIBLE_IOCTL(FITHAW)
> +COMPATIBLE_IOCTL(FIMETADATA_INCORE)
>  COMPATIBLE_IOCTL(KDGETKEYCODE)
>  COMPATIBLE_IOCTL(KDSETKEYCODE)
>  COMPATIBLE_IOCTL(KDGKBTYPE)

This change can go away as well.

Two more general comments:

- You probably want to add the ioctls to file_ioctl instead of do_vfs_ioctl,
  so you don't add another case statement to the common path.

- I don't know if there are any rules for what should be an ioctl or an
  fcntl, we're rather inconsistent about this. If you have found a good
  reason for making it an ioctl, just put that into the changelog so we
  can refer to it next time.

	Arnd

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/5] add metadata_incore ioctl in vfs
       [not found]         ` <201101051042.37181.arnd-r2nGTMty4D4@public.gmane.org>
@ 2011-01-06  1:13           ` Shaohua Li
  2011-01-06  7:38             ` Arnd Bergmann
  0 siblings, 1 reply; 8+ messages in thread
From: Shaohua Li @ 2011-01-06  1:13 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: linux-btrfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Chris Mason, Christoph Hellwig, Andrew Morton, Arjan van de Ven,
	Yan, Zheng, linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org

On Wed, 2011-01-05 at 17:42 +0800, Arnd Bergmann wrote:
> On Wednesday 05 January 2011 03:17:16 Shaohua Li wrote:
> > On Tue, 2011-01-04 at 17:40 +0800, Arnd Bergmann wrote:
> > 
> > > Have you tried passing just a single metadata_incore_ent
> > > at the ioctl and looping in user space? I would guess the
> > > extra overhead of that would be small enough, but that might
> > > need to be measured.
> > metadata usually isn't continuous, so this means we have a lot of
> > metadata_incore_ent entries. And this is called at boot time and I want
> > to make the overhead as low as possible to not impact boot. Unless there
> > are certain reasons we can't use indirect pointers, I'd like to make
> > kernel return a vector of entries.
> 
> It's not a strict rule, but the indirect data passing is rather
> ugly and I'd only do that if the difference can be /measured/.
> 
> If the purpose is to speed up boot time by preloading metadata,
> the FIMETADATA_INCORE operations should of course not take a
> significant amount of time compared to the actual preloading,
> but as long as it's less than one percent of the time you need
> for the preload, I would just use the simpler interface.
ok, just have a measurement, the overhead is acceptable. I'll change the
code to just accept one entry.

> > @@ -882,6 +882,7 @@ COMPATIBLE_IOCTL(FIGETBSZ)
> >  /* 'X' - originally XFS but some now in the VFS */
> >  COMPATIBLE_IOCTL(FIFREEZE)
> >  COMPATIBLE_IOCTL(FITHAW)
> > +COMPATIBLE_IOCTL(FIMETADATA_INCORE)
> >  COMPATIBLE_IOCTL(KDGETKEYCODE)
> >  COMPATIBLE_IOCTL(KDSETKEYCODE)
> >  COMPATIBLE_IOCTL(KDGKBTYPE)
> 
> This change can go away as well.
I don't understand. adding a case statement in compat_sys_ioctl, so we will do
compat_ioctl_check_table(). If I add COMPATIBLE_IOCTL(), then the check
will success, we will go to the found_handler code path and execute
do_vfs_ioctl, which is what we want. if not adding COMPATIBLE_IOCTL(),
the check will fail, and in any case, we will go to the out_fput code
path, so our ioctl does nothing.

> Two more general comments:
> 
> - You probably want to add the ioctls to file_ioctl instead of do_vfs_ioctl,
>   so you don't add another case statement to the common path.
> 
> - I don't know if there are any rules for what should be an ioctl or an
>   fcntl, we're rather inconsistent about this. If you have found a good
>   reason for making it an ioctl, just put that into the changelog so we
>   can refer to it next time.
it can be applied to a directory too. I thought file_ioctl or fcntl is
for file.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/5] add metadata_incore ioctl in vfs
  2011-01-06  1:13           ` Shaohua Li
@ 2011-01-06  7:38             ` Arnd Bergmann
       [not found]               ` <201101060838.49359.arnd-r2nGTMty4D4@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Arnd Bergmann @ 2011-01-06  7:38 UTC (permalink / raw)
  To: Shaohua Li
  Cc: linux-btrfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Chris Mason, Christoph Hellwig, Andrew Morton, Arjan van de Ven,
	Yan, Zheng, linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org

On Thursday 06 January 2011, Shaohua Li wrote:
> I don't understand. adding a case statement in compat_sys_ioctl, so we will do
> compat_ioctl_check_table(). If I add COMPATIBLE_IOCTL(), then the check
> will success, we will go to the found_handler code path and execute
> do_vfs_ioctl, which is what we want. if not adding COMPATIBLE_IOCTL(),
> the check will fail, and in any case, we will go to the out_fput code
> path, so our ioctl does nothing.

You are correct, I misremembered the code and did not check properly.

> > Two more general comments:
> > 
> > - You probably want to add the ioctls to file_ioctl instead of do_vfs_ioctl,
> >   so you don't add another case statement to the common path.
> > 
> > - I don't know if there are any rules for what should be an ioctl or an
> >   fcntl, we're rather inconsistent about this. If you have found a good
> >   reason for making it an ioctl, just put that into the changelog so we
> >   can refer to it next time.
> it can be applied to a directory too. I thought file_ioctl or fcntl is
> for file.

Right again, good point!

	Arnd

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/5] add metadata_incore ioctl in vfs
       [not found]               ` <201101060838.49359.arnd-r2nGTMty4D4@public.gmane.org>
@ 2011-01-06  7:45                 ` Shaohua Li
  2011-01-07 14:15                   ` Arnd Bergmann
  0 siblings, 1 reply; 8+ messages in thread
From: Shaohua Li @ 2011-01-06  7:45 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: linux-btrfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Chris Mason, Christoph Hellwig, Andrew Morton, Arjan van de Ven,
	Yan, Zheng, linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org

On Thu, 2011-01-06 at 15:38 +0800, Arnd Bergmann wrote:
> On Thursday 06 January 2011, Shaohua Li wrote:
> > I don't understand. adding a case statement in compat_sys_ioctl, so we will do
> > compat_ioctl_check_table(). If I add COMPATIBLE_IOCTL(), then the check
> > will success, we will go to the found_handler code path and execute
> > do_vfs_ioctl, which is what we want. if not adding COMPATIBLE_IOCTL(),
> > the check will fail, and in any case, we will go to the out_fput code
> > path, so our ioctl does nothing.
> 
> You are correct, I misremembered the code and did not check properly.
> 
> > > Two more general comments:
> > > 
> > > - You probably want to add the ioctls to file_ioctl instead of do_vfs_ioctl,
> > >   so you don't add another case statement to the common path.
> > > 
> > > - I don't know if there are any rules for what should be an ioctl or an
> > >   fcntl, we're rather inconsistent about this. If you have found a good
> > >   reason for making it an ioctl, just put that into the changelog so we
> > >   can refer to it next time.
> > it can be applied to a directory too. I thought file_ioctl or fcntl is
> > for file.
> 
> Right again, good point!
ok, below is my latest patch after adopting your comments.

Subject: add metadata_incore ioctl in vfs

Add an ioctl to dump filesystem's metadata in memory in vfs. Userspace collects
such info and uses it to do metadata readahead.
Filesystem can hook to super_operations.metadata_incore to get metadata in
specific approach. Next patch will give an example how to implement
.metadata_incore in btrfs.

Signed-off-by: Shaohua Li <shaohua.li-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

---
 fs/compat_ioctl.c  |    2 ++
 fs/ioctl.c         |   42 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |   10 ++++++++++
 3 files changed, 54 insertions(+)

Index: linux/fs/ioctl.c
===================================================================
--- linux.orig/fs/ioctl.c	2011-01-06 09:15:22.000000000 +0800
+++ linux/fs/ioctl.c	2011-01-06 09:35:12.000000000 +0800
@@ -530,6 +530,45 @@ static int ioctl_fsthaw(struct file *fil
 }
 
 /*
+ * Copy info about metadata in memory to userspace
+ * Returns:
+ * = 1, one metadata range copied to userspace
+ * = 0, no more metadata
+ * < 0, error
+ */
+static int ioctl_metadata_incore(struct file *filp, void __user *argp)
+{
+	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	struct metadata_incore_args args;
+	loff_t offset;
+	ssize_t size;
+
+	if (!sb->s_op->metadata_incore)
+		return -EINVAL;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	/* we check metadata info in page unit */
+	if (args.offset & ~PAGE_CACHE_MASK)
+		return -EINVAL;
+
+	offset = args.offset;
+
+	if (sb->s_op->metadata_incore(sb, &offset, &size) < 0)
+		return 0;
+
+	args.address = offset;
+	args.size = size;
+	args.unused = 0;
+
+	if (copy_to_user(argp, &args, sizeof(args)))
+		return -EFAULT;
+
+	return 1;
+}
+
+/*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
  *
@@ -589,6 +628,9 @@ int do_vfs_ioctl(struct file *filp, unsi
 		return put_user(inode->i_sb->s_blocksize, p);
 	}
 
+	case FIMETADATA_INCORE:
+		return ioctl_metadata_incore(filp, argp);
+
 	default:
 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			error = file_ioctl(filp, cmd, arg);
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h	2011-01-06 09:15:22.000000000 +0800
+++ linux/include/linux/fs.h	2011-01-06 09:31:36.000000000 +0800
@@ -53,6 +53,13 @@ struct inodes_stat_t {
 };
 
 
+struct metadata_incore_args {
+	__u64 offset; /* offset in metadata address */
+	__u64 address; /* returned address of metadata in memory */
+	__u32 size; /* size of the metadata */
+	__u32 unused;
+};
+
 #define NR_FILE  8192	/* this can well be larger on a larger system */
 
 #define MAY_EXEC 1
@@ -325,6 +332,7 @@ struct inodes_stat_t {
 #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
 #define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
+#define FIMETADATA_INCORE _IOWR('X', 122, struct metadata_incore_args)
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
@@ -1613,6 +1621,8 @@ struct super_operations {
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
 #endif
 	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
+	int (*metadata_incore)(struct super_block*, loff_t *offset,
+		ssize_t *size);
 };
 
 /*
Index: linux/fs/compat_ioctl.c
===================================================================
--- linux.orig/fs/compat_ioctl.c	2011-01-06 09:15:22.000000000 +0800
+++ linux/fs/compat_ioctl.c	2011-01-06 09:15:24.000000000 +0800
@@ -882,6 +882,7 @@ COMPATIBLE_IOCTL(FIGETBSZ)
 /* 'X' - originally XFS but some now in the VFS */
 COMPATIBLE_IOCTL(FIFREEZE)
 COMPATIBLE_IOCTL(FITHAW)
+COMPATIBLE_IOCTL(FIMETADATA_INCORE)
 COMPATIBLE_IOCTL(KDGETKEYCODE)
 COMPATIBLE_IOCTL(KDSETKEYCODE)
 COMPATIBLE_IOCTL(KDGKBTYPE)
@@ -1577,6 +1578,7 @@ asmlinkage long compat_sys_ioctl(unsigne
 	case FIONBIO:
 	case FIOASYNC:
 	case FIOQSIZE:
+	case FIMETADATA_INCORE:
 		break;
 
 #if defined(CONFIG_IA64) || defined(CONFIG_X86_64)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/5] add metadata_incore ioctl in vfs
  2011-01-06  7:45                 ` Shaohua Li
@ 2011-01-07 14:15                   ` Arnd Bergmann
  0 siblings, 0 replies; 8+ messages in thread
From: Arnd Bergmann @ 2011-01-07 14:15 UTC (permalink / raw)
  To: Shaohua Li
  Cc: linux-btrfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	Chris Mason, Christoph Hellwig, Andrew Morton, Arjan van de Ven,
	Yan, Zheng, linux-api@vger.kernel.org, mtk.manpages@gmail.com

On Thursday 06 January 2011, Shaohua Li wrote:
> Subject: add metadata_incore ioctl in vfs
> 
> Add an ioctl to dump filesystem's metadata in memory in vfs. Userspace collects
> such info and uses it to do metadata readahead.
> Filesystem can hook to super_operations.metadata_incore to get metadata in
> specific approach. Next patch will give an example how to implement
> .metadata_incore in btrfs.
> 
> Signed-off-by: Shaohua Li <shaohua.li@intel.com>

Looks great!

Reviewed-by: Arnd Bergmann <arnd@arndb.de>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-01-07 14:15 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-01-04  5:40 [PATCH v2 1/5] add metadata_incore ioctl in vfs Shaohua Li
2011-01-04  9:40 ` Arnd Bergmann
     [not found]   ` <201101041040.31482.arnd-r2nGTMty4D4@public.gmane.org>
2011-01-05  2:17     ` Shaohua Li
2011-01-05  9:42       ` Arnd Bergmann
     [not found]         ` <201101051042.37181.arnd-r2nGTMty4D4@public.gmane.org>
2011-01-06  1:13           ` Shaohua Li
2011-01-06  7:38             ` Arnd Bergmann
     [not found]               ` <201101060838.49359.arnd-r2nGTMty4D4@public.gmane.org>
2011-01-06  7:45                 ` Shaohua Li
2011-01-07 14:15                   ` Arnd Bergmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).