linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Miklos Szeredi <miklos@szeredi.hu>
To: David Howells <dhowells@redhat.com>
Cc: Ian Kent <raven@themaw.net>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Al Viro <viro@zeniv.linux.org.uk>,
	Linux NFS list <linux-nfs@vger.kernel.org>,
	Andreas Dilger <adilger.kernel@dilger.ca>,
	Anna Schumaker <anna.schumaker@netapp.com>,
	"Theodore Ts'o" <tytso@mit.edu>,
	Linux API <linux-api@vger.kernel.org>,
	linux-ext4@vger.kernel.org,
	Trond Myklebust <trond.myklebust@hammerspace.com>,
	Miklos Szeredi <mszeredi@redhat.com>,
	Christian Brauner <christian@brauner.io>,
	Jann Horn <jannh@google.com>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	Karel Zak <kzak@redhat.com>, Jeff Layton <jlayton@redhat.com>,
	linux-fsdevel@vger.kernel.org,
	LSM <linux-security-module@vger.kernel.org>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 00/13] VFS: Filesystem information [ver #19]
Date: Wed, 1 Apr 2020 14:35:54 +0200	[thread overview]
Message-ID: <CAJfpeguxACC68bMhS-mNm4m6ytrKgs1--jbF5X3tBiPf_iG1jg@mail.gmail.com> (raw)
In-Reply-To: <CAJfpegsyeJmH3zJuseaAAY06fzgavSzpOtYr-1Mw8GR0cLcQbA@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1618 bytes --]

On Wed, Apr 1, 2020 at 10:37 AM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Wed, Apr 1, 2020 at 10:27 AM David Howells <dhowells@redhat.com> wrote:
> >
> > Miklos Szeredi <miklos@szeredi.hu> wrote:
> >
> > > According to dhowell's measurements processing 100k mounts would take
> > > about a few seconds of system time (that's the time spent by the
> > > kernel to retrieve the data,
> >
> > But the inefficiency of mountfs - at least as currently implemented - scales
> > up with the number of individual values you want to retrieve, both in terms of
> > memory usage and time taken.
>
> I've taken that into account when guesstimating a "few seconds per
> 100k entries".  My guess is that there's probably an order of
> magnitude difference between the performance of a fs based interface
> and a binary syscall based interface.  That could be reduced somewhat
> with a readfile(2) type API.

And to show that I'm not completely off base, attached a patch that
adds a limited readfile(2) syscall and uses it in the p2 method.

Results are promising:

./test-fsinfo-perf /tmp/a 30000
--- make mounts ---
--- test fsinfo by path ---
sum(mnt_id) = 930000
--- test fsinfo by mnt_id ---
sum(mnt_id) = 930000
--- test /proc/fdinfo ---
sum(mnt_id) = 930000
--- test mountfs ---
sum(mnt_id) = 930000
For   30000 mounts, f=    146400us f2=    136766us p=   1406569us p2=
  221669us; p=9.6*f p=10.3*f2 p=6.3*p2
--- umount ---

This is about a 2 fold increase in speed compared to open + read + close.

Is someone still worried about performance, or can we move on to more
interesting parts of the design?

Thanks,
Miklos

[-- Attachment #2: fsmount-readfile.patch --]
[-- Type: text/x-patch, Size: 6326 bytes --]

Index: linux/fs/mountfs/super.c
===================================================================
--- linux.orig/fs/mountfs/super.c	2020-04-01 14:21:24.609955072 +0200
+++ linux/fs/mountfs/super.c	2020-04-01 14:21:42.426151545 +0200
@@ -51,10 +51,11 @@ static bool mountfs_entry_visible(struct
 
 	return visible;
 }
+
 static int mountfs_attr_show(struct seq_file *sf, void *v)
 {
 	const char *name = sf->file->f_path.dentry->d_name.name;
-	struct mountfs_entry *entry = sf->private;
+	struct mountfs_entry *entry = file_inode(sf->file)->i_private;
 	struct mount *mnt;
 	struct vfsmount *m;
 	struct super_block *sb;
@@ -140,12 +141,40 @@ static int mountfs_attr_show(struct seq_
 	return err;
 }
 
+ssize_t mountfs_attr_readfile(struct file *file, char __user *buf, size_t size)
+{
+	struct seq_file m = { .size = PAGE_SIZE, .file = file };
+	ssize_t ret;
+
+retry:
+	m.buf = kvmalloc(m.size, GFP_KERNEL);
+	if (!m.buf)
+		return -ENOMEM;
+
+	ret = mountfs_attr_show(&m, NULL);
+	if (!ret) {
+		if (m.count == m.size) {
+			kvfree(m.buf);
+			m.size <<= 1;
+			m.count = 0;
+			goto retry;
+		}
+		ret = min(m.count, size);
+		if (copy_to_user(buf, m.buf, ret))
+			ret = -EFAULT;
+	}
+
+	kvfree(m.buf);
+	return ret;
+}
+
 static int mountfs_attr_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, mountfs_attr_show, inode->i_private);
+	return single_open(file, mountfs_attr_show, NULL);
 }
 
 static const struct file_operations mountfs_attr_fops = {
+	.readfile	= mountfs_attr_readfile,
 	.open		= mountfs_attr_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
Index: linux/samples/vfs/test-fsinfo-perf.c
===================================================================
--- linux.orig/samples/vfs/test-fsinfo-perf.c	2020-04-01 14:21:24.609955072 +0200
+++ linux/samples/vfs/test-fsinfo-perf.c	2020-04-01 14:21:42.426151545 +0200
@@ -172,6 +172,12 @@ static void get_id_by_proc(int ix, const
 	//printf("[%u] %u\n", ix, x);
 }
 
+static long readfile(int dfd, const char *name, char *buffer, size_t size,
+		     int flags)
+{
+	return syscall(__NR_readfile, dfd, name, buffer, size, flags);
+}
+
 static void get_id_by_fsinfo_2(void)
 {
 	struct fsinfo_mount_topology t;
@@ -300,11 +306,8 @@ static void get_id_by_mountfs(void)
 		}
 
 		sprintf(procfile, "%u/parent", mnt_id);
-		fd = openat(mntfd, procfile, O_RDONLY);
-		ERR(fd, procfile);
-		len = read(fd, buffer, sizeof(buffer) - 1);
-		ERR(len, "read/parent");
-		close(fd);
+		len = readfile(mntfd, procfile, buffer, sizeof(buffer), 0);
+		ERR(len, "readfile/parent");
 		if (len > 0 && buffer[len - 1] == '\n')
 			len--;
 		buffer[len] = 0;
@@ -319,11 +322,8 @@ static void get_id_by_mountfs(void)
 		sum_check += x;
 
 		sprintf(procfile, "%u/counter", mnt_id);
-		fd = openat(mntfd, procfile, O_RDONLY);
-		ERR(fd, procfile);
-		len = read(fd, buffer, sizeof(buffer) - 1);
-		ERR(len, "read/counter");
-		close(fd);
+		len = readfile(mntfd, procfile, buffer, sizeof(buffer) - 1, 0);
+		ERR(len, "readfile/counter");
 		if (len > 0 && buffer[len - 1] == '\n')
 			len--;
 		buffer[len] = 0;
Index: linux/arch/x86/entry/syscalls/syscall_64.tbl
===================================================================
--- linux.orig/arch/x86/entry/syscalls/syscall_64.tbl	2020-04-01 14:21:37.284094840 +0200
+++ linux/arch/x86/entry/syscalls/syscall_64.tbl	2020-04-01 14:21:42.412151390 +0200
@@ -362,6 +362,7 @@
 439	common	watch_mount		__x64_sys_watch_mount
 440	common	watch_sb		__x64_sys_watch_sb
 441	common	fsinfo			__x64_sys_fsinfo
+442	common	readfile		__x64_sys_readfile
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
Index: linux/fs/open.c
===================================================================
--- linux.orig/fs/open.c	2020-04-01 14:21:37.284094840 +0200
+++ linux/fs/open.c	2020-04-01 14:21:42.424151523 +0200
@@ -1340,3 +1340,25 @@ int stream_open(struct inode *inode, str
 }
 
 EXPORT_SYMBOL(stream_open);
+
+SYSCALL_DEFINE5(readfile, int, dfd, const char __user *, filename,
+		char __user *, buffer, size_t, bufsize, int, flags)
+{
+	ssize_t ret;
+	struct file file = {};
+
+	if (flags)
+		return -EINVAL;
+
+	ret = user_path_at(dfd, filename, 0, &file.f_path);
+	if (!ret) {
+		file.f_inode = file.f_path.dentry->d_inode;
+		file.f_op = file.f_inode->i_fop;
+		ret = -EOPNOTSUPP;
+		if (file.f_op->readfile)
+			ret = file.f_op->readfile(&file, buffer, bufsize);
+		path_put(&file.f_path);
+	}
+
+	return ret;
+}
Index: linux/include/linux/syscalls.h
===================================================================
--- linux.orig/include/linux/syscalls.h	2020-04-01 14:21:37.284094840 +0200
+++ linux/include/linux/syscalls.h	2020-04-01 14:21:42.413151401 +0200
@@ -1011,6 +1011,8 @@ asmlinkage long sys_watch_sb(int dfd, co
 asmlinkage long sys_fsinfo(int dfd, const char __user *pathname,
 			   struct fsinfo_params __user *params, size_t params_size,
 			   void __user *result_buffer, size_t result_buf_size);
+asmlinkage long sys_readfile(int dfd, const char __user *filename,
+			     char __user *buffer, size_t bufsize, int flags);
 
 /*
  * Architecture-specific system calls
Index: linux/include/uapi/asm-generic/unistd.h
===================================================================
--- linux.orig/include/uapi/asm-generic/unistd.h	2020-04-01 14:21:37.284094840 +0200
+++ linux/include/uapi/asm-generic/unistd.h	2020-04-01 14:21:42.413151401 +0200
@@ -861,9 +861,11 @@ __SYSCALL(__NR_watch_mount, sys_watch_mo
 __SYSCALL(__NR_watch_sb, sys_watch_sb)
 #define __NR_fsinfo 441
 __SYSCALL(__NR_fsinfo, sys_fsinfo)
+#define __NR_readfile 442
+__SYSCALL(__NR_readfile, sys_readfile)
 
 #undef __NR_syscalls
-#define __NR_syscalls 442
+#define __NR_syscalls 443
 
 /*
  * 32 bit systems traditionally used different
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h	2020-04-01 14:21:19.144894804 +0200
+++ linux/include/linux/fs.h	2020-04-01 14:21:42.425151534 +0200
@@ -1868,6 +1868,7 @@ struct file_operations {
 				   struct file *file_out, loff_t pos_out,
 				   loff_t len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
+	ssize_t (*readfile)(struct file *, char __user *, size_t);
 } __randomize_layout;
 
 struct inode_operations {

  reply	other threads:[~2020-04-01 12:36 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-18 15:08 [PATCH 00/13] VFS: Filesystem information [ver #19] David Howells
2020-03-18 15:09 ` [PATCH 12/13] fsinfo: Example support for Ext4 " David Howells
2020-03-18 16:05 ` [PATCH 00/13] VFS: Filesystem information " Miklos Szeredi
2020-04-01  5:22   ` Ian Kent
2020-04-01  8:18     ` Miklos Szeredi
2020-04-01  8:27     ` David Howells
2020-04-01  8:37       ` Miklos Szeredi
2020-04-01 12:35         ` Miklos Szeredi [this message]
2020-04-01 15:51         ` David Howells
2020-04-02  1:38         ` Ian Kent
2020-04-02 14:14           ` Karel Zak
2020-03-19 10:37 ` David Howells
2020-03-19 12:36   ` Miklos Szeredi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAJfpeguxACC68bMhS-mNm4m6ytrKgs1--jbF5X3tBiPf_iG1jg@mail.gmail.com \
    --to=miklos@szeredi.hu \
    --cc=adilger.kernel@dilger.ca \
    --cc=anna.schumaker@netapp.com \
    --cc=christian@brauner.io \
    --cc=darrick.wong@oracle.com \
    --cc=dhowells@redhat.com \
    --cc=jannh@google.com \
    --cc=jlayton@redhat.com \
    --cc=kzak@redhat.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-security-module@vger.kernel.org \
    --cc=mszeredi@redhat.com \
    --cc=raven@themaw.net \
    --cc=torvalds@linux-foundation.org \
    --cc=trond.myklebust@hammerspace.com \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).