linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] ipc, mqueue: lazy call kern_mount_data in new namespaces
@ 2017-11-27 12:55 Giuseppe Scrivano
  2017-11-28  7:09 ` Davidlohr Bueso
  2017-11-28 21:53 ` Andrew Morton
  0 siblings, 2 replies; 6+ messages in thread
From: Giuseppe Scrivano @ 2017-11-27 12:55 UTC (permalink / raw)
  To: linux-kernel; +Cc: gregkh, mingo, gscrivan, dave

kern_mount_data is a relatively expensive operation when creating a
new IPC namespace, so delay the mount until its first usage when not
creating the the global namespace.

On my machine, the time for creating 1000 new IPC namespaces dropped
from ~8s to ~2s.

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
---
 include/linux/ipc_namespace.h |  2 +-
 ipc/mqueue.c                  | 47 ++++++++++++++++++++++++++++++++++---------
 ipc/namespace.c               |  2 +-
 3 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index b5630c8eb2f3..a06617c113f1 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -81,7 +81,7 @@ static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
 #endif /* CONFIG_SYSVIPC */
 
 #ifdef CONFIG_POSIX_MQUEUE
-extern int mq_init_ns(struct ipc_namespace *ns);
+extern int mq_init_ns(struct ipc_namespace *ns, bool mount);
 /*
  * POSIX Message Queue default values:
  *
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index d24025626310..46d2795bbf93 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -87,6 +87,7 @@ struct mqueue_inode_info {
 	unsigned long qsize; /* size of queue in memory (sum of all msgs) */
 };
 
+static struct file_system_type mqueue_fs_type;
 static const struct inode_operations mqueue_dir_inode_operations;
 static const struct file_operations mqueue_file_operations;
 static const struct super_operations mqueue_super_ops;
@@ -776,10 +777,24 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
 	struct filename *name;
 	int fd, error;
 	struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
-	struct vfsmount *mnt = ipc_ns->mq_mnt;
-	struct dentry *root = mnt->mnt_root;
+	struct vfsmount *mnt;
+	struct dentry *root;
 	int ro;
 
+	spin_lock(&mq_lock);
+	mnt = ipc_ns->mq_mnt;
+	if (unlikely(!mnt)) {
+		mnt = kern_mount_data(&mqueue_fs_type, ipc_ns);
+		if (IS_ERR(mnt)) {
+			spin_unlock(&mq_lock);
+			return PTR_ERR(mnt);
+		}
+		ipc_ns->mq_mnt = mnt;
+	}
+	spin_unlock(&mq_lock);
+
+	root = mnt->mnt_root;
+
 	audit_mq_open(oflag, mode, attr);
 
 	if (IS_ERR(name = getname(u_name)))
@@ -863,6 +878,9 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
 	struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
 	struct vfsmount *mnt = ipc_ns->mq_mnt;
 
+	if (!mnt)
+		return -ENOENT;
+
 	name = getname(u_name);
 	if (IS_ERR(name))
 		return PTR_ERR(name);
@@ -1581,7 +1599,8 @@ static struct file_system_type mqueue_fs_type = {
 	.fs_flags = FS_USERNS_MOUNT,
 };
 
-int mq_init_ns(struct ipc_namespace *ns)
+
+int mq_init_ns(struct ipc_namespace *ns, bool mount)
 {
 	ns->mq_queues_count  = 0;
 	ns->mq_queues_max    = DFLT_QUEUESMAX;
@@ -1590,23 +1609,31 @@ int mq_init_ns(struct ipc_namespace *ns)
 	ns->mq_msg_default   = DFLT_MSG;
 	ns->mq_msgsize_default  = DFLT_MSGSIZE;
 
-	ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns);
-	if (IS_ERR(ns->mq_mnt)) {
-		int err = PTR_ERR(ns->mq_mnt);
+	if (!mount)
 		ns->mq_mnt = NULL;
-		return err;
+	else {
+		int err;
+
+		ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns);
+		if (IS_ERR(ns->mq_mnt)) {
+			err = PTR_ERR(ns->mq_mnt);
+			ns->mq_mnt = NULL;
+			return err;
+		}
 	}
 	return 0;
 }
 
 void mq_clear_sbinfo(struct ipc_namespace *ns)
 {
-	ns->mq_mnt->mnt_sb->s_fs_info = NULL;
+	if (ns->mq_mnt)
+		ns->mq_mnt->mnt_sb->s_fs_info = NULL;
 }
 
 void mq_put_mnt(struct ipc_namespace *ns)
 {
-	kern_unmount(ns->mq_mnt);
+	if (ns->mq_mnt)
+		kern_unmount(ns->mq_mnt);
 }
 
 static int __init init_mqueue_fs(void)
@@ -1628,7 +1655,7 @@ static int __init init_mqueue_fs(void)
 
 	spin_lock_init(&mq_lock);
 
-	error = mq_init_ns(&init_ipc_ns);
+	error = mq_init_ns(&init_ipc_ns, true);
 	if (error)
 		goto out_filesystem;
 
diff --git a/ipc/namespace.c b/ipc/namespace.c
index f59a89966f92..9d3689577f66 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -65,7 +65,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	if (err)
 		goto fail_destroy_msg;
 
-	err = mq_init_ns(ns);
+	err = mq_init_ns(ns, false);
 	if (err)
 		goto fail_destroy_shm;
 
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] ipc, mqueue: lazy call kern_mount_data in new namespaces
  2017-11-27 12:55 [RFC PATCH] ipc, mqueue: lazy call kern_mount_data in new namespaces Giuseppe Scrivano
@ 2017-11-28  7:09 ` Davidlohr Bueso
  2017-11-28 21:53 ` Andrew Morton
  1 sibling, 0 replies; 6+ messages in thread
From: Davidlohr Bueso @ 2017-11-28  7:09 UTC (permalink / raw)
  To: Giuseppe Scrivano; +Cc: linux-kernel, gregkh, mingo, akpm

This is akpm domain.

On Mon, 27 Nov 2017, Giuseppe Scrivano wrote:

>kern_mount_data is a relatively expensive operation when creating a
>new IPC namespace, so delay the mount until its first usage when not
>creating the the global namespace.
>
>On my machine, the time for creating 1000 new IPC namespaces dropped
>from ~8s to ~2s.

Nice.

>
>Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
>---
> include/linux/ipc_namespace.h |  2 +-
> ipc/mqueue.c                  | 47 ++++++++++++++++++++++++++++++++++---------
> ipc/namespace.c               |  2 +-
> 3 files changed, 39 insertions(+), 12 deletions(-)
>
>diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
>index b5630c8eb2f3..a06617c113f1 100644
>--- a/include/linux/ipc_namespace.h
>+++ b/include/linux/ipc_namespace.h
>@@ -81,7 +81,7 @@ static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
> #endif /* CONFIG_SYSVIPC */
>
> #ifdef CONFIG_POSIX_MQUEUE
>-extern int mq_init_ns(struct ipc_namespace *ns);
>+extern int mq_init_ns(struct ipc_namespace *ns, bool mount);
> /*
>  * POSIX Message Queue default values:
>  *
>diff --git a/ipc/mqueue.c b/ipc/mqueue.c
>index d24025626310..46d2795bbf93 100644
>--- a/ipc/mqueue.c
>+++ b/ipc/mqueue.c
>@@ -87,6 +87,7 @@ struct mqueue_inode_info {
> 	unsigned long qsize; /* size of queue in memory (sum of all msgs) */
> };
>
>+static struct file_system_type mqueue_fs_type;
> static const struct inode_operations mqueue_dir_inode_operations;
> static const struct file_operations mqueue_file_operations;
> static const struct super_operations mqueue_super_ops;
>@@ -776,10 +777,24 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
> 	struct filename *name;
> 	int fd, error;
> 	struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
>-	struct vfsmount *mnt = ipc_ns->mq_mnt;
>-	struct dentry *root = mnt->mnt_root;
>+	struct vfsmount *mnt;
>+	struct dentry *root;
> 	int ro;
>
>+	spin_lock(&mq_lock);
>+	mnt = ipc_ns->mq_mnt;
>+	if (unlikely(!mnt)) {
>+		mnt = kern_mount_data(&mqueue_fs_type, ipc_ns);
>+		if (IS_ERR(mnt)) {
>+			spin_unlock(&mq_lock);
>+			return PTR_ERR(mnt);
>+		}
>+		ipc_ns->mq_mnt = mnt;
>+	}
>+	spin_unlock(&mq_lock);
>+
>+	root = mnt->mnt_root;
>+
> 	audit_mq_open(oflag, mode, attr);
>
> 	if (IS_ERR(name = getname(u_name)))
>@@ -863,6 +878,9 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
> 	struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
> 	struct vfsmount *mnt = ipc_ns->mq_mnt;
>
>+	if (!mnt)
>+		return -ENOENT;
>+
> 	name = getname(u_name);
> 	if (IS_ERR(name))
> 		return PTR_ERR(name);
>@@ -1581,7 +1599,8 @@ static struct file_system_type mqueue_fs_type = {
> 	.fs_flags = FS_USERNS_MOUNT,
> };
>
>-int mq_init_ns(struct ipc_namespace *ns)
>+
>+int mq_init_ns(struct ipc_namespace *ns, bool mount)
> {
> 	ns->mq_queues_count  = 0;
> 	ns->mq_queues_max    = DFLT_QUEUESMAX;
>@@ -1590,23 +1609,31 @@ int mq_init_ns(struct ipc_namespace *ns)
> 	ns->mq_msg_default   = DFLT_MSG;
> 	ns->mq_msgsize_default  = DFLT_MSGSIZE;
>
>-	ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns);
>-	if (IS_ERR(ns->mq_mnt)) {
>-		int err = PTR_ERR(ns->mq_mnt);
>+	if (!mount)
> 		ns->mq_mnt = NULL;
>-		return err;
>+	else {
>+		int err;
>+
>+		ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns);
>+		if (IS_ERR(ns->mq_mnt)) {
>+			err = PTR_ERR(ns->mq_mnt);
>+			ns->mq_mnt = NULL;
>+			return err;
>+		}
> 	}
> 	return 0;
> }
>
> void mq_clear_sbinfo(struct ipc_namespace *ns)
> {
>-	ns->mq_mnt->mnt_sb->s_fs_info = NULL;
>+	if (ns->mq_mnt)
>+		ns->mq_mnt->mnt_sb->s_fs_info = NULL;
> }
>
> void mq_put_mnt(struct ipc_namespace *ns)
> {
>-	kern_unmount(ns->mq_mnt);
>+	if (ns->mq_mnt)
>+		kern_unmount(ns->mq_mnt);
> }
>
> static int __init init_mqueue_fs(void)
>@@ -1628,7 +1655,7 @@ static int __init init_mqueue_fs(void)
>
> 	spin_lock_init(&mq_lock);
>
>-	error = mq_init_ns(&init_ipc_ns);
>+	error = mq_init_ns(&init_ipc_ns, true);
> 	if (error)
> 		goto out_filesystem;
>
>diff --git a/ipc/namespace.c b/ipc/namespace.c
>index f59a89966f92..9d3689577f66 100644
>--- a/ipc/namespace.c
>+++ b/ipc/namespace.c
>@@ -65,7 +65,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
> 	if (err)
> 		goto fail_destroy_msg;
>
>-	err = mq_init_ns(ns);
>+	err = mq_init_ns(ns, false);
> 	if (err)
> 		goto fail_destroy_shm;
>
>-- 
>2.14.3
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] ipc, mqueue: lazy call kern_mount_data in new namespaces
  2017-11-27 12:55 [RFC PATCH] ipc, mqueue: lazy call kern_mount_data in new namespaces Giuseppe Scrivano
  2017-11-28  7:09 ` Davidlohr Bueso
@ 2017-11-28 21:53 ` Andrew Morton
  2017-11-29 10:33   ` Giuseppe Scrivano
  1 sibling, 1 reply; 6+ messages in thread
From: Andrew Morton @ 2017-11-28 21:53 UTC (permalink / raw)
  To: Giuseppe Scrivano; +Cc: linux-kernel, gregkh, mingo, dave

On Mon, 27 Nov 2017 13:55:50 +0100 Giuseppe Scrivano <gscrivan@redhat.com> wrote:

> kern_mount_data is a relatively expensive operation when creating a
> new IPC namespace, so delay the mount until its first usage when not
> creating the the global namespace.
> 
> On my machine, the time for creating 1000 new IPC namespaces dropped
> from ~8s to ~2s.

OK, but this simply moves the expense so it happens later on.  Why is
that better?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] ipc, mqueue: lazy call kern_mount_data in new namespaces
  2017-11-28 21:53 ` Andrew Morton
@ 2017-11-29 10:33   ` Giuseppe Scrivano
  2017-11-29 22:17     ` Andrew Morton
  0 siblings, 1 reply; 6+ messages in thread
From: Giuseppe Scrivano @ 2017-11-29 10:33 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, gregkh, mingo, dave

Andrew Morton <akpm@linux-foundation.org> writes:

> OK, but this simply moves the expense so it happens later on.  Why is
> that better?

the optimization is for new IPC namespaces that don't use mq_open.  In
this case there won't be any kern_mount_data cost at all.

Regards,
Giuseppe

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] ipc, mqueue: lazy call kern_mount_data in new namespaces
  2017-11-29 10:33   ` Giuseppe Scrivano
@ 2017-11-29 22:17     ` Andrew Morton
  2017-11-30 12:20       ` Giuseppe Scrivano
  0 siblings, 1 reply; 6+ messages in thread
From: Andrew Morton @ 2017-11-29 22:17 UTC (permalink / raw)
  To: Giuseppe Scrivano; +Cc: linux-kernel, gregkh, mingo, dave

On Wed, 29 Nov 2017 11:33:28 +0100 Giuseppe Scrivano <gscrivan@redhat.com> wrote:

> Andrew Morton <akpm@linux-foundation.org> writes:
> 
> > OK, but this simply moves the expense so it happens later on.  Why is
> > that better?
> 
> the optimization is for new IPC namespaces that don't use mq_open.  In
> this case there won't be any kern_mount_data cost at all.
> 

Fair enough.  Please add this paragraph (or similar) to the changelog:

: This is a net saving for new IPC namespaces that don't use mq_open().  In
: this case there won't be any kern_mount_data() cost at all

And..  the patch calls
kern_mount_data()->vfs_kern_mount()->...->kmem_cache_zalloc(GFP_KERNEL)
under spin_lock().  This should have created a might_sleep() warning in
your testing, but obviously did not.

Could you please find out why?  Do you have
CONFIG_DEBUG_ATOMIC_SLEEP=n, I hope?  Please peruse
Documentation/process/submit-checklist.rst, section 12...

I assume a suitable fix would be to create a new mutex (static to
do_mq_open()) to prevent concurrent mounting.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] ipc, mqueue: lazy call kern_mount_data in new namespaces
  2017-11-29 22:17     ` Andrew Morton
@ 2017-11-30 12:20       ` Giuseppe Scrivano
  0 siblings, 0 replies; 6+ messages in thread
From: Giuseppe Scrivano @ 2017-11-30 12:20 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, gregkh, mingo, dave

Andrew Morton <akpm@linux-foundation.org> writes:

> On Wed, 29 Nov 2017 11:33:28 +0100 Giuseppe Scrivano <gscrivan@redhat.com> wrote:
>
>> Andrew Morton <akpm@linux-foundation.org> writes:
>> 
>> > OK, but this simply moves the expense so it happens later on.  Why is
>> > that better?
>> 
>> the optimization is for new IPC namespaces that don't use mq_open.  In
>> this case there won't be any kern_mount_data cost at all.
>> 
>
> Fair enough.  Please add this paragraph (or similar) to the changelog:
>
> : This is a net saving for new IPC namespaces that don't use mq_open().  In
> : this case there won't be any kern_mount_data() cost at all
>
> And..  the patch calls
> kern_mount_data()->vfs_kern_mount()->...->kmem_cache_zalloc(GFP_KERNEL)
> under spin_lock().  This should have created a might_sleep() warning in
> your testing, but obviously did not.
>
> Could you please find out why?  Do you have
> CONFIG_DEBUG_ATOMIC_SLEEP=n, I hope?  Please peruse
> Documentation/process/submit-checklist.rst, section 12...
>
> I assume a suitable fix would be to create a new mutex (static to
> do_mq_open()) to prevent concurrent mounting.

thanks for the hints.

Indeed, that was a mistake on my side as I didn't use
CONFIG_DEBUG_ATOMIC_SLEEP=y.  The might_sleep() warning is correctly 
raised once I enable CONFIG_DEBUG_ATOMIC_SLEEP (and the other options
suggested in submit-checklist.rst).

Giuseppe

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-11-30 12:20 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-11-27 12:55 [RFC PATCH] ipc, mqueue: lazy call kern_mount_data in new namespaces Giuseppe Scrivano
2017-11-28  7:09 ` Davidlohr Bueso
2017-11-28 21:53 ` Andrew Morton
2017-11-29 10:33   ` Giuseppe Scrivano
2017-11-29 22:17     ` Andrew Morton
2017-11-30 12:20       ` Giuseppe Scrivano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).