cgroups.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] sysfs: fix namespace refcnt leak
@ 2014-02-17  3:54 Li Zefan
  2014-02-18 23:12 ` Tejun Heo
  0 siblings, 1 reply; 4+ messages in thread
From: Li Zefan @ 2014-02-17  3:54 UTC (permalink / raw)
  To: Tejun Heo, Greg Kroah-Hartman; +Cc: LKML, Cgroups

Hi Tejun,

I think we have to fix kernfs in order to fix refcnt leak in sysfs
and cgroupfs. This fix is for 3.14, but it creates conflicts for
cgroup-next.

====================

As mount() and kill_sb() is not a one-to-one match, we shoudn't get
ns refcnt unconditionally in sysfs_mount(), and instead we should
get the refcnt only when kernfs_mount() allocated a new superblock.

Signed-off-by: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
 fs/kernfs/mount.c      | 7 ++++++-
 fs/sysfs/mount.c       | 5 +++--
 include/linux/kernfs.h | 9 +++++----
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 405279b..53ce420 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -132,6 +132,7 @@ const void *kernfs_super_ns(struct super_block *sb)
  * @flags: mount flags specified for the mount
  * @root: kernfs_root of the hierarchy being mounted
  * @ns: optional namespace tag of the mount
+ * @new: tell the caller if we allocated a new superblock
  *
  * This is to be called from each kernfs user's file_system_type->mount()
  * implementation, which should pass through the specified @fs_type and
@@ -141,12 +142,15 @@ const void *kernfs_super_ns(struct super_block *sb)
  * The return value can be passed to the vfs layer verbatim.
  */
 struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
-			       struct kernfs_root *root, const void *ns)
+			       struct kernfs_root *root, const void *ns,
+			       bool *new_sb)
 {
 	struct super_block *sb;
 	struct kernfs_super_info *info;
 	int error;
 
+	*new_sb = false;
+
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
 		return ERR_PTR(-ENOMEM);
@@ -166,6 +170,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
 			return ERR_PTR(error);
 		}
 		sb->s_flags |= MS_ACTIVE;
+		*new_sb = true;
 	}
 
 	return dget(sb->s_root);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 5c7fdd9..f5bea79 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -27,6 +27,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 {
 	struct dentry *root;
 	void *ns;
+	bool new_sb;
 
 	if (!(flags & MS_KERNMOUNT)) {
 		if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
@@ -37,8 +38,8 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 	}
 
 	ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
-	root = kernfs_mount_ns(fs_type, flags, sysfs_root, ns);
-	if (IS_ERR(root))
+	root = kernfs_mount_ns(fs_type, flags, sysfs_root, ns, &new_sb);
+	if (IS_ERR(root) || !new_sb)
 		kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
 	return root;
 }
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 649497a..1b1849f 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -279,7 +279,8 @@ void kernfs_notify(struct kernfs_node *kn);
 
 const void *kernfs_super_ns(struct super_block *sb);
 struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
-			       struct kernfs_root *root, const void *ns);
+			       struct kernfs_root *root, const void *ns,
+			       bool *new_sb);
 void kernfs_kill_sb(struct super_block *sb);
 
 void kernfs_init(void);
@@ -372,7 +373,7 @@ static inline const void *kernfs_super_ns(struct super_block *sb)
 
 static inline struct dentry *
 kernfs_mount_ns(struct file_system_type *fs_type, int flags,
-		struct kernfs_root *root, const void *ns)
+		struct kernfs_root *root, const void *ns, bool *new_sb)
 { return ERR_PTR(-ENOSYS); }
 
 static inline void kernfs_kill_sb(struct super_block *sb) { }
@@ -430,9 +431,9 @@ static inline int kernfs_rename(struct kernfs_node *kn,
 
 static inline struct dentry *
 kernfs_mount(struct file_system_type *fs_type, int flags,
-	     struct kernfs_root *root)
+	     struct kernfs_root *root, bool *new_sb)
 {
-	return kernfs_mount_ns(fs_type, flags, root, NULL);
+	return kernfs_mount_ns(fs_type, flags, root, NULL, new_sb);
 }
 
 #endif	/* __LINUX_KERNFS_H */
-- 
1.8.0.2

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] sysfs: fix namespace refcnt leak
  2014-02-17  3:54 [PATCH] sysfs: fix namespace refcnt leak Li Zefan
@ 2014-02-18 23:12 ` Tejun Heo
       [not found]   ` <20140218231242.GJ31892-9pTldWuhBndy/B6EtB590w@public.gmane.org>
  0 siblings, 1 reply; 4+ messages in thread
From: Tejun Heo @ 2014-02-18 23:12 UTC (permalink / raw)
  To: Li Zefan; +Cc: Greg Kroah-Hartman, LKML, Cgroups

Hey,

On Mon, Feb 17, 2014 at 11:54:59AM +0800, Li Zefan wrote:
> I think we have to fix kernfs in order to fix refcnt leak in sysfs
> and cgroupfs. This fix is for 3.14, but it creates conflicts for
> cgroup-next.
> 
> ====================
> 
> As mount() and kill_sb() is not a one-to-one match, we shoudn't get
> ns refcnt unconditionally in sysfs_mount(), and instead we should
> get the refcnt only when kernfs_mount() allocated a new superblock.

Ugh... nasty :(

> @@ -132,6 +132,7 @@ const void *kernfs_super_ns(struct super_block *sb)
>   * @flags: mount flags specified for the mount
>   * @root: kernfs_root of the hierarchy being mounted
>   * @ns: optional namespace tag of the mount
> + * @new: tell the caller if we allocated a new superblock

Maybe something like @new_sb_created is better?

>  	struct super_block *sb;
>  	struct kernfs_super_info *info;
>  	int error;
>  
> +	*new_sb = false;

Can we make it optional so that users who don't care about it can
ignore it?

> @@ -430,9 +431,9 @@ static inline int kernfs_rename(struct kernfs_node *kn,
>  
>  static inline struct dentry *
>  kernfs_mount(struct file_system_type *fs_type, int flags,
> -	     struct kernfs_root *root)
> +	     struct kernfs_root *root, bool *new_sb)
>  {
> -	return kernfs_mount_ns(fs_type, flags, root, NULL);
> +	return kernfs_mount_ns(fs_type, flags, root, NULL, new_sb);

And let kernfs_mount() just use NULL for the parameter?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] sysfs: fix namespace refcnt leak
       [not found]   ` <20140218231242.GJ31892-9pTldWuhBndy/B6EtB590w@public.gmane.org>
@ 2014-02-19  1:39     ` Li Zefan
       [not found]       ` <53040B50.4060600-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 4+ messages in thread
From: Li Zefan @ 2014-02-19  1:39 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Greg Kroah-Hartman, LKML, Cgroups

On 2014/2/19 7:12, Tejun Heo wrote:
> Hey,
> 
> On Mon, Feb 17, 2014 at 11:54:59AM +0800, Li Zefan wrote:
>> I think we have to fix kernfs in order to fix refcnt leak in sysfs
>> and cgroupfs. This fix is for 3.14, but it creates conflicts for
>> cgroup-next.
>>
>> ====================
>>
>> As mount() and kill_sb() is not a one-to-one match, we shoudn't get
>> ns refcnt unconditionally in sysfs_mount(), and instead we should
>> get the refcnt only when kernfs_mount() allocated a new superblock.
> 
> Ugh... nasty :(
> 
>> @@ -132,6 +132,7 @@ const void *kernfs_super_ns(struct super_block *sb)
>>   * @flags: mount flags specified for the mount
>>   * @root: kernfs_root of the hierarchy being mounted
>>   * @ns: optional namespace tag of the mount
>> + * @new: tell the caller if we allocated a new superblock
> 
> Maybe something like @new_sb_created is better?
> 
>>  	struct super_block *sb;
>>  	struct kernfs_super_info *info;
>>  	int error;
>>  
>> +	*new_sb = false;
> 
> Can we make it optional so that users who don't care about it can
> ignore it?
> 

cgroupfs also needs this to fix refcnt leak.

Because success in finding an existing cgroup_root doesn't mean no new
superblock is needed. For example:

  # mount -t cgroup -o cpuacct xxx /cgroup
  # mkdir /cgroup/tmp
  # umount /cgroup    <--- sb will be freed but cgroup_root won't

  // this will allocate new sb, but we find the cgroup_root is there.
  # mount -t cgroup -o cpuacct xxx /cgroup

But debugfs won't need this if it's converted to kernfs.

How about I keep kernfs_mount() API intact, and when this fix gets
merged into mainline, you merge the fix into cgroup-next, and then
I make a fix for cgroup by changing kernfs_mount()?

>> @@ -430,9 +431,9 @@ static inline int kernfs_rename(struct kernfs_node *kn,
>>  
>>  static inline struct dentry *
>>  kernfs_mount(struct file_system_type *fs_type, int flags,
>> -	     struct kernfs_root *root)
>> +	     struct kernfs_root *root, bool *new_sb)
>>  {
>> -	return kernfs_mount_ns(fs_type, flags, root, NULL);
>> +	return kernfs_mount_ns(fs_type, flags, root, NULL, new_sb);
> 
> And let kernfs_mount() just use NULL for the parameter?
> 


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] sysfs: fix namespace refcnt leak
       [not found]       ` <53040B50.4060600-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
@ 2014-02-20 14:30         ` Tejun Heo
  0 siblings, 0 replies; 4+ messages in thread
From: Tejun Heo @ 2014-02-20 14:30 UTC (permalink / raw)
  To: Li Zefan; +Cc: Greg Kroah-Hartman, LKML, Cgroups

Hello, Li.

On Wed, Feb 19, 2014 at 09:39:28AM +0800, Li Zefan wrote:
> > Can we make it optional so that users who don't care about it can
> > ignore it?
> 
> cgroupfs also needs this to fix refcnt leak.

Ah, okay.

> Because success in finding an existing cgroup_root doesn't mean no new
> superblock is needed. For example:
> 
>   # mount -t cgroup -o cpuacct xxx /cgroup
>   # mkdir /cgroup/tmp
>   # umount /cgroup    <--- sb will be freed but cgroup_root won't
> 
>   // this will allocate new sb, but we find the cgroup_root is there.
>   # mount -t cgroup -o cpuacct xxx /cgroup
> 
> But debugfs won't need this if it's converted to kernfs.
> 
> How about I keep kernfs_mount() API intact, and when this fix gets
> merged into mainline, you merge the fix into cgroup-next, and then
> I make a fix for cgroup by changing kernfs_mount()?

If we need kernfs_mount() modified anyway, let's do it in this patch.
I still think it'd be better to allow the parameter to be NULL but
other than that, no objection.

Thanks!

-- 
tejun

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2014-02-20 14:30 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-02-17  3:54 [PATCH] sysfs: fix namespace refcnt leak Li Zefan
2014-02-18 23:12 ` Tejun Heo
     [not found]   ` <20140218231242.GJ31892-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2014-02-19  1:39     ` Li Zefan
     [not found]       ` <53040B50.4060600-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2014-02-20 14:30         ` Tejun Heo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).