cgroups.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH cgroup 1/2] cgroup: move module ref handling into rebind_subsystems()
@ 2013-06-29  4:12 Tejun Heo
       [not found] ` <20130629041231.GA31353-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2013-06-29  4:12 UTC (permalink / raw)
  To: Li Zefan
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	cgroups-u79uwXL29TY76Z2rM5mHXA

Hello,

These two patches are on top of

  cgroup/for-3.11 0ce6cba35 ("cgroup: CGRP_ROOT_SUBSYS_BOUND should be ignored when comparing mount options")
+ "cgroup: fix and clean up cgroup file creations and removals" patchset
   http://thread.gmane.org/gmane.linux.kernel.cgroups/8245

and available in the following git branch

 git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git review-module-ref

Both patchsets I'm posting today are too late for for-3.11 unless
there's gonna be -rc8.  I'll collect the reviews and apply these after
-rc1 drops.

Thanks.

---------------------------------- 8< ----------------------------------
From e8645bf68bcada3e07538fd042e9f0ce8a1e72cd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Date: Fri, 28 Jun 2013 21:08:27 -0700
Subject: [PATCH 1/2] cgroup: move module ref handling into rebind_subsystems()

Module ref handling in cgroup is rather weird.
parse_cgroupfs_options() grabs all the modules for the specified
subsystems.  A module ref is kept if the specified subsystem is newly
bound to the hierarchy.  If not, or the operation fails, the refs are
dropped.  This scatters module ref handling across multiple functions
making it difficult to track.  It also make the function nasty to use
for dynamic subsystem binding which is necessary for the planned
unified hierarchy.

There's nothing which requires the subsystem modules to be pinned
between parse_cgroupfs_options() and rebind_subsystems() in both mount
and remount paths.  parse_cgroupfs_options() can just parse and
rebind_subsystems() can handle pinning the subsystems that it wants to
bind, which is a natural part of its task - binding - anyway.

Move module ref handling into rebind_subsystems() which makes the code
a lot simpler - modules are gotten iff it's gonna be bound and put iff
unbound or binding fails.

Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 kernel/cgroup.c | 87 +++++++++++++++------------------------------------------
 1 file changed, 22 insertions(+), 65 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3bc7a1a..a65aff1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1003,6 +1003,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 {
 	struct cgroup *cgrp = &root->top_cgroup;
 	struct cgroup_subsys *ss;
+	unsigned long pinned = 0;
 	int i, ret;
 
 	BUG_ON(!mutex_is_locked(&cgroup_mutex));
@@ -1010,20 +1011,26 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 
 	/* Check that any added subsystems are currently free */
 	for_each_subsys(ss, i) {
-		unsigned long bit = 1UL << i;
-
-		if (!(bit & added_mask))
+		if (!(added_mask & (1 << i)))
 			continue;
 
+		/* is the subsystem mounted elsewhere? */
 		if (ss->root != &cgroup_dummy_root) {
-			/* Subsystem isn't free */
-			return -EBUSY;
+			ret = -EBUSY;
+			goto out_put;
 		}
+
+		/* pin the module */
+		if (!try_module_get(ss->module)) {
+			ret = -ENOENT;
+			goto out_put;
+		}
+		pinned |= 1 << i;
 	}
 
 	ret = cgroup_populate_dir(cgrp, added_mask);
 	if (ret)
-		return ret;
+		goto out_put;
 
 	/*
 	 * Nothing can fail from this point on.  Remove files for the
@@ -1067,11 +1074,6 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 		} else if (bit & root->subsys_mask) {
 			/* Subsystem state should already exist */
 			BUG_ON(!cgrp->subsys[i]);
-			/*
-			 * a refcount was taken, but we already had one, so
-			 * drop the extra reference.
-			 */
-			module_put(ss->module);
 #ifdef CONFIG_MODULE_UNLOAD
 			BUG_ON(ss->module && !module_refcount(ss->module));
 #endif
@@ -1088,6 +1090,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 	root->flags |= CGRP_ROOT_SUBSYS_BOUND;
 
 	return 0;
+
+out_put:
+	for_each_subsys(ss, i)
+		if (pinned & (1 << i))
+			module_put(ss->module);
+	return ret;
 }
 
 static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
@@ -1138,7 +1146,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 	char *token, *o = data;
 	bool all_ss = false, one_ss = false;
 	unsigned long mask = (unsigned long)-1;
-	bool module_pin_failed = false;
 	struct cgroup_subsys *ss;
 	int i;
 
@@ -1281,52 +1288,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 	if (!opts->subsys_mask && !opts->name)
 		return -EINVAL;
 
-	/*
-	 * Grab references on all the modules we'll need, so the subsystems
-	 * don't dance around before rebind_subsystems attaches them. This may
-	 * take duplicate reference counts on a subsystem that's already used,
-	 * but rebind_subsystems handles this case.
-	 */
-	for_each_subsys(ss, i) {
-		if (!(opts->subsys_mask & (1UL << i)))
-			continue;
-		if (!try_module_get(cgroup_subsys[i]->module)) {
-			module_pin_failed = true;
-			break;
-		}
-	}
-	if (module_pin_failed) {
-		/*
-		 * oops, one of the modules was going away. this means that we
-		 * raced with a module_delete call, and to the user this is
-		 * essentially a "subsystem doesn't exist" case.
-		 */
-		for (i--; i >= 0; i--) {
-			/* drop refcounts only on the ones we took */
-			unsigned long bit = 1UL << i;
-
-			if (!(bit & opts->subsys_mask))
-				continue;
-			module_put(cgroup_subsys[i]->module);
-		}
-		return -ENOENT;
-	}
-
 	return 0;
 }
 
-static void drop_parsed_module_refcounts(unsigned long subsys_mask)
-{
-	struct cgroup_subsys *ss;
-	int i;
-
-	mutex_lock(&cgroup_mutex);
-	for_each_subsys(ss, i)
-		if (subsys_mask & (1UL << i))
-			module_put(cgroup_subsys[i]->module);
-	mutex_unlock(&cgroup_mutex);
-}
-
 static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 {
 	int ret = 0;
@@ -1384,8 +1348,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 	mutex_unlock(&cgroup_root_mutex);
 	mutex_unlock(&cgroup_mutex);
 	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
-	if (ret)
-		drop_parsed_module_refcounts(opts.subsys_mask);
 	return ret;
 }
 
@@ -1590,7 +1552,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 	new_root = cgroup_root_from_opts(&opts);
 	if (IS_ERR(new_root)) {
 		ret = PTR_ERR(new_root);
-		goto drop_modules;
+		goto out_err;
 	}
 	opts.new_root = new_root;
 
@@ -1599,7 +1561,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 	if (IS_ERR(sb)) {
 		ret = PTR_ERR(sb);
 		cgroup_free_root(opts.new_root);
-		goto drop_modules;
+		goto out_err;
 	}
 
 	root = sb->s_fs_info;
@@ -1710,9 +1672,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 				pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
 			}
 		}
-
-		/* no subsys rebinding, so refcounts don't change */
-		drop_parsed_module_refcounts(opts.subsys_mask);
 	}
 
 	kfree(opts.release_agent);
@@ -1729,8 +1688,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 	mutex_unlock(&inode->i_mutex);
  drop_new_super:
 	deactivate_locked_super(sb);
- drop_modules:
-	drop_parsed_module_refcounts(opts.subsys_mask);
  out_err:
 	kfree(opts.release_agent);
 	kfree(opts.name);
@@ -4838,7 +4795,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
 
 	/*
 	 * we shouldn't be called if the subsystem is in use, and the use of
-	 * try_module_get in parse_cgroupfs_options should ensure that it
+	 * try_module_get() in rebind_subsystems() should ensure that it
 	 * doesn't start being used while we're killing it off.
 	 */
 	BUG_ON(ss->root != &cgroup_dummy_root);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] cgroup: remove gratuituous BUG_ON()s from rebind_subsystems()
       [not found] ` <20130629041231.GA31353-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
@ 2013-06-29  4:13   ` Tejun Heo
       [not found]     ` <20130629041305.GB31353-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
  2013-07-12  9:03   ` [PATCH cgroup 1/2] cgroup: move module ref handling into rebind_subsystems() Li Zefan
  2013-07-12 20:38   ` [PATCH v2 " Tejun Heo
  2 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2013-06-29  4:13 UTC (permalink / raw)
  To: Li Zefan
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	cgroups-u79uwXL29TY76Z2rM5mHXA

From 9d47f1dd70a19b78a9da53e71fc1bf41e0300026 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Date: Fri, 28 Jun 2013 21:08:27 -0700

rebind_subsystems() performs santiy checks even on subsystems which
aren't specified to be added or removed and the checks aren't all that
useful given that these are in a very cold path while the violations
they check would trip up in much hotter paths.

Let's remove these from rebind_subsystems().

Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 kernel/cgroup.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a65aff1..864a4b2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1071,15 +1071,6 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			/* subsystem is now free - drop reference on module */
 			module_put(ss->module);
 			root->subsys_mask &= ~bit;
-		} else if (bit & root->subsys_mask) {
-			/* Subsystem state should already exist */
-			BUG_ON(!cgrp->subsys[i]);
-#ifdef CONFIG_MODULE_UNLOAD
-			BUG_ON(ss->module && !module_refcount(ss->module));
-#endif
-		} else {
-			/* Subsystem state shouldn't exist */
-			BUG_ON(cgrp->subsys[i]);
 		}
 	}
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH cgroup 1/2] cgroup: move module ref handling into rebind_subsystems()
       [not found] ` <20130629041231.GA31353-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
  2013-06-29  4:13   ` [PATCH 2/2] cgroup: remove gratuituous BUG_ON()s from rebind_subsystems() Tejun Heo
@ 2013-07-12  9:03   ` Li Zefan
       [not found]     ` <51DFC650.9010801-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
  2013-07-12 20:38   ` [PATCH v2 " Tejun Heo
  2 siblings, 1 reply; 9+ messages in thread
From: Li Zefan @ 2013-07-12  9:03 UTC (permalink / raw)
  To: Tejun Heo
  Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

On 2013/6/29 12:12, Tejun Heo wrote:
> Hello,
> 
> These two patches are on top of
> 
>   cgroup/for-3.11 0ce6cba35 ("cgroup: CGRP_ROOT_SUBSYS_BOUND should be ignored when comparing mount options")
> + "cgroup: fix and clean up cgroup file creations and removals" patchset
>    http://thread.gmane.org/gmane.linux.kernel.cgroups/8245
> 
> and available in the following git branch
> 
>  git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git review-module-ref
> 
> Both patchsets I'm posting today are too late for for-3.11 unless
> there's gonna be -rc8.  I'll collect the reviews and apply these after
> -rc1 drops.
> 
> Thanks.
> 
> ---------------------------------- 8< ----------------------------------
>>From e8645bf68bcada3e07538fd042e9f0ce8a1e72cd Mon Sep 17 00:00:00 2001
> From: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> Date: Fri, 28 Jun 2013 21:08:27 -0700
> Subject: [PATCH 1/2] cgroup: move module ref handling into rebind_subsystems()
> 
> Module ref handling in cgroup is rather weird.
> parse_cgroupfs_options() grabs all the modules for the specified
> subsystems.  A module ref is kept if the specified subsystem is newly
> bound to the hierarchy.  If not, or the operation fails, the refs are
> dropped.  This scatters module ref handling across multiple functions
> making it difficult to track.  It also make the function nasty to use
> for dynamic subsystem binding which is necessary for the planned
> unified hierarchy.
> 
> There's nothing which requires the subsystem modules to be pinned
> between parse_cgroupfs_options() and rebind_subsystems() in both mount
> and remount paths.  parse_cgroupfs_options() can just parse and
> rebind_subsystems() can handle pinning the subsystems that it wants to
> bind, which is a natural part of its task - binding - anyway.
> 
> Move module ref handling into rebind_subsystems() which makes the code
> a lot simpler - modules are gotten iff it's gonna be bound and put iff
> unbound or binding fails.
> 
> Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> ---
>  kernel/cgroup.c | 87 +++++++++++++++------------------------------------------
>  1 file changed, 22 insertions(+), 65 deletions(-)
> 
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 3bc7a1a..a65aff1 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -1003,6 +1003,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
>  {
>  	struct cgroup *cgrp = &root->top_cgroup;
>  	struct cgroup_subsys *ss;
> +	unsigned long pinned = 0;
>  	int i, ret;
>  
>  	BUG_ON(!mutex_is_locked(&cgroup_mutex));
> @@ -1010,20 +1011,26 @@ static int rebind_subsystems(struct cgroupfs_root *root,
>  
>  	/* Check that any added subsystems are currently free */
>  	for_each_subsys(ss, i) {
> -		unsigned long bit = 1UL << i;
> -
> -		if (!(bit & added_mask))
> +		if (!(added_mask & (1 << i)))
>  			continue;
>  
> +		/* is the subsystem mounted elsewhere? */
>  		if (ss->root != &cgroup_dummy_root) {
> -			/* Subsystem isn't free */
> -			return -EBUSY;
> +			ret = -EBUSY;
> +			goto out_put;
>  		}
> +
> +		/* pin the module */
> +		if (!try_module_get(ss->module)) {
> +			ret = -ENOENT;
> +			goto out_put;
> +		}
> +		pinned |= 1 << i;
>  	}

This looks wrong to me.

cgroup_mount()
{
	mutex_lock(cgroup_mutex);
	parse_cgroupfs_options();
	mutex_unlock(cgroup_mutex);
	...

	mutex_lock(cgroup_mutex);
	...
	rebind_subsystems();
	...
	mutex_unlock(cgroup_mutex);
}

so a modular cgroup subsystem can be unloaded inbetween, say it's net_cls, and
then it's possible that:

	# mount -t cgroup -o net_cls xxx /cgroup

The above operation succeeds but it's not binded to cgroupfs as it just got
unloaded.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH cgroup 1/2] cgroup: move module ref handling into rebind_subsystems()
       [not found]     ` <51DFC650.9010801-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
@ 2013-07-12  9:08       ` Li Zefan
       [not found]         ` <51DFC787.7080703-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Li Zefan @ 2013-07-12  9:08 UTC (permalink / raw)
  To: Tejun Heo
  Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

>> Subject: [PATCH 1/2] cgroup: move module ref handling into rebind_subsystems()
>>
>> Module ref handling in cgroup is rather weird.
>> parse_cgroupfs_options() grabs all the modules for the specified
>> subsystems.  A module ref is kept if the specified subsystem is newly
>> bound to the hierarchy.  If not, or the operation fails, the refs are
>> dropped.  This scatters module ref handling across multiple functions
>> making it difficult to track.  It also make the function nasty to use
>> for dynamic subsystem binding which is necessary for the planned
>> unified hierarchy.
>>
>> There's nothing which requires the subsystem modules to be pinned
>> between parse_cgroupfs_options() and rebind_subsystems() in both mount
>> and remount paths.  parse_cgroupfs_options() can just parse and
>> rebind_subsystems() can handle pinning the subsystems that it wants to
>> bind, which is a natural part of its task - binding - anyway.
>>
>> Move module ref handling into rebind_subsystems() which makes the code
>> a lot simpler - modules are gotten iff it's gonna be bound and put iff
>> unbound or binding fails.
>>
>> Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
>> ---
>>  kernel/cgroup.c | 87 +++++++++++++++------------------------------------------
>>  1 file changed, 22 insertions(+), 65 deletions(-)
>>
>> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
>> index 3bc7a1a..a65aff1 100644
>> --- a/kernel/cgroup.c
>> +++ b/kernel/cgroup.c
>> @@ -1003,6 +1003,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
>>  {
>>  	struct cgroup *cgrp = &root->top_cgroup;
>>  	struct cgroup_subsys *ss;
>> +	unsigned long pinned = 0;
>>  	int i, ret;
>>  
>>  	BUG_ON(!mutex_is_locked(&cgroup_mutex));
>> @@ -1010,20 +1011,26 @@ static int rebind_subsystems(struct cgroupfs_root *root,
>>  
>>  	/* Check that any added subsystems are currently free */
>>  	for_each_subsys(ss, i) {
>> -		unsigned long bit = 1UL << i;
>> -
>> -		if (!(bit & added_mask))
>> +		if (!(added_mask & (1 << i)))
>>  			continue;
>>  
>> +		/* is the subsystem mounted elsewhere? */
>>  		if (ss->root != &cgroup_dummy_root) {
>> -			/* Subsystem isn't free */
>> -			return -EBUSY;
>> +			ret = -EBUSY;
>> +			goto out_put;
>>  		}
>> +
>> +		/* pin the module */
>> +		if (!try_module_get(ss->module)) {
>> +			ret = -ENOENT;
>> +			goto out_put;
>> +		}
>> +		pinned |= 1 << i;
>>  	}
> 
> This looks wrong to me.
> 
> cgroup_mount()
> {
> 	mutex_lock(cgroup_mutex);
> 	parse_cgroupfs_options();
> 	mutex_unlock(cgroup_mutex);
> 	...
> 
> 	mutex_lock(cgroup_mutex);
> 	...
> 	rebind_subsystems();
> 	...
> 	mutex_unlock(cgroup_mutex);
> }
> 
> so a modular cgroup subsystem can be unloaded inbetween, say it's net_cls, and
> then it's possible that:
> 
> 	# mount -t cgroup -o net_cls xxx /cgroup
> 
> The above operation succeeds but it's not binded to cgroupfs as it just got
> unloaded.
> 

for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
	...
	if (!subsys[i] && (added_mask & (1 << i))
		return -EINVAL;
	...
}

This should work.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH cgroup 1/2] cgroup: move module ref handling into rebind_subsystems()
       [not found]         ` <51DFC787.7080703-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
@ 2013-07-12 20:10           ` Tejun Heo
  0 siblings, 0 replies; 9+ messages in thread
From: Tejun Heo @ 2013-07-12 20:10 UTC (permalink / raw)
  To: Li Zefan
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	cgroups-u79uwXL29TY76Z2rM5mHXA

On Fri, Jul 12, 2013 at 05:08:23PM +0800, Li Zefan wrote:
> > so a modular cgroup subsystem can be unloaded inbetween, say it's net_cls, and
> > then it's possible that:
> > 
> > 	# mount -t cgroup -o net_cls xxx /cgroup
> > 
> > The above operation succeeds but it's not binded to cgroupfs as it just got
> > unloaded.

Ooh, right, it'd be excluded from the iteration.

> for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
> 	...
> 	if (!subsys[i] && (added_mask & (1 << i))
> 		return -EINVAL;
> 	...
> }
> 
> This should work.

Yeah, I kinda dislike the raw iteration tho.  I wonder whether setting
the actually added mask and comparing would be cleaner.  I'll
experiment with it and post the updated patch.

Thanks!

-- 
tejun

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v2 cgroup 1/2] cgroup: move module ref handling into rebind_subsystems()
       [not found] ` <20130629041231.GA31353-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
  2013-06-29  4:13   ` [PATCH 2/2] cgroup: remove gratuituous BUG_ON()s from rebind_subsystems() Tejun Heo
  2013-07-12  9:03   ` [PATCH cgroup 1/2] cgroup: move module ref handling into rebind_subsystems() Li Zefan
@ 2013-07-12 20:38   ` Tejun Heo
       [not found]     ` <20130712203817.GI23680-9pTldWuhBndy/B6EtB590w@public.gmane.org>
  2 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2013-07-12 20:38 UTC (permalink / raw)
  To: Li Zefan
  Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Module ref handling in cgroup is rather weird.
parse_cgroupfs_options() grabs all the modules for the specified
subsystems.  A module ref is kept if the specified subsystem is newly
bound to the hierarchy.  If not, or the operation fails, the refs are
dropped.  This scatters module ref handling across multiple functions
making it difficult to track.  It also make the function nasty to use
for dynamic subsystem binding which is necessary for the planned
unified hierarchy.

There's nothing which requires the subsystem modules to be pinned
between parse_cgroupfs_options() and rebind_subsystems() in both mount
and remount paths.  parse_cgroupfs_options() can just parse and
rebind_subsystems() can handle pinning the subsystems that it wants to
bind, which is a natural part of its task - binding - anyway.

Move module ref handling into rebind_subsystems() which makes the code
a lot simpler - modules are gotten iff it's gonna be bound and put iff
unbound or binding fails.

v2: Li pointed out that if a controller module is unloaded between
    parsing and binding, rebind_subsystems() won't notice the missing
    controller as it only iterates through existing controllers.  Fix
    it by updating rebind_subsystems() to compare @added_mask to
    @pinned and fail with -ENOENT if they don't match.

Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
Turns out rebind_subsystems() already has @pinned so detecting cases
where the module went missing is trivial.

Thanks.

 kernel/cgroup.c |   93 ++++++++++++++++----------------------------------------
 1 file changed, 28 insertions(+), 65 deletions(-)

--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1003,6 +1003,7 @@ static int rebind_subsystems(struct cgro
 {
 	struct cgroup *cgrp = &root->top_cgroup;
 	struct cgroup_subsys *ss;
+	unsigned long pinned = 0;
 	int i, ret;
 
 	BUG_ON(!mutex_is_locked(&cgroup_mutex));
@@ -1010,20 +1011,32 @@ static int rebind_subsystems(struct cgro
 
 	/* Check that any added subsystems are currently free */
 	for_each_subsys(ss, i) {
-		unsigned long bit = 1UL << i;
-
-		if (!(bit & added_mask))
+		if (!(added_mask & (1 << i)))
 			continue;
 
+		/* is the subsystem mounted elsewhere? */
 		if (ss->root != &cgroup_dummy_root) {
-			/* Subsystem isn't free */
-			return -EBUSY;
+			ret = -EBUSY;
+			goto out_put;
+		}
+
+		/* pin the module */
+		if (!try_module_get(ss->module)) {
+			ret = -ENOENT;
+			goto out_put;
 		}
+		pinned |= 1 << i;
+	}
+
+	/* subsys could be missing if unloaded between parsing and here */
+	if (added_mask != pinned) {
+		ret = -ENOENT;
+		goto out_put;
 	}
 
 	ret = cgroup_populate_dir(cgrp, added_mask);
 	if (ret)
-		return ret;
+		goto out_put;
 
 	/*
 	 * Nothing can fail from this point on.  Remove files for the
@@ -1067,11 +1080,6 @@ static int rebind_subsystems(struct cgro
 		} else if (bit & root->subsys_mask) {
 			/* Subsystem state should already exist */
 			BUG_ON(!cgrp->subsys[i]);
-			/*
-			 * a refcount was taken, but we already had one, so
-			 * drop the extra reference.
-			 */
-			module_put(ss->module);
 #ifdef CONFIG_MODULE_UNLOAD
 			BUG_ON(ss->module && !module_refcount(ss->module));
 #endif
@@ -1088,6 +1096,12 @@ static int rebind_subsystems(struct cgro
 	root->flags |= CGRP_ROOT_SUBSYS_BOUND;
 
 	return 0;
+
+out_put:
+	for_each_subsys(ss, i)
+		if (pinned & (1 << i))
+			module_put(ss->module);
+	return ret;
 }
 
 static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
@@ -1138,7 +1152,6 @@ static int parse_cgroupfs_options(char *
 	char *token, *o = data;
 	bool all_ss = false, one_ss = false;
 	unsigned long mask = (unsigned long)-1;
-	bool module_pin_failed = false;
 	struct cgroup_subsys *ss;
 	int i;
 
@@ -1281,52 +1294,9 @@ static int parse_cgroupfs_options(char *
 	if (!opts->subsys_mask && !opts->name)
 		return -EINVAL;
 
-	/*
-	 * Grab references on all the modules we'll need, so the subsystems
-	 * don't dance around before rebind_subsystems attaches them. This may
-	 * take duplicate reference counts on a subsystem that's already used,
-	 * but rebind_subsystems handles this case.
-	 */
-	for_each_subsys(ss, i) {
-		if (!(opts->subsys_mask & (1UL << i)))
-			continue;
-		if (!try_module_get(cgroup_subsys[i]->module)) {
-			module_pin_failed = true;
-			break;
-		}
-	}
-	if (module_pin_failed) {
-		/*
-		 * oops, one of the modules was going away. this means that we
-		 * raced with a module_delete call, and to the user this is
-		 * essentially a "subsystem doesn't exist" case.
-		 */
-		for (i--; i >= 0; i--) {
-			/* drop refcounts only on the ones we took */
-			unsigned long bit = 1UL << i;
-
-			if (!(bit & opts->subsys_mask))
-				continue;
-			module_put(cgroup_subsys[i]->module);
-		}
-		return -ENOENT;
-	}
-
 	return 0;
 }
 
-static void drop_parsed_module_refcounts(unsigned long subsys_mask)
-{
-	struct cgroup_subsys *ss;
-	int i;
-
-	mutex_lock(&cgroup_mutex);
-	for_each_subsys(ss, i)
-		if (subsys_mask & (1UL << i))
-			module_put(cgroup_subsys[i]->module);
-	mutex_unlock(&cgroup_mutex);
-}
-
 static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 {
 	int ret = 0;
@@ -1384,8 +1354,6 @@ static int cgroup_remount(struct super_b
 	mutex_unlock(&cgroup_root_mutex);
 	mutex_unlock(&cgroup_mutex);
 	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
-	if (ret)
-		drop_parsed_module_refcounts(opts.subsys_mask);
 	return ret;
 }
 
@@ -1591,7 +1559,7 @@ static struct dentry *cgroup_mount(struc
 	new_root = cgroup_root_from_opts(&opts);
 	if (IS_ERR(new_root)) {
 		ret = PTR_ERR(new_root);
-		goto drop_modules;
+		goto out_err;
 	}
 	opts.new_root = new_root;
 
@@ -1600,7 +1568,7 @@ static struct dentry *cgroup_mount(struc
 	if (IS_ERR(sb)) {
 		ret = PTR_ERR(sb);
 		cgroup_free_root(opts.new_root);
-		goto drop_modules;
+		goto out_err;
 	}
 
 	root = sb->s_fs_info;
@@ -1708,9 +1676,6 @@ static struct dentry *cgroup_mount(struc
 				pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
 			}
 		}
-
-		/* no subsys rebinding, so refcounts don't change */
-		drop_parsed_module_refcounts(opts.subsys_mask);
 	}
 
 	kfree(opts.release_agent);
@@ -1728,8 +1693,6 @@ static struct dentry *cgroup_mount(struc
 	mutex_unlock(&inode->i_mutex);
  drop_new_super:
 	deactivate_locked_super(sb);
- drop_modules:
-	drop_parsed_module_refcounts(opts.subsys_mask);
  out_err:
 	kfree(opts.release_agent);
 	kfree(opts.name);
@@ -4837,7 +4800,7 @@ void cgroup_unload_subsys(struct cgroup_
 
 	/*
 	 * we shouldn't be called if the subsystem is in use, and the use of
-	 * try_module_get in parse_cgroupfs_options should ensure that it
+	 * try_module_get() in rebind_subsystems() should ensure that it
 	 * doesn't start being used while we're killing it off.
 	 */
 	BUG_ON(ss->root != &cgroup_dummy_root);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 cgroup 1/2] cgroup: move module ref handling into rebind_subsystems()
       [not found]     ` <20130712203817.GI23680-9pTldWuhBndy/B6EtB590w@public.gmane.org>
@ 2013-07-15  2:54       ` Li Zefan
       [not found]         ` <51E3645A.6080404-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Li Zefan @ 2013-07-15  2:54 UTC (permalink / raw)
  To: Tejun Heo
  Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

On 2013/7/13 4:38, Tejun Heo wrote:
> Module ref handling in cgroup is rather weird.
> parse_cgroupfs_options() grabs all the modules for the specified
> subsystems.  A module ref is kept if the specified subsystem is newly
> bound to the hierarchy.  If not, or the operation fails, the refs are
> dropped.  This scatters module ref handling across multiple functions
> making it difficult to track.  It also make the function nasty to use
> for dynamic subsystem binding which is necessary for the planned
> unified hierarchy.
> 
> There's nothing which requires the subsystem modules to be pinned
> between parse_cgroupfs_options() and rebind_subsystems() in both mount
> and remount paths.  parse_cgroupfs_options() can just parse and
> rebind_subsystems() can handle pinning the subsystems that it wants to
> bind, which is a natural part of its task - binding - anyway.
> 
> Move module ref handling into rebind_subsystems() which makes the code
> a lot simpler - modules are gotten iff it's gonna be bound and put iff
> unbound or binding fails.
> 
> v2: Li pointed out that if a controller module is unloaded between
>     parsing and binding, rebind_subsystems() won't notice the missing
>     controller as it only iterates through existing controllers.  Fix
>     it by updating rebind_subsystems() to compare @added_mask to
>     @pinned and fail with -ENOENT if they don't match.
> 
> Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> Cc: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

Acked-by: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] cgroup: remove gratuituous BUG_ON()s from rebind_subsystems()
       [not found]     ` <20130629041305.GB31353-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
@ 2013-07-15  2:54       ` Li Zefan
  0 siblings, 0 replies; 9+ messages in thread
From: Li Zefan @ 2013-07-15  2:54 UTC (permalink / raw)
  To: Tejun Heo
  Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

On 2013/6/29 12:13, Tejun Heo wrote:
>>From 9d47f1dd70a19b78a9da53e71fc1bf41e0300026 Mon Sep 17 00:00:00 2001
> From: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> Date: Fri, 28 Jun 2013 21:08:27 -0700
> 
> rebind_subsystems() performs santiy checks even on subsystems which
> aren't specified to be added or removed and the checks aren't all that
> useful given that these are in a very cold path while the violations
> they check would trip up in much hotter paths.
> 
> Let's remove these from rebind_subsystems().
> 
> Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

Acked-by: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 cgroup 1/2] cgroup: move module ref handling into rebind_subsystems()
       [not found]         ` <51E3645A.6080404-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
@ 2013-07-16 11:29           ` Tejun Heo
  0 siblings, 0 replies; 9+ messages in thread
From: Tejun Heo @ 2013-07-16 11:29 UTC (permalink / raw)
  To: Li Zefan
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	cgroups-u79uwXL29TY76Z2rM5mHXA

On Mon, Jul 15, 2013 at 10:54:18AM +0800, Li Zefan wrote:
> Acked-by: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

Applied 1-2 to cgroup/for-3.12.  BTW, I'm traveling and will be pretty
slow to respond this week.

Thanks!

-- 
tejun

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-07-16 11:29 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-06-29  4:12 [PATCH cgroup 1/2] cgroup: move module ref handling into rebind_subsystems() Tejun Heo
     [not found] ` <20130629041231.GA31353-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
2013-06-29  4:13   ` [PATCH 2/2] cgroup: remove gratuituous BUG_ON()s from rebind_subsystems() Tejun Heo
     [not found]     ` <20130629041305.GB31353-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
2013-07-15  2:54       ` Li Zefan
2013-07-12  9:03   ` [PATCH cgroup 1/2] cgroup: move module ref handling into rebind_subsystems() Li Zefan
     [not found]     ` <51DFC650.9010801-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2013-07-12  9:08       ` Li Zefan
     [not found]         ` <51DFC787.7080703-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2013-07-12 20:10           ` Tejun Heo
2013-07-12 20:38   ` [PATCH v2 " Tejun Heo
     [not found]     ` <20130712203817.GI23680-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2013-07-15  2:54       ` Li Zefan
     [not found]         ` <51E3645A.6080404-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2013-07-16 11:29           ` Tejun Heo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).