* [PATCH] Memory controller rename to Memory Resource Controller
@ 2008-02-25 11:55 ` Balbir Singh
0 siblings, 0 replies; 24+ messages in thread
From: Balbir Singh @ 2008-02-25 11:55 UTC (permalink / raw)
To: Andrew Morton
Cc: Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, Paul Menage, lizf,
linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov,
Balbir Singh, KAMEZAWA Hiroyuki
Rename Memory Controller to Memory Resource Controller. Reflect the same
changes in the CONFIG definition for the Memory Resource Controller.
Group together the config options for Resource Counters and Memory
Resource Controller.
This code has been compile tested with the Memory Resource Controller on and off.
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---
Documentation/controllers/memory.txt | 8 ++++++--
include/linux/cgroup_subsys.h | 2 +-
include/linux/memcontrol.h | 4 ++--
include/linux/mm_types.h | 4 ++--
init/Kconfig | 30 +++++++++++++++---------------
mm/Makefile | 2 +-
mm/oom_kill.c | 2 +-
mm/vmscan.c | 4 ++--
8 files changed, 30 insertions(+), 26 deletions(-)
diff -puN Documentation/controllers/memory.txt~memory-controller-naming-fixes Documentation/controllers/memory.txt
--- linux-2.6.25-rc3/Documentation/controllers/memory.txt~memory-controller-naming-fixes 2008-02-25 14:22:36.000000000 +0530
+++ linux-2.6.25-rc3-balbir/Documentation/controllers/memory.txt 2008-02-25 14:53:49.000000000 +0530
@@ -1,4 +1,8 @@
-Memory Controller
+Memory Resource Controller
+
+NOTE: The Memory Resource Controller has been generically been referred
+to as the memory controller in this document. Do not confuse memory controller
+used here with the memory controller that is used in hardware.
Salient features
@@ -152,7 +156,7 @@ The memory controller uses the following
a. Enable CONFIG_CGROUPS
b. Enable CONFIG_RESOURCE_COUNTERS
-c. Enable CONFIG_CGROUP_MEM_CONT
+c. Enable CONFIG_CGROUP_MEM_RES_CTLR
1. Prepare the cgroups
# mkdir -p /cgroups
diff -puN init/Kconfig~memory-controller-naming-fixes init/Kconfig
--- linux-2.6.25-rc3/init/Kconfig~memory-controller-naming-fixes 2008-02-25 14:22:36.000000000 +0530
+++ linux-2.6.25-rc3-balbir/init/Kconfig 2008-02-25 15:03:43.000000000 +0530
@@ -366,6 +366,21 @@ config RESOURCE_COUNTERS
infrastructure that works with cgroups
depends on CGROUPS
+config CGROUP_MEM_RES_CTLR
+ bool "Memory Resource Controller for Control Groups"
+ depends on CGROUPS && RESOURCE_COUNTERS
+ help
+ Provides a memory resource controller that manages both page cache and
+ RSS memory.
+
+ Note that setting this option increases fixed memory overhead
+ associated with each page of memory in the system by 4/8 bytes
+ and also increases cache misses because struct page on many 64bit
+ systems will not fit into a single cache line anymore.
+
+ Only enable when you're ok with these trade offs and really
+ sure you need the memory resource controller.
+
config SYSFS_DEPRECATED
bool "Create deprecated sysfs files"
depends on SYSFS
@@ -387,21 +402,6 @@ config SYSFS_DEPRECATED
If you are using a distro that was released in 2006 or later,
it should be safe to say N here.
-config CGROUP_MEM_CONT
- bool "Memory controller for cgroups"
- depends on CGROUPS && RESOURCE_COUNTERS
- help
- Provides a memory controller that manages both page cache and
- RSS memory.
-
- Note that setting this option increases fixed memory overhead
- associated with each page of memory in the system by 4/8 bytes
- and also increases cache misses because struct page on many 64bit
- systems will not fit into a single cache line anymore.
-
- Only enable when you're ok with these trade offs and really
- sure you need the memory controller.
-
config PROC_PID_CPUSET
bool "Include legacy /proc/<pid>/cpuset file"
depends on CPUSETS
diff -puN include/linux/memcontrol.h~memory-controller-naming-fixes include/linux/memcontrol.h
--- linux-2.6.25-rc3/include/linux/memcontrol.h~memory-controller-naming-fixes 2008-02-25 14:22:36.000000000 +0530
+++ linux-2.6.25-rc3-balbir/include/linux/memcontrol.h 2008-02-25 15:55:44.000000000 +0530
@@ -25,7 +25,7 @@ struct page_cgroup;
struct page;
struct mm_struct;
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p);
extern void mm_free_cgroup(struct mm_struct *mm);
@@ -72,7 +72,7 @@ extern long mem_cgroup_calc_reclaim_acti
extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
struct zone *zone, int priority);
-#else /* CONFIG_CGROUP_MEM_CONT */
+#else /* CONFIG_CGROUP_MEM_RES_CTLR */
static inline void mm_init_cgroup(struct mm_struct *mm,
struct task_struct *p)
{
diff -puN mm/memcontrol.c~memory-controller-naming-fixes mm/memcontrol.c
diff -puN mm/vmscan.c~memory-controller-naming-fixes mm/vmscan.c
--- linux-2.6.25-rc3/mm/vmscan.c~memory-controller-naming-fixes 2008-02-25 14:22:36.000000000 +0530
+++ linux-2.6.25-rc3-balbir/mm/vmscan.c 2008-02-25 14:32:55.000000000 +0530
@@ -126,7 +126,7 @@ long vm_total_pages; /* The total number
static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
#define scan_global_lru(sc) (!(sc)->mem_cgroup)
#else
#define scan_global_lru(sc) (1)
@@ -1427,7 +1427,7 @@ unsigned long try_to_free_pages(struct z
return do_try_to_free_pages(zones, gfp_mask, &sc);
}
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
gfp_t gfp_mask)
diff -puN mm/rmap.c~memory-controller-naming-fixes mm/rmap.c
diff -puN mm/Makefile~memory-controller-naming-fixes mm/Makefile
--- linux-2.6.25-rc3/mm/Makefile~memory-controller-naming-fixes 2008-02-25 14:22:36.000000000 +0530
+++ linux-2.6.25-rc3-balbir/mm/Makefile 2008-02-25 14:33:10.000000000 +0530
@@ -32,5 +32,5 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_SMP) += allocpercpu.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
-obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o
+obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o
diff -puN include/linux/cgroup_subsys.h~memory-controller-naming-fixes include/linux/cgroup_subsys.h
--- linux-2.6.25-rc3/include/linux/cgroup_subsys.h~memory-controller-naming-fixes 2008-02-25 14:31:04.000000000 +0530
+++ linux-2.6.25-rc3-balbir/include/linux/cgroup_subsys.h 2008-02-25 14:31:18.000000000 +0530
@@ -37,7 +37,7 @@ SUBSYS(cpuacct)
/* */
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
SUBSYS(mem_cgroup)
#endif
diff -puN include/linux/mm_types.h~memory-controller-naming-fixes include/linux/mm_types.h
--- linux-2.6.25-rc3/include/linux/mm_types.h~memory-controller-naming-fixes 2008-02-25 14:32:18.000000000 +0530
+++ linux-2.6.25-rc3-balbir/include/linux/mm_types.h 2008-02-25 15:13:53.000000000 +0530
@@ -91,7 +91,7 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
unsigned long page_cgroup;
#endif
};
@@ -225,7 +225,7 @@ struct mm_struct {
/* aio bits */
rwlock_t ioctx_list_lock;
struct kioctx *ioctx_list;
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
struct mem_cgroup *mem_cgroup;
#endif
};
diff -puN mm/oom_kill.c~memory-controller-naming-fixes mm/oom_kill.c
--- linux-2.6.25-rc3/mm/oom_kill.c~memory-controller-naming-fixes 2008-02-25 14:33:17.000000000 +0530
+++ linux-2.6.25-rc3-balbir/mm/oom_kill.c 2008-02-25 14:33:27.000000000 +0530
@@ -412,7 +412,7 @@ static int oom_kill_process(struct task_
return oom_kill_task(p);
}
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
{
unsigned long points = 0;
_
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
^ permalink raw reply [flat|nested] 24+ messages in thread* [PATCH] Memory controller rename to Memory Resource Controller @ 2008-02-25 11:55 ` Balbir Singh 0 siblings, 0 replies; 24+ messages in thread From: Balbir Singh @ 2008-02-25 11:55 UTC (permalink / raw) To: Andrew Morton Cc: Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, Paul Menage, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, Balbir Singh, KAMEZAWA Hiroyuki Rename Memory Controller to Memory Resource Controller. Reflect the same changes in the CONFIG definition for the Memory Resource Controller. Group together the config options for Resource Counters and Memory Resource Controller. This code has been compile tested with the Memory Resource Controller on and off. Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> --- Documentation/controllers/memory.txt | 8 ++++++-- include/linux/cgroup_subsys.h | 2 +- include/linux/memcontrol.h | 4 ++-- include/linux/mm_types.h | 4 ++-- init/Kconfig | 30 +++++++++++++++--------------- mm/Makefile | 2 +- mm/oom_kill.c | 2 +- mm/vmscan.c | 4 ++-- 8 files changed, 30 insertions(+), 26 deletions(-) diff -puN Documentation/controllers/memory.txt~memory-controller-naming-fixes Documentation/controllers/memory.txt --- linux-2.6.25-rc3/Documentation/controllers/memory.txt~memory-controller-naming-fixes 2008-02-25 14:22:36.000000000 +0530 +++ linux-2.6.25-rc3-balbir/Documentation/controllers/memory.txt 2008-02-25 14:53:49.000000000 +0530 @@ -1,4 +1,8 @@ -Memory Controller +Memory Resource Controller + +NOTE: The Memory Resource Controller has been generically been referred +to as the memory controller in this document. Do not confuse memory controller +used here with the memory controller that is used in hardware. Salient features @@ -152,7 +156,7 @@ The memory controller uses the following a. Enable CONFIG_CGROUPS b. Enable CONFIG_RESOURCE_COUNTERS -c. Enable CONFIG_CGROUP_MEM_CONT +c. Enable CONFIG_CGROUP_MEM_RES_CTLR 1. Prepare the cgroups # mkdir -p /cgroups diff -puN init/Kconfig~memory-controller-naming-fixes init/Kconfig --- linux-2.6.25-rc3/init/Kconfig~memory-controller-naming-fixes 2008-02-25 14:22:36.000000000 +0530 +++ linux-2.6.25-rc3-balbir/init/Kconfig 2008-02-25 15:03:43.000000000 +0530 @@ -366,6 +366,21 @@ config RESOURCE_COUNTERS infrastructure that works with cgroups depends on CGROUPS +config CGROUP_MEM_RES_CTLR + bool "Memory Resource Controller for Control Groups" + depends on CGROUPS && RESOURCE_COUNTERS + help + Provides a memory resource controller that manages both page cache and + RSS memory. + + Note that setting this option increases fixed memory overhead + associated with each page of memory in the system by 4/8 bytes + and also increases cache misses because struct page on many 64bit + systems will not fit into a single cache line anymore. + + Only enable when you're ok with these trade offs and really + sure you need the memory resource controller. + config SYSFS_DEPRECATED bool "Create deprecated sysfs files" depends on SYSFS @@ -387,21 +402,6 @@ config SYSFS_DEPRECATED If you are using a distro that was released in 2006 or later, it should be safe to say N here. -config CGROUP_MEM_CONT - bool "Memory controller for cgroups" - depends on CGROUPS && RESOURCE_COUNTERS - help - Provides a memory controller that manages both page cache and - RSS memory. - - Note that setting this option increases fixed memory overhead - associated with each page of memory in the system by 4/8 bytes - and also increases cache misses because struct page on many 64bit - systems will not fit into a single cache line anymore. - - Only enable when you're ok with these trade offs and really - sure you need the memory controller. - config PROC_PID_CPUSET bool "Include legacy /proc/<pid>/cpuset file" depends on CPUSETS diff -puN include/linux/memcontrol.h~memory-controller-naming-fixes include/linux/memcontrol.h --- linux-2.6.25-rc3/include/linux/memcontrol.h~memory-controller-naming-fixes 2008-02-25 14:22:36.000000000 +0530 +++ linux-2.6.25-rc3-balbir/include/linux/memcontrol.h 2008-02-25 15:55:44.000000000 +0530 @@ -25,7 +25,7 @@ struct page_cgroup; struct page; struct mm_struct; -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); extern void mm_free_cgroup(struct mm_struct *mm); @@ -72,7 +72,7 @@ extern long mem_cgroup_calc_reclaim_acti extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, struct zone *zone, int priority); -#else /* CONFIG_CGROUP_MEM_CONT */ +#else /* CONFIG_CGROUP_MEM_RES_CTLR */ static inline void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) { diff -puN mm/memcontrol.c~memory-controller-naming-fixes mm/memcontrol.c diff -puN mm/vmscan.c~memory-controller-naming-fixes mm/vmscan.c --- linux-2.6.25-rc3/mm/vmscan.c~memory-controller-naming-fixes 2008-02-25 14:22:36.000000000 +0530 +++ linux-2.6.25-rc3-balbir/mm/vmscan.c 2008-02-25 14:32:55.000000000 +0530 @@ -126,7 +126,7 @@ long vm_total_pages; /* The total number static LIST_HEAD(shrinker_list); static DECLARE_RWSEM(shrinker_rwsem); -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR #define scan_global_lru(sc) (!(sc)->mem_cgroup) #else #define scan_global_lru(sc) (1) @@ -1427,7 +1427,7 @@ unsigned long try_to_free_pages(struct z return do_try_to_free_pages(zones, gfp_mask, &sc); } -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, gfp_t gfp_mask) diff -puN mm/rmap.c~memory-controller-naming-fixes mm/rmap.c diff -puN mm/Makefile~memory-controller-naming-fixes mm/Makefile --- linux-2.6.25-rc3/mm/Makefile~memory-controller-naming-fixes 2008-02-25 14:22:36.000000000 +0530 +++ linux-2.6.25-rc3-balbir/mm/Makefile 2008-02-25 14:33:10.000000000 +0530 @@ -32,5 +32,5 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_QUICKLIST) += quicklist.o -obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o +obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o diff -puN include/linux/cgroup_subsys.h~memory-controller-naming-fixes include/linux/cgroup_subsys.h --- linux-2.6.25-rc3/include/linux/cgroup_subsys.h~memory-controller-naming-fixes 2008-02-25 14:31:04.000000000 +0530 +++ linux-2.6.25-rc3-balbir/include/linux/cgroup_subsys.h 2008-02-25 14:31:18.000000000 +0530 @@ -37,7 +37,7 @@ SUBSYS(cpuacct) /* */ -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR SUBSYS(mem_cgroup) #endif diff -puN include/linux/mm_types.h~memory-controller-naming-fixes include/linux/mm_types.h --- linux-2.6.25-rc3/include/linux/mm_types.h~memory-controller-naming-fixes 2008-02-25 14:32:18.000000000 +0530 +++ linux-2.6.25-rc3-balbir/include/linux/mm_types.h 2008-02-25 15:13:53.000000000 +0530 @@ -91,7 +91,7 @@ struct page { void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR unsigned long page_cgroup; #endif }; @@ -225,7 +225,7 @@ struct mm_struct { /* aio bits */ rwlock_t ioctx_list_lock; struct kioctx *ioctx_list; -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR struct mem_cgroup *mem_cgroup; #endif }; diff -puN mm/oom_kill.c~memory-controller-naming-fixes mm/oom_kill.c --- linux-2.6.25-rc3/mm/oom_kill.c~memory-controller-naming-fixes 2008-02-25 14:33:17.000000000 +0530 +++ linux-2.6.25-rc3-balbir/mm/oom_kill.c 2008-02-25 14:33:27.000000000 +0530 @@ -412,7 +412,7 @@ static int oom_kill_process(struct task_ return oom_kill_task(p); } -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) { unsigned long points = 0; _ -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH] Memory Resource Controller Add Boot Option 2008-02-25 11:55 ` Balbir Singh @ 2008-02-25 11:55 ` Balbir Singh -1 siblings, 0 replies; 24+ messages in thread From: Balbir Singh @ 2008-02-25 11:55 UTC (permalink / raw) To: Andrew Morton Cc: Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, Paul Menage, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, Balbir Singh, KAMEZAWA Hiroyuki A boot option for the memory controller was discussed on lkml. It is a good idea to add it, since it saves memory for people who want to turn off the memory controller. By default the option is on for the following two reasons 1. It provides compatibility with the current scheme where the memory controller turns on if the config option is enabled 2. It allows for wider testing of the memory controller, once the config option is enabled We still allow the create, destroy callbacks to succeed, since they are not aware of boot options. We do not populate the directory will memory resource controller specific files. Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> --- Documentation/kernel-parameters.txt | 2 ++ mm/memcontrol.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff -L mm/memcontrol.h -puN /dev/null /dev/null diff -puN include/linux/memcontrol.h~memory-controller-add-boot-option include/linux/memcontrol.h diff -puN mm/memcontrol.c~memory-controller-add-boot-option mm/memcontrol.c --- linux-2.6.25-rc3/mm/memcontrol.c~memory-controller-add-boot-option 2008-02-25 15:55:58.000000000 +0530 +++ linux-2.6.25-rc3-balbir/mm/memcontrol.c 2008-02-25 17:10:50.000000000 +0530 @@ -35,6 +35,7 @@ struct cgroup_subsys mem_cgroup_subsys; static const int MEM_CGROUP_RECLAIM_RETRIES = 5; +static int mem_cgroup_on __read_mostly = 1; /* turned on/off */ /* * Statistics for memory cgroup. @@ -578,6 +579,9 @@ static int mem_cgroup_charge_common(stru unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; struct mem_cgroup_per_zone *mz; + if (!mem_cgroup_on) + return 0; + /* * Should page_cgroup's go to their own slab? * One could optimize the performance of the charging routine @@ -729,7 +733,7 @@ void mem_cgroup_uncharge(struct page_cgr /* * Check if our page_cgroup is valid */ - if (!pc) + if (!pc || !mem_cgroup_on) return; if (atomic_dec_and_test(&pc->ref_cnt)) { @@ -755,6 +759,9 @@ void mem_cgroup_uncharge(struct page_cgr void mem_cgroup_uncharge_page(struct page *page) { + if (!mem_cgroup_on) + return; + lock_page_cgroup(page); mem_cgroup_uncharge(page_get_page_cgroup(page)); unlock_page_cgroup(page); @@ -769,6 +776,10 @@ int mem_cgroup_prepare_migration(struct { struct page_cgroup *pc; int ret = 0; + + if (!mem_cgroup_on) + return 0; + lock_page_cgroup(page); pc = page_get_page_cgroup(page); if (pc && atomic_inc_not_zero(&pc->ref_cnt)) @@ -781,6 +792,9 @@ void mem_cgroup_end_migration(struct pag { struct page_cgroup *pc; + if (!mem_cgroup_on) + return; + lock_page_cgroup(page); pc = page_get_page_cgroup(page); mem_cgroup_uncharge(pc); @@ -881,6 +895,10 @@ int mem_cgroup_force_empty(struct mem_cg { int ret = -EBUSY; int node, zid; + + if (!mem_cgroup_on) + return 0; + css_get(&mem->css); /* * page reclaim code (kswapd etc..) will move pages between @@ -1141,6 +1159,9 @@ static void mem_cgroup_destroy(struct cg static int mem_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) { + if (!mem_cgroup_on) + return 0; + return cgroup_add_files(cont, ss, mem_cgroup_files, ARRAY_SIZE(mem_cgroup_files)); } @@ -1153,6 +1174,9 @@ static void mem_cgroup_move_task(struct struct mm_struct *mm; struct mem_cgroup *mem, *old_mem; + if (!mem_cgroup_on) + return; + mm = get_task_mm(p); if (mm == NULL) return; @@ -1189,3 +1213,10 @@ struct cgroup_subsys mem_cgroup_subsys = .attach = mem_cgroup_move_task, .early_init = 0, }; + +static int __init mem_cgroup_startup_disable(char *str) +{ + mem_cgroup_on = 0; + return 1; +} +__setup("memcgroupoff", mem_cgroup_startup_disable); diff -puN Documentation/kernel-parameters.txt~memory-controller-add-boot-option Documentation/kernel-parameters.txt --- linux-2.6.25-rc3/Documentation/kernel-parameters.txt~memory-controller-add-boot-option 2008-02-25 15:55:58.000000000 +0530 +++ linux-2.6.25-rc3-balbir/Documentation/kernel-parameters.txt 2008-02-25 15:56:01.000000000 +0530 @@ -1114,6 +1114,8 @@ and is between 256 and 4096 characters. mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel memory. + memcgroupoff [KNL] Disable memory resource controller + memmap=exactmap [KNL,X86-32,X86_64] Enable setting of an exact E820 memory map, as specified by the user. Such memmap=exactmap lines can be constructed based on _ -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL ^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH] Memory Resource Controller Add Boot Option @ 2008-02-25 11:55 ` Balbir Singh 0 siblings, 0 replies; 24+ messages in thread From: Balbir Singh @ 2008-02-25 11:55 UTC (permalink / raw) To: Andrew Morton Cc: Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, Paul Menage, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, Balbir Singh, KAMEZAWA Hiroyuki A boot option for the memory controller was discussed on lkml. It is a good idea to add it, since it saves memory for people who want to turn off the memory controller. By default the option is on for the following two reasons 1. It provides compatibility with the current scheme where the memory controller turns on if the config option is enabled 2. It allows for wider testing of the memory controller, once the config option is enabled We still allow the create, destroy callbacks to succeed, since they are not aware of boot options. We do not populate the directory will memory resource controller specific files. Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> --- Documentation/kernel-parameters.txt | 2 ++ mm/memcontrol.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff -L mm/memcontrol.h -puN /dev/null /dev/null diff -puN include/linux/memcontrol.h~memory-controller-add-boot-option include/linux/memcontrol.h diff -puN mm/memcontrol.c~memory-controller-add-boot-option mm/memcontrol.c --- linux-2.6.25-rc3/mm/memcontrol.c~memory-controller-add-boot-option 2008-02-25 15:55:58.000000000 +0530 +++ linux-2.6.25-rc3-balbir/mm/memcontrol.c 2008-02-25 17:10:50.000000000 +0530 @@ -35,6 +35,7 @@ struct cgroup_subsys mem_cgroup_subsys; static const int MEM_CGROUP_RECLAIM_RETRIES = 5; +static int mem_cgroup_on __read_mostly = 1; /* turned on/off */ /* * Statistics for memory cgroup. @@ -578,6 +579,9 @@ static int mem_cgroup_charge_common(stru unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; struct mem_cgroup_per_zone *mz; + if (!mem_cgroup_on) + return 0; + /* * Should page_cgroup's go to their own slab? * One could optimize the performance of the charging routine @@ -729,7 +733,7 @@ void mem_cgroup_uncharge(struct page_cgr /* * Check if our page_cgroup is valid */ - if (!pc) + if (!pc || !mem_cgroup_on) return; if (atomic_dec_and_test(&pc->ref_cnt)) { @@ -755,6 +759,9 @@ void mem_cgroup_uncharge(struct page_cgr void mem_cgroup_uncharge_page(struct page *page) { + if (!mem_cgroup_on) + return; + lock_page_cgroup(page); mem_cgroup_uncharge(page_get_page_cgroup(page)); unlock_page_cgroup(page); @@ -769,6 +776,10 @@ int mem_cgroup_prepare_migration(struct { struct page_cgroup *pc; int ret = 0; + + if (!mem_cgroup_on) + return 0; + lock_page_cgroup(page); pc = page_get_page_cgroup(page); if (pc && atomic_inc_not_zero(&pc->ref_cnt)) @@ -781,6 +792,9 @@ void mem_cgroup_end_migration(struct pag { struct page_cgroup *pc; + if (!mem_cgroup_on) + return; + lock_page_cgroup(page); pc = page_get_page_cgroup(page); mem_cgroup_uncharge(pc); @@ -881,6 +895,10 @@ int mem_cgroup_force_empty(struct mem_cg { int ret = -EBUSY; int node, zid; + + if (!mem_cgroup_on) + return 0; + css_get(&mem->css); /* * page reclaim code (kswapd etc..) will move pages between @@ -1141,6 +1159,9 @@ static void mem_cgroup_destroy(struct cg static int mem_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) { + if (!mem_cgroup_on) + return 0; + return cgroup_add_files(cont, ss, mem_cgroup_files, ARRAY_SIZE(mem_cgroup_files)); } @@ -1153,6 +1174,9 @@ static void mem_cgroup_move_task(struct struct mm_struct *mm; struct mem_cgroup *mem, *old_mem; + if (!mem_cgroup_on) + return; + mm = get_task_mm(p); if (mm == NULL) return; @@ -1189,3 +1213,10 @@ struct cgroup_subsys mem_cgroup_subsys = .attach = mem_cgroup_move_task, .early_init = 0, }; + +static int __init mem_cgroup_startup_disable(char *str) +{ + mem_cgroup_on = 0; + return 1; +} +__setup("memcgroupoff", mem_cgroup_startup_disable); diff -puN Documentation/kernel-parameters.txt~memory-controller-add-boot-option Documentation/kernel-parameters.txt --- linux-2.6.25-rc3/Documentation/kernel-parameters.txt~memory-controller-add-boot-option 2008-02-25 15:55:58.000000000 +0530 +++ linux-2.6.25-rc3-balbir/Documentation/kernel-parameters.txt 2008-02-25 15:56:01.000000000 +0530 @@ -1114,6 +1114,8 @@ and is between 256 and 4096 characters. mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel memory. + memcgroupoff [KNL] Disable memory resource controller + memmap=exactmap [KNL,X86-32,X86_64] Enable setting of an exact E820 memory map, as specified by the user. Such memmap=exactmap lines can be constructed based on _ -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option 2008-02-25 11:55 ` Balbir Singh @ 2008-02-25 16:16 ` Paul Menage -1 siblings, 0 replies; 24+ messages in thread From: Paul Menage @ 2008-02-25 16:16 UTC (permalink / raw) To: Balbir Singh Cc: Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki On Mon, Feb 25, 2008 at 3:55 AM, Balbir Singh <balbir@linux.vnet.ibm.com> wrote: > > > A boot option for the memory controller was discussed on lkml. It is a good > idea to add it, since it saves memory for people who want to turn off the > memory controller. > > By default the option is on for the following two reasons > > 1. It provides compatibility with the current scheme where the memory > controller turns on if the config option is enabled > 2. It allows for wider testing of the memory controller, once the config > option is enabled > > We still allow the create, destroy callbacks to succeed, since they are > not aware of boot options. We do not populate the directory will > memory resource controller specific files. Would it make more sense to have a generic cgroups boot option for this? Something like cgroup_disable=xxx, which would be parsed by cgroups and would cause: - a "disabled" flag to be set to true in the subsys object (you could use this in place of the mem_cgroup_on flag) - prevent the disabled cgroup from being bound to any mounted hierarchy (so it would be ignored in a mount with no subsystem options, and a mount with options that specifically pick that subsystem would give an error) Paul ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option @ 2008-02-25 16:16 ` Paul Menage 0 siblings, 0 replies; 24+ messages in thread From: Paul Menage @ 2008-02-25 16:16 UTC (permalink / raw) To: Balbir Singh Cc: Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki On Mon, Feb 25, 2008 at 3:55 AM, Balbir Singh <balbir@linux.vnet.ibm.com> wrote: > > > A boot option for the memory controller was discussed on lkml. It is a good > idea to add it, since it saves memory for people who want to turn off the > memory controller. > > By default the option is on for the following two reasons > > 1. It provides compatibility with the current scheme where the memory > controller turns on if the config option is enabled > 2. It allows for wider testing of the memory controller, once the config > option is enabled > > We still allow the create, destroy callbacks to succeed, since they are > not aware of boot options. We do not populate the directory will > memory resource controller specific files. Would it make more sense to have a generic cgroups boot option for this? Something like cgroup_disable=xxx, which would be parsed by cgroups and would cause: - a "disabled" flag to be set to true in the subsys object (you could use this in place of the mem_cgroup_on flag) - prevent the disabled cgroup from being bound to any mounted hierarchy (so it would be ignored in a mount with no subsystem options, and a mount with options that specifically pick that subsystem would give an error) Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option 2008-02-25 16:16 ` Paul Menage @ 2008-02-25 17:18 ` Balbir Singh -1 siblings, 0 replies; 24+ messages in thread From: Balbir Singh @ 2008-02-25 17:18 UTC (permalink / raw) To: Paul Menage Cc: Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki Paul Menage wrote: > On Mon, Feb 25, 2008 at 3:55 AM, Balbir Singh <balbir@linux.vnet.ibm.com> wrote: >> >> A boot option for the memory controller was discussed on lkml. It is a good >> idea to add it, since it saves memory for people who want to turn off the >> memory controller. >> >> By default the option is on for the following two reasons >> >> 1. It provides compatibility with the current scheme where the memory >> controller turns on if the config option is enabled >> 2. It allows for wider testing of the memory controller, once the config >> option is enabled >> >> We still allow the create, destroy callbacks to succeed, since they are >> not aware of boot options. We do not populate the directory will >> memory resource controller specific files. > > Would it make more sense to have a generic cgroups boot option for this? > > Something like cgroup_disable=xxx, which would be parsed by cgroups > and would cause: > > - a "disabled" flag to be set to true in the subsys object (you could > use this in place of the mem_cgroup_on flag) > I thought about it, but it did not work out all that well. The reason being, that the memory controller is called in from places besides cgroup. mem_cgroup_charge_common() for example is called from several places in mm. Calling into cgroups to check, enabled/disabled did not seem right. Hence I put the boot option in mm/memcontrol.c > - prevent the disabled cgroup from being bound to any mounted > hierarchy (so it would be ignored in a mount with no subsystem > options, and a mount with options that specifically pick that > subsystem would give an error) > The controller can be bound, but I just don't populate the files associated with the controller > Paul -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option @ 2008-02-25 17:18 ` Balbir Singh 0 siblings, 0 replies; 24+ messages in thread From: Balbir Singh @ 2008-02-25 17:18 UTC (permalink / raw) To: Paul Menage Cc: Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki Paul Menage wrote: > On Mon, Feb 25, 2008 at 3:55 AM, Balbir Singh <balbir@linux.vnet.ibm.com> wrote: >> >> A boot option for the memory controller was discussed on lkml. It is a good >> idea to add it, since it saves memory for people who want to turn off the >> memory controller. >> >> By default the option is on for the following two reasons >> >> 1. It provides compatibility with the current scheme where the memory >> controller turns on if the config option is enabled >> 2. It allows for wider testing of the memory controller, once the config >> option is enabled >> >> We still allow the create, destroy callbacks to succeed, since they are >> not aware of boot options. We do not populate the directory will >> memory resource controller specific files. > > Would it make more sense to have a generic cgroups boot option for this? > > Something like cgroup_disable=xxx, which would be parsed by cgroups > and would cause: > > - a "disabled" flag to be set to true in the subsys object (you could > use this in place of the mem_cgroup_on flag) > I thought about it, but it did not work out all that well. The reason being, that the memory controller is called in from places besides cgroup. mem_cgroup_charge_common() for example is called from several places in mm. Calling into cgroups to check, enabled/disabled did not seem right. Hence I put the boot option in mm/memcontrol.c > - prevent the disabled cgroup from being bound to any mounted > hierarchy (so it would be ignored in a mount with no subsystem > options, and a mount with options that specifically pick that > subsystem would give an error) > The controller can be bound, but I just don't populate the files associated with the controller > Paul -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option 2008-02-25 17:18 ` Balbir Singh @ 2008-02-25 17:32 ` Paul Menage -1 siblings, 0 replies; 24+ messages in thread From: Paul Menage @ 2008-02-25 17:32 UTC (permalink / raw) To: balbir Cc: Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki On Mon, Feb 25, 2008 at 9:18 AM, Balbir Singh <balbir@linux.vnet.ibm.com> wrote: > > I thought about it, but it did not work out all that well. The reason being, > that the memory controller is called in from places besides cgroup. > mem_cgroup_charge_common() for example is called from several places in mm. > Calling into cgroups to check, enabled/disabled did not seem right. You wouldn't need to call into cgroups - if it's a flag in the subsys object (which is defined in memcontrol.c) you'd just say if (mem_cgroup_subsys.disabled) { ... } I'll send out a prototype for comment. Paul ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option @ 2008-02-25 17:32 ` Paul Menage 0 siblings, 0 replies; 24+ messages in thread From: Paul Menage @ 2008-02-25 17:32 UTC (permalink / raw) To: balbir Cc: Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki On Mon, Feb 25, 2008 at 9:18 AM, Balbir Singh <balbir@linux.vnet.ibm.com> wrote: > > I thought about it, but it did not work out all that well. The reason being, > that the memory controller is called in from places besides cgroup. > mem_cgroup_charge_common() for example is called from several places in mm. > Calling into cgroups to check, enabled/disabled did not seem right. You wouldn't need to call into cgroups - if it's a flag in the subsys object (which is defined in memcontrol.c) you'd just say if (mem_cgroup_subsys.disabled) { ... } I'll send out a prototype for comment. Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option 2008-02-25 17:32 ` Paul Menage @ 2008-02-25 17:37 ` Balbir Singh -1 siblings, 0 replies; 24+ messages in thread From: Balbir Singh @ 2008-02-25 17:37 UTC (permalink / raw) To: Paul Menage Cc: Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki Paul Menage wrote: > On Mon, Feb 25, 2008 at 9:18 AM, Balbir Singh <balbir@linux.vnet.ibm.com> wrote: >> I thought about it, but it did not work out all that well. The reason being, >> that the memory controller is called in from places besides cgroup. >> mem_cgroup_charge_common() for example is called from several places in mm. >> Calling into cgroups to check, enabled/disabled did not seem right. > > You wouldn't need to call into cgroups - if it's a flag in the subsys > object (which is defined in memcontrol.c) you'd just say > > if (mem_cgroup_subsys.disabled) { > ... > } > > I'll send out a prototype for comment. Sure thing, if css has the flag, then it would nice. Could you wrap it up to say something like css_disabled(&mem_cgroup_subsys) -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option @ 2008-02-25 17:37 ` Balbir Singh 0 siblings, 0 replies; 24+ messages in thread From: Balbir Singh @ 2008-02-25 17:37 UTC (permalink / raw) To: Paul Menage Cc: Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki Paul Menage wrote: > On Mon, Feb 25, 2008 at 9:18 AM, Balbir Singh <balbir@linux.vnet.ibm.com> wrote: >> I thought about it, but it did not work out all that well. The reason being, >> that the memory controller is called in from places besides cgroup. >> mem_cgroup_charge_common() for example is called from several places in mm. >> Calling into cgroups to check, enabled/disabled did not seem right. > > You wouldn't need to call into cgroups - if it's a flag in the subsys > object (which is defined in memcontrol.c) you'd just say > > if (mem_cgroup_subsys.disabled) { > ... > } > > I'll send out a prototype for comment. Sure thing, if css has the flag, then it would nice. Could you wrap it up to say something like css_disabled(&mem_cgroup_subsys) -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option 2008-02-25 17:37 ` Balbir Singh @ 2008-02-25 18:54 ` Paul Menage -1 siblings, 0 replies; 24+ messages in thread From: Paul Menage @ 2008-02-25 18:54 UTC (permalink / raw) To: balbir Cc: Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki >> I'll send out a prototype for comment. Something like the patch below. The effects of cgroup_disable=foo are: - foo doesn't show up in /proc/cgroups - foo isn't auto-mounted if you mount all cgroups in a single hierarchy - foo isn't visible as an individually mountable subsystem As a result there will only ever be one call to foo->create(), at init time; all processes will stay in this group, and the group will never be mounted on a visible hierarchy. Any additional effects (e.g. not allocating metadata) are up to the foo subsystem. This doesn't handle early_init subsystems (their "disabled" bit isn't set be, but it could easily be extended to do so if any of the early_init systems wanted it - I think it would just involve some nastier parameter processing since it would occur before the command-line argument parser had been run. include/linux/cgroup.h | 1 + kernel/cgroup.c | 29 +++++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) Index: cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h =================================================================== --- cgroup_disable-2.6.25-rc2-mm1.orig/include/linux/cgroup.h +++ cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h @@ -256,6 +256,7 @@ struct cgroup_subsys { void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); int subsys_id; int active; + int disabled; int early_init; #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; Index: cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c =================================================================== --- cgroup_disable-2.6.25-rc2-mm1.orig/kernel/cgroup.c +++ cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c @@ -790,7 +790,14 @@ static int parse_cgroupfs_options(char * if (!*token) return -EINVAL; if (!strcmp(token, "all")) { - opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1; + /* Add all non-disabled subsystems */ + int i; + opts->subsys_bits = 0; + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (!ss->disabled) + opts->subsys_bits |= 1ul << i; + } } else if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); } else if (!strncmp(token, "release_agent=", 14)) { @@ -808,7 +815,8 @@ static int parse_cgroupfs_options(char * for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { ss = subsys[i]; if (!strcmp(token, ss->name)) { - set_bit(i, &opts->subsys_bits); + if (!ss->disabled) + set_bit(i, &opts->subsys_bits); break; } } @@ -2596,6 +2606,8 @@ static int proc_cgroupstats_show(struct mutex_lock(&cgroup_mutex); for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; + if (ss->disabled) + continue; seq_printf(m, "%s\t%lu\t%d\n", ss->name, ss->root->subsys_bits, ss->root->number_of_cgroups); @@ -2991,3 +3003,16 @@ static void cgroup_release_agent(struct spin_unlock(&release_list_lock); mutex_unlock(&cgroup_mutex); } + +static int __init cgroup_disable(char *str) +{ + int i; + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (!strcmp(str, ss->name)) { + ss->disabled = 1; + break; + } + } +} +__setup("cgroup_disable=", cgroup_disable); > > Sure thing, if css has the flag, then it would nice. Could you wrap it up to say > something like css_disabled(&mem_cgroup_subsys) > > It's the subsys object rather than the css (cgroup_subsys_state). We could have something like: #define cgroup_subsys_disabled(_ss) ((ss_)->disabled) but I don't see that cgroup_subsys_disabled(&mem_cgroup_subsys) is better than just putting mem_cgroup_subsys.disabled Paul ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option @ 2008-02-25 18:54 ` Paul Menage 0 siblings, 0 replies; 24+ messages in thread From: Paul Menage @ 2008-02-25 18:54 UTC (permalink / raw) To: balbir Cc: Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki >> I'll send out a prototype for comment. Something like the patch below. The effects of cgroup_disable=foo are: - foo doesn't show up in /proc/cgroups - foo isn't auto-mounted if you mount all cgroups in a single hierarchy - foo isn't visible as an individually mountable subsystem As a result there will only ever be one call to foo->create(), at init time; all processes will stay in this group, and the group will never be mounted on a visible hierarchy. Any additional effects (e.g. not allocating metadata) are up to the foo subsystem. This doesn't handle early_init subsystems (their "disabled" bit isn't set be, but it could easily be extended to do so if any of the early_init systems wanted it - I think it would just involve some nastier parameter processing since it would occur before the command-line argument parser had been run. include/linux/cgroup.h | 1 + kernel/cgroup.c | 29 +++++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) Index: cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h =================================================================== --- cgroup_disable-2.6.25-rc2-mm1.orig/include/linux/cgroup.h +++ cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h @@ -256,6 +256,7 @@ struct cgroup_subsys { void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); int subsys_id; int active; + int disabled; int early_init; #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; Index: cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c =================================================================== --- cgroup_disable-2.6.25-rc2-mm1.orig/kernel/cgroup.c +++ cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c @@ -790,7 +790,14 @@ static int parse_cgroupfs_options(char * if (!*token) return -EINVAL; if (!strcmp(token, "all")) { - opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1; + /* Add all non-disabled subsystems */ + int i; + opts->subsys_bits = 0; + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (!ss->disabled) + opts->subsys_bits |= 1ul << i; + } } else if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); } else if (!strncmp(token, "release_agent=", 14)) { @@ -808,7 +815,8 @@ static int parse_cgroupfs_options(char * for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { ss = subsys[i]; if (!strcmp(token, ss->name)) { - set_bit(i, &opts->subsys_bits); + if (!ss->disabled) + set_bit(i, &opts->subsys_bits); break; } } @@ -2596,6 +2606,8 @@ static int proc_cgroupstats_show(struct mutex_lock(&cgroup_mutex); for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; + if (ss->disabled) + continue; seq_printf(m, "%s\t%lu\t%d\n", ss->name, ss->root->subsys_bits, ss->root->number_of_cgroups); @@ -2991,3 +3003,16 @@ static void cgroup_release_agent(struct spin_unlock(&release_list_lock); mutex_unlock(&cgroup_mutex); } + +static int __init cgroup_disable(char *str) +{ + int i; + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (!strcmp(str, ss->name)) { + ss->disabled = 1; + break; + } + } +} +__setup("cgroup_disable=", cgroup_disable); > > Sure thing, if css has the flag, then it would nice. Could you wrap it up to say > something like css_disabled(&mem_cgroup_subsys) > > It's the subsys object rather than the css (cgroup_subsys_state). We could have something like: #define cgroup_subsys_disabled(_ss) ((ss_)->disabled) but I don't see that cgroup_subsys_disabled(&mem_cgroup_subsys) is better than just putting mem_cgroup_subsys.disabled Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option 2008-02-25 18:54 ` Paul Menage @ 2008-02-26 3:01 ` Li Zefan -1 siblings, 0 replies; 24+ messages in thread From: Li Zefan @ 2008-02-26 3:01 UTC (permalink / raw) To: Paul Menage Cc: balbir, Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki Paul Menage wrote: >>> I'll send out a prototype for comment. > > Something like the patch below. The effects of cgroup_disable=foo are: > > - foo doesn't show up in /proc/cgroups Or we can print out the disable flag, maybe this will be better? Because we can distinguish from disabled and not compiled in from /proc/cgroups. > - foo isn't auto-mounted if you mount all cgroups in a single hierarchy > - foo isn't visible as an individually mountable subsystem You mentioned in a previous mail if we mount a disabled subsystem we will get an error. Here we just ignore the mount option. Which makes more sense ? > > As a result there will only ever be one call to foo->create(), at init > time; all processes will stay in this group, and the group will never be > mounted on a visible hierarchy. Any additional effects (e.g. not > allocating metadata) are up to the foo subsystem. > > This doesn't handle early_init subsystems (their "disabled" bit isn't > set be, but it could easily be extended to do so if any of the > early_init systems wanted it - I think it would just involve some > nastier parameter processing since it would occur before the > command-line argument parser had been run. > > include/linux/cgroup.h | 1 + > kernel/cgroup.c | 29 +++++++++++++++++++++++++++-- > 2 files changed, 28 insertions(+), 2 deletions(-) > > Index: cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h > =================================================================== > --- cgroup_disable-2.6.25-rc2-mm1.orig/include/linux/cgroup.h > +++ cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h > @@ -256,6 +256,7 @@ struct cgroup_subsys { > void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); > int subsys_id; > int active; > + int disabled; > int early_init; > #define MAX_CGROUP_TYPE_NAMELEN 32 > const char *name; > Index: cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c > =================================================================== > --- cgroup_disable-2.6.25-rc2-mm1.orig/kernel/cgroup.c > +++ cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c > @@ -790,7 +790,14 @@ static int parse_cgroupfs_options(char * > if (!*token) > return -EINVAL; > if (!strcmp(token, "all")) { > - opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1; > + /* Add all non-disabled subsystems */ > + int i; > + opts->subsys_bits = 0; > + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > + struct cgroup_subsys *ss = subsys[i]; > + if (!ss->disabled) > + opts->subsys_bits |= 1ul << i; > + } > } else if (!strcmp(token, "noprefix")) { > set_bit(ROOT_NOPREFIX, &opts->flags); > } else if (!strncmp(token, "release_agent=", 14)) { > @@ -808,7 +815,8 @@ static int parse_cgroupfs_options(char * > for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > ss = subsys[i]; > if (!strcmp(token, ss->name)) { > - set_bit(i, &opts->subsys_bits); > + if (!ss->disabled) > + set_bit(i, &opts->subsys_bits); > break; > } > } > @@ -2596,6 +2606,8 @@ static int proc_cgroupstats_show(struct > mutex_lock(&cgroup_mutex); > for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > struct cgroup_subsys *ss = subsys[i]; > + if (ss->disabled) > + continue; > seq_printf(m, "%s\t%lu\t%d\n", > ss->name, ss->root->subsys_bits, > ss->root->number_of_cgroups); > @@ -2991,3 +3003,16 @@ static void cgroup_release_agent(struct > spin_unlock(&release_list_lock); > mutex_unlock(&cgroup_mutex); > } > + > +static int __init cgroup_disable(char *str) > +{ > + int i; > + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > + struct cgroup_subsys *ss = subsys[i]; > + if (!strcmp(str, ss->name)) { > + ss->disabled = 1; > + break; > + } > + } > +} > +__setup("cgroup_disable=", cgroup_disable); > > >> >> Sure thing, if css has the flag, then it would nice. Could you wrap it >> up to say >> something like css_disabled(&mem_cgroup_subsys) >> >> > > It's the subsys object rather than the css (cgroup_subsys_state). > > We could have something like: > > #define cgroup_subsys_disabled(_ss) ((ss_)->disabled) > > but I don't see that > cgroup_subsys_disabled(&mem_cgroup_subsys) > is better than just putting > > mem_cgroup_subsys.disabled > > Paul > > ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option @ 2008-02-26 3:01 ` Li Zefan 0 siblings, 0 replies; 24+ messages in thread From: Li Zefan @ 2008-02-26 3:01 UTC (permalink / raw) To: Paul Menage Cc: balbir, Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki Paul Menage wrote: >>> I'll send out a prototype for comment. > > Something like the patch below. The effects of cgroup_disable=foo are: > > - foo doesn't show up in /proc/cgroups Or we can print out the disable flag, maybe this will be better? Because we can distinguish from disabled and not compiled in from /proc/cgroups. > - foo isn't auto-mounted if you mount all cgroups in a single hierarchy > - foo isn't visible as an individually mountable subsystem You mentioned in a previous mail if we mount a disabled subsystem we will get an error. Here we just ignore the mount option. Which makes more sense ? > > As a result there will only ever be one call to foo->create(), at init > time; all processes will stay in this group, and the group will never be > mounted on a visible hierarchy. Any additional effects (e.g. not > allocating metadata) are up to the foo subsystem. > > This doesn't handle early_init subsystems (their "disabled" bit isn't > set be, but it could easily be extended to do so if any of the > early_init systems wanted it - I think it would just involve some > nastier parameter processing since it would occur before the > command-line argument parser had been run. > > include/linux/cgroup.h | 1 + > kernel/cgroup.c | 29 +++++++++++++++++++++++++++-- > 2 files changed, 28 insertions(+), 2 deletions(-) > > Index: cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h > =================================================================== > --- cgroup_disable-2.6.25-rc2-mm1.orig/include/linux/cgroup.h > +++ cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h > @@ -256,6 +256,7 @@ struct cgroup_subsys { > void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); > int subsys_id; > int active; > + int disabled; > int early_init; > #define MAX_CGROUP_TYPE_NAMELEN 32 > const char *name; > Index: cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c > =================================================================== > --- cgroup_disable-2.6.25-rc2-mm1.orig/kernel/cgroup.c > +++ cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c > @@ -790,7 +790,14 @@ static int parse_cgroupfs_options(char * > if (!*token) > return -EINVAL; > if (!strcmp(token, "all")) { > - opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1; > + /* Add all non-disabled subsystems */ > + int i; > + opts->subsys_bits = 0; > + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > + struct cgroup_subsys *ss = subsys[i]; > + if (!ss->disabled) > + opts->subsys_bits |= 1ul << i; > + } > } else if (!strcmp(token, "noprefix")) { > set_bit(ROOT_NOPREFIX, &opts->flags); > } else if (!strncmp(token, "release_agent=", 14)) { > @@ -808,7 +815,8 @@ static int parse_cgroupfs_options(char * > for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > ss = subsys[i]; > if (!strcmp(token, ss->name)) { > - set_bit(i, &opts->subsys_bits); > + if (!ss->disabled) > + set_bit(i, &opts->subsys_bits); > break; > } > } > @@ -2596,6 +2606,8 @@ static int proc_cgroupstats_show(struct > mutex_lock(&cgroup_mutex); > for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > struct cgroup_subsys *ss = subsys[i]; > + if (ss->disabled) > + continue; > seq_printf(m, "%s\t%lu\t%d\n", > ss->name, ss->root->subsys_bits, > ss->root->number_of_cgroups); > @@ -2991,3 +3003,16 @@ static void cgroup_release_agent(struct > spin_unlock(&release_list_lock); > mutex_unlock(&cgroup_mutex); > } > + > +static int __init cgroup_disable(char *str) > +{ > + int i; > + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > + struct cgroup_subsys *ss = subsys[i]; > + if (!strcmp(str, ss->name)) { > + ss->disabled = 1; > + break; > + } > + } > +} > +__setup("cgroup_disable=", cgroup_disable); > > >> >> Sure thing, if css has the flag, then it would nice. Could you wrap it >> up to say >> something like css_disabled(&mem_cgroup_subsys) >> >> > > It's the subsys object rather than the css (cgroup_subsys_state). > > We could have something like: > > #define cgroup_subsys_disabled(_ss) ((ss_)->disabled) > > but I don't see that > cgroup_subsys_disabled(&mem_cgroup_subsys) > is better than just putting > > mem_cgroup_subsys.disabled > > Paul > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option 2008-02-26 3:01 ` Li Zefan @ 2008-02-26 6:59 ` Hirokazu Takahashi -1 siblings, 0 replies; 24+ messages in thread From: Hirokazu Takahashi @ 2008-02-26 6:59 UTC (permalink / raw) To: lizf Cc: menage, balbir, akpm, hugh, skumar, yamamoto, linux-kernel, linux-mm, rientjes, xemul, kamezawa.hiroyu Hi, > >>> I'll send out a prototype for comment. > > > > Something like the patch below. The effects of cgroup_disable=foo are: > > > > - foo doesn't show up in /proc/cgroups > > Or we can print out the disable flag, maybe this will be better? > Because we can distinguish from disabled and not compiled in from > /proc/cgroups. It would be neat if the disable flag /proc/cgroups can be cleared/set on demand. It will depend on the implementation of each controller whether it works or not. > > - foo isn't auto-mounted if you mount all cgroups in a single hierarchy > > - foo isn't visible as an individually mountable subsystem > > You mentioned in a previous mail if we mount a disabled subsystem we > will get an error. Here we just ignore the mount option. Which makes > more sense ? > > > > > As a result there will only ever be one call to foo->create(), at init > > time; all processes will stay in this group, and the group will never be > > mounted on a visible hierarchy. Any additional effects (e.g. not > > allocating metadata) are up to the foo subsystem. > > > > This doesn't handle early_init subsystems (their "disabled" bit isn't > > set be, but it could easily be extended to do so if any of the > > early_init systems wanted it - I think it would just involve some > > nastier parameter processing since it would occur before the > > command-line argument parser had been run. > > > > include/linux/cgroup.h | 1 + > > kernel/cgroup.c | 29 +++++++++++++++++++++++++++-- > > 2 files changed, 28 insertions(+), 2 deletions(-) > > > > Index: cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h > > =================================================================== > > --- cgroup_disable-2.6.25-rc2-mm1.orig/include/linux/cgroup.h > > +++ cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h > > @@ -256,6 +256,7 @@ struct cgroup_subsys { > > void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); > > int subsys_id; > > int active; > > + int disabled; > > int early_init; > > #define MAX_CGROUP_TYPE_NAMELEN 32 > > const char *name; > > Index: cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c > > =================================================================== > > --- cgroup_disable-2.6.25-rc2-mm1.orig/kernel/cgroup.c > > +++ cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c > > @@ -790,7 +790,14 @@ static int parse_cgroupfs_options(char * > > if (!*token) > > return -EINVAL; > > if (!strcmp(token, "all")) { > > - opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1; > > + /* Add all non-disabled subsystems */ > > + int i; > > + opts->subsys_bits = 0; > > + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > > + struct cgroup_subsys *ss = subsys[i]; > > + if (!ss->disabled) > > + opts->subsys_bits |= 1ul << i; > > + } > > } else if (!strcmp(token, "noprefix")) { > > set_bit(ROOT_NOPREFIX, &opts->flags); > > } else if (!strncmp(token, "release_agent=", 14)) { > > @@ -808,7 +815,8 @@ static int parse_cgroupfs_options(char * > > for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > > ss = subsys[i]; > > if (!strcmp(token, ss->name)) { > > - set_bit(i, &opts->subsys_bits); > > + if (!ss->disabled) > > + set_bit(i, &opts->subsys_bits); > > break; > > } > > } > > @@ -2596,6 +2606,8 @@ static int proc_cgroupstats_show(struct > > mutex_lock(&cgroup_mutex); > > for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > > struct cgroup_subsys *ss = subsys[i]; > > + if (ss->disabled) > > + continue; > > seq_printf(m, "%s\t%lu\t%d\n", > > ss->name, ss->root->subsys_bits, > > ss->root->number_of_cgroups); > > @@ -2991,3 +3003,16 @@ static void cgroup_release_agent(struct > > spin_unlock(&release_list_lock); > > mutex_unlock(&cgroup_mutex); > > } > > + > > +static int __init cgroup_disable(char *str) > > +{ > > + int i; > > + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > > + struct cgroup_subsys *ss = subsys[i]; > > + if (!strcmp(str, ss->name)) { > > + ss->disabled = 1; > > + break; > > + } > > + } > > +} > > +__setup("cgroup_disable=", cgroup_disable); > > > > > >> > >> Sure thing, if css has the flag, then it would nice. Could you wrap it > >> up to say > >> something like css_disabled(&mem_cgroup_subsys) > >> > >> > > > > It's the subsys object rather than the css (cgroup_subsys_state). > > > > We could have something like: > > > > #define cgroup_subsys_disabled(_ss) ((ss_)->disabled) > > > > but I don't see that > > cgroup_subsys_disabled(&mem_cgroup_subsys) > > is better than just putting > > > > mem_cgroup_subsys.disabled > > > > Paul > > > > > > ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option @ 2008-02-26 6:59 ` Hirokazu Takahashi 0 siblings, 0 replies; 24+ messages in thread From: Hirokazu Takahashi @ 2008-02-26 6:59 UTC (permalink / raw) To: lizf Cc: menage, balbir, akpm, hugh, skumar, yamamoto, linux-kernel, linux-mm, rientjes, xemul, kamezawa.hiroyu Hi, > >>> I'll send out a prototype for comment. > > > > Something like the patch below. The effects of cgroup_disable=foo are: > > > > - foo doesn't show up in /proc/cgroups > > Or we can print out the disable flag, maybe this will be better? > Because we can distinguish from disabled and not compiled in from > /proc/cgroups. It would be neat if the disable flag /proc/cgroups can be cleared/set on demand. It will depend on the implementation of each controller whether it works or not. > > - foo isn't auto-mounted if you mount all cgroups in a single hierarchy > > - foo isn't visible as an individually mountable subsystem > > You mentioned in a previous mail if we mount a disabled subsystem we > will get an error. Here we just ignore the mount option. Which makes > more sense ? > > > > > As a result there will only ever be one call to foo->create(), at init > > time; all processes will stay in this group, and the group will never be > > mounted on a visible hierarchy. Any additional effects (e.g. not > > allocating metadata) are up to the foo subsystem. > > > > This doesn't handle early_init subsystems (their "disabled" bit isn't > > set be, but it could easily be extended to do so if any of the > > early_init systems wanted it - I think it would just involve some > > nastier parameter processing since it would occur before the > > command-line argument parser had been run. > > > > include/linux/cgroup.h | 1 + > > kernel/cgroup.c | 29 +++++++++++++++++++++++++++-- > > 2 files changed, 28 insertions(+), 2 deletions(-) > > > > Index: cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h > > =================================================================== > > --- cgroup_disable-2.6.25-rc2-mm1.orig/include/linux/cgroup.h > > +++ cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h > > @@ -256,6 +256,7 @@ struct cgroup_subsys { > > void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); > > int subsys_id; > > int active; > > + int disabled; > > int early_init; > > #define MAX_CGROUP_TYPE_NAMELEN 32 > > const char *name; > > Index: cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c > > =================================================================== > > --- cgroup_disable-2.6.25-rc2-mm1.orig/kernel/cgroup.c > > +++ cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c > > @@ -790,7 +790,14 @@ static int parse_cgroupfs_options(char * > > if (!*token) > > return -EINVAL; > > if (!strcmp(token, "all")) { > > - opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1; > > + /* Add all non-disabled subsystems */ > > + int i; > > + opts->subsys_bits = 0; > > + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > > + struct cgroup_subsys *ss = subsys[i]; > > + if (!ss->disabled) > > + opts->subsys_bits |= 1ul << i; > > + } > > } else if (!strcmp(token, "noprefix")) { > > set_bit(ROOT_NOPREFIX, &opts->flags); > > } else if (!strncmp(token, "release_agent=", 14)) { > > @@ -808,7 +815,8 @@ static int parse_cgroupfs_options(char * > > for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > > ss = subsys[i]; > > if (!strcmp(token, ss->name)) { > > - set_bit(i, &opts->subsys_bits); > > + if (!ss->disabled) > > + set_bit(i, &opts->subsys_bits); > > break; > > } > > } > > @@ -2596,6 +2606,8 @@ static int proc_cgroupstats_show(struct > > mutex_lock(&cgroup_mutex); > > for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > > struct cgroup_subsys *ss = subsys[i]; > > + if (ss->disabled) > > + continue; > > seq_printf(m, "%s\t%lu\t%d\n", > > ss->name, ss->root->subsys_bits, > > ss->root->number_of_cgroups); > > @@ -2991,3 +3003,16 @@ static void cgroup_release_agent(struct > > spin_unlock(&release_list_lock); > > mutex_unlock(&cgroup_mutex); > > } > > + > > +static int __init cgroup_disable(char *str) > > +{ > > + int i; > > + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > > + struct cgroup_subsys *ss = subsys[i]; > > + if (!strcmp(str, ss->name)) { > > + ss->disabled = 1; > > + break; > > + } > > + } > > +} > > +__setup("cgroup_disable=", cgroup_disable); > > > > > >> > >> Sure thing, if css has the flag, then it would nice. Could you wrap it > >> up to say > >> something like css_disabled(&mem_cgroup_subsys) > >> > >> > > > > It's the subsys object rather than the css (cgroup_subsys_state). > > > > We could have something like: > > > > #define cgroup_subsys_disabled(_ss) ((ss_)->disabled) > > > > but I don't see that > > cgroup_subsys_disabled(&mem_cgroup_subsys) > > is better than just putting > > > > mem_cgroup_subsys.disabled > > > > Paul > > > > > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option 2008-02-26 3:01 ` Li Zefan @ 2008-02-26 8:58 ` Paul Menage -1 siblings, 0 replies; 24+ messages in thread From: Paul Menage @ 2008-02-26 8:58 UTC (permalink / raw) To: Li Zefan Cc: balbir, Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki On Mon, Feb 25, 2008 at 7:01 PM, Li Zefan <lizf@cn.fujitsu.com> wrote: > > > > - foo doesn't show up in /proc/cgroups > > Or we can print out the disable flag, maybe this will be better? > Because we can distinguish from disabled and not compiled in from > > /proc/cgroups. Certainly possible, if people felt it was useful. > > > - foo isn't auto-mounted if you mount all cgroups in a single hierarchy > > - foo isn't visible as an individually mountable subsystem > > You mentioned in a previous mail if we mount a disabled subsystem we > will get an error. Here we just ignore the mount option. Which makes > more sense ? > No, we don't ignore the mount option - we give an error since it doesn't refer to a valid subsystem. (And in the first case there is no mount option). Paul ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option @ 2008-02-26 8:58 ` Paul Menage 0 siblings, 0 replies; 24+ messages in thread From: Paul Menage @ 2008-02-26 8:58 UTC (permalink / raw) To: Li Zefan Cc: balbir, Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki On Mon, Feb 25, 2008 at 7:01 PM, Li Zefan <lizf@cn.fujitsu.com> wrote: > > > > - foo doesn't show up in /proc/cgroups > > Or we can print out the disable flag, maybe this will be better? > Because we can distinguish from disabled and not compiled in from > > /proc/cgroups. Certainly possible, if people felt it was useful. > > > - foo isn't auto-mounted if you mount all cgroups in a single hierarchy > > - foo isn't visible as an individually mountable subsystem > > You mentioned in a previous mail if we mount a disabled subsystem we > will get an error. Here we just ignore the mount option. Which makes > more sense ? > No, we don't ignore the mount option - we give an error since it doesn't refer to a valid subsystem. (And in the first case there is no mount option). Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option 2008-02-26 8:58 ` Paul Menage @ 2008-02-26 9:05 ` Li Zefan -1 siblings, 0 replies; 24+ messages in thread From: Li Zefan @ 2008-02-26 9:05 UTC (permalink / raw) To: Paul Menage Cc: balbir, Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki Paul Menage wrote: > On Mon, Feb 25, 2008 at 7:01 PM, Li Zefan <lizf@cn.fujitsu.com> wrote: >> > >> > - foo doesn't show up in /proc/cgroups >> >> Or we can print out the disable flag, maybe this will be better? >> Because we can distinguish from disabled and not compiled in from >> >> /proc/cgroups. > > Certainly possible, if people felt it was useful. > >> > - foo isn't auto-mounted if you mount all cgroups in a single hierarchy >> > - foo isn't visible as an individually mountable subsystem >> >> You mentioned in a previous mail if we mount a disabled subsystem we >> will get an error. Here we just ignore the mount option. Which makes >> more sense ? >> > > No, we don't ignore the mount option - we give an error since it > doesn't refer to a valid subsystem. (And in the first case there is no > mount option). > You are write, -ENOENT will be returned in this case. I made a mistake when reading the prototype patch, thanks for the clarification. :) ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option @ 2008-02-26 9:05 ` Li Zefan 0 siblings, 0 replies; 24+ messages in thread From: Li Zefan @ 2008-02-26 9:05 UTC (permalink / raw) To: Paul Menage Cc: balbir, Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki Paul Menage wrote: > On Mon, Feb 25, 2008 at 7:01 PM, Li Zefan <lizf@cn.fujitsu.com> wrote: >> > >> > - foo doesn't show up in /proc/cgroups >> >> Or we can print out the disable flag, maybe this will be better? >> Because we can distinguish from disabled and not compiled in from >> >> /proc/cgroups. > > Certainly possible, if people felt it was useful. > >> > - foo isn't auto-mounted if you mount all cgroups in a single hierarchy >> > - foo isn't visible as an individually mountable subsystem >> >> You mentioned in a previous mail if we mount a disabled subsystem we >> will get an error. Here we just ignore the mount option. Which makes >> more sense ? >> > > No, we don't ignore the mount option - we give an error since it > doesn't refer to a valid subsystem. (And in the first case there is no > mount option). > You are write, -ENOENT will be returned in this case. I made a mistake when reading the prototype patch, thanks for the clarification. :) -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option 2008-02-25 18:54 ` Paul Menage @ 2008-03-05 16:11 ` Balbir Singh -1 siblings, 0 replies; 24+ messages in thread From: Balbir Singh @ 2008-03-05 16:11 UTC (permalink / raw) To: Paul Menage Cc: Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki Paul Menage wrote: >>> I'll send out a prototype for comment. > > Something like the patch below. The effects of cgroup_disable=foo are: > > - foo doesn't show up in /proc/cgroups > - foo isn't auto-mounted if you mount all cgroups in a single hierarchy > - foo isn't visible as an individually mountable subsystem > > As a result there will only ever be one call to foo->create(), at init > time; all processes will stay in this group, and the group will never be > mounted on a visible hierarchy. Any additional effects (e.g. not > allocating metadata) are up to the foo subsystem. > > This doesn't handle early_init subsystems (their "disabled" bit isn't > set be, but it could easily be extended to do so if any of the > early_init systems wanted it - I think it would just involve some > nastier parameter processing since it would occur before the > command-line argument parser had been run. > > include/linux/cgroup.h | 1 + > kernel/cgroup.c | 29 +++++++++++++++++++++++++++-- > 2 files changed, 28 insertions(+), 2 deletions(-) > > Index: cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h > =================================================================== > --- cgroup_disable-2.6.25-rc2-mm1.orig/include/linux/cgroup.h > +++ cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h > @@ -256,6 +256,7 @@ struct cgroup_subsys { > void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); > int subsys_id; > int active; > + int disabled; > int early_init; > #define MAX_CGROUP_TYPE_NAMELEN 32 > const char *name; > Index: cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c > =================================================================== > --- cgroup_disable-2.6.25-rc2-mm1.orig/kernel/cgroup.c > +++ cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c > @@ -790,7 +790,14 @@ static int parse_cgroupfs_options(char * > if (!*token) > return -EINVAL; > if (!strcmp(token, "all")) { > - opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1; > + /* Add all non-disabled subsystems */ > + int i; > + opts->subsys_bits = 0; > + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > + struct cgroup_subsys *ss = subsys[i]; > + if (!ss->disabled) > + opts->subsys_bits |= 1ul << i; > + } > } else if (!strcmp(token, "noprefix")) { > set_bit(ROOT_NOPREFIX, &opts->flags); > } else if (!strncmp(token, "release_agent=", 14)) { > @@ -808,7 +815,8 @@ static int parse_cgroupfs_options(char * > for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > ss = subsys[i]; > if (!strcmp(token, ss->name)) { > - set_bit(i, &opts->subsys_bits); > + if (!ss->disabled) > + set_bit(i, &opts->subsys_bits); > break; > } > } > @@ -2596,6 +2606,8 @@ static int proc_cgroupstats_show(struct > mutex_lock(&cgroup_mutex); > for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > struct cgroup_subsys *ss = subsys[i]; > + if (ss->disabled) > + continue; > seq_printf(m, "%s\t%lu\t%d\n", > ss->name, ss->root->subsys_bits, > ss->root->number_of_cgroups); > @@ -2991,3 +3003,16 @@ static void cgroup_release_agent(struct > spin_unlock(&release_list_lock); > mutex_unlock(&cgroup_mutex); > } > + > +static int __init cgroup_disable(char *str) > +{ > + int i; > + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > + struct cgroup_subsys *ss = subsys[i]; > + if (!strcmp(str, ss->name)) { > + ss->disabled = 1; > + break; > + } > + } > +} > +__setup("cgroup_disable=", cgroup_disable); > Hi, Paul, I am going to go ahead and test this patch. If they work fine, I'll request you to send them out, so that we can get them in by 2.6.25. -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Memory Resource Controller Add Boot Option @ 2008-03-05 16:11 ` Balbir Singh 0 siblings, 0 replies; 24+ messages in thread From: Balbir Singh @ 2008-03-05 16:11 UTC (permalink / raw) To: Paul Menage Cc: Andrew Morton, Hugh Dickins, Sudhir Kumar, YAMAMOTO Takashi, lizf, linux-kernel, taka, linux-mm, David Rientjes, Pavel Emelianov, KAMEZAWA Hiroyuki Paul Menage wrote: >>> I'll send out a prototype for comment. > > Something like the patch below. The effects of cgroup_disable=foo are: > > - foo doesn't show up in /proc/cgroups > - foo isn't auto-mounted if you mount all cgroups in a single hierarchy > - foo isn't visible as an individually mountable subsystem > > As a result there will only ever be one call to foo->create(), at init > time; all processes will stay in this group, and the group will never be > mounted on a visible hierarchy. Any additional effects (e.g. not > allocating metadata) are up to the foo subsystem. > > This doesn't handle early_init subsystems (their "disabled" bit isn't > set be, but it could easily be extended to do so if any of the > early_init systems wanted it - I think it would just involve some > nastier parameter processing since it would occur before the > command-line argument parser had been run. > > include/linux/cgroup.h | 1 + > kernel/cgroup.c | 29 +++++++++++++++++++++++++++-- > 2 files changed, 28 insertions(+), 2 deletions(-) > > Index: cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h > =================================================================== > --- cgroup_disable-2.6.25-rc2-mm1.orig/include/linux/cgroup.h > +++ cgroup_disable-2.6.25-rc2-mm1/include/linux/cgroup.h > @@ -256,6 +256,7 @@ struct cgroup_subsys { > void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); > int subsys_id; > int active; > + int disabled; > int early_init; > #define MAX_CGROUP_TYPE_NAMELEN 32 > const char *name; > Index: cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c > =================================================================== > --- cgroup_disable-2.6.25-rc2-mm1.orig/kernel/cgroup.c > +++ cgroup_disable-2.6.25-rc2-mm1/kernel/cgroup.c > @@ -790,7 +790,14 @@ static int parse_cgroupfs_options(char * > if (!*token) > return -EINVAL; > if (!strcmp(token, "all")) { > - opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1; > + /* Add all non-disabled subsystems */ > + int i; > + opts->subsys_bits = 0; > + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > + struct cgroup_subsys *ss = subsys[i]; > + if (!ss->disabled) > + opts->subsys_bits |= 1ul << i; > + } > } else if (!strcmp(token, "noprefix")) { > set_bit(ROOT_NOPREFIX, &opts->flags); > } else if (!strncmp(token, "release_agent=", 14)) { > @@ -808,7 +815,8 @@ static int parse_cgroupfs_options(char * > for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > ss = subsys[i]; > if (!strcmp(token, ss->name)) { > - set_bit(i, &opts->subsys_bits); > + if (!ss->disabled) > + set_bit(i, &opts->subsys_bits); > break; > } > } > @@ -2596,6 +2606,8 @@ static int proc_cgroupstats_show(struct > mutex_lock(&cgroup_mutex); > for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > struct cgroup_subsys *ss = subsys[i]; > + if (ss->disabled) > + continue; > seq_printf(m, "%s\t%lu\t%d\n", > ss->name, ss->root->subsys_bits, > ss->root->number_of_cgroups); > @@ -2991,3 +3003,16 @@ static void cgroup_release_agent(struct > spin_unlock(&release_list_lock); > mutex_unlock(&cgroup_mutex); > } > + > +static int __init cgroup_disable(char *str) > +{ > + int i; > + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { > + struct cgroup_subsys *ss = subsys[i]; > + if (!strcmp(str, ss->name)) { > + ss->disabled = 1; > + break; > + } > + } > +} > +__setup("cgroup_disable=", cgroup_disable); > Hi, Paul, I am going to go ahead and test this patch. If they work fine, I'll request you to send them out, so that we can get them in by 2.6.25. -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2008-03-05 16:12 UTC | newest] Thread overview: 24+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-02-25 11:55 [PATCH] Memory controller rename to Memory Resource Controller Balbir Singh 2008-02-25 11:55 ` Balbir Singh 2008-02-25 11:55 ` [PATCH] Memory Resource Controller Add Boot Option Balbir Singh 2008-02-25 11:55 ` Balbir Singh 2008-02-25 16:16 ` Paul Menage 2008-02-25 16:16 ` Paul Menage 2008-02-25 17:18 ` Balbir Singh 2008-02-25 17:18 ` Balbir Singh 2008-02-25 17:32 ` Paul Menage 2008-02-25 17:32 ` Paul Menage 2008-02-25 17:37 ` Balbir Singh 2008-02-25 17:37 ` Balbir Singh 2008-02-25 18:54 ` Paul Menage 2008-02-25 18:54 ` Paul Menage 2008-02-26 3:01 ` Li Zefan 2008-02-26 3:01 ` Li Zefan 2008-02-26 6:59 ` Hirokazu Takahashi 2008-02-26 6:59 ` Hirokazu Takahashi 2008-02-26 8:58 ` Paul Menage 2008-02-26 8:58 ` Paul Menage 2008-02-26 9:05 ` Li Zefan 2008-02-26 9:05 ` Li Zefan 2008-03-05 16:11 ` Balbir Singh 2008-03-05 16:11 ` Balbir Singh
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.