* [RFC/PATCH] cgroup swap subsystem
@ 2008-03-05 5:59 ` Daisuke Nishimura
0 siblings, 0 replies; 50+ messages in thread
From: Daisuke Nishimura @ 2008-03-05 5:59 UTC (permalink / raw)
To: containers-qjLDD68F18O7TbgM5vRIOg,
linux-mm-Bw31MaZKKs3YtjvyW6yDsg
Cc: xemul-GEFAQzZX7r8dnm+yROfE0A,
balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
Hi.
Even if limiting memory usage by cgroup memory subsystem
or isolating memory by cpuset, swap space is shared, so
resource isolation is not enough. If one group uses up all the
swap space, it can affect other groups.
I try making a patch of swap subsystem based on memory
subsystem, which limits swap usage per cgroup.
It can now charge and limit the swap usage.
I implemented this feature as a new subsystem,
not as a part of memory subsystem, because I don't want to
make big change to memcontrol.c, and even if implemented
as other subsystem, users can manage memory and swap on
the same cgroup directory if mount them together.
Basic idea of my implementation:
- what will be charged ?
the number of swap entries.
- when to charge/uncharge ?
charge at get_swap_entry(), and uncharge at swap_entry_free().
- to what group charge the swap entry ?
To determine to what swap_cgroup (corresponding to mem_cgroup in
memory subsystem) the swap entry should be charged,
I added a pointer to mm_struct to page_cgroup(pc->pc_mm), and
changed the argument of get_swap_entry() from (void) to
(struct page *). As a result, get_swap_entry() can determine
to what swap_cgroup it should charge the swap entry
by referring to page->page_cgroup->mm_struct->swap_cgroup.
- from what group uncharge the swap entry ?
I added to swap_info_struct a member 'struct swap_cgroup **',
array of pointer to which swap_cgroup the swap entry is
charged.
Todo:
- rebase new kernel, and split into some patches.
- Merge with memory subsystem (if it would be better), or
remove dependency on CONFIG_CGROUP_MEM_CONT if possible
(needs to make page_cgroup more generic one).
- More tests, cleanups, and feartures :-)
Any comments or discussions would be appreciated.
Thanks,
Daisuke Nishimura
Signed-off-by: Daisuke Nishimura <nishimura-YQH0OdQVrdy45+QrQBaojngSJqDPrsil@public.gmane.org>
---
diff -uprN linux-2.6.24-mm1/include/linux/cgroup_subsys.h linux-2.6.24-mm1-swaplimit/include/linux/cgroup_subsys.h
--- linux-2.6.24-mm1/include/linux/cgroup_subsys.h 2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/include/linux/cgroup_subsys.h 2008-03-03 10:56:56.000000000 +0900
@@ -42,3 +42,9 @@ SUBSYS(mem_cgroup)
#endif
/* */
+
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+SUBSYS(swap)
+#endif
+
+/* */
diff -uprN linux-2.6.24-mm1/include/linux/memcontrol.h linux-2.6.24-mm1-swaplimit/include/linux/memcontrol.h
--- linux-2.6.24-mm1/include/linux/memcontrol.h 2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/include/linux/memcontrol.h 2008-03-03 10:56:56.000000000 +0900
@@ -29,6 +29,21 @@ struct page;
struct mm_struct;
#ifdef CONFIG_CGROUP_MEM_CONT
+/*
+ * A page_cgroup page is associated with every page descriptor. The
+ * page_cgroup helps us identify information about the cgroup
+ */
+struct page_cgroup {
+ struct list_head lru; /* per cgroup LRU list */
+ struct page *page;
+ struct mem_cgroup *mem_cgroup;
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ struct mm_struct *pc_mm;
+#endif
+ atomic_t ref_cnt; /* Helpful when pages move b/w */
+ /* mapped and cached states */
+ int flags;
+};
extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p);
extern void mm_free_cgroup(struct mm_struct *mm);
diff -uprN linux-2.6.24-mm1/include/linux/mm_types.h linux-2.6.24-mm1-swaplimit/include/linux/mm_types.h
--- linux-2.6.24-mm1/include/linux/mm_types.h 2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/include/linux/mm_types.h 2008-03-03 10:56:56.000000000 +0900
@@ -233,6 +233,9 @@ struct mm_struct {
#ifdef CONFIG_CGROUP_MEM_CONT
struct mem_cgroup *mem_cgroup;
#endif
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ struct swap_cgroup *swap_cgroup;
+#endif
};
#endif /* _LINUX_MM_TYPES_H */
diff -uprN linux-2.6.24-mm1/include/linux/swap.h linux-2.6.24-mm1-swaplimit/include/linux/swap.h
--- linux-2.6.24-mm1/include/linux/swap.h 2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/include/linux/swap.h 2008-03-03 10:56:56.000000000 +0900
@@ -7,6 +7,7 @@
#include <linux/list.h>
#include <linux/memcontrol.h>
#include <linux/sched.h>
+#include <linux/swap_limit.h>
#include <asm/atomic.h>
#include <asm/page.h>
@@ -141,6 +142,9 @@ struct swap_info_struct {
struct swap_extent *curr_swap_extent;
unsigned old_block_size;
unsigned short * swap_map;
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ struct swap_cgroup **swap_cgroup;
+#endif
unsigned int lowest_bit;
unsigned int highest_bit;
unsigned int cluster_next;
@@ -239,7 +243,7 @@ extern struct page *swapin_readahead(swp
extern long total_swap_pages;
extern unsigned int nr_swapfiles;
extern void si_swapinfo(struct sysinfo *);
-extern swp_entry_t get_swap_page(void);
+extern swp_entry_t get_swap_page(struct page *);
extern swp_entry_t get_swap_page_of_type(int);
extern int swap_duplicate(swp_entry_t);
extern int valid_swaphandles(swp_entry_t, unsigned long *);
@@ -342,7 +346,7 @@ static inline int remove_exclusive_swap_
return 0;
}
-static inline swp_entry_t get_swap_page(void)
+static inline swp_entry_t get_swap_page(struct page *page)
{
swp_entry_t entry;
entry.val = 0;
diff -uprN linux-2.6.24-mm1/include/linux/swap_limit.h linux-2.6.24-mm1-swaplimit/include/linux/swap_limit.h
--- linux-2.6.24-mm1/include/linux/swap_limit.h 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/include/linux/swap_limit.h 2008-03-03 10:56:56.000000000 +0900
@@ -0,0 +1,65 @@
+/*
+ * swap_limit.h
+ *
+ */
+#ifndef _LINUX_SWAP_LIMIT_H
+#define _LINUX_SWAP_LIMIT_H
+
+#include <linux/swap.h>
+#include <linux/cgroup.h>
+#include <linux/res_counter.h>
+
+struct swap_cgroup;
+struct swap_info_struct;
+
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+struct swap_cgroup {
+ struct cgroup_subsys_state css;
+ struct res_counter res;
+};
+
+static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp)
+{
+ return container_of(cgroup_subsys_state(cgrp, swap_subsys_id),
+ struct swap_cgroup,
+ css);
+}
+
+static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p)
+{
+ return container_of(task_subsys_state(p, swap_subsys_id),
+ struct swap_cgroup, css);
+}
+
+extern int swap_cgroup_charge(struct page *page,
+ struct swap_info_struct *si,
+ unsigned long offset);
+extern void swap_cgroup_uncharge(struct swap_info_struct *si,
+ unsigned long offset);
+
+#else /* CONFIG_CGROUP_SWAP_LIMIT */
+static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp)
+{
+ return NULL;
+}
+
+static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p)
+{
+ return NULL;
+}
+
+static inline int swap_cgroup_charge(struct page *page,
+ struct swap_info_struct *si,
+ unsigned long offset)
+{
+ return 0;
+}
+
+static inline void swap_cgroup_uncharge(struct swap_info_struct *si,
+ unsigned long offset)
+{
+}
+
+#endif
+
+#endif
diff -uprN linux-2.6.24-mm1/init/Kconfig linux-2.6.24-mm1-swaplimit/init/Kconfig
--- linux-2.6.24-mm1/init/Kconfig 2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/init/Kconfig 2008-03-03 10:56:56.000000000 +0900
@@ -383,6 +383,12 @@ config CGROUP_MEM_CONT
Provides a memory controller that manages both page cache and
RSS memory.
+config CGROUP_SWAP_LIMIT
+ bool "cgroup subsystem for swap"
+ depends on CGROUP_MEM_CONT && SWAP
+ help
+ Provides a swap controller that manages and limits swap usage.
+
config PROC_PID_CPUSET
bool "Include legacy /proc/<pid>/cpuset file"
depends on CPUSETS
diff -uprN linux-2.6.24-mm1/mm/Makefile linux-2.6.24-mm1-swaplimit/mm/Makefile
--- linux-2.6.24-mm1/mm/Makefile 2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/mm/Makefile 2008-03-03 10:56:56.000000000 +0900
@@ -32,4 +32,5 @@ obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_SMP) += allocpercpu.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o
+obj-$(CONFIG_CGROUP_SWAP_LIMIT) += swap_limit.o
diff -uprN linux-2.6.24-mm1/mm/memcontrol.c linux-2.6.24-mm1-swaplimit/mm/memcontrol.c
--- linux-2.6.24-mm1/mm/memcontrol.c 2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/mm/memcontrol.c 2008-03-03 10:56:56.000000000 +0900
@@ -19,6 +19,7 @@
#include <linux/res_counter.h>
#include <linux/memcontrol.h>
+#include <linux/swap_limit.h>
#include <linux/cgroup.h>
#include <linux/mm.h>
#include <linux/smp.h>
@@ -146,18 +147,6 @@ struct mem_cgroup {
#define PAGE_CGROUP_LOCK_BIT 0x0
#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT)
-/*
- * A page_cgroup page is associated with every page descriptor. The
- * page_cgroup helps us identify information about the cgroup
- */
-struct page_cgroup {
- struct list_head lru; /* per cgroup LRU list */
- struct page *page;
- struct mem_cgroup *mem_cgroup;
- atomic_t ref_cnt; /* Helpful when pages move b/w */
- /* mapped and cached states */
- int flags;
-};
#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */
@@ -254,15 +243,27 @@ struct mem_cgroup *mem_cgroup_from_task(
void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p)
{
struct mem_cgroup *mem;
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ struct swap_cgroup *swap;
+#endif
mem = mem_cgroup_from_task(p);
css_get(&mem->css);
mm->mem_cgroup = mem;
+
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ swap = swap_cgroup_from_task(p);
+ css_get(&swap->css);
+ mm->swap_cgroup = swap;
+#endif
}
void mm_free_cgroup(struct mm_struct *mm)
{
css_put(&mm->mem_cgroup->css);
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ css_put(&mm->swap_cgroup->css);
+#endif
}
static inline int page_cgroup_locked(struct page *page)
@@ -664,6 +665,10 @@ retry:
pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
pc->flags |= PAGE_CGROUP_FLAG_CACHE;
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ atomic_inc(&mm->mm_count);
+ pc->pc_mm = mm;
+#endif
if (!page || page_cgroup_assign_new_page_cgroup(page, pc)) {
/*
@@ -673,6 +678,9 @@ retry:
*/
res_counter_uncharge(&mem->res, PAGE_SIZE);
css_put(&mem->css);
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ mmdrop(mm);
+#endif
kfree(pc);
if (!page)
goto done;
@@ -744,6 +752,9 @@ void mem_cgroup_uncharge(struct page_cgr
if (clear_page_cgroup(page, pc) == pc) {
mem = pc->mem_cgroup;
css_put(&mem->css);
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ mmdrop(pc->pc_mm);
+#endif
res_counter_uncharge(&mem->res, PAGE_SIZE);
spin_lock_irqsave(&mz->lru_lock, flags);
__mem_cgroup_remove_list(pc);
@@ -859,6 +870,9 @@ retry:
atomic_set(&pc->ref_cnt, 0);
if (clear_page_cgroup(page, pc) == pc) {
css_put(&mem->css);
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ mmdrop(pc->pc_mm);
+#endif
res_counter_uncharge(&mem->res, PAGE_SIZE);
__mem_cgroup_remove_list(pc);
kfree(pc);
diff -uprN linux-2.6.24-mm1/mm/shmem.c linux-2.6.24-mm1-swaplimit/mm/shmem.c
--- linux-2.6.24-mm1/mm/shmem.c 2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/mm/shmem.c 2008-03-03 10:56:56.000000000 +0900
@@ -1024,7 +1024,7 @@ static int shmem_writepage(struct page *
* want to check if there's a redundant swappage to be discarded.
*/
if (wbc->for_reclaim)
- swap = get_swap_page();
+ swap = get_swap_page(page);
else
swap.val = 0;
diff -uprN linux-2.6.24-mm1/mm/swap_limit.c linux-2.6.24-mm1-swaplimit/mm/swap_limit.c
--- linux-2.6.24-mm1/mm/swap_limit.c 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/mm/swap_limit.c 2008-03-05 14:39:23.000000000 +0900
@@ -0,0 +1,194 @@
+/*
+ * swap_limit.c - SWAP controller (based on memcontrol.c)
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/rcupdate.h>
+#include <linux/cgroup.h>
+#include <linux/res_counter.h>
+#include <linux/memcontrol.h>
+#include <linux/swap_limit.h>
+
+static struct swap_cgroup init_swap_cgroup;
+
+int swap_cgroup_charge(struct page *page,
+ struct swap_info_struct *si,
+ unsigned long offset)
+{
+ int ret;
+ struct page_cgroup *pc;
+ struct mm_struct *mm;
+ struct swap_cgroup *swap;
+
+ BUG_ON(!page);
+
+ /*
+ * Pages to be swapped out should have been charged by memory cgroup,
+ * but very rarely, pc would be NULL (pc is not reliable without lock,
+ * so I should fix here).
+ * In such cases, we charge the init_mm now.
+ */
+ pc = page_get_page_cgroup(page);
+ if (WARN_ON(!pc))
+ mm = &init_mm;
+ else
+ mm = pc->pc_mm;
+ BUG_ON(!mm);
+
+ rcu_read_lock();
+ swap = rcu_dereference(mm->swap_cgroup);
+ rcu_read_unlock();
+ BUG_ON(!swap);
+
+ ret = res_counter_charge(&swap->res, PAGE_SIZE);
+ if (!ret) {
+ css_get(&swap->css);
+ si->swap_cgroup[offset] = swap;
+ }
+
+ return ret;
+}
+
+void swap_cgroup_uncharge(struct swap_info_struct *si, unsigned long offset)
+{
+ struct swap_cgroup *swap = si->swap_cgroup[offset];
+
+ /*
+ * "swap" would be NULL:
+ * 1. when get_swap_page() failed at charging swap_cgroup,
+ * and called swap_entry_free().
+ * 2. when this swap entry had been assigned by
+ * get_swap_page_of_type() (via SWSUSP ?).
+ */
+ if (swap) {
+ res_counter_uncharge(&swap->res, PAGE_SIZE);
+ si->swap_cgroup[offset] = NULL;
+ css_put(&swap->css);
+ }
+}
+
+static struct cgroup_subsys_state *swap_cgroup_create(struct cgroup_subsys *ss,
+ struct cgroup *cgrp)
+{
+ struct swap_cgroup *swap;
+
+ if (unlikely((cgrp->parent) == NULL)) {
+ swap = &init_swap_cgroup;
+ init_mm.swap_cgroup = swap;
+ } else
+ swap = kzalloc(sizeof(struct swap_cgroup), GFP_KERNEL);
+
+ if (swap == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ res_counter_init(&swap->res);
+
+ return &swap->css;
+}
+
+static void swap_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ kfree(swap_cgroup_from_cgrp(cgrp));
+}
+
+static ssize_t swap_cgroup_read(struct cgroup *cgrp,
+ struct cftype *cft, struct file *file,
+ char __user *userbuf, size_t nbytes,
+ loff_t *ppos)
+{
+ return res_counter_read(&swap_cgroup_from_cgrp(cgrp)->res,
+ cft->private, userbuf, nbytes, ppos,
+ NULL);
+}
+
+static int swap_cgroup_write_strategy(char *buf, unsigned long long *tmp)
+{
+ *tmp = memparse(buf, &buf);
+ if (*buf != '\0')
+ return -EINVAL;
+
+ /*
+ * Round up the value to the closest page size
+ */
+ *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT;
+ return 0;
+}
+
+static ssize_t swap_cgroup_write(struct cgroup *cgrp, struct cftype *cft,
+ struct file *file, const char __user *userbuf,
+ size_t nbytes, loff_t *ppos)
+{
+ return res_counter_write(&swap_cgroup_from_cgrp(cgrp)->res,
+ cft->private, userbuf, nbytes, ppos,
+ swap_cgroup_write_strategy);
+}
+
+static struct cftype swap_files[] = {
+ {
+ .name = "usage_in_bytes",
+ .private = RES_USAGE,
+ .read = swap_cgroup_read,
+ },
+ {
+ .name = "limit_in_bytes",
+ .private = RES_LIMIT,
+ .write = swap_cgroup_write,
+ .read = swap_cgroup_read,
+ },
+ {
+ .name = "failcnt",
+ .private = RES_FAILCNT,
+ .read = swap_cgroup_read,
+ },
+};
+
+static int swap_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ return cgroup_add_files(cgrp, ss, swap_files, ARRAY_SIZE(swap_files));
+}
+
+static void swap_cgroup_move_task(struct cgroup_subsys *ss,
+ struct cgroup *cgrp,
+ struct cgroup *old_cgrp,
+ struct task_struct *p)
+{
+ struct mm_struct *mm;
+ struct swap_cgroup *swap, *old_swap;
+
+ mm = get_task_mm(p);
+ if (mm == NULL)
+ return;
+
+ swap = swap_cgroup_from_cgrp(cgrp);
+ old_swap = swap_cgroup_from_cgrp(old_cgrp);
+
+ if (swap == old_swap)
+ goto out;
+
+ if (p->tgid != p->pid)
+ goto out;
+
+ css_get(&swap->css);
+ rcu_assign_pointer(mm->swap_cgroup, swap);
+ css_put(&old_swap->css);
+
+out:
+ mmput(mm);
+ return;
+}
+
+struct cgroup_subsys swap_subsys = {
+ .name = "swap",
+ .create = swap_cgroup_create,
+ .destroy = swap_cgroup_destroy,
+ .populate = swap_cgroup_populate,
+ .subsys_id = swap_subsys_id,
+ .attach = swap_cgroup_move_task,
+ .early_init = 0,
+};
diff -uprN linux-2.6.24-mm1/mm/swap_state.c linux-2.6.24-mm1-swaplimit/mm/swap_state.c
--- linux-2.6.24-mm1/mm/swap_state.c 2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/mm/swap_state.c 2008-03-03 10:56:56.000000000 +0900
@@ -128,7 +128,7 @@ int add_to_swap(struct page * page, gfp_
BUG_ON(!PageUptodate(page));
for (;;) {
- entry = get_swap_page();
+ entry = get_swap_page(page);
if (!entry.val)
return 0;
diff -uprN linux-2.6.24-mm1/mm/swapfile.c linux-2.6.24-mm1-swaplimit/mm/swapfile.c
--- linux-2.6.24-mm1/mm/swapfile.c 2008-02-04 14:34:24.000000000 +0900
+++ linux-2.6.24-mm1-swaplimit/mm/swapfile.c 2008-03-03 10:56:56.000000000 +0900
@@ -28,6 +28,7 @@
#include <linux/capability.h>
#include <linux/syscalls.h>
#include <linux/memcontrol.h>
+#include <linux/swap_limit.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -172,7 +173,10 @@ no_page:
return 0;
}
-swp_entry_t get_swap_page(void)
+/* get_swap_page() calls this */
+static int swap_entry_free(struct swap_info_struct *, unsigned long);
+
+swp_entry_t get_swap_page(struct page *page)
{
struct swap_info_struct *si;
pgoff_t offset;
@@ -201,6 +205,16 @@ swp_entry_t get_swap_page(void)
swap_list.next = next;
offset = scan_swap_map(si);
if (offset) {
+ /*
+ * This should be the first use of this swap entry,
+ * so charge this swap entry now.
+ */
+ if (swap_cgroup_charge(page, si, offset)) {
+ /* should free this entry */
+ swap_entry_free(si, offset);
+
+ goto noswap;
+ }
spin_unlock(&swap_lock);
return swp_entry(type, offset);
}
@@ -285,6 +299,7 @@ static int swap_entry_free(struct swap_i
swap_list.next = p - swap_info;
nr_swap_pages++;
p->inuse_pages--;
+ swap_cgroup_uncharge(p, offset);
}
}
return count;
@@ -1207,6 +1222,9 @@ asmlinkage long sys_swapoff(const char _
{
struct swap_info_struct * p = NULL;
unsigned short *swap_map;
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ struct swap_cgroup **swap_cgroup;
+#endif
struct file *swap_file, *victim;
struct address_space *mapping;
struct inode *inode;
@@ -1309,10 +1327,17 @@ asmlinkage long sys_swapoff(const char _
p->max = 0;
swap_map = p->swap_map;
p->swap_map = NULL;
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ swap_cgroup = p->swap_cgroup;
+ p->swap_cgroup = NULL;
+#endif
p->flags = 0;
spin_unlock(&swap_lock);
mutex_unlock(&swapon_mutex);
vfree(swap_map);
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ vfree(swap_cgroup);
+#endif
inode = mapping->host;
if (S_ISBLK(inode->i_mode)) {
struct block_device *bdev = I_BDEV(inode);
@@ -1460,6 +1485,9 @@ asmlinkage long sys_swapon(const char __
unsigned long maxpages = 1;
int swapfilesize;
unsigned short *swap_map;
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ struct swap_cgroup **swap_cgroup;
+#endif
struct page *page = NULL;
struct inode *inode = NULL;
int did_down = 0;
@@ -1483,6 +1511,9 @@ asmlinkage long sys_swapon(const char __
p->swap_file = NULL;
p->old_block_size = 0;
p->swap_map = NULL;
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ p->swap_cgroup = NULL;
+#endif
p->lowest_bit = 0;
p->highest_bit = 0;
p->cluster_nr = 0;
@@ -1647,6 +1678,15 @@ asmlinkage long sys_swapon(const char __
1 /* header page */;
if (error)
goto bad_swap;
+
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ p->swap_cgroup = vmalloc(maxpages * sizeof(*swap_cgroup));
+ if (!(p->swap_cgroup)) {
+ error = -ENOMEM;
+ goto bad_swap;
+ }
+ memset(p->swap_cgroup, 0, maxpages * sizeof(*swap_cgroup));
+#endif
}
if (nr_good_pages) {
@@ -1704,13 +1744,22 @@ bad_swap:
bad_swap_2:
spin_lock(&swap_lock);
swap_map = p->swap_map;
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ swap_cgroup = p->swap_cgroup;
+#endif
p->swap_file = NULL;
p->swap_map = NULL;
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ p->swap_cgroup = NULL;
+#endif
p->flags = 0;
if (!(swap_flags & SWAP_FLAG_PREFER))
++least_priority;
spin_unlock(&swap_lock);
vfree(swap_map);
+#ifdef CONFIG_CGROUP_SWAP_LIMIT
+ vfree(swap_cgroup);
+#endif
if (swap_file)
filp_close(swap_file, NULL);
out:
^ permalink raw reply [flat|nested] 50+ messages in thread* [RFC/PATCH] cgroup swap subsystem @ 2008-03-05 5:59 ` Daisuke Nishimura 0 siblings, 0 replies; 50+ messages in thread From: Daisuke Nishimura @ 2008-03-05 5:59 UTC (permalink / raw) To: containers, linux-mm; +Cc: balbir, xemul, kamezawa.hiroyu Hi. Even if limiting memory usage by cgroup memory subsystem or isolating memory by cpuset, swap space is shared, so resource isolation is not enough. If one group uses up all the swap space, it can affect other groups. I try making a patch of swap subsystem based on memory subsystem, which limits swap usage per cgroup. It can now charge and limit the swap usage. I implemented this feature as a new subsystem, not as a part of memory subsystem, because I don't want to make big change to memcontrol.c, and even if implemented as other subsystem, users can manage memory and swap on the same cgroup directory if mount them together. Basic idea of my implementation: - what will be charged ? the number of swap entries. - when to charge/uncharge ? charge at get_swap_entry(), and uncharge at swap_entry_free(). - to what group charge the swap entry ? To determine to what swap_cgroup (corresponding to mem_cgroup in memory subsystem) the swap entry should be charged, I added a pointer to mm_struct to page_cgroup(pc->pc_mm), and changed the argument of get_swap_entry() from (void) to (struct page *). As a result, get_swap_entry() can determine to what swap_cgroup it should charge the swap entry by referring to page->page_cgroup->mm_struct->swap_cgroup. - from what group uncharge the swap entry ? I added to swap_info_struct a member 'struct swap_cgroup **', array of pointer to which swap_cgroup the swap entry is charged. Todo: - rebase new kernel, and split into some patches. - Merge with memory subsystem (if it would be better), or remove dependency on CONFIG_CGROUP_MEM_CONT if possible (needs to make page_cgroup more generic one). - More tests, cleanups, and feartures :-) Any comments or discussions would be appreciated. Thanks, Daisuke Nishimura Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> --- diff -uprN linux-2.6.24-mm1/include/linux/cgroup_subsys.h linux-2.6.24-mm1-swaplimit/include/linux/cgroup_subsys.h --- linux-2.6.24-mm1/include/linux/cgroup_subsys.h 2008-02-04 14:34:24.000000000 +0900 +++ linux-2.6.24-mm1-swaplimit/include/linux/cgroup_subsys.h 2008-03-03 10:56:56.000000000 +0900 @@ -42,3 +42,9 @@ SUBSYS(mem_cgroup) #endif /* */ + +#ifdef CONFIG_CGROUP_SWAP_LIMIT +SUBSYS(swap) +#endif + +/* */ diff -uprN linux-2.6.24-mm1/include/linux/memcontrol.h linux-2.6.24-mm1-swaplimit/include/linux/memcontrol.h --- linux-2.6.24-mm1/include/linux/memcontrol.h 2008-02-04 14:34:24.000000000 +0900 +++ linux-2.6.24-mm1-swaplimit/include/linux/memcontrol.h 2008-03-03 10:56:56.000000000 +0900 @@ -29,6 +29,21 @@ struct page; struct mm_struct; #ifdef CONFIG_CGROUP_MEM_CONT +/* + * A page_cgroup page is associated with every page descriptor. The + * page_cgroup helps us identify information about the cgroup + */ +struct page_cgroup { + struct list_head lru; /* per cgroup LRU list */ + struct page *page; + struct mem_cgroup *mem_cgroup; +#ifdef CONFIG_CGROUP_SWAP_LIMIT + struct mm_struct *pc_mm; +#endif + atomic_t ref_cnt; /* Helpful when pages move b/w */ + /* mapped and cached states */ + int flags; +}; extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); extern void mm_free_cgroup(struct mm_struct *mm); diff -uprN linux-2.6.24-mm1/include/linux/mm_types.h linux-2.6.24-mm1-swaplimit/include/linux/mm_types.h --- linux-2.6.24-mm1/include/linux/mm_types.h 2008-02-04 14:34:24.000000000 +0900 +++ linux-2.6.24-mm1-swaplimit/include/linux/mm_types.h 2008-03-03 10:56:56.000000000 +0900 @@ -233,6 +233,9 @@ struct mm_struct { #ifdef CONFIG_CGROUP_MEM_CONT struct mem_cgroup *mem_cgroup; #endif +#ifdef CONFIG_CGROUP_SWAP_LIMIT + struct swap_cgroup *swap_cgroup; +#endif }; #endif /* _LINUX_MM_TYPES_H */ diff -uprN linux-2.6.24-mm1/include/linux/swap.h linux-2.6.24-mm1-swaplimit/include/linux/swap.h --- linux-2.6.24-mm1/include/linux/swap.h 2008-02-04 14:34:24.000000000 +0900 +++ linux-2.6.24-mm1-swaplimit/include/linux/swap.h 2008-03-03 10:56:56.000000000 +0900 @@ -7,6 +7,7 @@ #include <linux/list.h> #include <linux/memcontrol.h> #include <linux/sched.h> +#include <linux/swap_limit.h> #include <asm/atomic.h> #include <asm/page.h> @@ -141,6 +142,9 @@ struct swap_info_struct { struct swap_extent *curr_swap_extent; unsigned old_block_size; unsigned short * swap_map; +#ifdef CONFIG_CGROUP_SWAP_LIMIT + struct swap_cgroup **swap_cgroup; +#endif unsigned int lowest_bit; unsigned int highest_bit; unsigned int cluster_next; @@ -239,7 +243,7 @@ extern struct page *swapin_readahead(swp extern long total_swap_pages; extern unsigned int nr_swapfiles; extern void si_swapinfo(struct sysinfo *); -extern swp_entry_t get_swap_page(void); +extern swp_entry_t get_swap_page(struct page *); extern swp_entry_t get_swap_page_of_type(int); extern int swap_duplicate(swp_entry_t); extern int valid_swaphandles(swp_entry_t, unsigned long *); @@ -342,7 +346,7 @@ static inline int remove_exclusive_swap_ return 0; } -static inline swp_entry_t get_swap_page(void) +static inline swp_entry_t get_swap_page(struct page *page) { swp_entry_t entry; entry.val = 0; diff -uprN linux-2.6.24-mm1/include/linux/swap_limit.h linux-2.6.24-mm1-swaplimit/include/linux/swap_limit.h --- linux-2.6.24-mm1/include/linux/swap_limit.h 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.24-mm1-swaplimit/include/linux/swap_limit.h 2008-03-03 10:56:56.000000000 +0900 @@ -0,0 +1,65 @@ +/* + * swap_limit.h + * + */ +#ifndef _LINUX_SWAP_LIMIT_H +#define _LINUX_SWAP_LIMIT_H + +#include <linux/swap.h> +#include <linux/cgroup.h> +#include <linux/res_counter.h> + +struct swap_cgroup; +struct swap_info_struct; + +#ifdef CONFIG_CGROUP_SWAP_LIMIT +struct swap_cgroup { + struct cgroup_subsys_state css; + struct res_counter res; +}; + +static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp) +{ + return container_of(cgroup_subsys_state(cgrp, swap_subsys_id), + struct swap_cgroup, + css); +} + +static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p) +{ + return container_of(task_subsys_state(p, swap_subsys_id), + struct swap_cgroup, css); +} + +extern int swap_cgroup_charge(struct page *page, + struct swap_info_struct *si, + unsigned long offset); +extern void swap_cgroup_uncharge(struct swap_info_struct *si, + unsigned long offset); + +#else /* CONFIG_CGROUP_SWAP_LIMIT */ +static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp) +{ + return NULL; +} + +static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p) +{ + return NULL; +} + +static inline int swap_cgroup_charge(struct page *page, + struct swap_info_struct *si, + unsigned long offset) +{ + return 0; +} + +static inline void swap_cgroup_uncharge(struct swap_info_struct *si, + unsigned long offset) +{ +} + +#endif + +#endif diff -uprN linux-2.6.24-mm1/init/Kconfig linux-2.6.24-mm1-swaplimit/init/Kconfig --- linux-2.6.24-mm1/init/Kconfig 2008-02-04 14:34:24.000000000 +0900 +++ linux-2.6.24-mm1-swaplimit/init/Kconfig 2008-03-03 10:56:56.000000000 +0900 @@ -383,6 +383,12 @@ config CGROUP_MEM_CONT Provides a memory controller that manages both page cache and RSS memory. +config CGROUP_SWAP_LIMIT + bool "cgroup subsystem for swap" + depends on CGROUP_MEM_CONT && SWAP + help + Provides a swap controller that manages and limits swap usage. + config PROC_PID_CPUSET bool "Include legacy /proc/<pid>/cpuset file" depends on CPUSETS diff -uprN linux-2.6.24-mm1/mm/Makefile linux-2.6.24-mm1-swaplimit/mm/Makefile --- linux-2.6.24-mm1/mm/Makefile 2008-02-04 14:34:24.000000000 +0900 +++ linux-2.6.24-mm1-swaplimit/mm/Makefile 2008-03-03 10:56:56.000000000 +0900 @@ -32,4 +32,5 @@ obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o +obj-$(CONFIG_CGROUP_SWAP_LIMIT) += swap_limit.o diff -uprN linux-2.6.24-mm1/mm/memcontrol.c linux-2.6.24-mm1-swaplimit/mm/memcontrol.c --- linux-2.6.24-mm1/mm/memcontrol.c 2008-02-04 14:34:24.000000000 +0900 +++ linux-2.6.24-mm1-swaplimit/mm/memcontrol.c 2008-03-03 10:56:56.000000000 +0900 @@ -19,6 +19,7 @@ #include <linux/res_counter.h> #include <linux/memcontrol.h> +#include <linux/swap_limit.h> #include <linux/cgroup.h> #include <linux/mm.h> #include <linux/smp.h> @@ -146,18 +147,6 @@ struct mem_cgroup { #define PAGE_CGROUP_LOCK_BIT 0x0 #define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) -/* - * A page_cgroup page is associated with every page descriptor. The - * page_cgroup helps us identify information about the cgroup - */ -struct page_cgroup { - struct list_head lru; /* per cgroup LRU list */ - struct page *page; - struct mem_cgroup *mem_cgroup; - atomic_t ref_cnt; /* Helpful when pages move b/w */ - /* mapped and cached states */ - int flags; -}; #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ @@ -254,15 +243,27 @@ struct mem_cgroup *mem_cgroup_from_task( void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) { struct mem_cgroup *mem; +#ifdef CONFIG_CGROUP_SWAP_LIMIT + struct swap_cgroup *swap; +#endif mem = mem_cgroup_from_task(p); css_get(&mem->css); mm->mem_cgroup = mem; + +#ifdef CONFIG_CGROUP_SWAP_LIMIT + swap = swap_cgroup_from_task(p); + css_get(&swap->css); + mm->swap_cgroup = swap; +#endif } void mm_free_cgroup(struct mm_struct *mm) { css_put(&mm->mem_cgroup->css); +#ifdef CONFIG_CGROUP_SWAP_LIMIT + css_put(&mm->swap_cgroup->css); +#endif } static inline int page_cgroup_locked(struct page *page) @@ -664,6 +665,10 @@ retry: pc->flags = PAGE_CGROUP_FLAG_ACTIVE; if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) pc->flags |= PAGE_CGROUP_FLAG_CACHE; +#ifdef CONFIG_CGROUP_SWAP_LIMIT + atomic_inc(&mm->mm_count); + pc->pc_mm = mm; +#endif if (!page || page_cgroup_assign_new_page_cgroup(page, pc)) { /* @@ -673,6 +678,9 @@ retry: */ res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); +#ifdef CONFIG_CGROUP_SWAP_LIMIT + mmdrop(mm); +#endif kfree(pc); if (!page) goto done; @@ -744,6 +752,9 @@ void mem_cgroup_uncharge(struct page_cgr if (clear_page_cgroup(page, pc) == pc) { mem = pc->mem_cgroup; css_put(&mem->css); +#ifdef CONFIG_CGROUP_SWAP_LIMIT + mmdrop(pc->pc_mm); +#endif res_counter_uncharge(&mem->res, PAGE_SIZE); spin_lock_irqsave(&mz->lru_lock, flags); __mem_cgroup_remove_list(pc); @@ -859,6 +870,9 @@ retry: atomic_set(&pc->ref_cnt, 0); if (clear_page_cgroup(page, pc) == pc) { css_put(&mem->css); +#ifdef CONFIG_CGROUP_SWAP_LIMIT + mmdrop(pc->pc_mm); +#endif res_counter_uncharge(&mem->res, PAGE_SIZE); __mem_cgroup_remove_list(pc); kfree(pc); diff -uprN linux-2.6.24-mm1/mm/shmem.c linux-2.6.24-mm1-swaplimit/mm/shmem.c --- linux-2.6.24-mm1/mm/shmem.c 2008-02-04 14:34:24.000000000 +0900 +++ linux-2.6.24-mm1-swaplimit/mm/shmem.c 2008-03-03 10:56:56.000000000 +0900 @@ -1024,7 +1024,7 @@ static int shmem_writepage(struct page * * want to check if there's a redundant swappage to be discarded. */ if (wbc->for_reclaim) - swap = get_swap_page(); + swap = get_swap_page(page); else swap.val = 0; diff -uprN linux-2.6.24-mm1/mm/swap_limit.c linux-2.6.24-mm1-swaplimit/mm/swap_limit.c --- linux-2.6.24-mm1/mm/swap_limit.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.24-mm1-swaplimit/mm/swap_limit.c 2008-03-05 14:39:23.000000000 +0900 @@ -0,0 +1,194 @@ +/* + * swap_limit.c - SWAP controller (based on memcontrol.c) + * + */ + +#include <linux/err.h> +#include <linux/fs.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/rcupdate.h> +#include <linux/cgroup.h> +#include <linux/res_counter.h> +#include <linux/memcontrol.h> +#include <linux/swap_limit.h> + +static struct swap_cgroup init_swap_cgroup; + +int swap_cgroup_charge(struct page *page, + struct swap_info_struct *si, + unsigned long offset) +{ + int ret; + struct page_cgroup *pc; + struct mm_struct *mm; + struct swap_cgroup *swap; + + BUG_ON(!page); + + /* + * Pages to be swapped out should have been charged by memory cgroup, + * but very rarely, pc would be NULL (pc is not reliable without lock, + * so I should fix here). + * In such cases, we charge the init_mm now. + */ + pc = page_get_page_cgroup(page); + if (WARN_ON(!pc)) + mm = &init_mm; + else + mm = pc->pc_mm; + BUG_ON(!mm); + + rcu_read_lock(); + swap = rcu_dereference(mm->swap_cgroup); + rcu_read_unlock(); + BUG_ON(!swap); + + ret = res_counter_charge(&swap->res, PAGE_SIZE); + if (!ret) { + css_get(&swap->css); + si->swap_cgroup[offset] = swap; + } + + return ret; +} + +void swap_cgroup_uncharge(struct swap_info_struct *si, unsigned long offset) +{ + struct swap_cgroup *swap = si->swap_cgroup[offset]; + + /* + * "swap" would be NULL: + * 1. when get_swap_page() failed at charging swap_cgroup, + * and called swap_entry_free(). + * 2. when this swap entry had been assigned by + * get_swap_page_of_type() (via SWSUSP ?). + */ + if (swap) { + res_counter_uncharge(&swap->res, PAGE_SIZE); + si->swap_cgroup[offset] = NULL; + css_put(&swap->css); + } +} + +static struct cgroup_subsys_state *swap_cgroup_create(struct cgroup_subsys *ss, + struct cgroup *cgrp) +{ + struct swap_cgroup *swap; + + if (unlikely((cgrp->parent) == NULL)) { + swap = &init_swap_cgroup; + init_mm.swap_cgroup = swap; + } else + swap = kzalloc(sizeof(struct swap_cgroup), GFP_KERNEL); + + if (swap == NULL) + return ERR_PTR(-ENOMEM); + + res_counter_init(&swap->res); + + return &swap->css; +} + +static void swap_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + kfree(swap_cgroup_from_cgrp(cgrp)); +} + +static ssize_t swap_cgroup_read(struct cgroup *cgrp, + struct cftype *cft, struct file *file, + char __user *userbuf, size_t nbytes, + loff_t *ppos) +{ + return res_counter_read(&swap_cgroup_from_cgrp(cgrp)->res, + cft->private, userbuf, nbytes, ppos, + NULL); +} + +static int swap_cgroup_write_strategy(char *buf, unsigned long long *tmp) +{ + *tmp = memparse(buf, &buf); + if (*buf != '\0') + return -EINVAL; + + /* + * Round up the value to the closest page size + */ + *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT; + return 0; +} + +static ssize_t swap_cgroup_write(struct cgroup *cgrp, struct cftype *cft, + struct file *file, const char __user *userbuf, + size_t nbytes, loff_t *ppos) +{ + return res_counter_write(&swap_cgroup_from_cgrp(cgrp)->res, + cft->private, userbuf, nbytes, ppos, + swap_cgroup_write_strategy); +} + +static struct cftype swap_files[] = { + { + .name = "usage_in_bytes", + .private = RES_USAGE, + .read = swap_cgroup_read, + }, + { + .name = "limit_in_bytes", + .private = RES_LIMIT, + .write = swap_cgroup_write, + .read = swap_cgroup_read, + }, + { + .name = "failcnt", + .private = RES_FAILCNT, + .read = swap_cgroup_read, + }, +}; + +static int swap_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + return cgroup_add_files(cgrp, ss, swap_files, ARRAY_SIZE(swap_files)); +} + +static void swap_cgroup_move_task(struct cgroup_subsys *ss, + struct cgroup *cgrp, + struct cgroup *old_cgrp, + struct task_struct *p) +{ + struct mm_struct *mm; + struct swap_cgroup *swap, *old_swap; + + mm = get_task_mm(p); + if (mm == NULL) + return; + + swap = swap_cgroup_from_cgrp(cgrp); + old_swap = swap_cgroup_from_cgrp(old_cgrp); + + if (swap == old_swap) + goto out; + + if (p->tgid != p->pid) + goto out; + + css_get(&swap->css); + rcu_assign_pointer(mm->swap_cgroup, swap); + css_put(&old_swap->css); + +out: + mmput(mm); + return; +} + +struct cgroup_subsys swap_subsys = { + .name = "swap", + .create = swap_cgroup_create, + .destroy = swap_cgroup_destroy, + .populate = swap_cgroup_populate, + .subsys_id = swap_subsys_id, + .attach = swap_cgroup_move_task, + .early_init = 0, +}; diff -uprN linux-2.6.24-mm1/mm/swap_state.c linux-2.6.24-mm1-swaplimit/mm/swap_state.c --- linux-2.6.24-mm1/mm/swap_state.c 2008-02-04 14:34:24.000000000 +0900 +++ linux-2.6.24-mm1-swaplimit/mm/swap_state.c 2008-03-03 10:56:56.000000000 +0900 @@ -128,7 +128,7 @@ int add_to_swap(struct page * page, gfp_ BUG_ON(!PageUptodate(page)); for (;;) { - entry = get_swap_page(); + entry = get_swap_page(page); if (!entry.val) return 0; diff -uprN linux-2.6.24-mm1/mm/swapfile.c linux-2.6.24-mm1-swaplimit/mm/swapfile.c --- linux-2.6.24-mm1/mm/swapfile.c 2008-02-04 14:34:24.000000000 +0900 +++ linux-2.6.24-mm1-swaplimit/mm/swapfile.c 2008-03-03 10:56:56.000000000 +0900 @@ -28,6 +28,7 @@ #include <linux/capability.h> #include <linux/syscalls.h> #include <linux/memcontrol.h> +#include <linux/swap_limit.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -172,7 +173,10 @@ no_page: return 0; } -swp_entry_t get_swap_page(void) +/* get_swap_page() calls this */ +static int swap_entry_free(struct swap_info_struct *, unsigned long); + +swp_entry_t get_swap_page(struct page *page) { struct swap_info_struct *si; pgoff_t offset; @@ -201,6 +205,16 @@ swp_entry_t get_swap_page(void) swap_list.next = next; offset = scan_swap_map(si); if (offset) { + /* + * This should be the first use of this swap entry, + * so charge this swap entry now. + */ + if (swap_cgroup_charge(page, si, offset)) { + /* should free this entry */ + swap_entry_free(si, offset); + + goto noswap; + } spin_unlock(&swap_lock); return swp_entry(type, offset); } @@ -285,6 +299,7 @@ static int swap_entry_free(struct swap_i swap_list.next = p - swap_info; nr_swap_pages++; p->inuse_pages--; + swap_cgroup_uncharge(p, offset); } } return count; @@ -1207,6 +1222,9 @@ asmlinkage long sys_swapoff(const char _ { struct swap_info_struct * p = NULL; unsigned short *swap_map; +#ifdef CONFIG_CGROUP_SWAP_LIMIT + struct swap_cgroup **swap_cgroup; +#endif struct file *swap_file, *victim; struct address_space *mapping; struct inode *inode; @@ -1309,10 +1327,17 @@ asmlinkage long sys_swapoff(const char _ p->max = 0; swap_map = p->swap_map; p->swap_map = NULL; +#ifdef CONFIG_CGROUP_SWAP_LIMIT + swap_cgroup = p->swap_cgroup; + p->swap_cgroup = NULL; +#endif p->flags = 0; spin_unlock(&swap_lock); mutex_unlock(&swapon_mutex); vfree(swap_map); +#ifdef CONFIG_CGROUP_SWAP_LIMIT + vfree(swap_cgroup); +#endif inode = mapping->host; if (S_ISBLK(inode->i_mode)) { struct block_device *bdev = I_BDEV(inode); @@ -1460,6 +1485,9 @@ asmlinkage long sys_swapon(const char __ unsigned long maxpages = 1; int swapfilesize; unsigned short *swap_map; +#ifdef CONFIG_CGROUP_SWAP_LIMIT + struct swap_cgroup **swap_cgroup; +#endif struct page *page = NULL; struct inode *inode = NULL; int did_down = 0; @@ -1483,6 +1511,9 @@ asmlinkage long sys_swapon(const char __ p->swap_file = NULL; p->old_block_size = 0; p->swap_map = NULL; +#ifdef CONFIG_CGROUP_SWAP_LIMIT + p->swap_cgroup = NULL; +#endif p->lowest_bit = 0; p->highest_bit = 0; p->cluster_nr = 0; @@ -1647,6 +1678,15 @@ asmlinkage long sys_swapon(const char __ 1 /* header page */; if (error) goto bad_swap; + +#ifdef CONFIG_CGROUP_SWAP_LIMIT + p->swap_cgroup = vmalloc(maxpages * sizeof(*swap_cgroup)); + if (!(p->swap_cgroup)) { + error = -ENOMEM; + goto bad_swap; + } + memset(p->swap_cgroup, 0, maxpages * sizeof(*swap_cgroup)); +#endif } if (nr_good_pages) { @@ -1704,13 +1744,22 @@ bad_swap: bad_swap_2: spin_lock(&swap_lock); swap_map = p->swap_map; +#ifdef CONFIG_CGROUP_SWAP_LIMIT + swap_cgroup = p->swap_cgroup; +#endif p->swap_file = NULL; p->swap_map = NULL; +#ifdef CONFIG_CGROUP_SWAP_LIMIT + p->swap_cgroup = NULL; +#endif p->flags = 0; if (!(swap_flags & SWAP_FLAG_PREFER)) ++least_priority; spin_unlock(&swap_lock); vfree(swap_map); +#ifdef CONFIG_CGROUP_SWAP_LIMIT + vfree(swap_cgroup); +#endif if (swap_file) filp_close(swap_file, NULL); out: -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <47CE36A9.3060204-YQH0OdQVrdy45+QrQBaojngSJqDPrsil@public.gmane.org>]
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-05 5:59 ` Daisuke Nishimura @ 2008-03-05 6:36 ` Paul Menage -1 siblings, 0 replies; 50+ messages in thread From: Paul Menage @ 2008-03-05 6:36 UTC (permalink / raw) To: Daisuke Nishimura Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, xemul-GEFAQzZX7r8dnm+yROfE0A, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 Hi Daisuke, Most of my comments below are to do with style issues with cgroups, rather than the details of the memory management code. 2008/3/4 Daisuke Nishimura <nishimura-YQH0OdQVrdy45+QrQBaojngSJqDPrsil@public.gmane.org>: > +/* > + * A page_cgroup page is associated with every page descriptor. The > + * page_cgroup helps us identify information about the cgroup > + */ > +struct page_cgroup { > + struct list_head lru; /* per cgroup LRU list */ > + struct page *page; > + struct mem_cgroup *mem_cgroup; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct mm_struct *pc_mm; > +#endif > + atomic_t ref_cnt; /* Helpful when pages move b/w */ > + /* mapped and cached states */ > + int flags; > +}; > > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > +struct swap_cgroup { > + struct cgroup_subsys_state css; > + struct res_counter res; > +}; > + > +static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp) > +{ > + return container_of(cgroup_subsys_state(cgrp, swap_subsys_id), > + struct swap_cgroup, > + css); > +} > + > +static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p) > +{ > + return container_of(task_subsys_state(p, swap_subsys_id), > + struct swap_cgroup, css); > +} Can't these definitions be moved into swap_limit.c? > @@ -254,15 +243,27 @@ struct mem_cgroup *mem_cgroup_from_task( > void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) > { > struct mem_cgroup *mem; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct swap_cgroup *swap; > +#endif > > mem = mem_cgroup_from_task(p); > css_get(&mem->css); > mm->mem_cgroup = mem; > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + swap = swap_cgroup_from_task(p); > + css_get(&swap->css); > + mm->swap_cgroup = swap; > +#endif My feeling is that it would be cleaner to move this code into swap_limit.c, and have a separate mm_init_swap_cgroup() function. (And a mm_free_swap_cgroup() function). > + pc = page_get_page_cgroup(page); > + if (WARN_ON(!pc)) > + mm = &init_mm; > + else > + mm = pc->pc_mm; > + BUG_ON(!mm); Is this safe against races with the mem.force_empty operation? > + > + rcu_read_lock(); > + swap = rcu_dereference(mm->swap_cgroup); > + rcu_read_unlock(); > + BUG_ON(!swap); Is it safe to do rcu_read_unlock() while you are still planning to operate on the value of "swap"? > + > +static ssize_t swap_cgroup_read(struct cgroup *cgrp, > + struct cftype *cft, struct file *file, > + char __user *userbuf, size_t nbytes, > + loff_t *ppos) > +{ > + return res_counter_read(&swap_cgroup_from_cgrp(cgrp)->res, > + cft->private, userbuf, nbytes, ppos, > + NULL); > +} Can you use the cgroups read_u64 method, and just call res_counter_read_u64? > + > +static int swap_cgroup_write_strategy(char *buf, unsigned long long *tmp) > +{ > + *tmp = memparse(buf, &buf); > + if (*buf != '\0') > + return -EINVAL; > + > + /* > + * Round up the value to the closest page size > + */ > + *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT; > + return 0; > +} This is the same as mem_cgroup_write_strategy. As part of your patch, can you create a res_counter_write_pagealign() strategy function in res_counter.c and use it from the memory and swap cgroups? > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + p->swap_cgroup = vmalloc(maxpages * sizeof(*swap_cgroup)); > + if (!(p->swap_cgroup)) { > + error = -ENOMEM; > + goto bad_swap; > + } > + memset(p->swap_cgroup, 0, maxpages * sizeof(*swap_cgroup)); > +#endif It would be nice to only allocate these the first time the swap cgroup subsystem becomes active, to avoid the overhead for people not using it; even better if you can free it again if the swap subsystem becomes inactive again. Paul ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-05 6:36 ` Paul Menage 0 siblings, 0 replies; 50+ messages in thread From: Paul Menage @ 2008-03-05 6:36 UTC (permalink / raw) To: Daisuke Nishimura; +Cc: containers, linux-mm, balbir, xemul, kamezawa.hiroyu Hi Daisuke, Most of my comments below are to do with style issues with cgroups, rather than the details of the memory management code. 2008/3/4 Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>: > +/* > + * A page_cgroup page is associated with every page descriptor. The > + * page_cgroup helps us identify information about the cgroup > + */ > +struct page_cgroup { > + struct list_head lru; /* per cgroup LRU list */ > + struct page *page; > + struct mem_cgroup *mem_cgroup; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct mm_struct *pc_mm; > +#endif > + atomic_t ref_cnt; /* Helpful when pages move b/w */ > + /* mapped and cached states */ > + int flags; > +}; > > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > +struct swap_cgroup { > + struct cgroup_subsys_state css; > + struct res_counter res; > +}; > + > +static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp) > +{ > + return container_of(cgroup_subsys_state(cgrp, swap_subsys_id), > + struct swap_cgroup, > + css); > +} > + > +static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p) > +{ > + return container_of(task_subsys_state(p, swap_subsys_id), > + struct swap_cgroup, css); > +} Can't these definitions be moved into swap_limit.c? > @@ -254,15 +243,27 @@ struct mem_cgroup *mem_cgroup_from_task( > void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) > { > struct mem_cgroup *mem; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct swap_cgroup *swap; > +#endif > > mem = mem_cgroup_from_task(p); > css_get(&mem->css); > mm->mem_cgroup = mem; > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + swap = swap_cgroup_from_task(p); > + css_get(&swap->css); > + mm->swap_cgroup = swap; > +#endif My feeling is that it would be cleaner to move this code into swap_limit.c, and have a separate mm_init_swap_cgroup() function. (And a mm_free_swap_cgroup() function). > + pc = page_get_page_cgroup(page); > + if (WARN_ON(!pc)) > + mm = &init_mm; > + else > + mm = pc->pc_mm; > + BUG_ON(!mm); Is this safe against races with the mem.force_empty operation? > + > + rcu_read_lock(); > + swap = rcu_dereference(mm->swap_cgroup); > + rcu_read_unlock(); > + BUG_ON(!swap); Is it safe to do rcu_read_unlock() while you are still planning to operate on the value of "swap"? > + > +static ssize_t swap_cgroup_read(struct cgroup *cgrp, > + struct cftype *cft, struct file *file, > + char __user *userbuf, size_t nbytes, > + loff_t *ppos) > +{ > + return res_counter_read(&swap_cgroup_from_cgrp(cgrp)->res, > + cft->private, userbuf, nbytes, ppos, > + NULL); > +} Can you use the cgroups read_u64 method, and just call res_counter_read_u64? > + > +static int swap_cgroup_write_strategy(char *buf, unsigned long long *tmp) > +{ > + *tmp = memparse(buf, &buf); > + if (*buf != '\0') > + return -EINVAL; > + > + /* > + * Round up the value to the closest page size > + */ > + *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT; > + return 0; > +} This is the same as mem_cgroup_write_strategy. As part of your patch, can you create a res_counter_write_pagealign() strategy function in res_counter.c and use it from the memory and swap cgroups? > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + p->swap_cgroup = vmalloc(maxpages * sizeof(*swap_cgroup)); > + if (!(p->swap_cgroup)) { > + error = -ENOMEM; > + goto bad_swap; > + } > + memset(p->swap_cgroup, 0, maxpages * sizeof(*swap_cgroup)); > +#endif It would be nice to only allocate these the first time the swap cgroup subsystem becomes active, to avoid the overhead for people not using it; even better if you can free it again if the swap subsystem becomes inactive again. Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <6599ad830803042236x3e5fdf0dmaf4119997025ba40-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-05 6:36 ` Paul Menage @ 2008-03-06 12:20 ` Daisuke Nishimura -1 siblings, 0 replies; 50+ messages in thread From: Daisuke Nishimura @ 2008-03-06 12:20 UTC (permalink / raw) To: Paul Menage Cc: linux-mm-Bw31MaZKKs3YtjvyW6yDsg, containers-qjLDD68F18O7TbgM5vRIOg, hugh-DTz5qymZ9yRBDgjK7y7TUQ, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8, xemul-GEFAQzZX7r8dnm+yROfE0A Hi. Paul Menage wrote: >> + pc = page_get_page_cgroup(page); >> + if (WARN_ON(!pc)) >> + mm = &init_mm; >> + else >> + mm = pc->pc_mm; >> + BUG_ON(!mm); > > Is this safe against races with the mem.force_empty operation? > I've not considered yet about force_empty operation of memory subsystem. Thank you for pointing it out. >> + >> + rcu_read_lock(); >> + swap = rcu_dereference(mm->swap_cgroup); >> + rcu_read_unlock(); >> + BUG_ON(!swap); > > Is it safe to do rcu_read_unlock() while you are still planning to > operate on the value of "swap"? > You are right. I think I should css_get() before rcu_read_unlock() as memory subsystem does. >> + >> +#ifdef CONFIG_CGROUP_SWAP_LIMIT >> + p->swap_cgroup = vmalloc(maxpages * sizeof(*swap_cgroup)); >> + if (!(p->swap_cgroup)) { >> + error = -ENOMEM; >> + goto bad_swap; >> + } >> + memset(p->swap_cgroup, 0, maxpages * sizeof(*swap_cgroup)); >> +#endif > > It would be nice to only allocate these the first time the swap cgroup > subsystem becomes active, to avoid the overhead for people not using > it; even better if you can free it again if the swap subsystem becomes > inactive again. > Hmm.. good idea. I think this is possible by adding a flag file, like "swap.enable_limit", to the top of cgroup directory, and charging all the swap entries which are used when the flag is enabled to the top cgroup. Thanks, Daisuke Nishimura. ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-06 12:20 ` Daisuke Nishimura 0 siblings, 0 replies; 50+ messages in thread From: Daisuke Nishimura @ 2008-03-06 12:20 UTC (permalink / raw) To: Paul Menage; +Cc: containers, linux-mm, balbir, xemul, kamezawa.hiroyu, hugh Hi. Paul Menage wrote: >> + pc = page_get_page_cgroup(page); >> + if (WARN_ON(!pc)) >> + mm = &init_mm; >> + else >> + mm = pc->pc_mm; >> + BUG_ON(!mm); > > Is this safe against races with the mem.force_empty operation? > I've not considered yet about force_empty operation of memory subsystem. Thank you for pointing it out. >> + >> + rcu_read_lock(); >> + swap = rcu_dereference(mm->swap_cgroup); >> + rcu_read_unlock(); >> + BUG_ON(!swap); > > Is it safe to do rcu_read_unlock() while you are still planning to > operate on the value of "swap"? > You are right. I think I should css_get() before rcu_read_unlock() as memory subsystem does. >> + >> +#ifdef CONFIG_CGROUP_SWAP_LIMIT >> + p->swap_cgroup = vmalloc(maxpages * sizeof(*swap_cgroup)); >> + if (!(p->swap_cgroup)) { >> + error = -ENOMEM; >> + goto bad_swap; >> + } >> + memset(p->swap_cgroup, 0, maxpages * sizeof(*swap_cgroup)); >> +#endif > > It would be nice to only allocate these the first time the swap cgroup > subsystem becomes active, to avoid the overhead for people not using > it; even better if you can free it again if the swap subsystem becomes > inactive again. > Hmm.. good idea. I think this is possible by adding a flag file, like "swap.enable_limit", to the top of cgroup directory, and charging all the swap entries which are used when the flag is enabled to the top cgroup. Thanks, Daisuke Nishimura. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-05 5:59 ` Daisuke Nishimura @ 2008-03-05 6:53 ` KAMEZAWA Hiroyuki -1 siblings, 0 replies; 50+ messages in thread From: KAMEZAWA Hiroyuki @ 2008-03-05 6:53 UTC (permalink / raw) To: Daisuke Nishimura Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, xemul-GEFAQzZX7r8dnm+yROfE0A, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 On Wed, 05 Mar 2008 14:59:05 +0900 Daisuke Nishimura <nishimura-YQH0OdQVrdy45+QrQBaojngSJqDPrsil@public.gmane.org> wrote: > #ifdef CONFIG_CGROUP_MEM_CONT > +/* > + * A page_cgroup page is associated with every page descriptor. The > + * page_cgroup helps us identify information about the cgroup > + */ > +struct page_cgroup { > + struct list_head lru; /* per cgroup LRU list */ > + struct page *page; > + struct mem_cgroup *mem_cgroup; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct mm_struct *pc_mm; > +#endif > + atomic_t ref_cnt; /* Helpful when pages move b/w */ > + /* mapped and cached states */ > + int flags; > +}; > As first impression, I don't like to increase size of this...but have no alternative idea. > static inline int page_cgroup_locked(struct page *page) > @@ -664,6 +665,10 @@ retry: > pc->flags = PAGE_CGROUP_FLAG_ACTIVE; > if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) > pc->flags |= PAGE_CGROUP_FLAG_CACHE; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + atomic_inc(&mm->mm_count); > + pc->pc_mm = mm; > +#endif > Strongly Nack to this atomic_inc(). What happens when tmpfs pages goes to swap ? > if (!page || page_cgroup_assign_new_page_cgroup(page, pc)) { > /* > @@ -673,6 +678,9 @@ retry: > +int swap_cgroup_charge(struct page *page, > + struct swap_info_struct *si, > + unsigned long offset) > +{ > + int ret; > + struct page_cgroup *pc; > + struct mm_struct *mm; > + struct swap_cgroup *swap; > + > + BUG_ON(!page); > + > + /* > + * Pages to be swapped out should have been charged by memory cgroup, > + * but very rarely, pc would be NULL (pc is not reliable without lock, > + * so I should fix here). > + * In such cases, we charge the init_mm now. > + */ > + pc = page_get_page_cgroup(page); > + if (WARN_ON(!pc)) > + mm = &init_mm; > + else > + mm = pc->pc_mm; > + BUG_ON(!mm); > + > + rcu_read_lock(); > + swap = rcu_dereference(mm->swap_cgroup); > + rcu_read_unlock(); > + BUG_ON(!swap); Is there no race ? At first look, remembering mm struct is not very good. Remembering swap controller itself is better. If you go this direction, how about this way ? == enum { #ifdef CONFIG_CGROUP_MEM_CONT MEMORY_RESOURCE_CONTROLLER, #endif #ifdef CONFIG_CGROUP_SWAP_CONT SWAP_CONTROLLER, #endif NR_PAGE_CONTROLLER, } struct page_cgroup { ...... void* controlls[NR_PAGE_CONTROLLER]; .... }; == Thanks, -Kame ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-05 6:53 ` KAMEZAWA Hiroyuki 0 siblings, 0 replies; 50+ messages in thread From: KAMEZAWA Hiroyuki @ 2008-03-05 6:53 UTC (permalink / raw) To: Daisuke Nishimura; +Cc: containers, linux-mm, balbir, xemul On Wed, 05 Mar 2008 14:59:05 +0900 Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> wrote: > #ifdef CONFIG_CGROUP_MEM_CONT > +/* > + * A page_cgroup page is associated with every page descriptor. The > + * page_cgroup helps us identify information about the cgroup > + */ > +struct page_cgroup { > + struct list_head lru; /* per cgroup LRU list */ > + struct page *page; > + struct mem_cgroup *mem_cgroup; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct mm_struct *pc_mm; > +#endif > + atomic_t ref_cnt; /* Helpful when pages move b/w */ > + /* mapped and cached states */ > + int flags; > +}; > As first impression, I don't like to increase size of this...but have no alternative idea. > static inline int page_cgroup_locked(struct page *page) > @@ -664,6 +665,10 @@ retry: > pc->flags = PAGE_CGROUP_FLAG_ACTIVE; > if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) > pc->flags |= PAGE_CGROUP_FLAG_CACHE; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + atomic_inc(&mm->mm_count); > + pc->pc_mm = mm; > +#endif > Strongly Nack to this atomic_inc(). What happens when tmpfs pages goes to swap ? > if (!page || page_cgroup_assign_new_page_cgroup(page, pc)) { > /* > @@ -673,6 +678,9 @@ retry: > +int swap_cgroup_charge(struct page *page, > + struct swap_info_struct *si, > + unsigned long offset) > +{ > + int ret; > + struct page_cgroup *pc; > + struct mm_struct *mm; > + struct swap_cgroup *swap; > + > + BUG_ON(!page); > + > + /* > + * Pages to be swapped out should have been charged by memory cgroup, > + * but very rarely, pc would be NULL (pc is not reliable without lock, > + * so I should fix here). > + * In such cases, we charge the init_mm now. > + */ > + pc = page_get_page_cgroup(page); > + if (WARN_ON(!pc)) > + mm = &init_mm; > + else > + mm = pc->pc_mm; > + BUG_ON(!mm); > + > + rcu_read_lock(); > + swap = rcu_dereference(mm->swap_cgroup); > + rcu_read_unlock(); > + BUG_ON(!swap); Is there no race ? At first look, remembering mm struct is not very good. Remembering swap controller itself is better. If you go this direction, how about this way ? == enum { #ifdef CONFIG_CGROUP_MEM_CONT MEMORY_RESOURCE_CONTROLLER, #endif #ifdef CONFIG_CGROUP_SWAP_CONT SWAP_CONTROLLER, #endif NR_PAGE_CONTROLLER, } struct page_cgroup { ...... void* controlls[NR_PAGE_CONTROLLER]; .... }; == Thanks, -Kame -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <20080305155329.60e02f48.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>]
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-05 6:53 ` KAMEZAWA Hiroyuki @ 2008-03-05 21:51 ` Hirokazu Takahashi -1 siblings, 0 replies; 50+ messages in thread From: Hirokazu Takahashi @ 2008-03-05 21:51 UTC (permalink / raw) To: nishimura-YQH0OdQVrdy45+QrQBaojngSJqDPrsil Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8, xemul-GEFAQzZX7r8dnm+yROfE0A Hi, > > #ifdef CONFIG_CGROUP_MEM_CONT > > +/* > > + * A page_cgroup page is associated with every page descriptor. The > > + * page_cgroup helps us identify information about the cgroup > > + */ > > +struct page_cgroup { > > + struct list_head lru; /* per cgroup LRU list */ > > + struct page *page; > > + struct mem_cgroup *mem_cgroup; > > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > > + struct mm_struct *pc_mm; > > +#endif > > + atomic_t ref_cnt; /* Helpful when pages move b/w */ > > + /* mapped and cached states */ > > + int flags; > > +}; > > > As first impression, I don't like to increase size of this...but have no alternative > idea. If you really want to make the swap space subsystem and the memory subsystem work independently each other, you can possibly introduce a new data structure that binds pages in the swapcache and swap_cgroup. It would be enough since only a small part of the pages are in the swapcache. Thanks, Hirokazu Takahashi. ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-05 21:51 ` Hirokazu Takahashi 0 siblings, 0 replies; 50+ messages in thread From: Hirokazu Takahashi @ 2008-03-05 21:51 UTC (permalink / raw) To: nishimura; +Cc: kamezawa.hiroyu, containers, linux-mm, xemul, balbir Hi, > > #ifdef CONFIG_CGROUP_MEM_CONT > > +/* > > + * A page_cgroup page is associated with every page descriptor. The > > + * page_cgroup helps us identify information about the cgroup > > + */ > > +struct page_cgroup { > > + struct list_head lru; /* per cgroup LRU list */ > > + struct page *page; > > + struct mem_cgroup *mem_cgroup; > > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > > + struct mm_struct *pc_mm; > > +#endif > > + atomic_t ref_cnt; /* Helpful when pages move b/w */ > > + /* mapped and cached states */ > > + int flags; > > +}; > > > As first impression, I don't like to increase size of this...but have no alternative > idea. If you really want to make the swap space subsystem and the memory subsystem work independently each other, you can possibly introduce a new data structure that binds pages in the swapcache and swap_cgroup. It would be enough since only a small part of the pages are in the swapcache. Thanks, Hirokazu Takahashi. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-05 6:53 ` KAMEZAWA Hiroyuki @ 2008-03-06 11:45 ` Daisuke Nishimura -1 siblings, 0 replies; 50+ messages in thread From: Daisuke Nishimura @ 2008-03-06 11:45 UTC (permalink / raw) To: KAMEZAWA Hiroyuki Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, xemul-GEFAQzZX7r8dnm+yROfE0A, hugh-DTz5qymZ9yRBDgjK7y7TUQ, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 Hi. > At first look, remembering mm struct is not very good. > Remembering swap controller itself is better. The swap_cgroup when the page(and page_cgroup) is allocated and the swap_cgroup when the page is going to be swapped out may be different by swap_cgroup_move_task(), so I think swap_cgroup to be charged should be determined at the point of swapout. Instead of pointing mm_struct from page_cgroup, it would be better to determine the mm_struct which the page to be swapped out is belongs to by rmap, and charge swap_cgroup of the mm_struct. In this implementation, I don't need to add new member to page_cgroup. What do you think ? Thanks, Daisuke Nishimura. ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-06 11:45 ` Daisuke Nishimura 0 siblings, 0 replies; 50+ messages in thread From: Daisuke Nishimura @ 2008-03-06 11:45 UTC (permalink / raw) To: KAMEZAWA Hiroyuki; +Cc: containers, linux-mm, balbir, xemul, hugh Hi. > At first look, remembering mm struct is not very good. > Remembering swap controller itself is better. The swap_cgroup when the page(and page_cgroup) is allocated and the swap_cgroup when the page is going to be swapped out may be different by swap_cgroup_move_task(), so I think swap_cgroup to be charged should be determined at the point of swapout. Instead of pointing mm_struct from page_cgroup, it would be better to determine the mm_struct which the page to be swapped out is belongs to by rmap, and charge swap_cgroup of the mm_struct. In this implementation, I don't need to add new member to page_cgroup. What do you think ? Thanks, Daisuke Nishimura. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <47CFD957.3060402-YQH0OdQVrdy45+QrQBaojngSJqDPrsil@public.gmane.org>]
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-06 11:45 ` Daisuke Nishimura @ 2008-03-06 12:25 ` Pavel Emelyanov -1 siblings, 0 replies; 50+ messages in thread From: Pavel Emelyanov @ 2008-03-06 12:25 UTC (permalink / raw) To: Daisuke Nishimura Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, hugh-DTz5qymZ9yRBDgjK7y7TUQ, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 Daisuke Nishimura wrote: > Hi. > >> At first look, remembering mm struct is not very good. >> Remembering swap controller itself is better. > > The swap_cgroup when the page(and page_cgroup) is allocated and > the swap_cgroup when the page is going to be swapped out may be > different by swap_cgroup_move_task(), so I think swap_cgroup > to be charged should be determined at the point of swapout. No. Since we now do not account for the situation, when pages are shared between cgroups, we may think, that the cgroup, which the page was allocated by and the cgroup, which this pages goes to swap in are the same. > Instead of pointing mm_struct from page_cgroup, it would be > better to determine the mm_struct which the page to be swapped > out is belongs to by rmap, and charge swap_cgroup of the mm_struct. > In this implementation, I don't need to add new member to page_cgroup. > > What do you think ? > > > Thanks, > Daisuke Nishimura. > ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-06 12:25 ` Pavel Emelyanov 0 siblings, 0 replies; 50+ messages in thread From: Pavel Emelyanov @ 2008-03-06 12:25 UTC (permalink / raw) To: Daisuke Nishimura; +Cc: KAMEZAWA Hiroyuki, containers, linux-mm, balbir, hugh Daisuke Nishimura wrote: > Hi. > >> At first look, remembering mm struct is not very good. >> Remembering swap controller itself is better. > > The swap_cgroup when the page(and page_cgroup) is allocated and > the swap_cgroup when the page is going to be swapped out may be > different by swap_cgroup_move_task(), so I think swap_cgroup > to be charged should be determined at the point of swapout. No. Since we now do not account for the situation, when pages are shared between cgroups, we may think, that the cgroup, which the page was allocated by and the cgroup, which this pages goes to swap in are the same. > Instead of pointing mm_struct from page_cgroup, it would be > better to determine the mm_struct which the page to be swapped > out is belongs to by rmap, and charge swap_cgroup of the mm_struct. > In this implementation, I don't need to add new member to page_cgroup. > > What do you think ? > > > Thanks, > Daisuke Nishimura. > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: Re: [RFC/PATCH] cgroup swap subsystem 2008-03-06 11:45 ` Daisuke Nishimura @ 2008-03-06 12:56 ` kamezawa.hiroyu -1 siblings, 0 replies; 50+ messages in thread From: kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A @ 2008-03-06 12:56 UTC (permalink / raw) To: Daisuke Nishimura Cc: linux-mm-Bw31MaZKKs3YtjvyW6yDsg, containers-qjLDD68F18O7TbgM5vRIOg, hugh-DTz5qymZ9yRBDgjK7y7TUQ, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8, xemul-GEFAQzZX7r8dnm+yROfE0A >> At first look, remembering mm struct is not very good. >> Remembering swap controller itself is better. > >The swap_cgroup when the page(and page_cgroup) is allocated and >the swap_cgroup when the page is going to be swapped out may be >different by swap_cgroup_move_task(), so I think swap_cgroup >to be charged should be determined at the point of swapout. > Accounting swap against an entity which allocs anon memory is not strange. Problem here is move_task itself. Now, charges against anon is not moved when a task which uses it is moved. please fix this behavior first if you think this is problematic. But, finally, a daemon driven by process event connector determines the group before process starts using anon. It's doubtful that it's worth to add complicated/costly ones. Thanks, -Kame ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-06 12:56 ` kamezawa.hiroyu 0 siblings, 0 replies; 50+ messages in thread From: kamezawa.hiroyu @ 2008-03-06 12:56 UTC (permalink / raw) To: Daisuke Nishimura Cc: KAMEZAWA Hiroyuki, containers, linux-mm, balbir, xemul, hugh >> At first look, remembering mm struct is not very good. >> Remembering swap controller itself is better. > >The swap_cgroup when the page(and page_cgroup) is allocated and >the swap_cgroup when the page is going to be swapped out may be >different by swap_cgroup_move_task(), so I think swap_cgroup >to be charged should be determined at the point of swapout. > Accounting swap against an entity which allocs anon memory is not strange. Problem here is move_task itself. Now, charges against anon is not moved when a task which uses it is moved. please fix this behavior first if you think this is problematic. But, finally, a daemon driven by process event connector determines the group before process starts using anon. It's doubtful that it's worth to add complicated/costly ones. Thanks, -Kame -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <6197904.1204808216900.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>]
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-06 12:56 ` kamezawa.hiroyu @ 2008-03-07 8:22 ` Daisuke Nishimura -1 siblings, 0 replies; 50+ messages in thread From: Daisuke Nishimura @ 2008-03-07 8:22 UTC (permalink / raw) To: kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, xemul-GEFAQzZX7r8dnm+yROfE0A, hugh-DTz5qymZ9yRBDgjK7y7TUQ, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 Hi. kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org wrote: >>> At first look, remembering mm struct is not very good. >>> Remembering swap controller itself is better. >> The swap_cgroup when the page(and page_cgroup) is allocated and >> the swap_cgroup when the page is going to be swapped out may be >> different by swap_cgroup_move_task(), so I think swap_cgroup >> to be charged should be determined at the point of swapout. >> > Accounting swap against an entity which allocs anon memory is > not strange. Problem here is move_task itself. > Now, charges against anon is not moved when a task which uses it > is moved. please fix this behavior first if you think this is > problematic. > > But, finally, a daemon driven by process event connector > determines the group before process starts using anon. It's > doubtful that it's worth to add complicated/costly ones. > I agree with you. I think the current behavior of move_task is problematic, and should fix it. But fixing it would be difficult and add a costly process, so I should consider more. Thanks, Daisuke Nishimura. ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-07 8:22 ` Daisuke Nishimura 0 siblings, 0 replies; 50+ messages in thread From: Daisuke Nishimura @ 2008-03-07 8:22 UTC (permalink / raw) To: kamezawa.hiroyu; +Cc: containers, linux-mm, balbir, xemul, hugh Hi. kamezawa.hiroyu@jp.fujitsu.com wrote: >>> At first look, remembering mm struct is not very good. >>> Remembering swap controller itself is better. >> The swap_cgroup when the page(and page_cgroup) is allocated and >> the swap_cgroup when the page is going to be swapped out may be >> different by swap_cgroup_move_task(), so I think swap_cgroup >> to be charged should be determined at the point of swapout. >> > Accounting swap against an entity which allocs anon memory is > not strange. Problem here is move_task itself. > Now, charges against anon is not moved when a task which uses it > is moved. please fix this behavior first if you think this is > problematic. > > But, finally, a daemon driven by process event connector > determines the group before process starts using anon. It's > doubtful that it's worth to add complicated/costly ones. > I agree with you. I think the current behavior of move_task is problematic, and should fix it. But fixing it would be difficult and add a costly process, so I should consider more. Thanks, Daisuke Nishimura. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: Re: [RFC/PATCH] cgroup swap subsystem 2008-03-06 12:56 ` kamezawa.hiroyu @ 2008-03-12 22:57 ` YAMAMOTO Takashi -1 siblings, 0 replies; 50+ messages in thread From: YAMAMOTO Takashi @ 2008-03-12 22:57 UTC (permalink / raw) To: kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A Cc: linux-mm-Bw31MaZKKs3YtjvyW6yDsg, containers-qjLDD68F18O7TbgM5vRIOg, hugh-DTz5qymZ9yRBDgjK7y7TUQ, xemul-GEFAQzZX7r8dnm+yROfE0A, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 > >> At first look, remembering mm struct is not very good. > >> Remembering swap controller itself is better. > > > >The swap_cgroup when the page(and page_cgroup) is allocated and > >the swap_cgroup when the page is going to be swapped out may be > >different by swap_cgroup_move_task(), so I think swap_cgroup > >to be charged should be determined at the point of swapout. > > > Accounting swap against an entity which allocs anon memory is > not strange. Problem here is move_task itself. > Now, charges against anon is not moved when a task which uses it > is moved. please fix this behavior first if you think this is > problematic. > > But, finally, a daemon driven by process event connector > determines the group before process starts using anon. It's > doubtful that it's worth to add complicated/costly ones. > > > Thanks, > -Kame doesn't PEC work asynchronously and allows processes to use anonymous memory before being moved by the daemon? YAMAMOTO Takashi ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-12 22:57 ` YAMAMOTO Takashi 0 siblings, 0 replies; 50+ messages in thread From: YAMAMOTO Takashi @ 2008-03-12 22:57 UTC (permalink / raw) To: kamezawa.hiroyu; +Cc: nishimura, linux-mm, containers, hugh, balbir, xemul > >> At first look, remembering mm struct is not very good. > >> Remembering swap controller itself is better. > > > >The swap_cgroup when the page(and page_cgroup) is allocated and > >the swap_cgroup when the page is going to be swapped out may be > >different by swap_cgroup_move_task(), so I think swap_cgroup > >to be charged should be determined at the point of swapout. > > > Accounting swap against an entity which allocs anon memory is > not strange. Problem here is move_task itself. > Now, charges against anon is not moved when a task which uses it > is moved. please fix this behavior first if you think this is > problematic. > > But, finally, a daemon driven by process event connector > determines the group before process starts using anon. It's > doubtful that it's worth to add complicated/costly ones. > > > Thanks, > -Kame doesn't PEC work asynchronously and allows processes to use anonymous memory before being moved by the daemon? YAMAMOTO Takashi -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-05 5:59 ` Daisuke Nishimura @ 2008-03-05 7:03 ` KAMEZAWA Hiroyuki -1 siblings, 0 replies; 50+ messages in thread From: KAMEZAWA Hiroyuki @ 2008-03-05 7:03 UTC (permalink / raw) To: Daisuke Nishimura Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, xemul-GEFAQzZX7r8dnm+yROfE0A, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 On Wed, 05 Mar 2008 14:59:05 +0900 Daisuke Nishimura <nishimura-YQH0OdQVrdy45+QrQBaojngSJqDPrsil@public.gmane.org> wrote: > +int swap_cgroup_charge(struct page *page, > + struct swap_info_struct *si, > + unsigned long offset) > +{ > + int ret; > + struct page_cgroup *pc; > + struct mm_struct *mm; > + struct swap_cgroup *swap; > + > + BUG_ON(!page); > + > + /* > + * Pages to be swapped out should have been charged by memory cgroup, > + * but very rarely, pc would be NULL (pc is not reliable without lock, > + * so I should fix here). > + * In such cases, we charge the init_mm now. > + */ > + pc = page_get_page_cgroup(page); > + if (WARN_ON(!pc)) > + mm = &init_mm; > + else > + mm = pc->pc_mm; > + BUG_ON(!mm); > + > + rcu_read_lock(); > + swap = rcu_dereference(mm->swap_cgroup); > + rcu_read_unlock(); > + BUG_ON(!swap); > + > + ret = res_counter_charge(&swap->res, PAGE_SIZE); > + if (!ret) { > + css_get(&swap->css); > + si->swap_cgroup[offset] = swap; > + } > + I think it's better to reclaim swap_entry used for SwapCache but not in Harddisk before failure. Thanks, -Kame ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-05 7:03 ` KAMEZAWA Hiroyuki 0 siblings, 0 replies; 50+ messages in thread From: KAMEZAWA Hiroyuki @ 2008-03-05 7:03 UTC (permalink / raw) To: Daisuke Nishimura; +Cc: containers, linux-mm, balbir, xemul On Wed, 05 Mar 2008 14:59:05 +0900 Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> wrote: > +int swap_cgroup_charge(struct page *page, > + struct swap_info_struct *si, > + unsigned long offset) > +{ > + int ret; > + struct page_cgroup *pc; > + struct mm_struct *mm; > + struct swap_cgroup *swap; > + > + BUG_ON(!page); > + > + /* > + * Pages to be swapped out should have been charged by memory cgroup, > + * but very rarely, pc would be NULL (pc is not reliable without lock, > + * so I should fix here). > + * In such cases, we charge the init_mm now. > + */ > + pc = page_get_page_cgroup(page); > + if (WARN_ON(!pc)) > + mm = &init_mm; > + else > + mm = pc->pc_mm; > + BUG_ON(!mm); > + > + rcu_read_lock(); > + swap = rcu_dereference(mm->swap_cgroup); > + rcu_read_unlock(); > + BUG_ON(!swap); > + > + ret = res_counter_charge(&swap->res, PAGE_SIZE); > + if (!ret) { > + css_get(&swap->css); > + si->swap_cgroup[offset] = swap; > + } > + I think it's better to reclaim swap_entry used for SwapCache but not in Harddisk before failure. Thanks, -Kame -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-05 5:59 ` Daisuke Nishimura @ 2008-03-05 7:28 ` Balbir Singh -1 siblings, 0 replies; 50+ messages in thread From: Balbir Singh @ 2008-03-05 7:28 UTC (permalink / raw) To: Daisuke Nishimura Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Hugh Dickins, xemul-GEFAQzZX7r8dnm+yROfE0A Daisuke Nishimura wrote: > Hi. > > Even if limiting memory usage by cgroup memory subsystem > or isolating memory by cpuset, swap space is shared, so > resource isolation is not enough. If one group uses up all the > swap space, it can affect other groups. > Yes, that is true. Please ensure that you also cc Hugh Dickins for all swap related changes. > I try making a patch of swap subsystem based on memory > subsystem, which limits swap usage per cgroup. > It can now charge and limit the swap usage. > > I implemented this feature as a new subsystem, > not as a part of memory subsystem, because I don't want to > make big change to memcontrol.c, and even if implemented > as other subsystem, users can manage memory and swap on > the same cgroup directory if mount them together. > I agree, the swap system should be independent of the memory resource controller. > Basic idea of my implementation: > - what will be charged ? > the number of swap entries. > > - when to charge/uncharge ? > charge at get_swap_entry(), and uncharge at swap_entry_free(). > You mean get_swap_page(), I suppose. The assumption in the code is that every swap page being charged has already been charged by the memory controller (that will go against making the controllers independent). Also, be careful of any charge operations under a spin_lock(). We tried controlling pages in the swap cache, but Hugh found problems with it, specially due to accounting for pages that are read ahead to the correct cgroup. > - to what group charge the swap entry ? > To determine to what swap_cgroup (corresponding to mem_cgroup in > memory subsystem) the swap entry should be charged, > I added a pointer to mm_struct to page_cgroup(pc->pc_mm), and > changed the argument of get_swap_entry() from (void) to > (struct page *). As a result, get_swap_entry() can determine > to what swap_cgroup it should charge the swap entry > by referring to page->page_cgroup->mm_struct->swap_cgroup. > I presume this is for the case when the memory and swap controllers are mounted in different hierarchies. It seems like too many dereferences to get to the swap_cgroup > - from what group uncharge the swap entry ? > I added to swap_info_struct a member 'struct swap_cgroup **', > array of pointer to which swap_cgroup the swap entry is > charged. > > Todo: > - rebase new kernel, and split into some patches. > - Merge with memory subsystem (if it would be better), or > remove dependency on CONFIG_CGROUP_MEM_CONT if possible > (needs to make page_cgroup more generic one). > - More tests, cleanups, and feartures :-) > > > Any comments or discussions would be appreciated. > To be honest, I tried looking at the code, but there were too many #ifdefs and I sort of lost myself in them. > Thanks, > Daisuke Nishimura > -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-05 7:28 ` Balbir Singh 0 siblings, 0 replies; 50+ messages in thread From: Balbir Singh @ 2008-03-05 7:28 UTC (permalink / raw) To: Daisuke Nishimura; +Cc: containers, linux-mm, xemul, Hugh Dickins Daisuke Nishimura wrote: > Hi. > > Even if limiting memory usage by cgroup memory subsystem > or isolating memory by cpuset, swap space is shared, so > resource isolation is not enough. If one group uses up all the > swap space, it can affect other groups. > Yes, that is true. Please ensure that you also cc Hugh Dickins for all swap related changes. > I try making a patch of swap subsystem based on memory > subsystem, which limits swap usage per cgroup. > It can now charge and limit the swap usage. > > I implemented this feature as a new subsystem, > not as a part of memory subsystem, because I don't want to > make big change to memcontrol.c, and even if implemented > as other subsystem, users can manage memory and swap on > the same cgroup directory if mount them together. > I agree, the swap system should be independent of the memory resource controller. > Basic idea of my implementation: > - what will be charged ? > the number of swap entries. > > - when to charge/uncharge ? > charge at get_swap_entry(), and uncharge at swap_entry_free(). > You mean get_swap_page(), I suppose. The assumption in the code is that every swap page being charged has already been charged by the memory controller (that will go against making the controllers independent). Also, be careful of any charge operations under a spin_lock(). We tried controlling pages in the swap cache, but Hugh found problems with it, specially due to accounting for pages that are read ahead to the correct cgroup. > - to what group charge the swap entry ? > To determine to what swap_cgroup (corresponding to mem_cgroup in > memory subsystem) the swap entry should be charged, > I added a pointer to mm_struct to page_cgroup(pc->pc_mm), and > changed the argument of get_swap_entry() from (void) to > (struct page *). As a result, get_swap_entry() can determine > to what swap_cgroup it should charge the swap entry > by referring to page->page_cgroup->mm_struct->swap_cgroup. > I presume this is for the case when the memory and swap controllers are mounted in different hierarchies. It seems like too many dereferences to get to the swap_cgroup > - from what group uncharge the swap entry ? > I added to swap_info_struct a member 'struct swap_cgroup **', > array of pointer to which swap_cgroup the swap entry is > charged. > > Todo: > - rebase new kernel, and split into some patches. > - Merge with memory subsystem (if it would be better), or > remove dependency on CONFIG_CGROUP_MEM_CONT if possible > (needs to make page_cgroup more generic one). > - More tests, cleanups, and feartures :-) > > > Any comments or discussions would be appreciated. > To be honest, I tried looking at the code, but there were too many #ifdefs and I sort of lost myself in them. > Thanks, > Daisuke Nishimura > -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <47CE4BB6.8050803-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>]
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-05 7:28 ` Balbir Singh @ 2008-03-07 4:23 ` Daisuke Nishimura -1 siblings, 0 replies; 50+ messages in thread From: Daisuke Nishimura @ 2008-03-07 4:23 UTC (permalink / raw) To: balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Hugh Dickins, xemul-GEFAQzZX7r8dnm+yROfE0A Hi. Balbir Singh wrote: > Daisuke Nishimura wrote: >> Basic idea of my implementation: >> - what will be charged ? >> the number of swap entries. >> >> - when to charge/uncharge ? >> charge at get_swap_entry(), and uncharge at swap_entry_free(). >> > > You mean get_swap_page(), I suppose. The assumption in the code is that every > swap page being charged has already been charged by the memory controller (that > will go against making the controllers independent). Also, be careful of any To make swap-limit independent of memory subsystem, I think page_cgroup code should be separated into two part: subsystem-independent and subsystem-dependent, that is part of associating page and page_cgroup and that of associating page_cgroup and subsystem. Rather than to do such a thing, I now think that it would be better to implement swap-limit as part of memory subsystem. Thanks, Daisuke Nishimura. ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-07 4:23 ` Daisuke Nishimura 0 siblings, 0 replies; 50+ messages in thread From: Daisuke Nishimura @ 2008-03-07 4:23 UTC (permalink / raw) To: balbir; +Cc: containers, linux-mm, xemul, Hugh Dickins Hi. Balbir Singh wrote: > Daisuke Nishimura wrote: >> Basic idea of my implementation: >> - what will be charged ? >> the number of swap entries. >> >> - when to charge/uncharge ? >> charge at get_swap_entry(), and uncharge at swap_entry_free(). >> > > You mean get_swap_page(), I suppose. The assumption in the code is that every > swap page being charged has already been charged by the memory controller (that > will go against making the controllers independent). Also, be careful of any To make swap-limit independent of memory subsystem, I think page_cgroup code should be separated into two part: subsystem-independent and subsystem-dependent, that is part of associating page and page_cgroup and that of associating page_cgroup and subsystem. Rather than to do such a thing, I now think that it would be better to implement swap-limit as part of memory subsystem. Thanks, Daisuke Nishimura. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-05 5:59 ` Daisuke Nishimura @ 2008-03-05 8:33 ` Pavel Emelyanov -1 siblings, 0 replies; 50+ messages in thread From: Pavel Emelyanov @ 2008-03-05 8:33 UTC (permalink / raw) To: Daisuke Nishimura Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, xemul-GEFAQzZX7r8dnm+yROfE0A, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 Daisuke Nishimura wrote: > Hi. > > Even if limiting memory usage by cgroup memory subsystem > or isolating memory by cpuset, swap space is shared, so > resource isolation is not enough. If one group uses up all the > swap space, it can affect other groups. > > I try making a patch of swap subsystem based on memory > subsystem, which limits swap usage per cgroup. > It can now charge and limit the swap usage. > > I implemented this feature as a new subsystem, > not as a part of memory subsystem, because I don't want to > make big change to memcontrol.c, and even if implemented > as other subsystem, users can manage memory and swap on > the same cgroup directory if mount them together. > > Basic idea of my implementation: > - what will be charged ? > the number of swap entries. This is a very obscure thing "a swap entry" for the end user. People would prefer accounting bytes. > - when to charge/uncharge ? > charge at get_swap_entry(), and uncharge at swap_entry_free(). > > - to what group charge the swap entry ? > To determine to what swap_cgroup (corresponding to mem_cgroup in > memory subsystem) the swap entry should be charged, > I added a pointer to mm_struct to page_cgroup(pc->pc_mm), and > changed the argument of get_swap_entry() from (void) to > (struct page *). As a result, get_swap_entry() can determine > to what swap_cgroup it should charge the swap entry > by referring to page->page_cgroup->mm_struct->swap_cgroup. > > - from what group uncharge the swap entry ? > I added to swap_info_struct a member 'struct swap_cgroup **', > array of pointer to which swap_cgroup the swap entry is > charged. > > Todo: > - rebase new kernel, and split into some patches. > - Merge with memory subsystem (if it would be better), or > remove dependency on CONFIG_CGROUP_MEM_CONT if possible > (needs to make page_cgroup more generic one). Merge is a must IMHO. I can hardly imagine a situation in which someone would need these two separately. > - More tests, cleanups, and feartures :-) > > > Any comments or discussions would be appreciated. > > Thanks, > Daisuke Nishimura > > > Signed-off-by: Daisuke Nishimura <nishimura-YQH0OdQVrdy45+QrQBaojngSJqDPrsil@public.gmane.org> > > --- > diff -uprN linux-2.6.24-mm1/include/linux/cgroup_subsys.h linux-2.6.24-mm1-swaplimit/include/linux/cgroup_subsys.h > --- linux-2.6.24-mm1/include/linux/cgroup_subsys.h 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/include/linux/cgroup_subsys.h 2008-03-03 10:56:56.000000000 +0900 > @@ -42,3 +42,9 @@ SUBSYS(mem_cgroup) > #endif > > /* */ > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > +SUBSYS(swap) > +#endif > + > +/* */ > diff -uprN linux-2.6.24-mm1/include/linux/memcontrol.h linux-2.6.24-mm1-swaplimit/include/linux/memcontrol.h > --- linux-2.6.24-mm1/include/linux/memcontrol.h 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/include/linux/memcontrol.h 2008-03-03 10:56:56.000000000 +0900 > @@ -29,6 +29,21 @@ struct page; > struct mm_struct; > > #ifdef CONFIG_CGROUP_MEM_CONT > +/* > + * A page_cgroup page is associated with every page descriptor. The > + * page_cgroup helps us identify information about the cgroup > + */ > +struct page_cgroup { > + struct list_head lru; /* per cgroup LRU list */ > + struct page *page; > + struct mem_cgroup *mem_cgroup; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct mm_struct *pc_mm; > +#endif Try not to add new entries here. > + atomic_t ref_cnt; /* Helpful when pages move b/w */ > + /* mapped and cached states */ > + int flags; > +}; > > extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); > extern void mm_free_cgroup(struct mm_struct *mm); > diff -uprN linux-2.6.24-mm1/include/linux/mm_types.h linux-2.6.24-mm1-swaplimit/include/linux/mm_types.h > --- linux-2.6.24-mm1/include/linux/mm_types.h 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/include/linux/mm_types.h 2008-03-03 10:56:56.000000000 +0900 > @@ -233,6 +233,9 @@ struct mm_struct { > #ifdef CONFIG_CGROUP_MEM_CONT > struct mem_cgroup *mem_cgroup; > #endif > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct swap_cgroup *swap_cgroup; > +#endif > }; > > #endif /* _LINUX_MM_TYPES_H */ > diff -uprN linux-2.6.24-mm1/include/linux/swap.h linux-2.6.24-mm1-swaplimit/include/linux/swap.h > --- linux-2.6.24-mm1/include/linux/swap.h 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/include/linux/swap.h 2008-03-03 10:56:56.000000000 +0900 > @@ -7,6 +7,7 @@ > #include <linux/list.h> > #include <linux/memcontrol.h> > #include <linux/sched.h> > +#include <linux/swap_limit.h> > > #include <asm/atomic.h> > #include <asm/page.h> > @@ -141,6 +142,9 @@ struct swap_info_struct { > struct swap_extent *curr_swap_extent; > unsigned old_block_size; > unsigned short * swap_map; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct swap_cgroup **swap_cgroup; > +#endif > unsigned int lowest_bit; > unsigned int highest_bit; > unsigned int cluster_next; > @@ -239,7 +243,7 @@ extern struct page *swapin_readahead(swp > extern long total_swap_pages; > extern unsigned int nr_swapfiles; > extern void si_swapinfo(struct sysinfo *); > -extern swp_entry_t get_swap_page(void); > +extern swp_entry_t get_swap_page(struct page *); > extern swp_entry_t get_swap_page_of_type(int); > extern int swap_duplicate(swp_entry_t); > extern int valid_swaphandles(swp_entry_t, unsigned long *); > @@ -342,7 +346,7 @@ static inline int remove_exclusive_swap_ > return 0; > } > > -static inline swp_entry_t get_swap_page(void) > +static inline swp_entry_t get_swap_page(struct page *page) > { > swp_entry_t entry; > entry.val = 0; > diff -uprN linux-2.6.24-mm1/include/linux/swap_limit.h linux-2.6.24-mm1-swaplimit/include/linux/swap_limit.h > --- linux-2.6.24-mm1/include/linux/swap_limit.h 1970-01-01 09:00:00.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/include/linux/swap_limit.h 2008-03-03 10:56:56.000000000 +0900 > @@ -0,0 +1,65 @@ > +/* > + * swap_limit.h > + * > + */ > +#ifndef _LINUX_SWAP_LIMIT_H > +#define _LINUX_SWAP_LIMIT_H > + > +#include <linux/swap.h> > +#include <linux/cgroup.h> > +#include <linux/res_counter.h> > + > +struct swap_cgroup; > +struct swap_info_struct; > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > +struct swap_cgroup { > + struct cgroup_subsys_state css; > + struct res_counter res; > +}; > + > +static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp) > +{ > + return container_of(cgroup_subsys_state(cgrp, swap_subsys_id), > + struct swap_cgroup, > + css); > +} > + > +static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p) > +{ > + return container_of(task_subsys_state(p, swap_subsys_id), > + struct swap_cgroup, css); > +} > + > +extern int swap_cgroup_charge(struct page *page, > + struct swap_info_struct *si, > + unsigned long offset); > +extern void swap_cgroup_uncharge(struct swap_info_struct *si, > + unsigned long offset); > + > +#else /* CONFIG_CGROUP_SWAP_LIMIT */ > +static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp) > +{ > + return NULL; > +} > + > +static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p) > +{ > + return NULL; > +} > + > +static inline int swap_cgroup_charge(struct page *page, > + struct swap_info_struct *si, > + unsigned long offset) > +{ > + return 0; > +} > + > +static inline void swap_cgroup_uncharge(struct swap_info_struct *si, > + unsigned long offset) > +{ > +} > + > +#endif > + > +#endif > diff -uprN linux-2.6.24-mm1/init/Kconfig linux-2.6.24-mm1-swaplimit/init/Kconfig > --- linux-2.6.24-mm1/init/Kconfig 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/init/Kconfig 2008-03-03 10:56:56.000000000 +0900 > @@ -383,6 +383,12 @@ config CGROUP_MEM_CONT > Provides a memory controller that manages both page cache and > RSS memory. > > +config CGROUP_SWAP_LIMIT > + bool "cgroup subsystem for swap" > + depends on CGROUP_MEM_CONT && SWAP > + help > + Provides a swap controller that manages and limits swap usage. > + > config PROC_PID_CPUSET > bool "Include legacy /proc/<pid>/cpuset file" > depends on CPUSETS > diff -uprN linux-2.6.24-mm1/mm/Makefile linux-2.6.24-mm1-swaplimit/mm/Makefile > --- linux-2.6.24-mm1/mm/Makefile 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/mm/Makefile 2008-03-03 10:56:56.000000000 +0900 > @@ -32,4 +32,5 @@ obj-$(CONFIG_MIGRATION) += migrate.o > obj-$(CONFIG_SMP) += allocpercpu.o > obj-$(CONFIG_QUICKLIST) += quicklist.o > obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o > +obj-$(CONFIG_CGROUP_SWAP_LIMIT) += swap_limit.o > > diff -uprN linux-2.6.24-mm1/mm/memcontrol.c linux-2.6.24-mm1-swaplimit/mm/memcontrol.c > --- linux-2.6.24-mm1/mm/memcontrol.c 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/mm/memcontrol.c 2008-03-03 10:56:56.000000000 +0900 > @@ -19,6 +19,7 @@ > > #include <linux/res_counter.h> > #include <linux/memcontrol.h> > +#include <linux/swap_limit.h> > #include <linux/cgroup.h> > #include <linux/mm.h> > #include <linux/smp.h> > @@ -146,18 +147,6 @@ struct mem_cgroup { > #define PAGE_CGROUP_LOCK_BIT 0x0 > #define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) > > -/* > - * A page_cgroup page is associated with every page descriptor. The > - * page_cgroup helps us identify information about the cgroup > - */ > -struct page_cgroup { > - struct list_head lru; /* per cgroup LRU list */ > - struct page *page; > - struct mem_cgroup *mem_cgroup; > - atomic_t ref_cnt; /* Helpful when pages move b/w */ > - /* mapped and cached states */ > - int flags; > -}; > #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ > #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ > > @@ -254,15 +243,27 @@ struct mem_cgroup *mem_cgroup_from_task( > void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) > { > struct mem_cgroup *mem; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct swap_cgroup *swap; > +#endif > > mem = mem_cgroup_from_task(p); > css_get(&mem->css); > mm->mem_cgroup = mem; > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + swap = swap_cgroup_from_task(p); > + css_get(&swap->css); > + mm->swap_cgroup = swap; > +#endif > } > > void mm_free_cgroup(struct mm_struct *mm) > { > css_put(&mm->mem_cgroup->css); > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + css_put(&mm->swap_cgroup->css); > +#endif > } > > static inline int page_cgroup_locked(struct page *page) > @@ -664,6 +665,10 @@ retry: > pc->flags = PAGE_CGROUP_FLAG_ACTIVE; > if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) > pc->flags |= PAGE_CGROUP_FLAG_CACHE; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + atomic_inc(&mm->mm_count); > + pc->pc_mm = mm; > +#endif What kernel is this patch for? I cannot find this code in 2.6.25-rc3-mm1 > if (!page || page_cgroup_assign_new_page_cgroup(page, pc)) { > /* > @@ -673,6 +678,9 @@ retry: > */ > res_counter_uncharge(&mem->res, PAGE_SIZE); > css_put(&mem->css); > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + mmdrop(mm); > +#endif > kfree(pc); > if (!page) > goto done; > @@ -744,6 +752,9 @@ void mem_cgroup_uncharge(struct page_cgr > if (clear_page_cgroup(page, pc) == pc) { > mem = pc->mem_cgroup; > css_put(&mem->css); > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + mmdrop(pc->pc_mm); > +#endif > res_counter_uncharge(&mem->res, PAGE_SIZE); > spin_lock_irqsave(&mz->lru_lock, flags); > __mem_cgroup_remove_list(pc); > @@ -859,6 +870,9 @@ retry: > atomic_set(&pc->ref_cnt, 0); > if (clear_page_cgroup(page, pc) == pc) { > css_put(&mem->css); > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + mmdrop(pc->pc_mm); > +#endif > res_counter_uncharge(&mem->res, PAGE_SIZE); > __mem_cgroup_remove_list(pc); > kfree(pc); > diff -uprN linux-2.6.24-mm1/mm/shmem.c linux-2.6.24-mm1-swaplimit/mm/shmem.c > --- linux-2.6.24-mm1/mm/shmem.c 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/mm/shmem.c 2008-03-03 10:56:56.000000000 +0900 > @@ -1024,7 +1024,7 @@ static int shmem_writepage(struct page * > * want to check if there's a redundant swappage to be discarded. > */ > if (wbc->for_reclaim) > - swap = get_swap_page(); > + swap = get_swap_page(page); > else > swap.val = 0; > > diff -uprN linux-2.6.24-mm1/mm/swap_limit.c linux-2.6.24-mm1-swaplimit/mm/swap_limit.c > --- linux-2.6.24-mm1/mm/swap_limit.c 1970-01-01 09:00:00.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/mm/swap_limit.c 2008-03-05 14:39:23.000000000 +0900 > @@ -0,0 +1,194 @@ > +/* > + * swap_limit.c - SWAP controller (based on memcontrol.c) > + * > + */ > + > +#include <linux/err.h> > +#include <linux/fs.h> > +#include <linux/types.h> > +#include <linux/sched.h> > +#include <linux/mm.h> > +#include <linux/swap.h> > +#include <linux/rcupdate.h> > +#include <linux/cgroup.h> > +#include <linux/res_counter.h> > +#include <linux/memcontrol.h> > +#include <linux/swap_limit.h> > + > +static struct swap_cgroup init_swap_cgroup; > + > +int swap_cgroup_charge(struct page *page, > + struct swap_info_struct *si, > + unsigned long offset) > +{ > + int ret; > + struct page_cgroup *pc; > + struct mm_struct *mm; > + struct swap_cgroup *swap; > + > + BUG_ON(!page); > + > + /* > + * Pages to be swapped out should have been charged by memory cgroup, > + * but very rarely, pc would be NULL (pc is not reliable without lock, > + * so I should fix here). > + * In such cases, we charge the init_mm now. > + */ > + pc = page_get_page_cgroup(page); > + if (WARN_ON(!pc)) > + mm = &init_mm; > + else > + mm = pc->pc_mm; > + BUG_ON(!mm); > + > + rcu_read_lock(); > + swap = rcu_dereference(mm->swap_cgroup); > + rcu_read_unlock(); > + BUG_ON(!swap); > + > + ret = res_counter_charge(&swap->res, PAGE_SIZE); > + if (!ret) { > + css_get(&swap->css); > + si->swap_cgroup[offset] = swap; > + } > + > + return ret; > +} > + > +void swap_cgroup_uncharge(struct swap_info_struct *si, unsigned long offset) > +{ > + struct swap_cgroup *swap = si->swap_cgroup[offset]; > + > + /* > + * "swap" would be NULL: > + * 1. when get_swap_page() failed at charging swap_cgroup, > + * and called swap_entry_free(). > + * 2. when this swap entry had been assigned by > + * get_swap_page_of_type() (via SWSUSP ?). > + */ > + if (swap) { > + res_counter_uncharge(&swap->res, PAGE_SIZE); > + si->swap_cgroup[offset] = NULL; > + css_put(&swap->css); > + } > +} > + > +static struct cgroup_subsys_state *swap_cgroup_create(struct cgroup_subsys *ss, > + struct cgroup *cgrp) > +{ > + struct swap_cgroup *swap; > + > + if (unlikely((cgrp->parent) == NULL)) { > + swap = &init_swap_cgroup; > + init_mm.swap_cgroup = swap; > + } else > + swap = kzalloc(sizeof(struct swap_cgroup), GFP_KERNEL); > + > + if (swap == NULL) > + return ERR_PTR(-ENOMEM); > + > + res_counter_init(&swap->res); > + > + return &swap->css; > +} > + > +static void swap_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) > +{ > + kfree(swap_cgroup_from_cgrp(cgrp)); > +} > + > +static ssize_t swap_cgroup_read(struct cgroup *cgrp, > + struct cftype *cft, struct file *file, > + char __user *userbuf, size_t nbytes, > + loff_t *ppos) > +{ > + return res_counter_read(&swap_cgroup_from_cgrp(cgrp)->res, > + cft->private, userbuf, nbytes, ppos, > + NULL); > +} > + > +static int swap_cgroup_write_strategy(char *buf, unsigned long long *tmp) > +{ > + *tmp = memparse(buf, &buf); > + if (*buf != '\0') > + return -EINVAL; > + > + /* > + * Round up the value to the closest page size > + */ > + *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT; > + return 0; > +} > + > +static ssize_t swap_cgroup_write(struct cgroup *cgrp, struct cftype *cft, > + struct file *file, const char __user *userbuf, > + size_t nbytes, loff_t *ppos) > +{ > + return res_counter_write(&swap_cgroup_from_cgrp(cgrp)->res, > + cft->private, userbuf, nbytes, ppos, > + swap_cgroup_write_strategy); > +} > + > +static struct cftype swap_files[] = { > + { > + .name = "usage_in_bytes", > + .private = RES_USAGE, > + .read = swap_cgroup_read, > + }, > + { > + .name = "limit_in_bytes", > + .private = RES_LIMIT, > + .write = swap_cgroup_write, > + .read = swap_cgroup_read, > + }, > + { > + .name = "failcnt", > + .private = RES_FAILCNT, > + .read = swap_cgroup_read, > + }, > +}; > + > +static int swap_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) > +{ > + return cgroup_add_files(cgrp, ss, swap_files, ARRAY_SIZE(swap_files)); > +} > + > +static void swap_cgroup_move_task(struct cgroup_subsys *ss, > + struct cgroup *cgrp, > + struct cgroup *old_cgrp, > + struct task_struct *p) > +{ > + struct mm_struct *mm; > + struct swap_cgroup *swap, *old_swap; > + > + mm = get_task_mm(p); > + if (mm == NULL) > + return; > + > + swap = swap_cgroup_from_cgrp(cgrp); > + old_swap = swap_cgroup_from_cgrp(old_cgrp); > + > + if (swap == old_swap) > + goto out; > + > + if (p->tgid != p->pid) > + goto out; > + > + css_get(&swap->css); > + rcu_assign_pointer(mm->swap_cgroup, swap); > + css_put(&old_swap->css); > + > +out: > + mmput(mm); > + return; > +} > + > +struct cgroup_subsys swap_subsys = { > + .name = "swap", > + .create = swap_cgroup_create, > + .destroy = swap_cgroup_destroy, > + .populate = swap_cgroup_populate, > + .subsys_id = swap_subsys_id, > + .attach = swap_cgroup_move_task, > + .early_init = 0, > +}; > diff -uprN linux-2.6.24-mm1/mm/swap_state.c linux-2.6.24-mm1-swaplimit/mm/swap_state.c > --- linux-2.6.24-mm1/mm/swap_state.c 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/mm/swap_state.c 2008-03-03 10:56:56.000000000 +0900 > @@ -128,7 +128,7 @@ int add_to_swap(struct page * page, gfp_ > BUG_ON(!PageUptodate(page)); > > for (;;) { > - entry = get_swap_page(); > + entry = get_swap_page(page); > if (!entry.val) > return 0; > > diff -uprN linux-2.6.24-mm1/mm/swapfile.c linux-2.6.24-mm1-swaplimit/mm/swapfile.c > --- linux-2.6.24-mm1/mm/swapfile.c 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/mm/swapfile.c 2008-03-03 10:56:56.000000000 +0900 > @@ -28,6 +28,7 @@ > #include <linux/capability.h> > #include <linux/syscalls.h> > #include <linux/memcontrol.h> > +#include <linux/swap_limit.h> > > #include <asm/pgtable.h> > #include <asm/tlbflush.h> > @@ -172,7 +173,10 @@ no_page: > return 0; > } > > -swp_entry_t get_swap_page(void) > +/* get_swap_page() calls this */ > +static int swap_entry_free(struct swap_info_struct *, unsigned long); > + > +swp_entry_t get_swap_page(struct page *page) > { > struct swap_info_struct *si; > pgoff_t offset; > @@ -201,6 +205,16 @@ swp_entry_t get_swap_page(void) > swap_list.next = next; > offset = scan_swap_map(si); > if (offset) { > + /* > + * This should be the first use of this swap entry, > + * so charge this swap entry now. > + */ > + if (swap_cgroup_charge(page, si, offset)) { > + /* should free this entry */ :) Please, don't create comments, that duplicate the next line. > + swap_entry_free(si, offset); > + > + goto noswap; > + } > spin_unlock(&swap_lock); > return swp_entry(type, offset); > } > @@ -285,6 +299,7 @@ static int swap_entry_free(struct swap_i > swap_list.next = p - swap_info; > nr_swap_pages++; > p->inuse_pages--; > + swap_cgroup_uncharge(p, offset); > } > } > return count; > @@ -1207,6 +1222,9 @@ asmlinkage long sys_swapoff(const char _ > { > struct swap_info_struct * p = NULL; > unsigned short *swap_map; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct swap_cgroup **swap_cgroup; > +#endif > struct file *swap_file, *victim; > struct address_space *mapping; > struct inode *inode; > @@ -1309,10 +1327,17 @@ asmlinkage long sys_swapoff(const char _ > p->max = 0; > swap_map = p->swap_map; > p->swap_map = NULL; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + swap_cgroup = p->swap_cgroup; > + p->swap_cgroup = NULL; > +#endif > p->flags = 0; > spin_unlock(&swap_lock); > mutex_unlock(&swapon_mutex); > vfree(swap_map); > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + vfree(swap_cgroup); > +#endif > inode = mapping->host; > if (S_ISBLK(inode->i_mode)) { > struct block_device *bdev = I_BDEV(inode); > @@ -1460,6 +1485,9 @@ asmlinkage long sys_swapon(const char __ > unsigned long maxpages = 1; > int swapfilesize; > unsigned short *swap_map; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct swap_cgroup **swap_cgroup; > +#endif > struct page *page = NULL; > struct inode *inode = NULL; > int did_down = 0; > @@ -1483,6 +1511,9 @@ asmlinkage long sys_swapon(const char __ > p->swap_file = NULL; > p->old_block_size = 0; > p->swap_map = NULL; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + p->swap_cgroup = NULL; > +#endif > p->lowest_bit = 0; > p->highest_bit = 0; > p->cluster_nr = 0; > @@ -1647,6 +1678,15 @@ asmlinkage long sys_swapon(const char __ > 1 /* header page */; > if (error) > goto bad_swap; > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + p->swap_cgroup = vmalloc(maxpages * sizeof(*swap_cgroup)); > + if (!(p->swap_cgroup)) { > + error = -ENOMEM; > + goto bad_swap; > + } > + memset(p->swap_cgroup, 0, maxpages * sizeof(*swap_cgroup)); > +#endif > } > > if (nr_good_pages) { > @@ -1704,13 +1744,22 @@ bad_swap: > bad_swap_2: > spin_lock(&swap_lock); > swap_map = p->swap_map; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + swap_cgroup = p->swap_cgroup; > +#endif > p->swap_file = NULL; > p->swap_map = NULL; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + p->swap_cgroup = NULL; > +#endif > p->flags = 0; > if (!(swap_flags & SWAP_FLAG_PREFER)) > ++least_priority; > spin_unlock(&swap_lock); > vfree(swap_map); > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + vfree(swap_cgroup); > +#endif > if (swap_file) > filp_close(swap_file, NULL); > out: > > > ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-05 8:33 ` Pavel Emelyanov 0 siblings, 0 replies; 50+ messages in thread From: Pavel Emelyanov @ 2008-03-05 8:33 UTC (permalink / raw) To: Daisuke Nishimura; +Cc: containers, linux-mm, balbir, xemul, kamezawa.hiroyu Daisuke Nishimura wrote: > Hi. > > Even if limiting memory usage by cgroup memory subsystem > or isolating memory by cpuset, swap space is shared, so > resource isolation is not enough. If one group uses up all the > swap space, it can affect other groups. > > I try making a patch of swap subsystem based on memory > subsystem, which limits swap usage per cgroup. > It can now charge and limit the swap usage. > > I implemented this feature as a new subsystem, > not as a part of memory subsystem, because I don't want to > make big change to memcontrol.c, and even if implemented > as other subsystem, users can manage memory and swap on > the same cgroup directory if mount them together. > > Basic idea of my implementation: > - what will be charged ? > the number of swap entries. This is a very obscure thing "a swap entry" for the end user. People would prefer accounting bytes. > - when to charge/uncharge ? > charge at get_swap_entry(), and uncharge at swap_entry_free(). > > - to what group charge the swap entry ? > To determine to what swap_cgroup (corresponding to mem_cgroup in > memory subsystem) the swap entry should be charged, > I added a pointer to mm_struct to page_cgroup(pc->pc_mm), and > changed the argument of get_swap_entry() from (void) to > (struct page *). As a result, get_swap_entry() can determine > to what swap_cgroup it should charge the swap entry > by referring to page->page_cgroup->mm_struct->swap_cgroup. > > - from what group uncharge the swap entry ? > I added to swap_info_struct a member 'struct swap_cgroup **', > array of pointer to which swap_cgroup the swap entry is > charged. > > Todo: > - rebase new kernel, and split into some patches. > - Merge with memory subsystem (if it would be better), or > remove dependency on CONFIG_CGROUP_MEM_CONT if possible > (needs to make page_cgroup more generic one). Merge is a must IMHO. I can hardly imagine a situation in which someone would need these two separately. > - More tests, cleanups, and feartures :-) > > > Any comments or discussions would be appreciated. > > Thanks, > Daisuke Nishimura > > > Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> > > --- > diff -uprN linux-2.6.24-mm1/include/linux/cgroup_subsys.h linux-2.6.24-mm1-swaplimit/include/linux/cgroup_subsys.h > --- linux-2.6.24-mm1/include/linux/cgroup_subsys.h 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/include/linux/cgroup_subsys.h 2008-03-03 10:56:56.000000000 +0900 > @@ -42,3 +42,9 @@ SUBSYS(mem_cgroup) > #endif > > /* */ > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > +SUBSYS(swap) > +#endif > + > +/* */ > diff -uprN linux-2.6.24-mm1/include/linux/memcontrol.h linux-2.6.24-mm1-swaplimit/include/linux/memcontrol.h > --- linux-2.6.24-mm1/include/linux/memcontrol.h 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/include/linux/memcontrol.h 2008-03-03 10:56:56.000000000 +0900 > @@ -29,6 +29,21 @@ struct page; > struct mm_struct; > > #ifdef CONFIG_CGROUP_MEM_CONT > +/* > + * A page_cgroup page is associated with every page descriptor. The > + * page_cgroup helps us identify information about the cgroup > + */ > +struct page_cgroup { > + struct list_head lru; /* per cgroup LRU list */ > + struct page *page; > + struct mem_cgroup *mem_cgroup; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct mm_struct *pc_mm; > +#endif Try not to add new entries here. > + atomic_t ref_cnt; /* Helpful when pages move b/w */ > + /* mapped and cached states */ > + int flags; > +}; > > extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); > extern void mm_free_cgroup(struct mm_struct *mm); > diff -uprN linux-2.6.24-mm1/include/linux/mm_types.h linux-2.6.24-mm1-swaplimit/include/linux/mm_types.h > --- linux-2.6.24-mm1/include/linux/mm_types.h 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/include/linux/mm_types.h 2008-03-03 10:56:56.000000000 +0900 > @@ -233,6 +233,9 @@ struct mm_struct { > #ifdef CONFIG_CGROUP_MEM_CONT > struct mem_cgroup *mem_cgroup; > #endif > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct swap_cgroup *swap_cgroup; > +#endif > }; > > #endif /* _LINUX_MM_TYPES_H */ > diff -uprN linux-2.6.24-mm1/include/linux/swap.h linux-2.6.24-mm1-swaplimit/include/linux/swap.h > --- linux-2.6.24-mm1/include/linux/swap.h 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/include/linux/swap.h 2008-03-03 10:56:56.000000000 +0900 > @@ -7,6 +7,7 @@ > #include <linux/list.h> > #include <linux/memcontrol.h> > #include <linux/sched.h> > +#include <linux/swap_limit.h> > > #include <asm/atomic.h> > #include <asm/page.h> > @@ -141,6 +142,9 @@ struct swap_info_struct { > struct swap_extent *curr_swap_extent; > unsigned old_block_size; > unsigned short * swap_map; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct swap_cgroup **swap_cgroup; > +#endif > unsigned int lowest_bit; > unsigned int highest_bit; > unsigned int cluster_next; > @@ -239,7 +243,7 @@ extern struct page *swapin_readahead(swp > extern long total_swap_pages; > extern unsigned int nr_swapfiles; > extern void si_swapinfo(struct sysinfo *); > -extern swp_entry_t get_swap_page(void); > +extern swp_entry_t get_swap_page(struct page *); > extern swp_entry_t get_swap_page_of_type(int); > extern int swap_duplicate(swp_entry_t); > extern int valid_swaphandles(swp_entry_t, unsigned long *); > @@ -342,7 +346,7 @@ static inline int remove_exclusive_swap_ > return 0; > } > > -static inline swp_entry_t get_swap_page(void) > +static inline swp_entry_t get_swap_page(struct page *page) > { > swp_entry_t entry; > entry.val = 0; > diff -uprN linux-2.6.24-mm1/include/linux/swap_limit.h linux-2.6.24-mm1-swaplimit/include/linux/swap_limit.h > --- linux-2.6.24-mm1/include/linux/swap_limit.h 1970-01-01 09:00:00.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/include/linux/swap_limit.h 2008-03-03 10:56:56.000000000 +0900 > @@ -0,0 +1,65 @@ > +/* > + * swap_limit.h > + * > + */ > +#ifndef _LINUX_SWAP_LIMIT_H > +#define _LINUX_SWAP_LIMIT_H > + > +#include <linux/swap.h> > +#include <linux/cgroup.h> > +#include <linux/res_counter.h> > + > +struct swap_cgroup; > +struct swap_info_struct; > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > +struct swap_cgroup { > + struct cgroup_subsys_state css; > + struct res_counter res; > +}; > + > +static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp) > +{ > + return container_of(cgroup_subsys_state(cgrp, swap_subsys_id), > + struct swap_cgroup, > + css); > +} > + > +static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p) > +{ > + return container_of(task_subsys_state(p, swap_subsys_id), > + struct swap_cgroup, css); > +} > + > +extern int swap_cgroup_charge(struct page *page, > + struct swap_info_struct *si, > + unsigned long offset); > +extern void swap_cgroup_uncharge(struct swap_info_struct *si, > + unsigned long offset); > + > +#else /* CONFIG_CGROUP_SWAP_LIMIT */ > +static inline struct swap_cgroup *swap_cgroup_from_cgrp(struct cgroup *cgrp) > +{ > + return NULL; > +} > + > +static inline struct swap_cgroup *swap_cgroup_from_task(struct task_struct *p) > +{ > + return NULL; > +} > + > +static inline int swap_cgroup_charge(struct page *page, > + struct swap_info_struct *si, > + unsigned long offset) > +{ > + return 0; > +} > + > +static inline void swap_cgroup_uncharge(struct swap_info_struct *si, > + unsigned long offset) > +{ > +} > + > +#endif > + > +#endif > diff -uprN linux-2.6.24-mm1/init/Kconfig linux-2.6.24-mm1-swaplimit/init/Kconfig > --- linux-2.6.24-mm1/init/Kconfig 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/init/Kconfig 2008-03-03 10:56:56.000000000 +0900 > @@ -383,6 +383,12 @@ config CGROUP_MEM_CONT > Provides a memory controller that manages both page cache and > RSS memory. > > +config CGROUP_SWAP_LIMIT > + bool "cgroup subsystem for swap" > + depends on CGROUP_MEM_CONT && SWAP > + help > + Provides a swap controller that manages and limits swap usage. > + > config PROC_PID_CPUSET > bool "Include legacy /proc/<pid>/cpuset file" > depends on CPUSETS > diff -uprN linux-2.6.24-mm1/mm/Makefile linux-2.6.24-mm1-swaplimit/mm/Makefile > --- linux-2.6.24-mm1/mm/Makefile 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/mm/Makefile 2008-03-03 10:56:56.000000000 +0900 > @@ -32,4 +32,5 @@ obj-$(CONFIG_MIGRATION) += migrate.o > obj-$(CONFIG_SMP) += allocpercpu.o > obj-$(CONFIG_QUICKLIST) += quicklist.o > obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o > +obj-$(CONFIG_CGROUP_SWAP_LIMIT) += swap_limit.o > > diff -uprN linux-2.6.24-mm1/mm/memcontrol.c linux-2.6.24-mm1-swaplimit/mm/memcontrol.c > --- linux-2.6.24-mm1/mm/memcontrol.c 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/mm/memcontrol.c 2008-03-03 10:56:56.000000000 +0900 > @@ -19,6 +19,7 @@ > > #include <linux/res_counter.h> > #include <linux/memcontrol.h> > +#include <linux/swap_limit.h> > #include <linux/cgroup.h> > #include <linux/mm.h> > #include <linux/smp.h> > @@ -146,18 +147,6 @@ struct mem_cgroup { > #define PAGE_CGROUP_LOCK_BIT 0x0 > #define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) > > -/* > - * A page_cgroup page is associated with every page descriptor. The > - * page_cgroup helps us identify information about the cgroup > - */ > -struct page_cgroup { > - struct list_head lru; /* per cgroup LRU list */ > - struct page *page; > - struct mem_cgroup *mem_cgroup; > - atomic_t ref_cnt; /* Helpful when pages move b/w */ > - /* mapped and cached states */ > - int flags; > -}; > #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ > #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ > > @@ -254,15 +243,27 @@ struct mem_cgroup *mem_cgroup_from_task( > void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) > { > struct mem_cgroup *mem; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct swap_cgroup *swap; > +#endif > > mem = mem_cgroup_from_task(p); > css_get(&mem->css); > mm->mem_cgroup = mem; > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + swap = swap_cgroup_from_task(p); > + css_get(&swap->css); > + mm->swap_cgroup = swap; > +#endif > } > > void mm_free_cgroup(struct mm_struct *mm) > { > css_put(&mm->mem_cgroup->css); > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + css_put(&mm->swap_cgroup->css); > +#endif > } > > static inline int page_cgroup_locked(struct page *page) > @@ -664,6 +665,10 @@ retry: > pc->flags = PAGE_CGROUP_FLAG_ACTIVE; > if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) > pc->flags |= PAGE_CGROUP_FLAG_CACHE; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + atomic_inc(&mm->mm_count); > + pc->pc_mm = mm; > +#endif What kernel is this patch for? I cannot find this code in 2.6.25-rc3-mm1 > if (!page || page_cgroup_assign_new_page_cgroup(page, pc)) { > /* > @@ -673,6 +678,9 @@ retry: > */ > res_counter_uncharge(&mem->res, PAGE_SIZE); > css_put(&mem->css); > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + mmdrop(mm); > +#endif > kfree(pc); > if (!page) > goto done; > @@ -744,6 +752,9 @@ void mem_cgroup_uncharge(struct page_cgr > if (clear_page_cgroup(page, pc) == pc) { > mem = pc->mem_cgroup; > css_put(&mem->css); > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + mmdrop(pc->pc_mm); > +#endif > res_counter_uncharge(&mem->res, PAGE_SIZE); > spin_lock_irqsave(&mz->lru_lock, flags); > __mem_cgroup_remove_list(pc); > @@ -859,6 +870,9 @@ retry: > atomic_set(&pc->ref_cnt, 0); > if (clear_page_cgroup(page, pc) == pc) { > css_put(&mem->css); > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + mmdrop(pc->pc_mm); > +#endif > res_counter_uncharge(&mem->res, PAGE_SIZE); > __mem_cgroup_remove_list(pc); > kfree(pc); > diff -uprN linux-2.6.24-mm1/mm/shmem.c linux-2.6.24-mm1-swaplimit/mm/shmem.c > --- linux-2.6.24-mm1/mm/shmem.c 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/mm/shmem.c 2008-03-03 10:56:56.000000000 +0900 > @@ -1024,7 +1024,7 @@ static int shmem_writepage(struct page * > * want to check if there's a redundant swappage to be discarded. > */ > if (wbc->for_reclaim) > - swap = get_swap_page(); > + swap = get_swap_page(page); > else > swap.val = 0; > > diff -uprN linux-2.6.24-mm1/mm/swap_limit.c linux-2.6.24-mm1-swaplimit/mm/swap_limit.c > --- linux-2.6.24-mm1/mm/swap_limit.c 1970-01-01 09:00:00.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/mm/swap_limit.c 2008-03-05 14:39:23.000000000 +0900 > @@ -0,0 +1,194 @@ > +/* > + * swap_limit.c - SWAP controller (based on memcontrol.c) > + * > + */ > + > +#include <linux/err.h> > +#include <linux/fs.h> > +#include <linux/types.h> > +#include <linux/sched.h> > +#include <linux/mm.h> > +#include <linux/swap.h> > +#include <linux/rcupdate.h> > +#include <linux/cgroup.h> > +#include <linux/res_counter.h> > +#include <linux/memcontrol.h> > +#include <linux/swap_limit.h> > + > +static struct swap_cgroup init_swap_cgroup; > + > +int swap_cgroup_charge(struct page *page, > + struct swap_info_struct *si, > + unsigned long offset) > +{ > + int ret; > + struct page_cgroup *pc; > + struct mm_struct *mm; > + struct swap_cgroup *swap; > + > + BUG_ON(!page); > + > + /* > + * Pages to be swapped out should have been charged by memory cgroup, > + * but very rarely, pc would be NULL (pc is not reliable without lock, > + * so I should fix here). > + * In such cases, we charge the init_mm now. > + */ > + pc = page_get_page_cgroup(page); > + if (WARN_ON(!pc)) > + mm = &init_mm; > + else > + mm = pc->pc_mm; > + BUG_ON(!mm); > + > + rcu_read_lock(); > + swap = rcu_dereference(mm->swap_cgroup); > + rcu_read_unlock(); > + BUG_ON(!swap); > + > + ret = res_counter_charge(&swap->res, PAGE_SIZE); > + if (!ret) { > + css_get(&swap->css); > + si->swap_cgroup[offset] = swap; > + } > + > + return ret; > +} > + > +void swap_cgroup_uncharge(struct swap_info_struct *si, unsigned long offset) > +{ > + struct swap_cgroup *swap = si->swap_cgroup[offset]; > + > + /* > + * "swap" would be NULL: > + * 1. when get_swap_page() failed at charging swap_cgroup, > + * and called swap_entry_free(). > + * 2. when this swap entry had been assigned by > + * get_swap_page_of_type() (via SWSUSP ?). > + */ > + if (swap) { > + res_counter_uncharge(&swap->res, PAGE_SIZE); > + si->swap_cgroup[offset] = NULL; > + css_put(&swap->css); > + } > +} > + > +static struct cgroup_subsys_state *swap_cgroup_create(struct cgroup_subsys *ss, > + struct cgroup *cgrp) > +{ > + struct swap_cgroup *swap; > + > + if (unlikely((cgrp->parent) == NULL)) { > + swap = &init_swap_cgroup; > + init_mm.swap_cgroup = swap; > + } else > + swap = kzalloc(sizeof(struct swap_cgroup), GFP_KERNEL); > + > + if (swap == NULL) > + return ERR_PTR(-ENOMEM); > + > + res_counter_init(&swap->res); > + > + return &swap->css; > +} > + > +static void swap_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) > +{ > + kfree(swap_cgroup_from_cgrp(cgrp)); > +} > + > +static ssize_t swap_cgroup_read(struct cgroup *cgrp, > + struct cftype *cft, struct file *file, > + char __user *userbuf, size_t nbytes, > + loff_t *ppos) > +{ > + return res_counter_read(&swap_cgroup_from_cgrp(cgrp)->res, > + cft->private, userbuf, nbytes, ppos, > + NULL); > +} > + > +static int swap_cgroup_write_strategy(char *buf, unsigned long long *tmp) > +{ > + *tmp = memparse(buf, &buf); > + if (*buf != '\0') > + return -EINVAL; > + > + /* > + * Round up the value to the closest page size > + */ > + *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT; > + return 0; > +} > + > +static ssize_t swap_cgroup_write(struct cgroup *cgrp, struct cftype *cft, > + struct file *file, const char __user *userbuf, > + size_t nbytes, loff_t *ppos) > +{ > + return res_counter_write(&swap_cgroup_from_cgrp(cgrp)->res, > + cft->private, userbuf, nbytes, ppos, > + swap_cgroup_write_strategy); > +} > + > +static struct cftype swap_files[] = { > + { > + .name = "usage_in_bytes", > + .private = RES_USAGE, > + .read = swap_cgroup_read, > + }, > + { > + .name = "limit_in_bytes", > + .private = RES_LIMIT, > + .write = swap_cgroup_write, > + .read = swap_cgroup_read, > + }, > + { > + .name = "failcnt", > + .private = RES_FAILCNT, > + .read = swap_cgroup_read, > + }, > +}; > + > +static int swap_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) > +{ > + return cgroup_add_files(cgrp, ss, swap_files, ARRAY_SIZE(swap_files)); > +} > + > +static void swap_cgroup_move_task(struct cgroup_subsys *ss, > + struct cgroup *cgrp, > + struct cgroup *old_cgrp, > + struct task_struct *p) > +{ > + struct mm_struct *mm; > + struct swap_cgroup *swap, *old_swap; > + > + mm = get_task_mm(p); > + if (mm == NULL) > + return; > + > + swap = swap_cgroup_from_cgrp(cgrp); > + old_swap = swap_cgroup_from_cgrp(old_cgrp); > + > + if (swap == old_swap) > + goto out; > + > + if (p->tgid != p->pid) > + goto out; > + > + css_get(&swap->css); > + rcu_assign_pointer(mm->swap_cgroup, swap); > + css_put(&old_swap->css); > + > +out: > + mmput(mm); > + return; > +} > + > +struct cgroup_subsys swap_subsys = { > + .name = "swap", > + .create = swap_cgroup_create, > + .destroy = swap_cgroup_destroy, > + .populate = swap_cgroup_populate, > + .subsys_id = swap_subsys_id, > + .attach = swap_cgroup_move_task, > + .early_init = 0, > +}; > diff -uprN linux-2.6.24-mm1/mm/swap_state.c linux-2.6.24-mm1-swaplimit/mm/swap_state.c > --- linux-2.6.24-mm1/mm/swap_state.c 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/mm/swap_state.c 2008-03-03 10:56:56.000000000 +0900 > @@ -128,7 +128,7 @@ int add_to_swap(struct page * page, gfp_ > BUG_ON(!PageUptodate(page)); > > for (;;) { > - entry = get_swap_page(); > + entry = get_swap_page(page); > if (!entry.val) > return 0; > > diff -uprN linux-2.6.24-mm1/mm/swapfile.c linux-2.6.24-mm1-swaplimit/mm/swapfile.c > --- linux-2.6.24-mm1/mm/swapfile.c 2008-02-04 14:34:24.000000000 +0900 > +++ linux-2.6.24-mm1-swaplimit/mm/swapfile.c 2008-03-03 10:56:56.000000000 +0900 > @@ -28,6 +28,7 @@ > #include <linux/capability.h> > #include <linux/syscalls.h> > #include <linux/memcontrol.h> > +#include <linux/swap_limit.h> > > #include <asm/pgtable.h> > #include <asm/tlbflush.h> > @@ -172,7 +173,10 @@ no_page: > return 0; > } > > -swp_entry_t get_swap_page(void) > +/* get_swap_page() calls this */ > +static int swap_entry_free(struct swap_info_struct *, unsigned long); > + > +swp_entry_t get_swap_page(struct page *page) > { > struct swap_info_struct *si; > pgoff_t offset; > @@ -201,6 +205,16 @@ swp_entry_t get_swap_page(void) > swap_list.next = next; > offset = scan_swap_map(si); > if (offset) { > + /* > + * This should be the first use of this swap entry, > + * so charge this swap entry now. > + */ > + if (swap_cgroup_charge(page, si, offset)) { > + /* should free this entry */ :) Please, don't create comments, that duplicate the next line. > + swap_entry_free(si, offset); > + > + goto noswap; > + } > spin_unlock(&swap_lock); > return swp_entry(type, offset); > } > @@ -285,6 +299,7 @@ static int swap_entry_free(struct swap_i > swap_list.next = p - swap_info; > nr_swap_pages++; > p->inuse_pages--; > + swap_cgroup_uncharge(p, offset); > } > } > return count; > @@ -1207,6 +1222,9 @@ asmlinkage long sys_swapoff(const char _ > { > struct swap_info_struct * p = NULL; > unsigned short *swap_map; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct swap_cgroup **swap_cgroup; > +#endif > struct file *swap_file, *victim; > struct address_space *mapping; > struct inode *inode; > @@ -1309,10 +1327,17 @@ asmlinkage long sys_swapoff(const char _ > p->max = 0; > swap_map = p->swap_map; > p->swap_map = NULL; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + swap_cgroup = p->swap_cgroup; > + p->swap_cgroup = NULL; > +#endif > p->flags = 0; > spin_unlock(&swap_lock); > mutex_unlock(&swapon_mutex); > vfree(swap_map); > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + vfree(swap_cgroup); > +#endif > inode = mapping->host; > if (S_ISBLK(inode->i_mode)) { > struct block_device *bdev = I_BDEV(inode); > @@ -1460,6 +1485,9 @@ asmlinkage long sys_swapon(const char __ > unsigned long maxpages = 1; > int swapfilesize; > unsigned short *swap_map; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + struct swap_cgroup **swap_cgroup; > +#endif > struct page *page = NULL; > struct inode *inode = NULL; > int did_down = 0; > @@ -1483,6 +1511,9 @@ asmlinkage long sys_swapon(const char __ > p->swap_file = NULL; > p->old_block_size = 0; > p->swap_map = NULL; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + p->swap_cgroup = NULL; > +#endif > p->lowest_bit = 0; > p->highest_bit = 0; > p->cluster_nr = 0; > @@ -1647,6 +1678,15 @@ asmlinkage long sys_swapon(const char __ > 1 /* header page */; > if (error) > goto bad_swap; > + > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + p->swap_cgroup = vmalloc(maxpages * sizeof(*swap_cgroup)); > + if (!(p->swap_cgroup)) { > + error = -ENOMEM; > + goto bad_swap; > + } > + memset(p->swap_cgroup, 0, maxpages * sizeof(*swap_cgroup)); > +#endif > } > > if (nr_good_pages) { > @@ -1704,13 +1744,22 @@ bad_swap: > bad_swap_2: > spin_lock(&swap_lock); > swap_map = p->swap_map; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + swap_cgroup = p->swap_cgroup; > +#endif > p->swap_file = NULL; > p->swap_map = NULL; > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + p->swap_cgroup = NULL; > +#endif > p->flags = 0; > if (!(swap_flags & SWAP_FLAG_PREFER)) > ++least_priority; > spin_unlock(&swap_lock); > vfree(swap_map); > +#ifdef CONFIG_CGROUP_SWAP_LIMIT > + vfree(swap_cgroup); > +#endif > if (swap_file) > filp_close(swap_file, NULL); > out: > > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <47CE5AE2.2050303-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>]
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-05 8:33 ` Pavel Emelyanov @ 2008-03-05 8:51 ` Daisuke Nishimura -1 siblings, 0 replies; 50+ messages in thread From: Daisuke Nishimura @ 2008-03-05 8:51 UTC (permalink / raw) To: Pavel Emelyanov Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, hugh-DTz5qymZ9yRBDgjK7y7TUQ, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 Hi. >> @@ -664,6 +665,10 @@ retry: >> pc->flags = PAGE_CGROUP_FLAG_ACTIVE; >> if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) >> pc->flags |= PAGE_CGROUP_FLAG_CACHE; >> +#ifdef CONFIG_CGROUP_SWAP_LIMIT >> + atomic_inc(&mm->mm_count); >> + pc->pc_mm = mm; >> +#endif > > What kernel is this patch for? I cannot find this code in 2.6.25-rc3-mm1 > For linux-2.6.24-mm1. Thanks, Daisuke Nishimura. ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-05 8:51 ` Daisuke Nishimura 0 siblings, 0 replies; 50+ messages in thread From: Daisuke Nishimura @ 2008-03-05 8:51 UTC (permalink / raw) To: Pavel Emelyanov; +Cc: containers, linux-mm, balbir, kamezawa.hiroyu, hugh Hi. >> @@ -664,6 +665,10 @@ retry: >> pc->flags = PAGE_CGROUP_FLAG_ACTIVE; >> if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) >> pc->flags |= PAGE_CGROUP_FLAG_CACHE; >> +#ifdef CONFIG_CGROUP_SWAP_LIMIT >> + atomic_inc(&mm->mm_count); >> + pc->pc_mm = mm; >> +#endif > > What kernel is this patch for? I cannot find this code in 2.6.25-rc3-mm1 > For linux-2.6.24-mm1. Thanks, Daisuke Nishimura. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-05 8:33 ` Pavel Emelyanov @ 2008-03-05 14:07 ` Hugh Dickins -1 siblings, 0 replies; 50+ messages in thread From: Hugh Dickins @ 2008-03-05 14:07 UTC (permalink / raw) To: Pavel Emelyanov Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 On Wed, 5 Mar 2008, Pavel Emelyanov wrote: > Daisuke Nishimura wrote: > > > > Todo: > > - rebase new kernel, and split into some patches. > > - Merge with memory subsystem (if it would be better), or > > remove dependency on CONFIG_CGROUP_MEM_CONT if possible > > (needs to make page_cgroup more generic one). > > Merge is a must IMHO. I can hardly imagine a situation in which > someone would need these two separately. Strongly agree. Nobody's interested in swap as such: it's just secondary memory, where RAM is primary memory. People want to control memory as the sum of the two; and I expect they may also want to control primary memory (all that the current memcg does) within that. I wonder if such nesting of limits fits easily into cgroups or will be problematic. Hugh ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-05 14:07 ` Hugh Dickins 0 siblings, 0 replies; 50+ messages in thread From: Hugh Dickins @ 2008-03-05 14:07 UTC (permalink / raw) To: Pavel Emelyanov Cc: Daisuke Nishimura, containers, linux-mm, balbir, kamezawa.hiroyu On Wed, 5 Mar 2008, Pavel Emelyanov wrote: > Daisuke Nishimura wrote: > > > > Todo: > > - rebase new kernel, and split into some patches. > > - Merge with memory subsystem (if it would be better), or > > remove dependency on CONFIG_CGROUP_MEM_CONT if possible > > (needs to make page_cgroup more generic one). > > Merge is a must IMHO. I can hardly imagine a situation in which > someone would need these two separately. Strongly agree. Nobody's interested in swap as such: it's just secondary memory, where RAM is primary memory. People want to control memory as the sum of the two; and I expect they may also want to control primary memory (all that the current memcg does) within that. I wonder if such nesting of limits fits easily into cgroups or will be problematic. Hugh -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <Pine.LNX.4.64.0803051400000.22243-popGQ1T0qN76K7/ahGyk6A@public.gmane.org>]
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-05 14:07 ` Hugh Dickins @ 2008-03-05 14:14 ` Pavel Emelyanov -1 siblings, 0 replies; 50+ messages in thread From: Pavel Emelyanov @ 2008-03-05 14:14 UTC (permalink / raw) To: Hugh Dickins Cc: containers-qjLDD68F18O7TbgM5vRIOg, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 Hugh Dickins wrote: > On Wed, 5 Mar 2008, Pavel Emelyanov wrote: >> Daisuke Nishimura wrote: >>> Todo: >>> - rebase new kernel, and split into some patches. >>> - Merge with memory subsystem (if it would be better), or >>> remove dependency on CONFIG_CGROUP_MEM_CONT if possible >>> (needs to make page_cgroup more generic one). >> Merge is a must IMHO. I can hardly imagine a situation in which >> someone would need these two separately. > > Strongly agree. Nobody's interested in swap as such: it's just > secondary memory, where RAM is primary memory. People want to > control memory as the sum of the two; and I expect they may also > want to control primary memory (all that the current memcg does) > within that. I wonder if such nesting of limits fits easily > into cgroups or will be problematic. This nesting would affect the res_couter abstraction, not the cgroup infrastructure. Current design of resource counters doesn't allow for such thing, but the extension is a couple-of-lines patch :) > Hugh > ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-05 14:14 ` Pavel Emelyanov 0 siblings, 0 replies; 50+ messages in thread From: Pavel Emelyanov @ 2008-03-05 14:14 UTC (permalink / raw) To: Hugh Dickins Cc: Daisuke Nishimura, containers, linux-mm, balbir, kamezawa.hiroyu Hugh Dickins wrote: > On Wed, 5 Mar 2008, Pavel Emelyanov wrote: >> Daisuke Nishimura wrote: >>> Todo: >>> - rebase new kernel, and split into some patches. >>> - Merge with memory subsystem (if it would be better), or >>> remove dependency on CONFIG_CGROUP_MEM_CONT if possible >>> (needs to make page_cgroup more generic one). >> Merge is a must IMHO. I can hardly imagine a situation in which >> someone would need these two separately. > > Strongly agree. Nobody's interested in swap as such: it's just > secondary memory, where RAM is primary memory. People want to > control memory as the sum of the two; and I expect they may also > want to control primary memory (all that the current memcg does) > within that. I wonder if such nesting of limits fits easily > into cgroups or will be problematic. This nesting would affect the res_couter abstraction, not the cgroup infrastructure. Current design of resource counters doesn't allow for such thing, but the extension is a couple-of-lines patch :) > Hugh > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <47CEAAB4.8070208-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>]
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-05 14:14 ` Pavel Emelyanov @ 2008-03-06 0:33 ` KAMEZAWA Hiroyuki -1 siblings, 0 replies; 50+ messages in thread From: KAMEZAWA Hiroyuki @ 2008-03-06 0:33 UTC (permalink / raw) To: Pavel Emelyanov Cc: containers-qjLDD68F18O7TbgM5vRIOg, Hugh Dickins, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8, linux-mm-Bw31MaZKKs3YtjvyW6yDsg On Wed, 05 Mar 2008 17:14:12 +0300 Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote: > > Strongly agree. Nobody's interested in swap as such: it's just > > secondary memory, where RAM is primary memory. People want to > > control memory as the sum of the two; and I expect they may also > > want to control primary memory (all that the current memcg does) > > within that. I wonder if such nesting of limits fits easily > > into cgroups or will be problematic. > > This nesting would affect the res_couter abstraction, not the > cgroup infrastructure. Current design of resource counters doesn't > allow for such thing, but the extension is a couple-of-lines patch :) > IMHO, keeping res_counter simple is better. Is this kind of new entry in mem_cgroup not good ? == struct mem_cgroup { ... struct res_counter memory_limit. struct res_counter swap_limit. .. } ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-06 0:33 ` KAMEZAWA Hiroyuki 0 siblings, 0 replies; 50+ messages in thread From: KAMEZAWA Hiroyuki @ 2008-03-06 0:33 UTC (permalink / raw) To: Pavel Emelyanov Cc: Hugh Dickins, Daisuke Nishimura, containers, linux-mm, balbir On Wed, 05 Mar 2008 17:14:12 +0300 Pavel Emelyanov <xemul@openvz.org> wrote: > > Strongly agree. Nobody's interested in swap as such: it's just > > secondary memory, where RAM is primary memory. People want to > > control memory as the sum of the two; and I expect they may also > > want to control primary memory (all that the current memcg does) > > within that. I wonder if such nesting of limits fits easily > > into cgroups or will be problematic. > > This nesting would affect the res_couter abstraction, not the > cgroup infrastructure. Current design of resource counters doesn't > allow for such thing, but the extension is a couple-of-lines patch :) > IMHO, keeping res_counter simple is better. Is this kind of new entry in mem_cgroup not good ? == struct mem_cgroup { ... struct res_counter memory_limit. struct res_counter swap_limit. .. } -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <20080306093324.77c6d7f4.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>]
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-06 0:33 ` KAMEZAWA Hiroyuki @ 2008-03-06 0:35 ` Paul Menage -1 siblings, 0 replies; 50+ messages in thread From: Paul Menage @ 2008-03-06 0:35 UTC (permalink / raw) To: KAMEZAWA Hiroyuki Cc: linux-mm-Bw31MaZKKs3YtjvyW6yDsg, containers-qjLDD68F18O7TbgM5vRIOg, Hugh Dickins, Pavel Emelyanov, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 On Wed, Mar 5, 2008 at 4:33 PM, KAMEZAWA Hiroyuki <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org> wrote: > Is this kind of new entry in mem_cgroup not good ? > == > struct mem_cgroup { > ... > struct res_counter memory_limit. > struct res_counter swap_limit. > .. I agree with this - main memory and swap memory are rather different kinds of resources, with very different performance characteristics. It should be possible to control them completely independently (e.g. this job gets 100M of main memory, and doesn't swap at all). Paul ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-06 0:35 ` Paul Menage 0 siblings, 0 replies; 50+ messages in thread From: Paul Menage @ 2008-03-06 0:35 UTC (permalink / raw) To: KAMEZAWA Hiroyuki Cc: Pavel Emelyanov, Hugh Dickins, Daisuke Nishimura, containers, linux-mm, balbir On Wed, Mar 5, 2008 at 4:33 PM, KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote: > Is this kind of new entry in mem_cgroup not good ? > == > struct mem_cgroup { > ... > struct res_counter memory_limit. > struct res_counter swap_limit. > .. I agree with this - main memory and swap memory are rather different kinds of resources, with very different performance characteristics. It should be possible to control them completely independently (e.g. this job gets 100M of main memory, and doesn't swap at all). Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-06 0:33 ` KAMEZAWA Hiroyuki @ 2008-03-06 8:20 ` Pavel Emelyanov -1 siblings, 0 replies; 50+ messages in thread From: Pavel Emelyanov @ 2008-03-06 8:20 UTC (permalink / raw) To: KAMEZAWA Hiroyuki Cc: containers-qjLDD68F18O7TbgM5vRIOg, Hugh Dickins, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8, linux-mm-Bw31MaZKKs3YtjvyW6yDsg KAMEZAWA Hiroyuki wrote: > On Wed, 05 Mar 2008 17:14:12 +0300 > Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote: >>> Strongly agree. Nobody's interested in swap as such: it's just >>> secondary memory, where RAM is primary memory. People want to >>> control memory as the sum of the two; and I expect they may also >>> want to control primary memory (all that the current memcg does) >>> within that. I wonder if such nesting of limits fits easily >>> into cgroups or will be problematic. >> This nesting would affect the res_couter abstraction, not the >> cgroup infrastructure. Current design of resource counters doesn't >> allow for such thing, but the extension is a couple-of-lines patch :) >> > IMHO, keeping res_counter simple is better. > > Is this kind of new entry in mem_cgroup not good ? > == > struct mem_cgroup { > ... > struct res_counter memory_limit. > struct res_counter swap_limit. > .. > } I meant the same thing actually. By "nesting would affect" I meant, that we might want to make res_counters hierarchical. That would kill two birds with one stone - we will make a true hierarchical memory accounting and let charging of two counters with one call. > ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-06 8:20 ` Pavel Emelyanov 0 siblings, 0 replies; 50+ messages in thread From: Pavel Emelyanov @ 2008-03-06 8:20 UTC (permalink / raw) To: KAMEZAWA Hiroyuki Cc: Hugh Dickins, Daisuke Nishimura, containers, linux-mm, balbir KAMEZAWA Hiroyuki wrote: > On Wed, 05 Mar 2008 17:14:12 +0300 > Pavel Emelyanov <xemul@openvz.org> wrote: >>> Strongly agree. Nobody's interested in swap as such: it's just >>> secondary memory, where RAM is primary memory. People want to >>> control memory as the sum of the two; and I expect they may also >>> want to control primary memory (all that the current memcg does) >>> within that. I wonder if such nesting of limits fits easily >>> into cgroups or will be problematic. >> This nesting would affect the res_couter abstraction, not the >> cgroup infrastructure. Current design of resource counters doesn't >> allow for such thing, but the extension is a couple-of-lines patch :) >> > IMHO, keeping res_counter simple is better. > > Is this kind of new entry in mem_cgroup not good ? > == > struct mem_cgroup { > ... > struct res_counter memory_limit. > struct res_counter swap_limit. > .. > } I meant the same thing actually. By "nesting would affect" I meant, that we might want to make res_counters hierarchical. That would kill two birds with one stone - we will make a true hierarchical memory accounting and let charging of two counters with one call. > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <47CFA941.4070507-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>]
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-06 8:20 ` Pavel Emelyanov @ 2008-03-06 8:33 ` KAMEZAWA Hiroyuki -1 siblings, 0 replies; 50+ messages in thread From: KAMEZAWA Hiroyuki @ 2008-03-06 8:33 UTC (permalink / raw) To: Pavel Emelyanov Cc: containers-qjLDD68F18O7TbgM5vRIOg, Hugh Dickins, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8, linux-mm-Bw31MaZKKs3YtjvyW6yDsg On Thu, 06 Mar 2008 11:20:17 +0300 Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote: > KAMEZAWA Hiroyuki wrote: > > On Wed, 05 Mar 2008 17:14:12 +0300 > > Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote: > >>> Strongly agree. Nobody's interested in swap as such: it's just > >>> secondary memory, where RAM is primary memory. People want to > >>> control memory as the sum of the two; and I expect they may also > >>> want to control primary memory (all that the current memcg does) > >>> within that. I wonder if such nesting of limits fits easily > >>> into cgroups or will be problematic. > >> This nesting would affect the res_couter abstraction, not the > >> cgroup infrastructure. Current design of resource counters doesn't > >> allow for such thing, but the extension is a couple-of-lines patch :) > >> > > IMHO, keeping res_counter simple is better. > > > > Is this kind of new entry in mem_cgroup not good ? > > == > > struct mem_cgroup { > > ... > > struct res_counter memory_limit. > > struct res_counter swap_limit. > > .. > > } > > I meant the same thing actually. By "nesting would affect" I > meant, that we might want to make res_counters hierarchical. > > That would kill two birds with one stone - we will make a true > hierarchical memory accounting and let charging of two counters > with one call. Hierarchical res_counter makes sense. Making it in simple/reasonable style will be our challenge. Thanks, -Kame ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-06 8:33 ` KAMEZAWA Hiroyuki 0 siblings, 0 replies; 50+ messages in thread From: KAMEZAWA Hiroyuki @ 2008-03-06 8:33 UTC (permalink / raw) To: Pavel Emelyanov Cc: Hugh Dickins, Daisuke Nishimura, containers, linux-mm, balbir On Thu, 06 Mar 2008 11:20:17 +0300 Pavel Emelyanov <xemul@openvz.org> wrote: > KAMEZAWA Hiroyuki wrote: > > On Wed, 05 Mar 2008 17:14:12 +0300 > > Pavel Emelyanov <xemul@openvz.org> wrote: > >>> Strongly agree. Nobody's interested in swap as such: it's just > >>> secondary memory, where RAM is primary memory. People want to > >>> control memory as the sum of the two; and I expect they may also > >>> want to control primary memory (all that the current memcg does) > >>> within that. I wonder if such nesting of limits fits easily > >>> into cgroups or will be problematic. > >> This nesting would affect the res_couter abstraction, not the > >> cgroup infrastructure. Current design of resource counters doesn't > >> allow for such thing, but the extension is a couple-of-lines patch :) > >> > > IMHO, keeping res_counter simple is better. > > > > Is this kind of new entry in mem_cgroup not good ? > > == > > struct mem_cgroup { > > ... > > struct res_counter memory_limit. > > struct res_counter swap_limit. > > .. > > } > > I meant the same thing actually. By "nesting would affect" I > meant, that we might want to make res_counters hierarchical. > > That would kill two birds with one stone - we will make a true > hierarchical memory accounting and let charging of two counters > with one call. Hierarchical res_counter makes sense. Making it in simple/reasonable style will be our challenge. Thanks, -Kame -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <20080306173347.f6c5c84c.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>]
* Re: [RFC/PATCH] cgroup swap subsystem 2008-03-06 8:33 ` KAMEZAWA Hiroyuki @ 2008-03-06 8:38 ` Pavel Emelyanov -1 siblings, 0 replies; 50+ messages in thread From: Pavel Emelyanov @ 2008-03-06 8:38 UTC (permalink / raw) To: KAMEZAWA Hiroyuki, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 Cc: containers-qjLDD68F18O7TbgM5vRIOg, Hugh Dickins, linux-mm-Bw31MaZKKs3YtjvyW6yDsg KAMEZAWA Hiroyuki wrote: > On Thu, 06 Mar 2008 11:20:17 +0300 > Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote: > >> KAMEZAWA Hiroyuki wrote: >>> On Wed, 05 Mar 2008 17:14:12 +0300 >>> Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote: >>>>> Strongly agree. Nobody's interested in swap as such: it's just >>>>> secondary memory, where RAM is primary memory. People want to >>>>> control memory as the sum of the two; and I expect they may also >>>>> want to control primary memory (all that the current memcg does) >>>>> within that. I wonder if such nesting of limits fits easily >>>>> into cgroups or will be problematic. >>>> This nesting would affect the res_couter abstraction, not the >>>> cgroup infrastructure. Current design of resource counters doesn't >>>> allow for such thing, but the extension is a couple-of-lines patch :) >>>> >>> IMHO, keeping res_counter simple is better. >>> >>> Is this kind of new entry in mem_cgroup not good ? >>> == >>> struct mem_cgroup { >>> ... >>> struct res_counter memory_limit. >>> struct res_counter swap_limit. >>> .. >>> } >> I meant the same thing actually. By "nesting would affect" I >> meant, that we might want to make res_counters hierarchical. >> >> That would kill two birds with one stone - we will make a true >> hierarchical memory accounting and let charging of two counters >> with one call. > > Hierarchical res_counter makes sense. > Making it in simple/reasonable style will be our challenge. I have this in my TODO list. Since this is not so urgent, then if you don't mind I can prepare the patches next week - after I set the git tree up. This change doesn't seem that big. > Thanks, > -Kame > > ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-06 8:38 ` Pavel Emelyanov 0 siblings, 0 replies; 50+ messages in thread From: Pavel Emelyanov @ 2008-03-06 8:38 UTC (permalink / raw) To: KAMEZAWA Hiroyuki, balbir Cc: Hugh Dickins, Daisuke Nishimura, containers, linux-mm KAMEZAWA Hiroyuki wrote: > On Thu, 06 Mar 2008 11:20:17 +0300 > Pavel Emelyanov <xemul@openvz.org> wrote: > >> KAMEZAWA Hiroyuki wrote: >>> On Wed, 05 Mar 2008 17:14:12 +0300 >>> Pavel Emelyanov <xemul@openvz.org> wrote: >>>>> Strongly agree. Nobody's interested in swap as such: it's just >>>>> secondary memory, where RAM is primary memory. People want to >>>>> control memory as the sum of the two; and I expect they may also >>>>> want to control primary memory (all that the current memcg does) >>>>> within that. I wonder if such nesting of limits fits easily >>>>> into cgroups or will be problematic. >>>> This nesting would affect the res_couter abstraction, not the >>>> cgroup infrastructure. Current design of resource counters doesn't >>>> allow for such thing, but the extension is a couple-of-lines patch :) >>>> >>> IMHO, keeping res_counter simple is better. >>> >>> Is this kind of new entry in mem_cgroup not good ? >>> == >>> struct mem_cgroup { >>> ... >>> struct res_counter memory_limit. >>> struct res_counter swap_limit. >>> .. >>> } >> I meant the same thing actually. By "nesting would affect" I >> meant, that we might want to make res_counters hierarchical. >> >> That would kill two birds with one stone - we will make a true >> hierarchical memory accounting and let charging of two counters >> with one call. > > Hierarchical res_counter makes sense. > Making it in simple/reasonable style will be our challenge. I have this in my TODO list. Since this is not so urgent, then if you don't mind I can prepare the patches next week - after I set the git tree up. This change doesn't seem that big. > Thanks, > -Kame > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <47CFAD69.6000909-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>]
* Re: [Devel] Re: [RFC/PATCH] cgroup swap subsystem 2008-03-06 8:38 ` Pavel Emelyanov @ 2008-03-06 8:48 ` Paul Menage -1 siblings, 0 replies; 50+ messages in thread From: Paul Menage @ 2008-03-06 8:48 UTC (permalink / raw) To: Pavel Emelyanov Cc: containers-qjLDD68F18O7TbgM5vRIOg, Hugh Dickins, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 On Thu, Mar 6, 2008 at 12:38 AM, Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote: > > Hierarchical res_counter makes sense. > > Making it in simple/reasonable style will be our challenge. > > I have this in my TODO list. Since this is not so urgent, then if you > don't mind I can prepare the patches next week - after I set the git > tree up. This change doesn't seem that big. > The change that you're referring to is allowing a cgroup to have a total memory limit for itself and all its children, and then giving that cgroup's children separate memory limits within that overall limit? Paul ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [Devel] Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-06 8:48 ` Paul Menage 0 siblings, 0 replies; 50+ messages in thread From: Paul Menage @ 2008-03-06 8:48 UTC (permalink / raw) To: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki, balbir, containers, Hugh Dickins, linux-mm On Thu, Mar 6, 2008 at 12:38 AM, Pavel Emelyanov <xemul@openvz.org> wrote: > > Hierarchical res_counter makes sense. > > Making it in simple/reasonable style will be our challenge. > > I have this in my TODO list. Since this is not so urgent, then if you > don't mind I can prepare the patches next week - after I set the git > tree up. This change doesn't seem that big. > The change that you're referring to is allowing a cgroup to have a total memory limit for itself and all its children, and then giving that cgroup's children separate memory limits within that overall limit? Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <6599ad830803060048sb39735an765a62e6b928657e-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [Devel] Re: [RFC/PATCH] cgroup swap subsystem 2008-03-06 8:48 ` Paul Menage @ 2008-03-06 8:50 ` Pavel Emelyanov -1 siblings, 0 replies; 50+ messages in thread From: Pavel Emelyanov @ 2008-03-06 8:50 UTC (permalink / raw) To: Paul Menage Cc: containers-qjLDD68F18O7TbgM5vRIOg, Hugh Dickins, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 Paul Menage wrote: > On Thu, Mar 6, 2008 at 12:38 AM, Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote: >> > Hierarchical res_counter makes sense. >> > Making it in simple/reasonable style will be our challenge. >> >> I have this in my TODO list. Since this is not so urgent, then if you >> don't mind I can prepare the patches next week - after I set the git >> tree up. This change doesn't seem that big. >> > > The change that you're referring to is allowing a cgroup to have a > total memory limit for itself and all its children, and then giving > that cgroup's children separate memory limits within that overall > limit? Yup. Isn't this reasonable? Without this, if I'm a task in a 1GB limited cgroup, I can create a new one, set 2GB limit and spawn a kid into it (or move there myself) and be happy with 2GB of memory... With the proposed change, even if I set a 2GB for a subgroup it will not pass _my_ (1GB) limit. > Paul > ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [Devel] Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-06 8:50 ` Pavel Emelyanov 0 siblings, 0 replies; 50+ messages in thread From: Pavel Emelyanov @ 2008-03-06 8:50 UTC (permalink / raw) To: Paul Menage; +Cc: KAMEZAWA Hiroyuki, balbir, containers, Hugh Dickins, linux-mm Paul Menage wrote: > On Thu, Mar 6, 2008 at 12:38 AM, Pavel Emelyanov <xemul@openvz.org> wrote: >> > Hierarchical res_counter makes sense. >> > Making it in simple/reasonable style will be our challenge. >> >> I have this in my TODO list. Since this is not so urgent, then if you >> don't mind I can prepare the patches next week - after I set the git >> tree up. This change doesn't seem that big. >> > > The change that you're referring to is allowing a cgroup to have a > total memory limit for itself and all its children, and then giving > that cgroup's children separate memory limits within that overall > limit? Yup. Isn't this reasonable? Without this, if I'm a task in a 1GB limited cgroup, I can create a new one, set 2GB limit and spawn a kid into it (or move there myself) and be happy with 2GB of memory... With the proposed change, even if I set a 2GB for a subgroup it will not pass _my_ (1GB) limit. > Paul > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
[parent not found: <47CFB065.3080200-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>]
* Re: [Devel] Re: [RFC/PATCH] cgroup swap subsystem 2008-03-06 8:50 ` Pavel Emelyanov @ 2008-03-06 8:52 ` Paul Menage -1 siblings, 0 replies; 50+ messages in thread From: Paul Menage @ 2008-03-06 8:52 UTC (permalink / raw) To: Pavel Emelyanov Cc: containers-qjLDD68F18O7TbgM5vRIOg, Hugh Dickins, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 On Thu, Mar 6, 2008 at 12:50 AM, Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> wrote: > > The change that you're referring to is allowing a cgroup to have a > > total memory limit for itself and all its children, and then giving > > that cgroup's children separate memory limits within that overall > > limit? > > Yup. Isn't this reasonable? Yes, sounds like a good plan. Paul ^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [Devel] Re: [RFC/PATCH] cgroup swap subsystem @ 2008-03-06 8:52 ` Paul Menage 0 siblings, 0 replies; 50+ messages in thread From: Paul Menage @ 2008-03-06 8:52 UTC (permalink / raw) To: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki, balbir, containers, Hugh Dickins, linux-mm On Thu, Mar 6, 2008 at 12:50 AM, Pavel Emelyanov <xemul@openvz.org> wrote: > > The change that you're referring to is allowing a cgroup to have a > > total memory limit for itself and all its children, and then giving > > that cgroup's children separate memory limits within that overall > > limit? > > Yup. Isn't this reasonable? Yes, sounds like a good plan. Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 50+ messages in thread
end of thread, other threads:[~2008-03-12 22:57 UTC | newest]
Thread overview: 50+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-05 5:59 [RFC/PATCH] cgroup swap subsystem Daisuke Nishimura
2008-03-05 5:59 ` Daisuke Nishimura
[not found] ` <47CE36A9.3060204-YQH0OdQVrdy45+QrQBaojngSJqDPrsil@public.gmane.org>
2008-03-05 6:36 ` Paul Menage
2008-03-05 6:36 ` Paul Menage
[not found] ` <6599ad830803042236x3e5fdf0dmaf4119997025ba40-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2008-03-06 12:20 ` Daisuke Nishimura
2008-03-06 12:20 ` Daisuke Nishimura
2008-03-05 6:53 ` KAMEZAWA Hiroyuki
2008-03-05 6:53 ` KAMEZAWA Hiroyuki
[not found] ` <20080305155329.60e02f48.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2008-03-05 21:51 ` Hirokazu Takahashi
2008-03-05 21:51 ` Hirokazu Takahashi
2008-03-06 11:45 ` Daisuke Nishimura
2008-03-06 11:45 ` Daisuke Nishimura
[not found] ` <47CFD957.3060402-YQH0OdQVrdy45+QrQBaojngSJqDPrsil@public.gmane.org>
2008-03-06 12:25 ` Pavel Emelyanov
2008-03-06 12:25 ` Pavel Emelyanov
2008-03-06 12:56 ` kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A
2008-03-06 12:56 ` kamezawa.hiroyu
[not found] ` <6197904.1204808216900.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2008-03-07 8:22 ` Daisuke Nishimura
2008-03-07 8:22 ` Daisuke Nishimura
2008-03-12 22:57 ` YAMAMOTO Takashi
2008-03-12 22:57 ` YAMAMOTO Takashi
2008-03-05 7:03 ` KAMEZAWA Hiroyuki
2008-03-05 7:03 ` KAMEZAWA Hiroyuki
2008-03-05 7:28 ` Balbir Singh
2008-03-05 7:28 ` Balbir Singh
[not found] ` <47CE4BB6.8050803-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2008-03-07 4:23 ` Daisuke Nishimura
2008-03-07 4:23 ` Daisuke Nishimura
2008-03-05 8:33 ` Pavel Emelyanov
2008-03-05 8:33 ` Pavel Emelyanov
[not found] ` <47CE5AE2.2050303-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2008-03-05 8:51 ` Daisuke Nishimura
2008-03-05 8:51 ` Daisuke Nishimura
2008-03-05 14:07 ` Hugh Dickins
2008-03-05 14:07 ` Hugh Dickins
[not found] ` <Pine.LNX.4.64.0803051400000.22243-popGQ1T0qN76K7/ahGyk6A@public.gmane.org>
2008-03-05 14:14 ` Pavel Emelyanov
2008-03-05 14:14 ` Pavel Emelyanov
[not found] ` <47CEAAB4.8070208-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2008-03-06 0:33 ` KAMEZAWA Hiroyuki
2008-03-06 0:33 ` KAMEZAWA Hiroyuki
[not found] ` <20080306093324.77c6d7f4.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2008-03-06 0:35 ` Paul Menage
2008-03-06 0:35 ` Paul Menage
2008-03-06 8:20 ` Pavel Emelyanov
2008-03-06 8:20 ` Pavel Emelyanov
[not found] ` <47CFA941.4070507-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2008-03-06 8:33 ` KAMEZAWA Hiroyuki
2008-03-06 8:33 ` KAMEZAWA Hiroyuki
[not found] ` <20080306173347.f6c5c84c.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2008-03-06 8:38 ` Pavel Emelyanov
2008-03-06 8:38 ` Pavel Emelyanov
[not found] ` <47CFAD69.6000909-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2008-03-06 8:48 ` [Devel] " Paul Menage
2008-03-06 8:48 ` Paul Menage
[not found] ` <6599ad830803060048sb39735an765a62e6b928657e-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2008-03-06 8:50 ` Pavel Emelyanov
2008-03-06 8:50 ` Pavel Emelyanov
[not found] ` <47CFB065.3080200-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2008-03-06 8:52 ` Paul Menage
2008-03-06 8:52 ` Paul Menage
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.