From: Jeff Liu <jeff.liu@oracle.com>
To: linux-mm@kvack.org
Cc: cgroups@vger.kernel.org, Glauber Costa <glommer@parallels.com>
Subject: [RFC PATCH 2/3] memcg: disable pages allocation for swap cgroup on system booting up
Date: Tue, 04 Dec 2012 16:36:11 +0800 [thread overview]
Message-ID: <50BDB5FB.6080707@oracle.com> (raw)
- Disable pages allocation for swap cgroup at system boot up stage.
- Perform page allocation if there have child memcg alive, because the user
might disabled one/more swap files/partitions for some reason.
- Introduce a couple of helpers to deal with page allocation/free for swap cgroup.
- Introduce a new static variable to indicate the status of child memcg create/remove.
Signed-off-by: Jie Liu <jeff.liu@oracle.com>
CC: Glauber Costa <glommer@parallels.com>
CC: Michal Hocko <mhocko@suse.cz>
CC: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
CC: Johannes Weiner <hannes@cmpxchg.org>
CC: Mel Gorman <mgorman@suse.de>
CC: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/page_cgroup.h | 12 +++++
mm/page_cgroup.c | 109 ++++++++++++++++++++++++++++++++++++++-----
2 files changed, 110 insertions(+), 11 deletions(-)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 777a524..2b94fc0 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -113,6 +113,8 @@ extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
extern void swap_cgroup_swapoff(int type);
+extern int swap_cgroup_init(void);
+extern void swap_cgroup_destroy(void);
#else
static inline
@@ -138,6 +140,16 @@ static inline void swap_cgroup_swapoff(int type)
return;
}
+static inline int swap_cgroup_init(void)
+{
+ return 0;
+}
+
+static inline void swap_cgroup_destroy(void)
+{
+ return;
+}
+
#endif /* CONFIG_MEMCG_SWAP */
#endif /* !__GENERATING_BOUNDS_H */
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 76b1344..f1b257b 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -321,8 +321,8 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
static DEFINE_MUTEX(swap_cgroup_mutex);
struct swap_cgroup_ctrl {
- struct page **map;
- unsigned long length;
+ struct page **map;
+ unsigned long length;
spinlock_t lock;
};
@@ -410,6 +410,8 @@ static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
return sc + offset % SC_PER_PAGE;
}
+static atomic_t swap_cgroup_initialized = ATOMIC_INIT(0);
+
/**
* swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
* @ent: swap entry to be cmpxchged
@@ -497,17 +499,36 @@ int swap_cgroup_swapon(int type, unsigned long max_pages)
ctrl->length = length;
ctrl->map = array;
spin_lock_init(&ctrl->lock);
- if (swap_cgroup_alloc_pages(type)) {
- /* memory shortage */
- ctrl->map = NULL;
- ctrl->length = 0;
- mutex_unlock(&swap_cgroup_mutex);
- vfree(array);
- goto nomem;
+
+ /*
+ * We would delay page allocation for swap cgroup if swapon(2)
+ * is occurred at system boot phase until the first none-parent
+ * memcg was created.
+ *
+ * However, we might run into the following scenarios:
+ * 1) one/more new swap partitions/files are being enabled
+ * with non-parent memcg+swap_cgroup is/are active.
+ * 2) keep memcg+swap_cgroup being active, but the user has
+ * performed swapoff(2) against the given type of swap
+ * partition or file for some reason, and then the user
+ * turn it on again.
+ * In those cases, we have to allocate the pages in swapon(2)
+ * stage since we have no chance to make it in swap_cgroup_init()
+ * until a new child memcg was created.
+ */
+ if (atomic_read(&swap_cgroup_initialized)) {
+ if (swap_cgroup_alloc_pages(type)) {
+ /* memory shortage */
+ ctrl->map = NULL;
+ ctrl->length = 0;
+ mutex_unlock(&swap_cgroup_mutex);
+ vfree(array);
+ goto nomem;
+ }
}
mutex_unlock(&swap_cgroup_mutex);
-
return 0;
+
nomem:
printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n");
printk(KERN_INFO
@@ -515,10 +536,74 @@ nomem:
return -ENOMEM;
}
+/*
+ * This function is called per child memcg created so that we might
+ * arrive here multiple times. But we only allocate pages for swap
+ * cgroup when the first child memcg was created.
+ */
+int swap_cgroup_init(void)
+{
+ int type;
+
+ if (!do_swap_account)
+ return 0;
+
+ if (atomic_add_return(1, &swap_cgroup_initialized) != 1)
+ return 0;
+
+ mutex_lock(&swap_cgroup_mutex);
+ for (type = 0; type < MAX_SWAPFILES; type++) {
+ if (swap_cgroup_alloc_pages(type) < 0) {
+ struct swap_cgroup_ctrl *ctrl;
+
+ ctrl = &swap_cgroup_ctrl[type];
+ mutex_unlock(&swap_cgroup_mutex);
+ ctrl->length = 0;
+ if (ctrl->map) {
+ vfree(ctrl->map);
+ ctrl->map = NULL;
+ }
+ goto nomem;
+ }
+ }
+ mutex_unlock(&swap_cgroup_mutex);
+
+ return 0;
+
+nomem:
+ pr_info("couldn't initialize swap_cgroup, no enough memory.\n");
+ pr_info("swap_cgroup can be disabled by swapaccount=0 boot option\n");
+ return -ENOMEM;
+}
+
+/*
+ * This function is called per memcg removed so that we might arrive
+ * here multiple times, but we only free pages when the last memcg
+ * was removed. Note that:
+ * We won't clean the map pointer and the length which were calculated
+ * at swapon(2) stage because of that we need those info to re-allocate
+ * pages if a child memcg was created again.
+ */
+void swap_cgroup_destroy(void)
+{
+ int type;
+
+ if (!do_swap_account)
+ return;
+
+ if (atomic_sub_return(1, &swap_cgroup_initialized))
+ return;
+
+ mutex_lock(&swap_cgroup_mutex);
+ for (type = 0; type < MAX_SWAPFILES; type++)
+ swap_cgroup_free_pages(type);
+ mutex_unlock(&swap_cgroup_mutex);
+}
+
void swap_cgroup_swapoff(int type)
{
struct page **map;
- unsigned long i, length;
+ unsigned long length;
struct swap_cgroup_ctrl *ctrl;
if (!do_swap_account)
@@ -533,6 +618,8 @@ void swap_cgroup_swapoff(int type)
mutex_unlock(&swap_cgroup_mutex);
if (map) {
+ unsigned long i;
+
for (i = 0; i < length; i++) {
struct page *page = map[i];
if (page)
--
1.7.9.5
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev reply other threads:[~2012-12-04 8:36 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-12-04 8:35 [RFC PATCH 0/3] Disable swap cgroup allocation at system boot stage Jeff Liu
2012-12-04 8:35 ` [RFC PATCH 1/3] memcg: refactor pages allocation/free for swap_cgroup Jeff Liu
2012-12-04 10:11 ` Michal Hocko
2012-12-04 10:46 ` Jeff Liu
2012-12-04 8:36 ` Jeff Liu [this message]
2012-12-04 11:17 ` [RFC PATCH 2/3] memcg: disable pages allocation for swap cgroup on system booting up Michal Hocko
[not found] ` <20121204111721.GB1343-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
2012-12-04 11:22 ` Michal Hocko
2012-12-04 11:22 ` Michal Hocko
2012-12-04 12:34 ` Michal Hocko
2012-12-04 12:34 ` Michal Hocko
2012-12-04 12:51 ` Jeff Liu
2012-12-04 8:36 ` [RFC PATCH 3/3] memcg: allocate pages for swap cgroup until the first child memcg is alive Jeff Liu
[not found] ` <50BDB601.4090205-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-12-04 12:54 ` Michal Hocko
2012-12-04 12:54 ` Michal Hocko
2012-12-04 13:14 ` Jeff Liu
[not found] ` <50BDB5E0.7030906-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-12-04 13:18 ` [RFC PATCH 0/3] Disable swap cgroup allocation at system boot stage Michal Hocko
2012-12-04 13:18 ` Michal Hocko
2012-12-04 14:00 ` Jeff Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=50BDB5FB.6080707@oracle.com \
--to=jeff.liu@oracle.com \
--cc=cgroups@vger.kernel.org \
--cc=glommer@parallels.com \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.