linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: linux-mm@kvack.org, kamezawa.hiroyu@jp.fujitsu.com,
	dhillf@gmail.com, rientjes@google.com, mhocko@suse.cz,
	akpm@linux-foundation.org, hannes@cmpxchg.org
Cc: linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Subject: [PATCH -V8 09/16] mm/hugetlb: Add new HugeTLB cgroup
Date: Sat,  9 Jun 2012 14:29:54 +0530	[thread overview]
Message-ID: <1339232401-14392-10-git-send-email-aneesh.kumar@linux.vnet.ibm.com> (raw)
In-Reply-To: <1339232401-14392-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This patch implements a new controller that allows us to control HugeTLB
allocations. The extension allows to limit the HugeTLB usage per control
group and enforces the controller limit during page fault.  Since HugeTLB
doesn't support page reclaim, enforcing the limit at page fault time implies
that, the application will get SIGBUS signal if it tries to access HugeTLB
pages beyond its limit. This requires the application to know beforehand
how much HugeTLB pages it would require for its use.

The charge/uncharge calls will be added to HugeTLB code in later patch.
Support for cgroup removal will be added in later patches.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 include/linux/cgroup_subsys.h  |    6 +++
 include/linux/hugetlb_cgroup.h |   79 ++++++++++++++++++++++++++++
 init/Kconfig                   |   16 ++++++
 mm/Makefile                    |    1 +
 mm/hugetlb_cgroup.c            |  114 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 216 insertions(+)
 create mode 100644 include/linux/hugetlb_cgroup.h
 create mode 100644 mm/hugetlb_cgroup.c

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 0bd390c..895923a 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -72,3 +72,9 @@ SUBSYS(net_prio)
 #endif
 
 /* */
+
+#ifdef CONFIG_CGROUP_HUGETLB_RES_CTLR
+SUBSYS(hugetlb)
+#endif
+
+/* */
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
new file mode 100644
index 0000000..5794be4
--- /dev/null
+++ b/include/linux/hugetlb_cgroup.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright IBM Corporation, 2012
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#ifndef _LINUX_HUGETLB_CGROUP_H
+#define _LINUX_HUGETLB_CGROUP_H
+
+#include <linux/res_counter.h>
+
+struct hugetlb_cgroup {
+	struct cgroup_subsys_state css;
+	/*
+	 * the counter to account for hugepages from hugetlb.
+	 */
+	struct res_counter hugepage[HUGE_MAX_HSTATE];
+};
+
+#ifdef CONFIG_CGROUP_HUGETLB_RES_CTLR
+static inline bool hugetlb_cgroup_disabled(void)
+{
+	if (hugetlb_subsys.disabled)
+		return true;
+	return false;
+}
+
+extern int hugetlb_cgroup_charge_page(int idx, unsigned long nr_pages,
+				      struct hugetlb_cgroup **ptr);
+extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
+					 struct hugetlb_cgroup *h_cg,
+					 struct page *page);
+extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
+					 struct page *page);
+extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
+					   struct hugetlb_cgroup *h_cg);
+#else
+static inline bool hugetlb_cgroup_disabled(void)
+{
+	return true;
+}
+
+static inline int
+hugetlb_cgroup_charge_page(int idx, unsigned long nr_pages,
+			   struct hugetlb_cgroup **ptr)
+{
+	return 0;
+}
+
+static inline void
+hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
+			     struct hugetlb_cgroup *h_cg,
+			     struct page *page)
+{
+	return;
+}
+
+static inline void
+hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page)
+{
+	return;
+}
+
+static inline void
+hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
+			       struct hugetlb_cgroup *h_cg)
+{
+	return;
+}
+#endif  /* CONFIG_MEM_RES_CTLR_HUGETLB */
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index d07dcf9..b9a0d0a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -751,6 +751,22 @@ config CGROUP_MEM_RES_CTLR_KMEM
 	  the kmem extension can use it to guarantee that no group of processes
 	  will ever exhaust kernel resources alone.
 
+config CGROUP_HUGETLB_RES_CTLR
+	bool "HugeTLB Resource Controller for Control Groups"
+	depends on RESOURCE_COUNTERS && HUGETLB_PAGE && EXPERIMENTAL
+	select PAGE_CGROUP
+	default n
+	help
+	  Provides a simple cgroup Resource Controller for HugeTLB pages.
+	  When you enable this, you can put a per cgroup limit on HugeTLB usage.
+	  The limit is enforced during page fault. Since HugeTLB doesn't
+	  support page reclaim, enforcing the limit at page fault time implies
+	  that, the application will get SIGBUS signal if it tries to access
+	  HugeTLB pages beyond its limit. This requires the application to know
+	  beforehand how much HugeTLB pages it would require for its use. The
+	  control group is tracked in the third page lru pointer. This means
+	  that we cannot use the controller with huge page less than 3 pages.
+
 config CGROUP_PERF
 	bool "Enable perf_event per-cpu per-container group (cgroup) monitoring"
 	depends on PERF_EVENTS && CGROUPS
diff --git a/mm/Makefile b/mm/Makefile
index a156285..a8dd8d5 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_CGROUP_HUGETLB_RES_CTLR) += hugetlb_cgroup.o
 obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
 obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
new file mode 100644
index 0000000..20a32c5
--- /dev/null
+++ b/mm/hugetlb_cgroup.c
@@ -0,0 +1,114 @@
+/*
+ *
+ * Copyright IBM Corporation, 2012
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+#include <linux/hugetlb_cgroup.h>
+
+struct cgroup_subsys hugetlb_subsys __read_mostly;
+struct hugetlb_cgroup *root_h_cgroup __read_mostly;
+
+static inline
+struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
+{
+	if (s)
+		return container_of(s, struct hugetlb_cgroup, css);
+	return NULL;
+}
+
+static inline
+struct hugetlb_cgroup *hugetlb_cgroup_from_cgroup(struct cgroup *cgroup)
+{
+	return hugetlb_cgroup_from_css(cgroup_subsys_state(cgroup,
+							   hugetlb_subsys_id));
+}
+
+static inline
+struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
+{
+	return hugetlb_cgroup_from_css(task_subsys_state(task,
+							 hugetlb_subsys_id));
+}
+
+static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
+{
+	return (h_cg == root_h_cgroup);
+}
+
+static inline struct hugetlb_cgroup *parent_hugetlb_cgroup(struct cgroup *cg)
+{
+	if (!cg->parent)
+		return NULL;
+	return hugetlb_cgroup_from_cgroup(cg->parent);
+}
+
+static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg)
+{
+	int idx;
+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cg);
+
+	for (idx = 0; idx < hugetlb_max_hstate; idx++) {
+		if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0)
+			return 1;
+	}
+	return 0;
+}
+
+static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup)
+{
+	int idx;
+	struct cgroup *parent_cgroup;
+	struct hugetlb_cgroup *h_cgroup, *parent_h_cgroup;
+
+	h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
+	if (!h_cgroup)
+		return ERR_PTR(-ENOMEM);
+
+	parent_cgroup = cgroup->parent;
+	if (parent_cgroup) {
+		parent_h_cgroup = hugetlb_cgroup_from_cgroup(parent_cgroup);
+		for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
+			res_counter_init(&h_cgroup->hugepage[idx],
+					 &parent_h_cgroup->hugepage[idx]);
+	} else {
+		root_h_cgroup = h_cgroup;
+		for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
+			res_counter_init(&h_cgroup->hugepage[idx], NULL);
+	}
+	return &h_cgroup->css;
+}
+
+static void hugetlb_cgroup_destroy(struct cgroup *cgroup)
+{
+	struct hugetlb_cgroup *h_cgroup;
+
+	h_cgroup = hugetlb_cgroup_from_cgroup(cgroup);
+	kfree(h_cgroup);
+}
+
+static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
+{
+	/* We will add the cgroup removal support in later patches */
+	   return -EBUSY;
+}
+
+struct cgroup_subsys hugetlb_subsys = {
+	.name = "hugetlb",
+	.create     = hugetlb_cgroup_create,
+	.pre_destroy = hugetlb_cgroup_pre_destroy,
+	.destroy    = hugetlb_cgroup_destroy,
+	.subsys_id  = hugetlb_subsys_id,
+};
-- 
1.7.10

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2012-06-09  9:00 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-09  8:59 [PATCH -V8 00/16] hugetlb: Add HugeTLB controller to control HugeTLB allocation Aneesh Kumar K.V
2012-06-09  8:59 ` [PATCH -V8 01/16] hugetlb: rename max_hstate to hugetlb_max_hstate Aneesh Kumar K.V
2012-06-09  8:59 ` [PATCH -V8 02/16] hugetlb: don't use ERR_PTR with VM_FAULT* values Aneesh Kumar K.V
2012-06-09 11:10   ` Konrad Rzeszutek Wilk
2012-06-09 13:17     ` Aneesh Kumar K.V
2012-06-09 19:28   ` KOSAKI Motohiro
2012-06-10  1:59   ` Hillf Danton
2012-06-09  8:59 ` [PATCH -V8 03/16] hugetlb: add an inline helper for finding hstate index Aneesh Kumar K.V
2012-06-09  8:59 ` [PATCH -V8 04/16] hugetlb: use mmu_gather instead of a temporary linked list for accumulating pages Aneesh Kumar K.V
2012-06-09  8:59 ` [PATCH -V8 05/16] hugetlb: avoid taking i_mmap_mutex in unmap_single_vma() for hugetlb Aneesh Kumar K.V
2012-06-09  9:44   ` Johannes Weiner
2012-06-09 13:03     ` Aneesh Kumar K.V
2012-06-09 14:49       ` Johannes Weiner
2012-06-09  8:59 ` [PATCH -V8 06/16] hugetlb: simplify migrate_huge_page() Aneesh Kumar K.V
2012-06-12  7:31   ` Kamezawa Hiroyuki
2012-06-09  8:59 ` [PATCH -V8 07/16] hugetlb: add a list for tracking in-use HugeTLB pages Aneesh Kumar K.V
2012-06-09  8:59 ` [PATCH -V8 08/16] hugetlb: Make some static variables global Aneesh Kumar K.V
2012-06-12  7:35   ` Kamezawa Hiroyuki
2012-06-09  8:59 ` Aneesh Kumar K.V [this message]
2012-06-12  7:46   ` [PATCH -V8 09/16] mm/hugetlb: Add new HugeTLB cgroup Kamezawa Hiroyuki
2012-06-12  9:37     ` Aneesh Kumar K.V
2012-06-09  8:59 ` [PATCH -V8 10/16] hugetlb/cgroup: Add the cgroup pointer to page lru Aneesh Kumar K.V
2012-06-09 11:14   ` Konrad Rzeszutek Wilk
2012-06-09 17:04   ` Aneesh Kumar K.V
2012-06-11  8:16   ` Michal Hocko
2012-06-11  9:03     ` Aneesh Kumar K.V
2012-06-11  9:16       ` Michal Hocko
2012-06-11  9:33         ` Aneesh Kumar K.V
2012-06-11  8:41   ` Kamezawa Hiroyuki
2012-06-12  7:52   ` Kamezawa Hiroyuki
2012-06-12  9:40     ` Aneesh Kumar K.V
2012-06-09  8:59 ` [PATCH -V8 11/16] hugetlb/cgroup: Add charge/uncharge routines for hugetlb cgroup Aneesh Kumar K.V
2012-06-11  8:38   ` Michal Hocko
2012-06-11  9:10     ` Michal Hocko
2012-06-11  9:28     ` Aneesh Kumar K.V
2012-06-11 12:59       ` Michal Hocko
2012-06-11 15:35         ` Aneesh Kumar K.V
2012-06-12  8:08   ` Kamezawa Hiroyuki
2012-06-12 10:50     ` Aneesh Kumar K.V
2012-06-13 11:02       ` Kamezawa Hiroyuki
2012-06-09  8:59 ` [PATCH -V8 12/16] hugetlb/cgroup: Add support for cgroup removal Aneesh Kumar K.V
2012-06-11  8:52   ` Michal Hocko
2012-06-11  9:40     ` Aneesh Kumar K.V
2012-06-11 13:14       ` Michal Hocko
2012-06-11 15:34         ` Aneesh Kumar K.V
2012-06-12  8:23   ` Kamezawa Hiroyuki
2012-06-12 10:52     ` Aneesh Kumar K.V
2012-06-09  8:59 ` [PATCH -V8 13/16] hugetlb/cgroup: add hugetlb cgroup control files Aneesh Kumar K.V
2012-06-09 11:19   ` Konrad Rzeszutek Wilk
2012-06-11  9:02   ` Michal Hocko
2012-06-11  9:43     ` Aneesh Kumar K.V
2012-06-11 13:15       ` Michal Hocko
2012-06-12  8:35   ` Kamezawa Hiroyuki
2012-06-12 10:58     ` Aneesh Kumar K.V
2012-06-13  0:16       ` Kamezawa Hiroyuki
2012-06-09  8:59 ` [PATCH -V8 14/16] hugetlb/cgroup: add charge/uncharge calls for HugeTLB alloc/free Aneesh Kumar K.V
2012-06-09  9:23   ` Johannes Weiner
2012-06-09 13:09     ` Aneesh Kumar K.V
2012-06-09 14:30       ` Johannes Weiner
2012-06-09 15:55         ` Aneesh Kumar K.V
2012-06-11  9:19         ` Michal Hocko
2012-06-11  9:21   ` Michal Hocko
2012-06-11 10:29     ` Aneesh Kumar K.V
2012-06-09  9:00 ` [PATCH -V8 15/16] hugetlb/cgroup: migrate hugetlb cgroup info from oldpage to new page during migration Aneesh Kumar K.V
2012-06-11  9:24   ` Michal Hocko
2012-06-11 10:17     ` Aneesh Kumar K.V
2012-06-12  8:51   ` Kamezawa Hiroyuki
2012-06-12 11:00     ` Aneesh Kumar K.V
2012-06-13 11:15       ` Kamezawa Hiroyuki
2012-06-09  9:00 ` [PATCH -V8 16/16] hugetlb/cgroup: add HugeTLB controller documentation Aneesh Kumar K.V
2012-06-12  8:57   ` Kamezawa Hiroyuki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1339232401-14392-10-git-send-email-aneesh.kumar@linux.vnet.ibm.com \
    --to=aneesh.kumar@linux.vnet.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=dhillf@gmail.com \
    --cc=hannes@cmpxchg.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.cz \
    --cc=rientjes@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).